linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
@ 2015-11-26 13:14 Andrey Ryabinin
  2015-11-26 14:48 ` Mark Rutland
  2015-11-26 16:40 ` Ard Biesheuvel
  0 siblings, 2 replies; 12+ messages in thread
From: Andrey Ryabinin @ 2015-11-26 13:14 UTC (permalink / raw)
  To: Catalin Marinas, Will Deacon, linux-arm-kernel
  Cc: Yury, Alexey Klimov, Arnd Bergmann, linux-mm, Linus Walleij,
	Ard Biesheuvel, linux-kernel, David Keitel, Alexander Potapenko,
	Dmitry Vyukov, Suzuki K. Poulose, Mark Rutland, Andrey Ryabinin

Currently kasan assumes that shadow memory covers one or more entire PGDs.
That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
than the whole shadow memory.

This patch tries to fix that case.
clear_page_tables() is a new replacement of clear_pgs(). Instead of always
clearing pgds it clears top level page table entries that entirely belongs
to shadow memory.
In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
puds that now might be cleared by clear_page_tables.

Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
---

 *** THIS is not tested with 16k pages ***

 arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 76 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
index cf038c7..ea9f92a 100644
--- a/arch/arm64/mm/kasan_init.c
+++ b/arch/arm64/mm/kasan_init.c
@@ -22,6 +22,7 @@
 #include <asm/tlbflush.h>
 
 static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
 
 static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
 					unsigned long end)
@@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
 {
 	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
-	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
 	kasan_map_early_shadow();
 }
 
-static void __init clear_pgds(unsigned long start,
-			unsigned long end)
+static void __init clear_pmds(pud_t *pud, unsigned long addr, unsigned long end)
 {
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+
+	do {
+		next = pmd_addr_end(addr, end);
+		if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE)
+			pmd_clear(pmd);
+
+	} while (pmd++, addr = next, addr != end);
+}
+
+static void __init clear_puds(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+
+	do {
+		next = pud_addr_end(addr, end);
+		if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE)
+			pud_clear(pud);
+
+		if (!pud_none(*pud))
+			clear_pmds(pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+}
+
+static void __init clear_page_tables(unsigned long addr, unsigned long end)
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	pgd = pgd_offset_k(addr);
+
+	do {
+		next = pgd_addr_end(addr, end);
+		if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE)
+			pgd_clear(pgd);
+
+		if (!pgd_none(*pgd))
+			clear_puds(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+}
+
+static void copy_pagetables(void)
+{
+	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
+
+	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+
 	/*
-	 * Remove references to kasan page tables from
-	 * swapper_pg_dir. pgd_clear() can't be used
-	 * here because it's nop on 2,3-level pagetable setups
+	 * If kasan shadow shares PGD with other mappings,
+	 * clear_page_tables() will clear puds instead of pgd,
+	 * so we need temporary pud table to keep early shadow mapped.
 	 */
-	for (; start < end; start += PGDIR_SIZE)
-		set_pgd(pgd_offset_k(start), __pgd(0));
+	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
+		pud_t *pud;
+		pmd_t *pmd;
+		pte_t *pte;
+
+		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
+
+		pgd_populate(&init_mm, pgd, tmp_pud);
+		pud = pud_offset(pgd, KASAN_SHADOW_START);
+		pmd = pmd_offset(pud, KASAN_SHADOW_START);
+		pud_populate(&init_mm, pud, pmd);
+		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
+		pmd_populate_kernel(&init_mm, pmd, pte);
+	}
 }
 
 static void __init cpu_set_ttbr1(unsigned long ttbr1)
@@ -123,16 +188,16 @@ void __init kasan_init(void)
 
 	/*
 	 * We are going to perform proper setup of shadow memory.
-	 * At first we should unmap early shadow (clear_pgds() call bellow).
+	 * At first we should unmap early shadow (clear_page_tables()).
 	 * However, instrumented code couldn't execute without shadow memory.
 	 * tmp_pg_dir used to keep early shadow mapped until full shadow
 	 * setup will be finished.
 	 */
-	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
+	copy_pagetables();
 	cpu_set_ttbr1(__pa(tmp_pg_dir));
 	flush_tlb_all();
 
-	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+	clear_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
 
 	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
 			kasan_mem_to_shadow((void *)MODULES_VADDR));
-- 
2.4.10


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-26 13:14 [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA Andrey Ryabinin
@ 2015-11-26 14:48 ` Mark Rutland
  2015-11-26 15:47   ` Andrey Ryabinin
  2015-11-26 16:40 ` Ard Biesheuvel
  1 sibling, 1 reply; 12+ messages in thread
From: Mark Rutland @ 2015-11-26 14:48 UTC (permalink / raw)
  To: Andrey Ryabinin
  Cc: Catalin Marinas, Will Deacon, linux-arm-kernel, Yury,
	Alexey Klimov, Arnd Bergmann, linux-mm, Linus Walleij,
	Ard Biesheuvel, linux-kernel, David Keitel, Alexander Potapenko,
	Dmitry Vyukov, Suzuki K. Poulose

Hi,

On Thu, Nov 26, 2015 at 04:14:46PM +0300, Andrey Ryabinin wrote:
> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> than the whole shadow memory.
> 
> This patch tries to fix that case.
> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> clearing pgds it clears top level page table entries that entirely belongs
> to shadow memory.
> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> puds that now might be cleared by clear_page_tables.
> 
> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> ---
> 
>  *** THIS is not tested with 16k pages ***
> 
>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 76 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
> index cf038c7..ea9f92a 100644
> --- a/arch/arm64/mm/kasan_init.c
> +++ b/arch/arm64/mm/kasan_init.c
> @@ -22,6 +22,7 @@
>  #include <asm/tlbflush.h>
>  
>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
>  
>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
>  					unsigned long end)
> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
>  {
>  	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
>  	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
> -	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
> +	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));

We also assume that even in the shared PUD case, the shadow region falls
within the same PGD entry, or we would need more than a single tmp_pud.

It would be good to test for that.

>  	kasan_map_early_shadow();
>  }
>  
> -static void __init clear_pgds(unsigned long start,
> -			unsigned long end)
> +static void __init clear_pmds(pud_t *pud, unsigned long addr, unsigned long end)
>  {
> +	pmd_t *pmd;
> +	unsigned long next;
> +
> +	pmd = pmd_offset(pud, addr);
> +
> +	do {
> +		next = pmd_addr_end(addr, end);
> +		if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE)
> +			pmd_clear(pmd);
> +
> +	} while (pmd++, addr = next, addr != end);
> +}
> +
> +static void __init clear_puds(pgd_t *pgd, unsigned long addr, unsigned long end)
> +{
> +	pud_t *pud;
> +	unsigned long next;
> +
> +	pud = pud_offset(pgd, addr);
> +
> +	do {
> +		next = pud_addr_end(addr, end);
> +		if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE)
> +			pud_clear(pud);

I think this can just be:

		if (next - addr == PUD_SIZE)
			pud_clear(pud);

Given that next can at most be PUD_SIZE from addr, and if so we knwo
addr is aligned.

> +
> +		if (!pud_none(*pud))
> +			clear_pmds(pud, addr, next);

I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
either end, so KASAN should never own a partial pud entry like this.

Regardless, were this case to occur, surely we'd be clearing pmd entries
in the active page tables? We didn't copy anything at the pmd level.

That doesn't seem right.

> +	} while (pud++, addr = next, addr != end);
> +}
> +
> +static void __init clear_page_tables(unsigned long addr, unsigned long end)
> +{
> +	pgd_t *pgd;
> +	unsigned long next;
> +
> +	pgd = pgd_offset_k(addr);
> +
> +	do {
> +		next = pgd_addr_end(addr, end);
> +		if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE)
> +			pgd_clear(pgd);

As with clear_puds, I think this can be:

		if (next - addr == PGDIR_SIZE)
			pgd_clear(pgd);

> +
> +		if (!pgd_none(*pgd))
> +			clear_puds(pgd, addr, next);
> +	} while (pgd++, addr = next, addr != end);
> +}
> +
> +static void copy_pagetables(void)
> +{
> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
> +
> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> +
>  	/*
> -	 * Remove references to kasan page tables from
> -	 * swapper_pg_dir. pgd_clear() can't be used
> -	 * here because it's nop on 2,3-level pagetable setups
> +	 * If kasan shadow shares PGD with other mappings,
> +	 * clear_page_tables() will clear puds instead of pgd,
> +	 * so we need temporary pud table to keep early shadow mapped.
>  	 */
> -	for (; start < end; start += PGDIR_SIZE)
> -		set_pgd(pgd_offset_k(start), __pgd(0));
> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
> +		pud_t *pud;
> +		pmd_t *pmd;
> +		pte_t *pte;
> +
> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
> +
> +		pgd_populate(&init_mm, pgd, tmp_pud);
> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
> +		pud_populate(&init_mm, pud, pmd);
> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
> +		pmd_populate_kernel(&init_mm, pmd, pte);

I don't understand why we need to do anything below the pud level here.
We only copy down to the pud level, and we already initialised the
shared ptes and pmds earlier.

Regardless of this patch, we currently initialise the shared tables
repeatedly, which is redundant after the first time we initialise them.
We could improve that.

> +	}
>  }
>  
>  static void __init cpu_set_ttbr1(unsigned long ttbr1)
> @@ -123,16 +188,16 @@ void __init kasan_init(void)
>  
>  	/*
>  	 * We are going to perform proper setup of shadow memory.
> -	 * At first we should unmap early shadow (clear_pgds() call bellow).
> +	 * At first we should unmap early shadow (clear_page_tables()).
>  	 * However, instrumented code couldn't execute without shadow memory.
>  	 * tmp_pg_dir used to keep early shadow mapped until full shadow
>  	 * setup will be finished.
>  	 */
> -	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> +	copy_pagetables();
>  	cpu_set_ttbr1(__pa(tmp_pg_dir));
>  	flush_tlb_all();

As a heads-up, I will shortly have patches altering the swap of TTBR1,
as it risks conflicting TLB entries and misses barriers.

Otherwise, we need a dsb(ishst) between the memcpy and writing to the
TTBR to ensure that the tables are visible to the MMU.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-26 14:48 ` Mark Rutland
@ 2015-11-26 15:47   ` Andrey Ryabinin
  2015-11-26 16:21     ` Mark Rutland
  0 siblings, 1 reply; 12+ messages in thread
From: Andrey Ryabinin @ 2015-11-26 15:47 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Catalin Marinas, Will Deacon, linux-arm-kernel, Yury,
	Alexey Klimov, Arnd Bergmann, linux-mm, Linus Walleij,
	Ard Biesheuvel, linux-kernel, David Keitel, Alexander Potapenko,
	Dmitry Vyukov, Suzuki K. Poulose



On 11/26/2015 05:48 PM, Mark Rutland wrote:
> Hi,
> 
> On Thu, Nov 26, 2015 at 04:14:46PM +0300, Andrey Ryabinin wrote:
>> Currently kasan assumes that shadow memory covers one or more entire PGDs.
>> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
>> than the whole shadow memory.
>>
>> This patch tries to fix that case.
>> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
>> clearing pgds it clears top level page table entries that entirely belongs
>> to shadow memory.
>> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
>> puds that now might be cleared by clear_page_tables.
>>
>> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>> ---
>>
>>  *** THIS is not tested with 16k pages ***
>>
>>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
>>  1 file changed, 76 insertions(+), 11 deletions(-)
>>
>> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
>> index cf038c7..ea9f92a 100644
>> --- a/arch/arm64/mm/kasan_init.c
>> +++ b/arch/arm64/mm/kasan_init.c
>> @@ -22,6 +22,7 @@
>>  #include <asm/tlbflush.h>
>>  
>>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
>> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
>>  
>>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
>>  					unsigned long end)
>> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
>>  {
>>  	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
>>  	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
>> -	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
>> +	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
> 
> We also assume that even in the shared PUD case, the shadow region falls
> within the same PGD entry, or we would need more than a single tmp_pud.
> 
> It would be good to test for that.
> 

Something like this:

	#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)

	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGD_SIZE)
			 && !((PGDIR_SIZE > KASAN_SHADOW_SIZE)
				 && IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)));



>> +static void __init clear_puds(pgd_t *pgd, unsigned long addr, unsigned long end)
>> +{
>> +	pud_t *pud;
>> +	unsigned long next;
>> +
>> +	pud = pud_offset(pgd, addr);
>> +
>> +	do {
>> +		next = pud_addr_end(addr, end);
>> +		if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE)
>> +			pud_clear(pud);
> 
> I think this can just be:
> 
> 		if (next - addr == PUD_SIZE)
> 			pud_clear(pud);
> 
> Given that next can at most be PUD_SIZE from addr, and if so we knwo
> addr is aligned.
> 

Right.

>> +
>> +		if (!pud_none(*pud))
>> +			clear_pmds(pud, addr, next);
> 
> I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
> either end, so KASAN should never own a partial pud entry like this.
> 
> Regardless, were this case to occur, surely we'd be clearing pmd entries
> in the active page tables? We didn't copy anything at the pmd level.
> 
> That doesn't seem right.
> 

Just take a look at p?d_clear() macroses, under CONFIG_PGTABLE_LEVELS=2 for example.
pgd_clear() and pud_clear() is nops, and pmd_clear() is actually clears pgd.

I could replace p?d_clear() with set_p?d(p?d, __p?d(0)).
In that case going down to pmds is not needed, set_p?d() macro will do it for us.


...

>> +static void copy_pagetables(void)
>> +{
>> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
>> +
>> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
>> +
>>  	/*
>> -	 * Remove references to kasan page tables from
>> -	 * swapper_pg_dir. pgd_clear() can't be used
>> -	 * here because it's nop on 2,3-level pagetable setups
>> +	 * If kasan shadow shares PGD with other mappings,
>> +	 * clear_page_tables() will clear puds instead of pgd,
>> +	 * so we need temporary pud table to keep early shadow mapped.
>>  	 */
>> -	for (; start < end; start += PGDIR_SIZE)
>> -		set_pgd(pgd_offset_k(start), __pgd(0));
>> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
>> +		pud_t *pud;
>> +		pmd_t *pmd;
>> +		pte_t *pte;
>> +
>> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
>> +
>> +		pgd_populate(&init_mm, pgd, tmp_pud);
>> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
>> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
>> +		pud_populate(&init_mm, pud, pmd);
>> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
>> +		pmd_populate_kernel(&init_mm, pmd, pte);
> 
> I don't understand why we need to do anything below the pud level here.
> We only copy down to the pud level, and we already initialised the
> shared ptes and pmds earlier.
> 
> Regardless of this patch, we currently initialise the shared tables
> repeatedly, which is redundant after the first time we initialise them.
> We could improve that.
> 

Sure, just pgd_populate() will work here, because this code is only for 16K+48-bit,
which has 4-level pagetables.
But it wouldn't work if 16k+48-bit would have > 4-level.
Because pgd_populate() in nop in such case, so we need to go down to actually set 'tmp_pud'

I just tried to avoid assumptions about number of pagetables levels in that code.




>> +	}
>>  }
>>  
>>  static void __init cpu_set_ttbr1(unsigned long ttbr1)
>> @@ -123,16 +188,16 @@ void __init kasan_init(void)
>>  
>>  	/*
>>  	 * We are going to perform proper setup of shadow memory.
>> -	 * At first we should unmap early shadow (clear_pgds() call bellow).
>> +	 * At first we should unmap early shadow (clear_page_tables()).
>>  	 * However, instrumented code couldn't execute without shadow memory.
>>  	 * tmp_pg_dir used to keep early shadow mapped until full shadow
>>  	 * setup will be finished.
>>  	 */
>> -	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
>> +	copy_pagetables();
>>  	cpu_set_ttbr1(__pa(tmp_pg_dir));
>>  	flush_tlb_all();
> 
> As a heads-up, I will shortly have patches altering the swap of TTBR1,
> as it risks conflicting TLB entries and misses barriers.
> 
> Otherwise, we need a dsb(ishst) between the memcpy and writing to the
> TTBR to ensure that the tables are visible to the MMU.
> 

Thanks.

> Thanks,
> Mark.
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-26 15:47   ` Andrey Ryabinin
@ 2015-11-26 16:21     ` Mark Rutland
  2015-11-26 16:40       ` Andrey Ryabinin
  0 siblings, 1 reply; 12+ messages in thread
From: Mark Rutland @ 2015-11-26 16:21 UTC (permalink / raw)
  To: Andrey Ryabinin
  Cc: Catalin Marinas, Will Deacon, linux-arm-kernel, Yury,
	Alexey Klimov, Arnd Bergmann, linux-mm, Linus Walleij,
	Ard Biesheuvel, linux-kernel, David Keitel, Alexander Potapenko,
	Dmitry Vyukov, Suzuki K. Poulose

On Thu, Nov 26, 2015 at 06:47:36PM +0300, Andrey Ryabinin wrote:
> 
> 
> On 11/26/2015 05:48 PM, Mark Rutland wrote:
> > Hi,
> > 
> > On Thu, Nov 26, 2015 at 04:14:46PM +0300, Andrey Ryabinin wrote:
> >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> >> than the whole shadow memory.
> >>
> >> This patch tries to fix that case.
> >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> >> clearing pgds it clears top level page table entries that entirely belongs
> >> to shadow memory.
> >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> >> puds that now might be cleared by clear_page_tables.
> >>
> >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> >> ---
> >>
> >>  *** THIS is not tested with 16k pages ***
> >>
> >>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
> >>  1 file changed, 76 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
> >> index cf038c7..ea9f92a 100644
> >> --- a/arch/arm64/mm/kasan_init.c
> >> +++ b/arch/arm64/mm/kasan_init.c
> >> @@ -22,6 +22,7 @@
> >>  #include <asm/tlbflush.h>
> >>  
> >>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
> >> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
> >>  
> >>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
> >>  					unsigned long end)
> >> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
> >>  {
> >>  	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
> >>  	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
> >> -	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
> >> +	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
> > 
> > We also assume that even in the shared PUD case, the shadow region falls
> > within the same PGD entry, or we would need more than a single tmp_pud.
> > 
> > It would be good to test for that.
> > 
> 
> Something like this:
> 
> 	#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)
> 
> 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGD_SIZE)
> 			 && !((PGDIR_SIZE > KASAN_SHADOW_SIZE)
> 				 && IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)));

I was thinking something more like:

	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE);
	BUILD_BUG_ON(KASAN_SHADOW_START >> PGDIR_SHIFT !=
		     KASAN_SHADOW_END >> PGDIR_SHIFT);

> >> +		if (!pud_none(*pud))
> >> +			clear_pmds(pud, addr, next);
> > 
> > I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
> > either end, so KASAN should never own a partial pud entry like this.
> > 
> > Regardless, were this case to occur, surely we'd be clearing pmd entries
> > in the active page tables? We didn't copy anything at the pmd level.
> > 
> > That doesn't seem right.
> > 
> 
> Just take a look at p?d_clear() macroses, under CONFIG_PGTABLE_LEVELS=2 for example.
> pgd_clear() and pud_clear() is nops, and pmd_clear() is actually clears pgd.

I see. Thanks for pointing that out.

I detest the weird folding behaviour we have in the p??_* macros. It
violates least surprise almost every time.

> I could replace p?d_clear() with set_p?d(p?d, __p?d(0)).
> In that case going down to pmds is not needed, set_p?d() macro will do it for us.

I think it would be simpler to rely on the fact that we only use puds
with 4 levels of table (and hence the p??_* macros will operate at the
levels their names imply).

We can verify that at build time with:

BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS != 4 &&
	     (!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE) ||
	      !IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)));

> >> +static void copy_pagetables(void)
> >> +{
> >> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
> >> +
> >> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> >> +
> >>  	/*
> >> -	 * Remove references to kasan page tables from
> >> -	 * swapper_pg_dir. pgd_clear() can't be used
> >> -	 * here because it's nop on 2,3-level pagetable setups
> >> +	 * If kasan shadow shares PGD with other mappings,
> >> +	 * clear_page_tables() will clear puds instead of pgd,
> >> +	 * so we need temporary pud table to keep early shadow mapped.
> >>  	 */
> >> -	for (; start < end; start += PGDIR_SIZE)
> >> -		set_pgd(pgd_offset_k(start), __pgd(0));
> >> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
> >> +		pud_t *pud;
> >> +		pmd_t *pmd;
> >> +		pte_t *pte;
> >> +
> >> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
> >> +
> >> +		pgd_populate(&init_mm, pgd, tmp_pud);
> >> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
> >> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
> >> +		pud_populate(&init_mm, pud, pmd);
> >> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
> >> +		pmd_populate_kernel(&init_mm, pmd, pte);
> > 
> > I don't understand why we need to do anything below the pud level here.
> > We only copy down to the pud level, and we already initialised the
> > shared ptes and pmds earlier.
> > 
> > Regardless of this patch, we currently initialise the shared tables
> > repeatedly, which is redundant after the first time we initialise them.
> > We could improve that.
> > 
> 
> Sure, just pgd_populate() will work here, because this code is only for 16K+48-bit,
> which has 4-level pagetables.
> But it wouldn't work if 16k+48-bit would have > 4-level.
> Because pgd_populate() in nop in such case, so we need to go down to actually set 'tmp_pud'

I don't follow.

16K + 48-bit will always require 4 levels given the page table format.
We never have more than 4 levels.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-26 16:21     ` Mark Rutland
@ 2015-11-26 16:40       ` Andrey Ryabinin
  2015-11-26 17:08         ` Mark Rutland
  0 siblings, 1 reply; 12+ messages in thread
From: Andrey Ryabinin @ 2015-11-26 16:40 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Catalin Marinas, Will Deacon, linux-arm-kernel, Yury,
	Alexey Klimov, Arnd Bergmann, linux-mm, Linus Walleij,
	Ard Biesheuvel, linux-kernel, David Keitel, Alexander Potapenko,
	Dmitry Vyukov, Suzuki K. Poulose

On 11/26/2015 07:21 PM, Mark Rutland wrote:
> On Thu, Nov 26, 2015 at 06:47:36PM +0300, Andrey Ryabinin wrote:
>>
>>
>> On 11/26/2015 05:48 PM, Mark Rutland wrote:
>>> Hi,
>>>
>>> On Thu, Nov 26, 2015 at 04:14:46PM +0300, Andrey Ryabinin wrote:
>>>> Currently kasan assumes that shadow memory covers one or more entire PGDs.
>>>> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
>>>> than the whole shadow memory.
>>>>
>>>> This patch tries to fix that case.
>>>> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
>>>> clearing pgds it clears top level page table entries that entirely belongs
>>>> to shadow memory.
>>>> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
>>>> puds that now might be cleared by clear_page_tables.
>>>>
>>>> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
>>>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>>>> ---
>>>>
>>>>  *** THIS is not tested with 16k pages ***
>>>>
>>>>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
>>>>  1 file changed, 76 insertions(+), 11 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
>>>> index cf038c7..ea9f92a 100644
>>>> --- a/arch/arm64/mm/kasan_init.c
>>>> +++ b/arch/arm64/mm/kasan_init.c
>>>> @@ -22,6 +22,7 @@
>>>>  #include <asm/tlbflush.h>
>>>>  
>>>>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
>>>> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
>>>>  
>>>>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
>>>>  					unsigned long end)
>>>> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
>>>>  {
>>>>  	BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
>>>>  	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
>>>> -	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
>>>> +	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
>>>
>>> We also assume that even in the shared PUD case, the shadow region falls
>>> within the same PGD entry, or we would need more than a single tmp_pud.
>>>
>>> It would be good to test for that.
>>>
>>
>> Something like this:
>>
>> 	#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)
>>
>> 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGD_SIZE)
>> 			 && !((PGDIR_SIZE > KASAN_SHADOW_SIZE)
>> 				 && IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE)));
> 
> I was thinking something more like:
> 
> 	BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE);
> 	BUILD_BUG_ON(KASAN_SHADOW_START >> PGDIR_SHIFT !=
> 		     KASAN_SHADOW_END >> PGDIR_SHIFT);
> 
>>>> +		if (!pud_none(*pud))
>>>> +			clear_pmds(pud, addr, next);
>>>
>>> I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
>>> either end, so KASAN should never own a partial pud entry like this.
>>>
>>> Regardless, were this case to occur, surely we'd be clearing pmd entries
>>> in the active page tables? We didn't copy anything at the pmd level.
>>>
>>> That doesn't seem right.
>>>
>>
>> Just take a look at p?d_clear() macroses, under CONFIG_PGTABLE_LEVELS=2 for example.
>> pgd_clear() and pud_clear() is nops, and pmd_clear() is actually clears pgd.
> 
> I see. Thanks for pointing that out.
> 
> I detest the weird folding behaviour we have in the p??_* macros. It
> violates least surprise almost every time.
> 
>> I could replace p?d_clear() with set_p?d(p?d, __p?d(0)).
>> In that case going down to pmds is not needed, set_p?d() macro will do it for us.
> 
> I think it would be simpler to rely on the fact that we only use puds
> with 4 levels of table (and hence the p??_* macros will operate at the
> levels their names imply).
> 

It's not only about puds.
E.g. if we need to clear PGD with 2-level page tables, than we need to call pmd_clear().

So we should either leave this code as is, or switch to set_pgd/set_pud.


> We can verify that at build time with:
> 
> BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS != 4 &&
> 	     (!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE) ||
> 	      !IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)));
> 
>>>> +static void copy_pagetables(void)
>>>> +{
>>>> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
>>>> +
>>>> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
>>>> +
>>>>  	/*
>>>> -	 * Remove references to kasan page tables from
>>>> -	 * swapper_pg_dir. pgd_clear() can't be used
>>>> -	 * here because it's nop on 2,3-level pagetable setups
>>>> +	 * If kasan shadow shares PGD with other mappings,
>>>> +	 * clear_page_tables() will clear puds instead of pgd,
>>>> +	 * so we need temporary pud table to keep early shadow mapped.
>>>>  	 */
>>>> -	for (; start < end; start += PGDIR_SIZE)
>>>> -		set_pgd(pgd_offset_k(start), __pgd(0));
>>>> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
>>>> +		pud_t *pud;
>>>> +		pmd_t *pmd;
>>>> +		pte_t *pte;
>>>> +
>>>> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
>>>> +
>>>> +		pgd_populate(&init_mm, pgd, tmp_pud);
>>>> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
>>>> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
>>>> +		pud_populate(&init_mm, pud, pmd);
>>>> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
>>>> +		pmd_populate_kernel(&init_mm, pmd, pte);
>>>
>>> I don't understand why we need to do anything below the pud level here.
>>> We only copy down to the pud level, and we already initialised the
>>> shared ptes and pmds earlier.
>>>
>>> Regardless of this patch, we currently initialise the shared tables
>>> repeatedly, which is redundant after the first time we initialise them.
>>> We could improve that.
>>>
>>
>> Sure, just pgd_populate() will work here, because this code is only for 16K+48-bit,
>> which has 4-level pagetables.
>> But it wouldn't work if 16k+48-bit would have > 4-level.
>> Because pgd_populate() in nop in such case, so we need to go down to actually set 'tmp_pud'
> 
> I don't follow.
> 
> 16K + 48-bit will always require 4 levels given the page table format.
> We never have more than 4 levels.
> 

Oh, it should be '< 4' of course.
Yes, 16K + 48-bit is always 4-levels, but I tried to not rely on this here.

But since we can rely on 4-levels here, I'm gonna leave only pgd_populate() and add you BUILD_BUG_ON().


> Thanks,
> Mark.
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-26 13:14 [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA Andrey Ryabinin
  2015-11-26 14:48 ` Mark Rutland
@ 2015-11-26 16:40 ` Ard Biesheuvel
  2015-11-27  8:12   ` Andrey Ryabinin
  1 sibling, 1 reply; 12+ messages in thread
From: Ard Biesheuvel @ 2015-11-26 16:40 UTC (permalink / raw)
  To: Andrey Ryabinin
  Cc: Catalin Marinas, Will Deacon, linux-arm-kernel, Yury,
	Alexey Klimov, Arnd Bergmann, linux-mm, Linus Walleij,
	linux-kernel, David Keitel, Alexander Potapenko, Dmitry Vyukov,
	Suzuki K. Poulose, Mark Rutland

On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> than the whole shadow memory.
>
> This patch tries to fix that case.
> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> clearing pgds it clears top level page table entries that entirely belongs
> to shadow memory.
> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> puds that now might be cleared by clear_page_tables.
>
> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>

I would argue that the Kasan code is complicated enough, and we should
avoid complicating it even further for a configuration that is highly
theoretical in nature.

In a 16k configuration, the 4th level only adds a single bit of VA
space (which is, as I understand it, exactly the issue you need to
address here since the top level page table has only 2 entries and
hence does not divide by 8 cleanly), which means you are better off
using 3 levels unless you *really* need more than 128 TB of VA space.

So can't we just live with the limitation, and keep the current code?

-- 
Ard.

> ---
>
>  *** THIS is not tested with 16k pages ***
>
>  arch/arm64/mm/kasan_init.c | 87 ++++++++++++++++++++++++++++++++++++++++------
>  1 file changed, 76 insertions(+), 11 deletions(-)
>
> diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c
> index cf038c7..ea9f92a 100644
> --- a/arch/arm64/mm/kasan_init.c
> +++ b/arch/arm64/mm/kasan_init.c
> @@ -22,6 +22,7 @@
>  #include <asm/tlbflush.h>
>
>  static pgd_t tmp_pg_dir[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
> +static pud_t tmp_pud[PAGE_SIZE/sizeof(pud_t)] __initdata __aligned(PAGE_SIZE);
>
>  static void __init kasan_early_pte_populate(pmd_t *pmd, unsigned long addr,
>                                         unsigned long end)
> @@ -92,20 +93,84 @@ asmlinkage void __init kasan_early_init(void)
>  {
>         BUILD_BUG_ON(KASAN_SHADOW_OFFSET != KASAN_SHADOW_END - (1UL << 61));
>         BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
> -       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
> +       BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PUD_SIZE));
>         kasan_map_early_shadow();
>  }
>
> -static void __init clear_pgds(unsigned long start,
> -                       unsigned long end)
> +static void __init clear_pmds(pud_t *pud, unsigned long addr, unsigned long end)
>  {
> +       pmd_t *pmd;
> +       unsigned long next;
> +
> +       pmd = pmd_offset(pud, addr);
> +
> +       do {
> +               next = pmd_addr_end(addr, end);
> +               if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE)
> +                       pmd_clear(pmd);
> +
> +       } while (pmd++, addr = next, addr != end);
> +}
> +
> +static void __init clear_puds(pgd_t *pgd, unsigned long addr, unsigned long end)
> +{
> +       pud_t *pud;
> +       unsigned long next;
> +
> +       pud = pud_offset(pgd, addr);
> +
> +       do {
> +               next = pud_addr_end(addr, end);
> +               if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE)
> +                       pud_clear(pud);
> +
> +               if (!pud_none(*pud))
> +                       clear_pmds(pud, addr, next);
> +       } while (pud++, addr = next, addr != end);
> +}
> +
> +static void __init clear_page_tables(unsigned long addr, unsigned long end)
> +{
> +       pgd_t *pgd;
> +       unsigned long next;
> +
> +       pgd = pgd_offset_k(addr);
> +
> +       do {
> +               next = pgd_addr_end(addr, end);
> +               if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE)
> +                       pgd_clear(pgd);
> +
> +               if (!pgd_none(*pgd))
> +                       clear_puds(pgd, addr, next);
> +       } while (pgd++, addr = next, addr != end);
> +}
> +
> +static void copy_pagetables(void)
> +{
> +       pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
> +
> +       memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> +
>         /*
> -        * Remove references to kasan page tables from
> -        * swapper_pg_dir. pgd_clear() can't be used
> -        * here because it's nop on 2,3-level pagetable setups
> +        * If kasan shadow shares PGD with other mappings,
> +        * clear_page_tables() will clear puds instead of pgd,
> +        * so we need temporary pud table to keep early shadow mapped.
>          */
> -       for (; start < end; start += PGDIR_SIZE)
> -               set_pgd(pgd_offset_k(start), __pgd(0));
> +       if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
> +               pud_t *pud;
> +               pmd_t *pmd;
> +               pte_t *pte;
> +
> +               memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
> +
> +               pgd_populate(&init_mm, pgd, tmp_pud);
> +               pud = pud_offset(pgd, KASAN_SHADOW_START);
> +               pmd = pmd_offset(pud, KASAN_SHADOW_START);
> +               pud_populate(&init_mm, pud, pmd);
> +               pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
> +               pmd_populate_kernel(&init_mm, pmd, pte);
> +       }
>  }
>
>  static void __init cpu_set_ttbr1(unsigned long ttbr1)
> @@ -123,16 +188,16 @@ void __init kasan_init(void)
>
>         /*
>          * We are going to perform proper setup of shadow memory.
> -        * At first we should unmap early shadow (clear_pgds() call bellow).
> +        * At first we should unmap early shadow (clear_page_tables()).
>          * However, instrumented code couldn't execute without shadow memory.
>          * tmp_pg_dir used to keep early shadow mapped until full shadow
>          * setup will be finished.
>          */
> -       memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> +       copy_pagetables();
>         cpu_set_ttbr1(__pa(tmp_pg_dir));
>         flush_tlb_all();
>
> -       clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
> +       clear_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
>
>         kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
>                         kasan_mem_to_shadow((void *)MODULES_VADDR));
> --
> 2.4.10
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-26 16:40       ` Andrey Ryabinin
@ 2015-11-26 17:08         ` Mark Rutland
  0 siblings, 0 replies; 12+ messages in thread
From: Mark Rutland @ 2015-11-26 17:08 UTC (permalink / raw)
  To: Andrey Ryabinin
  Cc: Catalin Marinas, Will Deacon, linux-arm-kernel, Yury,
	Alexey Klimov, Arnd Bergmann, linux-mm, Linus Walleij,
	Ard Biesheuvel, linux-kernel, David Keitel, Alexander Potapenko,
	Dmitry Vyukov, Suzuki K. Poulose

> >>>> +		if (!pud_none(*pud))
> >>>> +			clear_pmds(pud, addr, next);
> >>>
> >>> I don't understand this. The KASAN shadow region is PUD_SIZE aligned at
> >>> either end, so KASAN should never own a partial pud entry like this.
> >>>
> >>> Regardless, were this case to occur, surely we'd be clearing pmd entries
> >>> in the active page tables? We didn't copy anything at the pmd level.
> >>>
> >>> That doesn't seem right.
> >>>
> >>
> >> Just take a look at p?d_clear() macroses, under CONFIG_PGTABLE_LEVELS=2 for example.
> >> pgd_clear() and pud_clear() is nops, and pmd_clear() is actually clears pgd.
> > 
> > I see. Thanks for pointing that out.
> > 
> > I detest the weird folding behaviour we have in the p??_* macros. It
> > violates least surprise almost every time.
> > 
> >> I could replace p?d_clear() with set_p?d(p?d, __p?d(0)).
> >> In that case going down to pmds is not needed, set_p?d() macro will do it for us.
> > 
> > I think it would be simpler to rely on the fact that we only use puds
> > with 4 levels of table (and hence the p??_* macros will operate at the
> > levels their names imply).
> > 
> 
> It's not only about puds.
> E.g. if we need to clear PGD with 2-level page tables, than we need to call pmd_clear().

Ah. Yes :(

I will reiterate that I hate the folding behaviour.

> So we should either leave this code as is, or switch to set_pgd/set_pud.

I think set_p?d is preferable.

> > We can verify that at build time with:
> > 
> > BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS != 4 &&
> > 	     (!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE) ||
> > 	      !IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE)));
> > 
> >>>> +static void copy_pagetables(void)
> >>>> +{
> >>>> +	pgd_t *pgd = tmp_pg_dir + pgd_index(KASAN_SHADOW_START);
> >>>> +
> >>>> +	memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
> >>>> +
> >>>>  	/*
> >>>> -	 * Remove references to kasan page tables from
> >>>> -	 * swapper_pg_dir. pgd_clear() can't be used
> >>>> -	 * here because it's nop on 2,3-level pagetable setups
> >>>> +	 * If kasan shadow shares PGD with other mappings,
> >>>> +	 * clear_page_tables() will clear puds instead of pgd,
> >>>> +	 * so we need temporary pud table to keep early shadow mapped.
> >>>>  	 */
> >>>> -	for (; start < end; start += PGDIR_SIZE)
> >>>> -		set_pgd(pgd_offset_k(start), __pgd(0));
> >>>> +	if (PGDIR_SIZE > KASAN_SHADOW_END - KASAN_SHADOW_START) {
> >>>> +		pud_t *pud;
> >>>> +		pmd_t *pmd;
> >>>> +		pte_t *pte;
> >>>> +
> >>>> +		memcpy(tmp_pud, pgd_page_vaddr(*pgd), sizeof(tmp_pud));
> >>>> +
> >>>> +		pgd_populate(&init_mm, pgd, tmp_pud);
> >>>> +		pud = pud_offset(pgd, KASAN_SHADOW_START);
> >>>> +		pmd = pmd_offset(pud, KASAN_SHADOW_START);
> >>>> +		pud_populate(&init_mm, pud, pmd);
> >>>> +		pte = pte_offset_kernel(pmd, KASAN_SHADOW_START);
> >>>> +		pmd_populate_kernel(&init_mm, pmd, pte);
> >>>
> >>> I don't understand why we need to do anything below the pud level here.
> >>> We only copy down to the pud level, and we already initialised the
> >>> shared ptes and pmds earlier.
> >>>
> >>> Regardless of this patch, we currently initialise the shared tables
> >>> repeatedly, which is redundant after the first time we initialise them.
> >>> We could improve that.
> >>>
> >>
> >> Sure, just pgd_populate() will work here, because this code is only for 16K+48-bit,
> >> which has 4-level pagetables.
> >> But it wouldn't work if 16k+48-bit would have > 4-level.
> >> Because pgd_populate() in nop in such case, so we need to go down to actually set 'tmp_pud'
> > 
> > I don't follow.
> > 
> > 16K + 48-bit will always require 4 levels given the page table format.
> > We never have more than 4 levels.
> > 
> 
> Oh, it should be '< 4' of course.
> Yes, 16K + 48-bit is always 4-levels, but I tried to not rely on this here.
> 
> But since we can rely on 4-levels here, I'm gonna leave only pgd_populate() and add you BUILD_BUG_ON().

Ok. That sounds good to me.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-26 16:40 ` Ard Biesheuvel
@ 2015-11-27  8:12   ` Andrey Ryabinin
  2015-11-27  9:35     ` Catalin Marinas
  0 siblings, 1 reply; 12+ messages in thread
From: Andrey Ryabinin @ 2015-11-27  8:12 UTC (permalink / raw)
  To: Ard Biesheuvel
  Cc: Catalin Marinas, Will Deacon, linux-arm-kernel, Yury,
	Alexey Klimov, Arnd Bergmann, linux-mm, Linus Walleij,
	linux-kernel, David Keitel, Alexander Potapenko, Dmitry Vyukov,
	Suzuki K. Poulose, Mark Rutland

On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
> On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
>> Currently kasan assumes that shadow memory covers one or more entire PGDs.
>> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
>> than the whole shadow memory.
>>
>> This patch tries to fix that case.
>> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
>> clearing pgds it clears top level page table entries that entirely belongs
>> to shadow memory.
>> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
>> puds that now might be cleared by clear_page_tables.
>>
>> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
>> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> 
> I would argue that the Kasan code is complicated enough, and we should
> avoid complicating it even further for a configuration that is highly
> theoretical in nature.
> 
> In a 16k configuration, the 4th level only adds a single bit of VA
> space (which is, as I understand it, exactly the issue you need to
> address here since the top level page table has only 2 entries and
> hence does not divide by 8 cleanly), which means you are better off
> using 3 levels unless you *really* need more than 128 TB of VA space.
> 
> So can't we just live with the limitation, and keep the current code?
 

No objections from my side. Let's keep the current code.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-27  8:12   ` Andrey Ryabinin
@ 2015-11-27  9:35     ` Catalin Marinas
  2015-11-27 10:02       ` Will Deacon
  0 siblings, 1 reply; 12+ messages in thread
From: Catalin Marinas @ 2015-11-27  9:35 UTC (permalink / raw)
  To: Andrey Ryabinin
  Cc: Ard Biesheuvel, Mark Rutland, Yury, Arnd Bergmann, linux-mm,
	Linus Walleij, Suzuki K. Poulose, Will Deacon, linux-kernel,
	Alexey Klimov, Alexander Potapenko, Dmitry Vyukov, David Keitel,
	linux-arm-kernel

On Fri, Nov 27, 2015 at 11:12:28AM +0300, Andrey Ryabinin wrote:
> On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
> > On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> >> than the whole shadow memory.
> >>
> >> This patch tries to fix that case.
> >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> >> clearing pgds it clears top level page table entries that entirely belongs
> >> to shadow memory.
> >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> >> puds that now might be cleared by clear_page_tables.
> >>
> >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> > 
> > I would argue that the Kasan code is complicated enough, and we should
> > avoid complicating it even further for a configuration that is highly
> > theoretical in nature.
> > 
> > In a 16k configuration, the 4th level only adds a single bit of VA
> > space (which is, as I understand it, exactly the issue you need to
> > address here since the top level page table has only 2 entries and
> > hence does not divide by 8 cleanly), which means you are better off
> > using 3 levels unless you *really* need more than 128 TB of VA space.
> > 
> > So can't we just live with the limitation, and keep the current code?
> 
> No objections from my side. Let's keep the current code.

Ard had a good point, so fine by me as well.

-- 
Catalin

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-27  9:35     ` Catalin Marinas
@ 2015-11-27 10:02       ` Will Deacon
  2015-11-27 10:39         ` Ard Biesheuvel
  2015-11-27 14:11         ` Catalin Marinas
  0 siblings, 2 replies; 12+ messages in thread
From: Will Deacon @ 2015-11-27 10:02 UTC (permalink / raw)
  To: Catalin Marinas
  Cc: Andrey Ryabinin, Ard Biesheuvel, Mark Rutland, Yury,
	Arnd Bergmann, linux-mm, Linus Walleij, Suzuki K. Poulose,
	linux-kernel, Alexey Klimov, Alexander Potapenko, Dmitry Vyukov,
	David Keitel, linux-arm-kernel

On Fri, Nov 27, 2015 at 09:35:29AM +0000, Catalin Marinas wrote:
> On Fri, Nov 27, 2015 at 11:12:28AM +0300, Andrey Ryabinin wrote:
> > On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
> > > On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> > >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> > >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> > >> than the whole shadow memory.
> > >>
> > >> This patch tries to fix that case.
> > >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> > >> clearing pgds it clears top level page table entries that entirely belongs
> > >> to shadow memory.
> > >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> > >> puds that now might be cleared by clear_page_tables.
> > >>
> > >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> > >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> > > 
> > > I would argue that the Kasan code is complicated enough, and we should
> > > avoid complicating it even further for a configuration that is highly
> > > theoretical in nature.
> > > 
> > > In a 16k configuration, the 4th level only adds a single bit of VA
> > > space (which is, as I understand it, exactly the issue you need to
> > > address here since the top level page table has only 2 entries and
> > > hence does not divide by 8 cleanly), which means you are better off
> > > using 3 levels unless you *really* need more than 128 TB of VA space.
> > > 
> > > So can't we just live with the limitation, and keep the current code?
> > 
> > No objections from my side. Let's keep the current code.
> 
> Ard had a good point, so fine by me as well.

Ok, so obvious follow-up question: why do we even support 48-bit + 16k
pages in the kernel? Either it's useful, and we make things work with it,
or it's not and we can drop it (or, at least, hide it behind EXPERT like
we do for 36-bit).

Will

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-27 10:02       ` Will Deacon
@ 2015-11-27 10:39         ` Ard Biesheuvel
  2015-11-27 14:11         ` Catalin Marinas
  1 sibling, 0 replies; 12+ messages in thread
From: Ard Biesheuvel @ 2015-11-27 10:39 UTC (permalink / raw)
  To: Will Deacon
  Cc: Catalin Marinas, Andrey Ryabinin, Mark Rutland, Yury,
	Arnd Bergmann, linux-mm, Linus Walleij, Suzuki K. Poulose,
	linux-kernel, Alexey Klimov, Alexander Potapenko, Dmitry Vyukov,
	David Keitel, linux-arm-kernel

On 27 November 2015 at 11:02, Will Deacon <will.deacon@arm.com> wrote:
> On Fri, Nov 27, 2015 at 09:35:29AM +0000, Catalin Marinas wrote:
>> On Fri, Nov 27, 2015 at 11:12:28AM +0300, Andrey Ryabinin wrote:
>> > On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
>> > > On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
>> > >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
>> > >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
>> > >> than the whole shadow memory.
>> > >>
>> > >> This patch tries to fix that case.
>> > >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
>> > >> clearing pgds it clears top level page table entries that entirely belongs
>> > >> to shadow memory.
>> > >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
>> > >> puds that now might be cleared by clear_page_tables.
>> > >>
>> > >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
>> > >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
>> > >
>> > > I would argue that the Kasan code is complicated enough, and we should
>> > > avoid complicating it even further for a configuration that is highly
>> > > theoretical in nature.
>> > >
>> > > In a 16k configuration, the 4th level only adds a single bit of VA
>> > > space (which is, as I understand it, exactly the issue you need to
>> > > address here since the top level page table has only 2 entries and
>> > > hence does not divide by 8 cleanly), which means you are better off
>> > > using 3 levels unless you *really* need more than 128 TB of VA space.
>> > >
>> > > So can't we just live with the limitation, and keep the current code?
>> >
>> > No objections from my side. Let's keep the current code.
>>
>> Ard had a good point, so fine by me as well.
>
> Ok, so obvious follow-up question: why do we even support 48-bit + 16k
> pages in the kernel? Either it's useful, and we make things work with it,
> or it's not and we can drop it (or, at least, hide it behind EXPERT like
> we do for 36-bit).
>

So there's 10 kinds of features in the world, useful ones and !useful ones? :-)

I think 48-bit/16k is somewhat useful, and I think we should support
it. But I also think we should be pragmatic, and not go out of our way
to support the combinatorial expansion of all niche features enabled
together. I think it is perfectly fine to limit kasan support to
configurations whose top level translation table divides by 8 cleanly
(which only excludes 16k/48-bit anyway)

However, I think it deserves being hidden behind CONFIG_EXPERT more
than 36-bit/16k does.

-- 
Ard.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA
  2015-11-27 10:02       ` Will Deacon
  2015-11-27 10:39         ` Ard Biesheuvel
@ 2015-11-27 14:11         ` Catalin Marinas
  1 sibling, 0 replies; 12+ messages in thread
From: Catalin Marinas @ 2015-11-27 14:11 UTC (permalink / raw)
  To: Will Deacon
  Cc: Mark Rutland, Yury, Arnd Bergmann, Ard Biesheuvel, Linus Walleij,
	Suzuki K. Poulose, linux-kernel, linux-mm, Alexander Potapenko,
	Alexey Klimov, David Keitel, Andrey Ryabinin, Dmitry Vyukov,
	linux-arm-kernel

On Fri, Nov 27, 2015 at 10:02:11AM +0000, Will Deacon wrote:
> On Fri, Nov 27, 2015 at 09:35:29AM +0000, Catalin Marinas wrote:
> > On Fri, Nov 27, 2015 at 11:12:28AM +0300, Andrey Ryabinin wrote:
> > > On 11/26/2015 07:40 PM, Ard Biesheuvel wrote:
> > > > On 26 November 2015 at 14:14, Andrey Ryabinin <aryabinin@virtuozzo.com> wrote:
> > > >> Currently kasan assumes that shadow memory covers one or more entire PGDs.
> > > >> That's not true for 16K pages + 48bit VA space, where PGDIR_SIZE is bigger
> > > >> than the whole shadow memory.
> > > >>
> > > >> This patch tries to fix that case.
> > > >> clear_page_tables() is a new replacement of clear_pgs(). Instead of always
> > > >> clearing pgds it clears top level page table entries that entirely belongs
> > > >> to shadow memory.
> > > >> In addition to 'tmp_pg_dir' we now have 'tmp_pud' which is used to store
> > > >> puds that now might be cleared by clear_page_tables.
> > > >>
> > > >> Reported-by: Suzuki K. Poulose <Suzuki.Poulose@arm.com>
> > > >> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
> > > > 
> > > > I would argue that the Kasan code is complicated enough, and we should
> > > > avoid complicating it even further for a configuration that is highly
> > > > theoretical in nature.
> > > > 
> > > > In a 16k configuration, the 4th level only adds a single bit of VA
> > > > space (which is, as I understand it, exactly the issue you need to
> > > > address here since the top level page table has only 2 entries and
> > > > hence does not divide by 8 cleanly), which means you are better off
> > > > using 3 levels unless you *really* need more than 128 TB of VA space.
> > > > 
> > > > So can't we just live with the limitation, and keep the current code?
> > > 
> > > No objections from my side. Let's keep the current code.
> > 
> > Ard had a good point, so fine by me as well.
> 
> Ok, so obvious follow-up question: why do we even support 48-bit + 16k
> pages in the kernel? Either it's useful, and we make things work with it,
> or it's not and we can drop it (or, at least, hide it behind EXPERT like
> we do for 36-bit).

One reason is hardware validation (I guess that may be the only reason
for 16KB in general ;)). For each of the page sizes we support two VA
ranges: 48-bit (maximum) and a recommended one for the corresponding
granule. With 16K, the difference is not significant (47 to 48), so we
could follow Ard's suggestion and make it depend on EXPERT (we already
do this for 16KB and 36-bit VA).

-- 
Catalin

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-11-27 14:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-11-26 13:14 [PATCH RFT] arm64: kasan: Make KASAN work with 16K pages + 48 bit VA Andrey Ryabinin
2015-11-26 14:48 ` Mark Rutland
2015-11-26 15:47   ` Andrey Ryabinin
2015-11-26 16:21     ` Mark Rutland
2015-11-26 16:40       ` Andrey Ryabinin
2015-11-26 17:08         ` Mark Rutland
2015-11-26 16:40 ` Ard Biesheuvel
2015-11-27  8:12   ` Andrey Ryabinin
2015-11-27  9:35     ` Catalin Marinas
2015-11-27 10:02       ` Will Deacon
2015-11-27 10:39         ` Ard Biesheuvel
2015-11-27 14:11         ` Catalin Marinas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).