All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christophe Leroy <christophe.leroy@csgroup.eu>
To: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	Steven Price <steven.price@arm.com>,
	akpm@linux-foundation.org, linux-mm@kvack.org
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Paul Mackerras <paulus@samba.org>,
	Michael Ellerman <mpe@ellerman.id.au>,
	dja@axtens.net, Oliver O'Halloran <oohall@gmail.com>,
	linux-arch@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3] mm: pagewalk: Fix walk for hugepage tables
Date: Mon, 28 Jun 2021 08:19:35 +0200	[thread overview]
Message-ID: <7bbf9c5e-b81d-d20c-7ba1-d50b10238d32@csgroup.eu> (raw)
In-Reply-To: <87bl7qle4o.fsf@linux.ibm.com>



Le 28/06/2021 à 08:03, Aneesh Kumar K.V a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> 
>> Pagewalk ignores hugepd entries and walk down the tables
>> as if it was traditionnal entries, leading to crazy result.
> 
> But we do handle hugetlb separately
> 
> 	if (vma && is_vm_hugetlb_page(vma)) {
> 		if (ops->hugetlb_entry)
> 			err = walk_hugetlb_range(start, end, walk);
> 	} else
> 		err = walk_pgd_range(start, end, walk);
> 
> Are we using hugepd format for non hugetlb entries?

Yes, on the 8xx we use hugepd for 8M pages for linear mapping and for kasan shadow mapping (See 
commit bb5f33c06940 ("Merge "Use hugepages to map kernel mem on 8xx" into next")

And I'm working on implementing huge VMAP with 8M pages, that will also make use of hugepd.

> 
>>
>> Add walk_hugepd_range() and use it to walk hugepage tables.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Reviewed-by: Steven Price <steven.price@arm.com>
>> ---
>> v3:
>> - Rebased on next-20210624 (no change since v2)
>> - Added Steven's Reviewed-by
>> - Sent as standalone for merge via mm
>>
>> v2:
>> - Add a guard for NULL ops->pte_entry
>> - Take mm->page_table_lock when walking hugepage table, as suggested by follow_huge_pd()
>> ---
>>   mm/pagewalk.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 53 insertions(+), 5 deletions(-)
>>
>> diff --git a/mm/pagewalk.c b/mm/pagewalk.c
>> index e81640d9f177..9b3db11a4d1d 100644
>> --- a/mm/pagewalk.c
>> +++ b/mm/pagewalk.c
>> @@ -58,6 +58,45 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
>>   	return err;
>>   }
>>   
>> +#ifdef CONFIG_ARCH_HAS_HUGEPD
>> +static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
>> +			     unsigned long end, struct mm_walk *walk, int pdshift)
>> +{
>> +	int err = 0;
>> +	const struct mm_walk_ops *ops = walk->ops;
>> +	int shift = hugepd_shift(*phpd);
>> +	int page_size = 1 << shift;
>> +
>> +	if (!ops->pte_entry)
>> +		return 0;
>> +
>> +	if (addr & (page_size - 1))
>> +		return 0;
>> +
>> +	for (;;) {
>> +		pte_t *pte;
>> +
>> +		spin_lock(&walk->mm->page_table_lock);
>> +		pte = hugepte_offset(*phpd, addr, pdshift);
>> +		err = ops->pte_entry(pte, addr, addr + page_size, walk);
>> +		spin_unlock(&walk->mm->page_table_lock);
>> +
>> +		if (err)
>> +			break;
>> +		if (addr >= end - page_size)
>> +			break;
>> +		addr += page_size;
>> +	}
>> +	return err;
>> +}
>> +#else
>> +static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
>> +			     unsigned long end, struct mm_walk *walk, int pdshift)
>> +{
>> +	return 0;
>> +}
>> +#endif
>> +
>>   static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
>>   			  struct mm_walk *walk)
>>   {
>> @@ -108,7 +147,10 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
>>   				goto again;
>>   		}
>>   
>> -		err = walk_pte_range(pmd, addr, next, walk);
>> +		if (is_hugepd(__hugepd(pmd_val(*pmd))))
>> +			err = walk_hugepd_range((hugepd_t *)pmd, addr, next, walk, PMD_SHIFT);
>> +		else
>> +			err = walk_pte_range(pmd, addr, next, walk);
>>   		if (err)
>>   			break;
>>   	} while (pmd++, addr = next, addr != end);
>> @@ -157,7 +199,10 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
>>   		if (pud_none(*pud))
>>   			goto again;
>>   
>> -		err = walk_pmd_range(pud, addr, next, walk);
>> +		if (is_hugepd(__hugepd(pud_val(*pud))))
>> +			err = walk_hugepd_range((hugepd_t *)pud, addr, next, walk, PUD_SHIFT);
>> +		else
>> +			err = walk_pmd_range(pud, addr, next, walk);
>>   		if (err)
>>   			break;
>>   	} while (pud++, addr = next, addr != end);
>> @@ -189,7 +234,9 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
>>   			if (err)
>>   				break;
>>   		}
>> -		if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
>> +		if (is_hugepd(__hugepd(p4d_val(*p4d))))
>> +			err = walk_hugepd_range((hugepd_t *)p4d, addr, next, walk, P4D_SHIFT);
>> +		else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
>>   			err = walk_pud_range(p4d, addr, next, walk);
>>   		if (err)
>>   			break;
>> @@ -224,8 +271,9 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
>>   			if (err)
>>   				break;
>>   		}
>> -		if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
>> -		    ops->pte_entry)
>> +		if (is_hugepd(__hugepd(pgd_val(*pgd))))
>> +			err = walk_hugepd_range((hugepd_t *)pgd, addr, next, walk, PGDIR_SHIFT);
>> +		else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry)
>>   			err = walk_p4d_range(pgd, addr, next, walk);
>>   		if (err)
>>   			break;
>> -- 
>> 2.25.0

WARNING: multiple messages have this Message-ID (diff)
From: Christophe Leroy <christophe.leroy@csgroup.eu>
To: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>,
	Steven Price <steven.price@arm.com>,
	akpm@linux-foundation.org, linux-mm@kvack.org
Cc: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org,
	Oliver O'Halloran <oohall@gmail.com>,
	Paul Mackerras <paulus@samba.org>,
	linuxppc-dev@lists.ozlabs.org, dja@axtens.net
Subject: Re: [PATCH v3] mm: pagewalk: Fix walk for hugepage tables
Date: Mon, 28 Jun 2021 08:19:35 +0200	[thread overview]
Message-ID: <7bbf9c5e-b81d-d20c-7ba1-d50b10238d32@csgroup.eu> (raw)
In-Reply-To: <87bl7qle4o.fsf@linux.ibm.com>



Le 28/06/2021 à 08:03, Aneesh Kumar K.V a écrit :
> Christophe Leroy <christophe.leroy@csgroup.eu> writes:
> 
>> Pagewalk ignores hugepd entries and walk down the tables
>> as if it was traditionnal entries, leading to crazy result.
> 
> But we do handle hugetlb separately
> 
> 	if (vma && is_vm_hugetlb_page(vma)) {
> 		if (ops->hugetlb_entry)
> 			err = walk_hugetlb_range(start, end, walk);
> 	} else
> 		err = walk_pgd_range(start, end, walk);
> 
> Are we using hugepd format for non hugetlb entries?

Yes, on the 8xx we use hugepd for 8M pages for linear mapping and for kasan shadow mapping (See 
commit bb5f33c06940 ("Merge "Use hugepages to map kernel mem on 8xx" into next")

And I'm working on implementing huge VMAP with 8M pages, that will also make use of hugepd.

> 
>>
>> Add walk_hugepd_range() and use it to walk hugepage tables.
>>
>> Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
>> Reviewed-by: Steven Price <steven.price@arm.com>
>> ---
>> v3:
>> - Rebased on next-20210624 (no change since v2)
>> - Added Steven's Reviewed-by
>> - Sent as standalone for merge via mm
>>
>> v2:
>> - Add a guard for NULL ops->pte_entry
>> - Take mm->page_table_lock when walking hugepage table, as suggested by follow_huge_pd()
>> ---
>>   mm/pagewalk.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++-----
>>   1 file changed, 53 insertions(+), 5 deletions(-)
>>
>> diff --git a/mm/pagewalk.c b/mm/pagewalk.c
>> index e81640d9f177..9b3db11a4d1d 100644
>> --- a/mm/pagewalk.c
>> +++ b/mm/pagewalk.c
>> @@ -58,6 +58,45 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
>>   	return err;
>>   }
>>   
>> +#ifdef CONFIG_ARCH_HAS_HUGEPD
>> +static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
>> +			     unsigned long end, struct mm_walk *walk, int pdshift)
>> +{
>> +	int err = 0;
>> +	const struct mm_walk_ops *ops = walk->ops;
>> +	int shift = hugepd_shift(*phpd);
>> +	int page_size = 1 << shift;
>> +
>> +	if (!ops->pte_entry)
>> +		return 0;
>> +
>> +	if (addr & (page_size - 1))
>> +		return 0;
>> +
>> +	for (;;) {
>> +		pte_t *pte;
>> +
>> +		spin_lock(&walk->mm->page_table_lock);
>> +		pte = hugepte_offset(*phpd, addr, pdshift);
>> +		err = ops->pte_entry(pte, addr, addr + page_size, walk);
>> +		spin_unlock(&walk->mm->page_table_lock);
>> +
>> +		if (err)
>> +			break;
>> +		if (addr >= end - page_size)
>> +			break;
>> +		addr += page_size;
>> +	}
>> +	return err;
>> +}
>> +#else
>> +static int walk_hugepd_range(hugepd_t *phpd, unsigned long addr,
>> +			     unsigned long end, struct mm_walk *walk, int pdshift)
>> +{
>> +	return 0;
>> +}
>> +#endif
>> +
>>   static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
>>   			  struct mm_walk *walk)
>>   {
>> @@ -108,7 +147,10 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
>>   				goto again;
>>   		}
>>   
>> -		err = walk_pte_range(pmd, addr, next, walk);
>> +		if (is_hugepd(__hugepd(pmd_val(*pmd))))
>> +			err = walk_hugepd_range((hugepd_t *)pmd, addr, next, walk, PMD_SHIFT);
>> +		else
>> +			err = walk_pte_range(pmd, addr, next, walk);
>>   		if (err)
>>   			break;
>>   	} while (pmd++, addr = next, addr != end);
>> @@ -157,7 +199,10 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
>>   		if (pud_none(*pud))
>>   			goto again;
>>   
>> -		err = walk_pmd_range(pud, addr, next, walk);
>> +		if (is_hugepd(__hugepd(pud_val(*pud))))
>> +			err = walk_hugepd_range((hugepd_t *)pud, addr, next, walk, PUD_SHIFT);
>> +		else
>> +			err = walk_pmd_range(pud, addr, next, walk);
>>   		if (err)
>>   			break;
>>   	} while (pud++, addr = next, addr != end);
>> @@ -189,7 +234,9 @@ static int walk_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
>>   			if (err)
>>   				break;
>>   		}
>> -		if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
>> +		if (is_hugepd(__hugepd(p4d_val(*p4d))))
>> +			err = walk_hugepd_range((hugepd_t *)p4d, addr, next, walk, P4D_SHIFT);
>> +		else if (ops->pud_entry || ops->pmd_entry || ops->pte_entry)
>>   			err = walk_pud_range(p4d, addr, next, walk);
>>   		if (err)
>>   			break;
>> @@ -224,8 +271,9 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
>>   			if (err)
>>   				break;
>>   		}
>> -		if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry ||
>> -		    ops->pte_entry)
>> +		if (is_hugepd(__hugepd(pgd_val(*pgd))))
>> +			err = walk_hugepd_range((hugepd_t *)pgd, addr, next, walk, PGDIR_SHIFT);
>> +		else if (ops->p4d_entry || ops->pud_entry || ops->pmd_entry || ops->pte_entry)
>>   			err = walk_p4d_range(pgd, addr, next, walk);
>>   		if (err)
>>   			break;
>> -- 
>> 2.25.0

  reply	other threads:[~2021-06-28  6:19 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-25  5:10 [PATCH v3] mm: pagewalk: Fix walk for hugepage tables Christophe Leroy
2021-06-25  5:10 ` Christophe Leroy
2021-06-28  1:12 ` Andrew Morton
2021-06-28  1:12   ` Andrew Morton
2021-06-28  5:56   ` Christophe Leroy
2021-06-28  5:56     ` Christophe Leroy
2021-06-28  6:03 ` Aneesh Kumar K.V
2021-06-28  6:03   ` Aneesh Kumar K.V
2021-06-28  6:19   ` Christophe Leroy [this message]
2021-06-28  6:19     ` Christophe Leroy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7bbf9c5e-b81d-d20c-7ba1-d50b10238d32@csgroup.eu \
    --to=christophe.leroy@csgroup.eu \
    --cc=akpm@linux-foundation.org \
    --cc=aneesh.kumar@linux.ibm.com \
    --cc=benh@kernel.crashing.org \
    --cc=dja@axtens.net \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mpe@ellerman.id.au \
    --cc=oohall@gmail.com \
    --cc=paulus@samba.org \
    --cc=steven.price@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.