linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page
@ 2024-02-01  8:05 Qi Zheng
  2024-02-01  8:05 ` [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page Qi Zheng
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Qi Zheng @ 2024-02-01  8:05 UTC (permalink / raw)
  To: akpm, arnd
  Cc: muchun.song, david, willy, linux-mm, linux-kernel, linux-arch, Qi Zheng

For kernel PTE page, we do not need to allocate and initialize its split
ptlock, but as a page table page, it's still necessary to add PG_table
flag and NR_PAGETABLE statistics for it.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 include/asm-generic/pgalloc.h |  7 ++++++-
 include/linux/mm.h            | 21 ++++++++++++++++-----
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 879e5f8aa5e9..908bd9140ac2 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -23,6 +23,8 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm)
 
 	if (!ptdesc)
 		return NULL;
+
+	__pagetable_pte_ctor(ptdesc);
 	return ptdesc_address(ptdesc);
 }
 
@@ -46,7 +48,10 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
  */
 static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
 {
-	pagetable_free(virt_to_ptdesc(pte));
+	struct ptdesc *ptdesc = virt_to_ptdesc(pte);
+
+	__pagetable_pte_dtor(ptdesc);
+	pagetable_free(ptdesc);
 }
 
 /**
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e442fd0efdd9..e37db032764e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2922,26 +2922,37 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
 static inline void ptlock_free(struct ptdesc *ptdesc) {}
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
-static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
+static inline void __pagetable_pte_ctor(struct ptdesc *ptdesc)
 {
 	struct folio *folio = ptdesc_folio(ptdesc);
 
-	if (!ptlock_init(ptdesc))
-		return false;
 	__folio_set_pgtable(folio);
 	lruvec_stat_add_folio(folio, NR_PAGETABLE);
+}
+
+static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
+{
+	if (!ptlock_init(ptdesc))
+		return false;
+
+	__pagetable_pte_ctor(ptdesc);
 	return true;
 }
 
-static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
+static inline void __pagetable_pte_dtor(struct ptdesc *ptdesc)
 {
 	struct folio *folio = ptdesc_folio(ptdesc);
 
-	ptlock_free(ptdesc);
 	__folio_clear_pgtable(folio);
 	lruvec_stat_sub_folio(folio, NR_PAGETABLE);
 }
 
+static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
+{
+	ptlock_free(ptdesc);
+	__pagetable_pte_dtor(ptdesc);
+}
+
 pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
 static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
 {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page
  2024-02-01  8:05 [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page Qi Zheng
@ 2024-02-01  8:05 ` Qi Zheng
  2024-02-02  3:16   ` Muchun Song
  2024-02-04 18:54   ` Matthew Wilcox
  2024-02-02  2:47 ` [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page Muchun Song
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 12+ messages in thread
From: Qi Zheng @ 2024-02-01  8:05 UTC (permalink / raw)
  To: akpm, arnd
  Cc: muchun.song, david, willy, linux-mm, linux-kernel, linux-arch, Qi Zheng

For kernel PMD entry, we use init_mm.page_table_lock to protect it, so
there is no need to allocate and initialize the split ptlock for kernel
PMD page.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 include/asm-generic/pgalloc.h | 10 ++++++++--
 include/linux/mm.h            | 21 ++++++++++++++++-----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 908bd9140ac2..57bd41adf760 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -139,7 +139,10 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 	ptdesc = pagetable_alloc(gfp, 0);
 	if (!ptdesc)
 		return NULL;
-	if (!pagetable_pmd_ctor(ptdesc)) {
+
+	if (mm == &init_mm) {
+		__pagetable_pmd_ctor(ptdesc);
+	} else if (!pagetable_pmd_ctor(ptdesc)) {
 		pagetable_free(ptdesc);
 		return NULL;
 	}
@@ -153,7 +156,10 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 	struct ptdesc *ptdesc = virt_to_ptdesc(pmd);
 
 	BUG_ON((unsigned long)pmd & (PAGE_SIZE-1));
-	pagetable_pmd_dtor(ptdesc);
+	if (mm == &init_mm)
+		__pagetable_pmd_dtor(ptdesc);
+	else
+		pagetable_pmd_dtor(ptdesc);
 	pagetable_free(ptdesc);
 }
 #endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e37db032764e..68ca71407177 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3048,26 +3048,37 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd)
 	return ptl;
 }
 
-static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc)
+static inline void __pagetable_pmd_ctor(struct ptdesc *ptdesc)
 {
 	struct folio *folio = ptdesc_folio(ptdesc);
 
-	if (!pmd_ptlock_init(ptdesc))
-		return false;
 	__folio_set_pgtable(folio);
 	lruvec_stat_add_folio(folio, NR_PAGETABLE);
+}
+
+static inline bool pagetable_pmd_ctor(struct ptdesc *ptdesc)
+{
+	if (!pmd_ptlock_init(ptdesc))
+		return false;
+
+	__pagetable_pmd_ctor(ptdesc);
 	return true;
 }
 
-static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc)
+static inline void __pagetable_pmd_dtor(struct ptdesc *ptdesc)
 {
 	struct folio *folio = ptdesc_folio(ptdesc);
 
-	pmd_ptlock_free(ptdesc);
 	__folio_clear_pgtable(folio);
 	lruvec_stat_sub_folio(folio, NR_PAGETABLE);
 }
 
+static inline void pagetable_pmd_dtor(struct ptdesc *ptdesc)
+{
+	pmd_ptlock_free(ptdesc);
+	__pagetable_pmd_dtor(ptdesc);
+}
+
 /*
  * No scalability reason to split PUD locks yet, but follow the same pattern
  * as the PMD locks to make it easier if we decide to.  The VM should not be
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page
  2024-02-01  8:05 [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page Qi Zheng
  2024-02-01  8:05 ` [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page Qi Zheng
@ 2024-02-02  2:47 ` Muchun Song
  2024-02-04 10:58 ` Mike Rapoport
  2024-02-04 18:51 ` Matthew Wilcox
  3 siblings, 0 replies; 12+ messages in thread
From: Muchun Song @ 2024-02-02  2:47 UTC (permalink / raw)
  To: Qi Zheng
  Cc: Andrew Morton, Arnd Bergmann, david, willy, linux-mm,
	linux-kernel, linux-arch



> On Feb 1, 2024, at 16:05, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> 
> For kernel PTE page, we do not need to allocate and initialize its split
> ptlock, but as a page table page, it's still necessary to add PG_table
> flag and NR_PAGETABLE statistics for it.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

Reviewed-by: Muchun Song <muchun.song@linux.dev>

Thanks.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page
  2024-02-01  8:05 ` [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page Qi Zheng
@ 2024-02-02  3:16   ` Muchun Song
  2024-02-04 18:54   ` Matthew Wilcox
  1 sibling, 0 replies; 12+ messages in thread
From: Muchun Song @ 2024-02-02  3:16 UTC (permalink / raw)
  To: Qi Zheng
  Cc: Andrew Morton, Arnd Bergmann, david, willy, linux-mm,
	linux-kernel, linux-arch



> On Feb 1, 2024, at 16:05, Qi Zheng <zhengqi.arch@bytedance.com> wrote:
> 
> For kernel PMD entry, we use init_mm.page_table_lock to protect it, so
> there is no need to allocate and initialize the split ptlock for kernel
> PMD page.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

Reviewed-by: Muchun Song <muchun.song@linux.dev>

Thanks.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page
  2024-02-01  8:05 [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page Qi Zheng
  2024-02-01  8:05 ` [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page Qi Zheng
  2024-02-02  2:47 ` [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page Muchun Song
@ 2024-02-04 10:58 ` Mike Rapoport
  2024-02-04 11:39   ` Qi Zheng
  2024-02-04 18:51 ` Matthew Wilcox
  3 siblings, 1 reply; 12+ messages in thread
From: Mike Rapoport @ 2024-02-04 10:58 UTC (permalink / raw)
  To: Qi Zheng
  Cc: akpm, arnd, muchun.song, david, willy, linux-mm, linux-kernel,
	linux-arch

On Thu, Feb 01, 2024 at 04:05:40PM +0800, Qi Zheng wrote:
> For kernel PTE page, we do not need to allocate and initialize its split
> ptlock, but as a page table page, it's still necessary to add PG_table
> flag and NR_PAGETABLE statistics for it.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>  include/asm-generic/pgalloc.h |  7 ++++++-
>  include/linux/mm.h            | 21 ++++++++++++++++-----
>  2 files changed, 22 insertions(+), 6 deletions(-)

This should also update the architectures that define
__HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, otherwise NR_PAGETABLE counts will get
wrong.

Another related thing is that many architectures have custom allocations
for early page tables and these would also benefit form NR_PAGETABLE
accounting.
 
> diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
> index 879e5f8aa5e9..908bd9140ac2 100644
> --- a/include/asm-generic/pgalloc.h
> +++ b/include/asm-generic/pgalloc.h
> @@ -23,6 +23,8 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm)
>  
>  	if (!ptdesc)
>  		return NULL;
> +
> +	__pagetable_pte_ctor(ptdesc);
>  	return ptdesc_address(ptdesc);
>  }
>  
> @@ -46,7 +48,10 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>   */
>  static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>  {
> -	pagetable_free(virt_to_ptdesc(pte));
> +	struct ptdesc *ptdesc = virt_to_ptdesc(pte);
> +
> +	__pagetable_pte_dtor(ptdesc);
> +	pagetable_free(ptdesc);
>  }
>  
>  /**
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index e442fd0efdd9..e37db032764e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2922,26 +2922,37 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
>  static inline void ptlock_free(struct ptdesc *ptdesc) {}
>  #endif /* USE_SPLIT_PTE_PTLOCKS */
>  
> -static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
> +static inline void __pagetable_pte_ctor(struct ptdesc *ptdesc)
>  {
>  	struct folio *folio = ptdesc_folio(ptdesc);
>  
> -	if (!ptlock_init(ptdesc))
> -		return false;
>  	__folio_set_pgtable(folio);
>  	lruvec_stat_add_folio(folio, NR_PAGETABLE);
> +}
> +
> +static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
> +{
> +	if (!ptlock_init(ptdesc))
> +		return false;
> +
> +	__pagetable_pte_ctor(ptdesc);
>  	return true;
>  }
>  
> -static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
> +static inline void __pagetable_pte_dtor(struct ptdesc *ptdesc)
>  {
>  	struct folio *folio = ptdesc_folio(ptdesc);
>  
> -	ptlock_free(ptdesc);
>  	__folio_clear_pgtable(folio);
>  	lruvec_stat_sub_folio(folio, NR_PAGETABLE);
>  }
>  
> +static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
> +{
> +	ptlock_free(ptdesc);
> +	__pagetable_pte_dtor(ptdesc);
> +}
> +
>  pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
>  static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
>  {
> -- 
> 2.30.2
> 
> 

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page
  2024-02-04 10:58 ` Mike Rapoport
@ 2024-02-04 11:39   ` Qi Zheng
  2024-02-04 12:15     ` Mike Rapoport
  0 siblings, 1 reply; 12+ messages in thread
From: Qi Zheng @ 2024-02-04 11:39 UTC (permalink / raw)
  To: Mike Rapoport
  Cc: akpm, arnd, muchun.song, david, willy, linux-mm, linux-kernel,
	linux-arch

Hi Mike,

On 2024/2/4 18:58, Mike Rapoport wrote:
> On Thu, Feb 01, 2024 at 04:05:40PM +0800, Qi Zheng wrote:
>> For kernel PTE page, we do not need to allocate and initialize its split
>> ptlock, but as a page table page, it's still necessary to add PG_table
>> flag and NR_PAGETABLE statistics for it.
>>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   include/asm-generic/pgalloc.h |  7 ++++++-
>>   include/linux/mm.h            | 21 ++++++++++++++++-----
>>   2 files changed, 22 insertions(+), 6 deletions(-)
> 
> This should also update the architectures that define
> __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, otherwise NR_PAGETABLE counts will get
> wrong.

Yes, this patchset only focuses on the generic implementation. For those
architectures that define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, some reuse
the generic __pte_alloc_one_kernel(), but some have their own customized
implementations, which indeed need to be fixed.

I wasn't familiar with those architectures and didn't investigate why
they couldn't reuse the generic __pte_alloc_one_kernel(), so I didn't
fix them. It would be better if there are maintainers corresponding to
the architecture who can help fix it. After all, they have a better
understanding of the historical background and have a testing
environment. ;)

> 
> Another related thing is that many architectures have custom allocations
> for early page tables and these would also benefit form NR_PAGETABLE
> accounting.

Indeed, this is also a point that can be optimized.

Thanks.

>   
>> diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
>> index 879e5f8aa5e9..908bd9140ac2 100644
>> --- a/include/asm-generic/pgalloc.h
>> +++ b/include/asm-generic/pgalloc.h
>> @@ -23,6 +23,8 @@ static inline pte_t *__pte_alloc_one_kernel(struct mm_struct *mm)
>>   
>>   	if (!ptdesc)
>>   		return NULL;
>> +
>> +	__pagetable_pte_ctor(ptdesc);
>>   	return ptdesc_address(ptdesc);
>>   }
>>   
>> @@ -46,7 +48,10 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
>>    */
>>   static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
>>   {
>> -	pagetable_free(virt_to_ptdesc(pte));
>> +	struct ptdesc *ptdesc = virt_to_ptdesc(pte);
>> +
>> +	__pagetable_pte_dtor(ptdesc);
>> +	pagetable_free(ptdesc);
>>   }
>>   
>>   /**
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index e442fd0efdd9..e37db032764e 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -2922,26 +2922,37 @@ static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; }
>>   static inline void ptlock_free(struct ptdesc *ptdesc) {}
>>   #endif /* USE_SPLIT_PTE_PTLOCKS */
>>   
>> -static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
>> +static inline void __pagetable_pte_ctor(struct ptdesc *ptdesc)
>>   {
>>   	struct folio *folio = ptdesc_folio(ptdesc);
>>   
>> -	if (!ptlock_init(ptdesc))
>> -		return false;
>>   	__folio_set_pgtable(folio);
>>   	lruvec_stat_add_folio(folio, NR_PAGETABLE);
>> +}
>> +
>> +static inline bool pagetable_pte_ctor(struct ptdesc *ptdesc)
>> +{
>> +	if (!ptlock_init(ptdesc))
>> +		return false;
>> +
>> +	__pagetable_pte_ctor(ptdesc);
>>   	return true;
>>   }
>>   
>> -static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
>> +static inline void __pagetable_pte_dtor(struct ptdesc *ptdesc)
>>   {
>>   	struct folio *folio = ptdesc_folio(ptdesc);
>>   
>> -	ptlock_free(ptdesc);
>>   	__folio_clear_pgtable(folio);
>>   	lruvec_stat_sub_folio(folio, NR_PAGETABLE);
>>   }
>>   
>> +static inline void pagetable_pte_dtor(struct ptdesc *ptdesc)
>> +{
>> +	ptlock_free(ptdesc);
>> +	__pagetable_pte_dtor(ptdesc);
>> +}
>> +
>>   pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp);
>>   static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr)
>>   {
>> -- 
>> 2.30.2
>>
>>
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page
  2024-02-04 11:39   ` Qi Zheng
@ 2024-02-04 12:15     ` Mike Rapoport
  2024-02-04 16:26       ` Qi Zheng
  0 siblings, 1 reply; 12+ messages in thread
From: Mike Rapoport @ 2024-02-04 12:15 UTC (permalink / raw)
  To: Qi Zheng
  Cc: akpm, arnd, muchun.song, david, willy, linux-mm, linux-kernel,
	linux-arch

On Sun, Feb 04, 2024 at 07:39:38PM +0800, Qi Zheng wrote:
> Hi Mike,
> 
> On 2024/2/4 18:58, Mike Rapoport wrote:
> > On Thu, Feb 01, 2024 at 04:05:40PM +0800, Qi Zheng wrote:
> > > For kernel PTE page, we do not need to allocate and initialize its split
> > > ptlock, but as a page table page, it's still necessary to add PG_table
> > > flag and NR_PAGETABLE statistics for it.
> > > 
> > > Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> > > ---
> > >   include/asm-generic/pgalloc.h |  7 ++++++-
> > >   include/linux/mm.h            | 21 ++++++++++++++++-----
> > >   2 files changed, 22 insertions(+), 6 deletions(-)
> > 
> > This should also update the architectures that define
> > __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, otherwise NR_PAGETABLE counts will get
> > wrong.
> 
> Yes, this patchset only focuses on the generic implementation. For those
> architectures that define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, some reuse
> the generic __pte_alloc_one_kernel(), but some have their own customized
> implementations, which indeed need to be fixed.
> 
> I wasn't familiar with those architectures and didn't investigate why
> they couldn't reuse the generic __pte_alloc_one_kernel(), so I didn't
> fix them.

But with your patch NR_PAGETABLE will underflow e.g. on arm and it'd be a
regression for no good reason.

> It would be better if there are maintainers corresponding to
> the architecture who can help fix it. After all, they have a better
> understanding of the historical background and have a testing
> environment. ;)

-- 
Sincerely yours,
Mike.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page
  2024-02-04 12:15     ` Mike Rapoport
@ 2024-02-04 16:26       ` Qi Zheng
  0 siblings, 0 replies; 12+ messages in thread
From: Qi Zheng @ 2024-02-04 16:26 UTC (permalink / raw)
  To: Mike Rapoport, akpm
  Cc: arnd, muchun.song, david, willy, linux-mm, linux-kernel, linux-arch



On 2024/2/4 20:15, Mike Rapoport wrote:
> On Sun, Feb 04, 2024 at 07:39:38PM +0800, Qi Zheng wrote:
>> Hi Mike,
>>
>> On 2024/2/4 18:58, Mike Rapoport wrote:
>>> On Thu, Feb 01, 2024 at 04:05:40PM +0800, Qi Zheng wrote:
>>>> For kernel PTE page, we do not need to allocate and initialize its split
>>>> ptlock, but as a page table page, it's still necessary to add PG_table
>>>> flag and NR_PAGETABLE statistics for it.
>>>>
>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>> ---
>>>>    include/asm-generic/pgalloc.h |  7 ++++++-
>>>>    include/linux/mm.h            | 21 ++++++++++++++++-----
>>>>    2 files changed, 22 insertions(+), 6 deletions(-)
>>>
>>> This should also update the architectures that define
>>> __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, otherwise NR_PAGETABLE counts will get
>>> wrong.
>>
>> Yes, this patchset only focuses on the generic implementation. For those
>> architectures that define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL, some reuse
>> the generic __pte_alloc_one_kernel(), but some have their own customized
>> implementations, which indeed need to be fixed.
>>
>> I wasn't familiar with those architectures and didn't investigate why
>> they couldn't reuse the generic __pte_alloc_one_kernel(), so I didn't
>> fix them.
> 
> But with your patch NR_PAGETABLE will underflow e.g. on arm and it'd be a
> regression for no good reason.

Oh, I see. In some architectures, they implement their own
pte_alloc_one_kernel() and do not call generic __pte_alloc_one_kernel(),
but still reuse generic pte_free_kernel(). So it needs to be fixed
together.

I will try to fix them and send the v2. But since I'm on vacation
recently, updates may not be quick.

Hi Andrew, please help to temporarily remove this patchset from the
mm-unstable.

Thanks!

> 
>> It would be better if there are maintainers corresponding to
>> the architecture who can help fix it. After all, they have a better
>> understanding of the historical background and have a testing
>> environment. ;)
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page
  2024-02-01  8:05 [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page Qi Zheng
                   ` (2 preceding siblings ...)
  2024-02-04 10:58 ` Mike Rapoport
@ 2024-02-04 18:51 ` Matthew Wilcox
  2024-02-05  2:05   ` Qi Zheng
  3 siblings, 1 reply; 12+ messages in thread
From: Matthew Wilcox @ 2024-02-04 18:51 UTC (permalink / raw)
  To: Qi Zheng
  Cc: akpm, arnd, muchun.song, david, linux-mm, linux-kernel, linux-arch

On Thu, Feb 01, 2024 at 04:05:40PM +0800, Qi Zheng wrote:
> For kernel PTE page, we do not need to allocate and initialize its split
> ptlock, but as a page table page, it's still necessary to add PG_table
> flag and NR_PAGETABLE statistics for it.

No, this is wrong.

We do not account _kernel_ page tables to the _user_.  Just because
the kernel, say, called vmalloc() doesn't mean we should charge the
task for it.  Moreover, one task may call vmalloc() and a different task
would then call vfree().

This is a can of worms you don't want to open.  Why did you want to do
this?

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page
  2024-02-01  8:05 ` [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page Qi Zheng
  2024-02-02  3:16   ` Muchun Song
@ 2024-02-04 18:54   ` Matthew Wilcox
  2024-02-05  2:14     ` Qi Zheng
  1 sibling, 1 reply; 12+ messages in thread
From: Matthew Wilcox @ 2024-02-04 18:54 UTC (permalink / raw)
  To: Qi Zheng
  Cc: akpm, arnd, muchun.song, david, linux-mm, linux-kernel, linux-arch

On Thu, Feb 01, 2024 at 04:05:41PM +0800, Qi Zheng wrote:
> For kernel PMD entry, we use init_mm.page_table_lock to protect it, so
> there is no need to allocate and initialize the split ptlock for kernel
> PMD page.

I don't think this is a great idea.  Maybe there's no need to initialise
it, but keeping things the same between kernel & user page tables is a
usually better.  We don't normally allocate memory for the spinlock,
it's only in debugging scenarios like LOCKDEP.  I would drop this unless
you have a really compelling argument to make.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page
  2024-02-04 18:51 ` Matthew Wilcox
@ 2024-02-05  2:05   ` Qi Zheng
  0 siblings, 0 replies; 12+ messages in thread
From: Qi Zheng @ 2024-02-05  2:05 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: akpm, arnd, muchun.song, david, linux-mm, linux-kernel, linux-arch

Hi Matthew,

On 2024/2/5 02:51, Matthew Wilcox wrote:
> On Thu, Feb 01, 2024 at 04:05:40PM +0800, Qi Zheng wrote:
>> For kernel PTE page, we do not need to allocate and initialize its split
>> ptlock, but as a page table page, it's still necessary to add PG_table
>> flag and NR_PAGETABLE statistics for it.
> 
> No, this is wrong.
> 
> We do not account _kernel_ page tables to the _user_.  Just because
> the kernel, say, called vmalloc() doesn't mean we should charge the
> task for it.  Moreover, one task may call vmalloc() and a different task
> would then call vfree().
> 

Got it. Thanks for providing this information!

> This is a can of worms you don't want to open.  Why did you want to do
> this?

Ah, just because generic {pmd|pud}_alloc_one() has opened it. ;) And
When I looked through the commits (e.g. commit 1d40a5ea01d5), I couldn't
find the information you provided above. And that is why I CC'd you to
double check this, in case I might have overlooked some important
background information.

So we should actually fix generic {pmd|pud}_alloc_one() (and maybe some
implementation in the arch), right? And it would be better to add some
comments to clarify.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page
  2024-02-04 18:54   ` Matthew Wilcox
@ 2024-02-05  2:14     ` Qi Zheng
  0 siblings, 0 replies; 12+ messages in thread
From: Qi Zheng @ 2024-02-05  2:14 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: akpm, arnd, muchun.song, david, linux-mm, linux-kernel, linux-arch

Hi Matthew,

On 2024/2/5 02:54, Matthew Wilcox wrote:
> On Thu, Feb 01, 2024 at 04:05:41PM +0800, Qi Zheng wrote:
>> For kernel PMD entry, we use init_mm.page_table_lock to protect it, so
>> there is no need to allocate and initialize the split ptlock for kernel
>> PMD page.
> 
> I don't think this is a great idea.  Maybe there's no need to initialise
> it, but keeping things the same between kernel & user page tables is a
> usually better.  We don't normally allocate memory for the spinlock,
> it's only in debugging scenarios like LOCKDEP.  I would drop this unless
> you have a really compelling argument to make.

The reason I first noticed this is that we didn't allocate and
initialize the ptlock in __pte_alloc_one_kernel(). So in at the PTE
level, the implementation of kernel & user page tables is already
different.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2024-02-05  2:14 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-01  8:05 [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page Qi Zheng
2024-02-01  8:05 ` [PATCH 2/2] mm: pgtable: remove unnecessary split ptlock for kernel PMD page Qi Zheng
2024-02-02  3:16   ` Muchun Song
2024-02-04 18:54   ` Matthew Wilcox
2024-02-05  2:14     ` Qi Zheng
2024-02-02  2:47 ` [PATCH 1/2] mm: pgtable: add missing flag and statistics for kernel PTE page Muchun Song
2024-02-04 10:58 ` Mike Rapoport
2024-02-04 11:39   ` Qi Zheng
2024-02-04 12:15     ` Mike Rapoport
2024-02-04 16:26       ` Qi Zheng
2024-02-04 18:51 ` Matthew Wilcox
2024-02-05  2:05   ` Qi Zheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).