All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-26 14:57 ` Qi Zheng
  0 siblings, 0 replies; 28+ messages in thread
From: Qi Zheng @ 2022-06-26 14:57 UTC (permalink / raw)
  To: mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm, Qi Zheng

The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
helper") add set_huge_swap_pte_at() to handle swap entries on
architectures that support hugepages consisting of contiguous ptes.
And currently the set_huge_swap_pte_at() is only overridden by arm64.

The set_huge_swap_pte_at() provide a sz parameter to help determine
the number of entries to be updated. But in fact, all hugetlb swap
entries contain pfn information, so we can find the corresponding
folio through the pfn recorded in the swap entry, then the folio_size()
is the number of entries that need to be updated.

And considering that users will easily cause bugs by ignoring the
difference between set_huge_swap_pte_at() and set_huge_pte_at().
Let's handle swap entries in set_huge_pte_at() and remove the
set_huge_swap_pte_at(), then we can call set_huge_pte_at()
anywhere, which simplifies our coding.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 arch/arm64/include/asm/hugetlb.h |  3 ---
 arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
 include/linux/hugetlb.h          | 13 ------------
 mm/hugetlb.c                     |  8 +++-----
 mm/rmap.c                        | 11 +++--------
 5 files changed, 23 insertions(+), 46 deletions(-)

diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 1fd2846dbefe..d20f5da2d76f 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, unsigned long sz);
 #define __HAVE_ARCH_HUGE_PTEP_GET
 extern pte_t huge_ptep_get(pte_t *ptep);
-extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
-				 pte_t *ptep, pte_t pte, unsigned long sz);
-#define set_huge_swap_pte_at set_huge_swap_pte_at
 
 void __init arm64_hugetlb_cma_reserve(void);
 
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index c9e076683e5d..58b89b9d13e0 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
 	flush_tlb_range(&vma, saddr, addr);
 }
 
+static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
+{
+	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
+
+	return page_folio(pfn_to_page(swp_offset(entry)));
+}
+
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, pte_t pte)
 {
@@ -247,11 +254,16 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	unsigned long pfn, dpfn;
 	pgprot_t hugeprot;
 
-	/*
-	 * Code needs to be expanded to handle huge swap and migration
-	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
-	 */
-	WARN_ON(!pte_present(pte));
+	if (!pte_present(pte)) {
+		struct folio *folio;
+
+		folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
+		ncontig = num_contig_ptes(folio_size(folio), &pgsize);
+
+		for (i = 0; i < ncontig; i++, ptep++)
+			set_pte_at(mm, addr, ptep, pte);
+		return;
+	}
 
 	if (!pte_cont(pte)) {
 		set_pte_at(mm, addr, ptep, pte);
@@ -269,18 +281,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 }
 
-void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
-			  pte_t *ptep, pte_t pte, unsigned long sz)
-{
-	int i, ncontig;
-	size_t pgsize;
-
-	ncontig = num_contig_ptes(sz, &pgsize);
-
-	for (i = 0; i < ncontig; i++, ptep++)
-		set_pte(ptep, pte);
-}
-
 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 		      unsigned long addr, unsigned long sz)
 {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ee9a28ef26ee..3bb98434550a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -928,14 +928,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
 	atomic_long_sub(l, &mm->hugetlb_usage);
 }
 
-#ifndef set_huge_swap_pte_at
-static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
-					pte_t *ptep, pte_t pte, unsigned long sz)
-{
-	set_huge_pte_at(mm, addr, ptep, pte);
-}
-#endif
-
 #ifndef huge_ptep_modify_prot_start
 #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
 static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
@@ -1119,11 +1111,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
 {
 }
 
-static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
-					pte_t *ptep, pte_t pte, unsigned long sz)
-{
-}
-
 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
 					  unsigned long addr, pte_t *ptep)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f338640fbe4a..559084d96082 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4813,12 +4813,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 				entry = swp_entry_to_pte(swp_entry);
 				if (userfaultfd_wp(src_vma) && uffd_wp)
 					entry = huge_pte_mkuffd_wp(entry);
-				set_huge_swap_pte_at(src, addr, src_pte,
-						     entry, sz);
+				set_huge_pte_at(src, addr, src_pte, entry);
 			}
 			if (!userfaultfd_wp(dst_vma) && uffd_wp)
 				entry = huge_pte_clear_uffd_wp(entry);
-			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
+			set_huge_pte_at(dst, addr, dst_pte, entry);
 		} else if (unlikely(is_pte_marker(entry))) {
 			/*
 			 * We copy the pte marker only if the dst vma has
@@ -6375,8 +6374,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 					newpte = pte_swp_mkuffd_wp(newpte);
 				else if (uffd_wp_resolve)
 					newpte = pte_swp_clear_uffd_wp(newpte);
-				set_huge_swap_pte_at(mm, address, ptep,
-						     newpte, psize);
+				set_huge_pte_at(mm, address, ptep, newpte);
 				pages++;
 			}
 			spin_unlock(ptl);
diff --git a/mm/rmap.c b/mm/rmap.c
index 062e8655f337..338fbb24c602 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1618,9 +1618,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
 			if (folio_test_hugetlb(folio)) {
 				hugetlb_count_sub(folio_nr_pages(folio), mm);
-				set_huge_swap_pte_at(mm, address,
-						     pvmw.pte, pteval,
-						     vma_mmu_pagesize(vma));
+				set_huge_pte_at(mm, address, pvmw.pte, pteval);
 			} else {
 				dec_mm_counter(mm, mm_counter(&folio->page));
 				set_pte_at(mm, address, pvmw.pte, pteval);
@@ -2004,9 +2002,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
 			if (folio_test_hugetlb(folio)) {
 				hugetlb_count_sub(folio_nr_pages(folio), mm);
-				set_huge_swap_pte_at(mm, address,
-						     pvmw.pte, pteval,
-						     vma_mmu_pagesize(vma));
+				set_huge_pte_at(mm, address, pvmw.pte, pteval);
 			} else {
 				dec_mm_counter(mm, mm_counter(&folio->page));
 				set_pte_at(mm, address, pvmw.pte, pteval);
@@ -2074,8 +2070,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 			if (pte_uffd_wp(pteval))
 				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			if (folio_test_hugetlb(folio))
-				set_huge_swap_pte_at(mm, address, pvmw.pte,
-						     swp_pte, vma_mmu_pagesize(vma));
+				set_huge_pte_at(mm, address, pvmw.pte, swp_pte);
 			else
 				set_pte_at(mm, address, pvmw.pte, swp_pte);
 			trace_set_migration_pte(address, pte_val(swp_pte),
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 28+ messages in thread

* [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-26 14:57 ` Qi Zheng
  0 siblings, 0 replies; 28+ messages in thread
From: Qi Zheng @ 2022-06-26 14:57 UTC (permalink / raw)
  To: mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm, Qi Zheng

The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
helper") add set_huge_swap_pte_at() to handle swap entries on
architectures that support hugepages consisting of contiguous ptes.
And currently the set_huge_swap_pte_at() is only overridden by arm64.

The set_huge_swap_pte_at() provide a sz parameter to help determine
the number of entries to be updated. But in fact, all hugetlb swap
entries contain pfn information, so we can find the corresponding
folio through the pfn recorded in the swap entry, then the folio_size()
is the number of entries that need to be updated.

And considering that users will easily cause bugs by ignoring the
difference between set_huge_swap_pte_at() and set_huge_pte_at().
Let's handle swap entries in set_huge_pte_at() and remove the
set_huge_swap_pte_at(), then we can call set_huge_pte_at()
anywhere, which simplifies our coding.

Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
---
 arch/arm64/include/asm/hugetlb.h |  3 ---
 arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
 include/linux/hugetlb.h          | 13 ------------
 mm/hugetlb.c                     |  8 +++-----
 mm/rmap.c                        | 11 +++--------
 5 files changed, 23 insertions(+), 46 deletions(-)

diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 1fd2846dbefe..d20f5da2d76f 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
 			   pte_t *ptep, unsigned long sz);
 #define __HAVE_ARCH_HUGE_PTEP_GET
 extern pte_t huge_ptep_get(pte_t *ptep);
-extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
-				 pte_t *ptep, pte_t pte, unsigned long sz);
-#define set_huge_swap_pte_at set_huge_swap_pte_at
 
 void __init arm64_hugetlb_cma_reserve(void);
 
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index c9e076683e5d..58b89b9d13e0 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
 	flush_tlb_range(&vma, saddr, addr);
 }
 
+static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
+{
+	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
+
+	return page_folio(pfn_to_page(swp_offset(entry)));
+}
+
 void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, pte_t pte)
 {
@@ -247,11 +254,16 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 	unsigned long pfn, dpfn;
 	pgprot_t hugeprot;
 
-	/*
-	 * Code needs to be expanded to handle huge swap and migration
-	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
-	 */
-	WARN_ON(!pte_present(pte));
+	if (!pte_present(pte)) {
+		struct folio *folio;
+
+		folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
+		ncontig = num_contig_ptes(folio_size(folio), &pgsize);
+
+		for (i = 0; i < ncontig; i++, ptep++)
+			set_pte_at(mm, addr, ptep, pte);
+		return;
+	}
 
 	if (!pte_cont(pte)) {
 		set_pte_at(mm, addr, ptep, pte);
@@ -269,18 +281,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
 		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
 }
 
-void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
-			  pte_t *ptep, pte_t pte, unsigned long sz)
-{
-	int i, ncontig;
-	size_t pgsize;
-
-	ncontig = num_contig_ptes(sz, &pgsize);
-
-	for (i = 0; i < ncontig; i++, ptep++)
-		set_pte(ptep, pte);
-}
-
 pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
 		      unsigned long addr, unsigned long sz)
 {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ee9a28ef26ee..3bb98434550a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -928,14 +928,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
 	atomic_long_sub(l, &mm->hugetlb_usage);
 }
 
-#ifndef set_huge_swap_pte_at
-static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
-					pte_t *ptep, pte_t pte, unsigned long sz)
-{
-	set_huge_pte_at(mm, addr, ptep, pte);
-}
-#endif
-
 #ifndef huge_ptep_modify_prot_start
 #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
 static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
@@ -1119,11 +1111,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
 {
 }
 
-static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
-					pte_t *ptep, pte_t pte, unsigned long sz)
-{
-}
-
 static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
 					  unsigned long addr, pte_t *ptep)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f338640fbe4a..559084d96082 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4813,12 +4813,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 				entry = swp_entry_to_pte(swp_entry);
 				if (userfaultfd_wp(src_vma) && uffd_wp)
 					entry = huge_pte_mkuffd_wp(entry);
-				set_huge_swap_pte_at(src, addr, src_pte,
-						     entry, sz);
+				set_huge_pte_at(src, addr, src_pte, entry);
 			}
 			if (!userfaultfd_wp(dst_vma) && uffd_wp)
 				entry = huge_pte_clear_uffd_wp(entry);
-			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
+			set_huge_pte_at(dst, addr, dst_pte, entry);
 		} else if (unlikely(is_pte_marker(entry))) {
 			/*
 			 * We copy the pte marker only if the dst vma has
@@ -6375,8 +6374,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 					newpte = pte_swp_mkuffd_wp(newpte);
 				else if (uffd_wp_resolve)
 					newpte = pte_swp_clear_uffd_wp(newpte);
-				set_huge_swap_pte_at(mm, address, ptep,
-						     newpte, psize);
+				set_huge_pte_at(mm, address, ptep, newpte);
 				pages++;
 			}
 			spin_unlock(ptl);
diff --git a/mm/rmap.c b/mm/rmap.c
index 062e8655f337..338fbb24c602 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1618,9 +1618,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
 			if (folio_test_hugetlb(folio)) {
 				hugetlb_count_sub(folio_nr_pages(folio), mm);
-				set_huge_swap_pte_at(mm, address,
-						     pvmw.pte, pteval,
-						     vma_mmu_pagesize(vma));
+				set_huge_pte_at(mm, address, pvmw.pte, pteval);
 			} else {
 				dec_mm_counter(mm, mm_counter(&folio->page));
 				set_pte_at(mm, address, pvmw.pte, pteval);
@@ -2004,9 +2002,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
 			if (folio_test_hugetlb(folio)) {
 				hugetlb_count_sub(folio_nr_pages(folio), mm);
-				set_huge_swap_pte_at(mm, address,
-						     pvmw.pte, pteval,
-						     vma_mmu_pagesize(vma));
+				set_huge_pte_at(mm, address, pvmw.pte, pteval);
 			} else {
 				dec_mm_counter(mm, mm_counter(&folio->page));
 				set_pte_at(mm, address, pvmw.pte, pteval);
@@ -2074,8 +2070,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
 			if (pte_uffd_wp(pteval))
 				swp_pte = pte_swp_mkuffd_wp(swp_pte);
 			if (folio_test_hugetlb(folio))
-				set_huge_swap_pte_at(mm, address, pvmw.pte,
-						     swp_pte, vma_mmu_pagesize(vma));
+				set_huge_pte_at(mm, address, pvmw.pte, swp_pte);
 			else
 				set_pte_at(mm, address, pvmw.pte, swp_pte);
 			trace_set_migration_pte(address, pte_val(swp_pte),
-- 
2.20.1


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-26 14:57 ` Qi Zheng
@ 2022-06-27  3:32   ` Muchun Song
  -1 siblings, 0 replies; 28+ messages in thread
From: Muchun Song @ 2022-06-27  3:32 UTC (permalink / raw)
  To: Qi Zheng
  Cc: mike.kravetz, akpm, catalin.marinas, will, linux-arm-kernel,
	linux-kernel, linux-mm

On Sun, Jun 26, 2022 at 10:57:17PM +0800, Qi Zheng wrote:
> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> helper") add set_huge_swap_pte_at() to handle swap entries on
> architectures that support hugepages consisting of contiguous ptes.
> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> 
> The set_huge_swap_pte_at() provide a sz parameter to help determine
> the number of entries to be updated. But in fact, all hugetlb swap
> entries contain pfn information, so we can find the corresponding
> folio through the pfn recorded in the swap entry, then the folio_size()
> is the number of entries that need to be updated.
> 
> And considering that users will easily cause bugs by ignoring the
> difference between set_huge_swap_pte_at() and set_huge_pte_at().
> Let's handle swap entries in set_huge_pte_at() and remove the
> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
> anywhere, which simplifies our coding.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

I think it is a nice cleanup since it simplify the code enough.
We do not need to struggle between set_huge_swap_pte_at() and
set_huge_pte_at(), it is very easy to make mistakes (see commit
5d4af6195c87 and e5251fd43007).

And arm64 is the only user which needs a special
set_huge_swap_pte_at(), it would be nicer if arm64 could handle
it transparently. So

Acked-by: Muchun Song <songmuchun@bytedance.com>

> ---
>  arch/arm64/include/asm/hugetlb.h |  3 ---
>  arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>  include/linux/hugetlb.h          | 13 ------------
>  mm/hugetlb.c                     |  8 +++-----
>  mm/rmap.c                        | 11 +++--------
>  5 files changed, 23 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
> index 1fd2846dbefe..d20f5da2d76f 100644
> --- a/arch/arm64/include/asm/hugetlb.h
> +++ b/arch/arm64/include/asm/hugetlb.h
> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>  			   pte_t *ptep, unsigned long sz);
>  #define __HAVE_ARCH_HUGE_PTEP_GET
>  extern pte_t huge_ptep_get(pte_t *ptep);
> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -				 pte_t *ptep, pte_t pte, unsigned long sz);
> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>  
>  void __init arm64_hugetlb_cma_reserve(void);
>  
> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> index c9e076683e5d..58b89b9d13e0 100644
> --- a/arch/arm64/mm/hugetlbpage.c
> +++ b/arch/arm64/mm/hugetlbpage.c
> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>  	flush_tlb_range(&vma, saddr, addr);
>  }
>  
> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
> +{
> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
> +
> +	return page_folio(pfn_to_page(swp_offset(entry)));
> +}
> +
>  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  			    pte_t *ptep, pte_t pte)
>  {
> @@ -247,11 +254,16 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  	unsigned long pfn, dpfn;
>  	pgprot_t hugeprot;
>  
> -	/*
> -	 * Code needs to be expanded to handle huge swap and migration
> -	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
> -	 */

When I noticed the comments here, seems this cleanup was on the plan
a few years ago?

> -	WARN_ON(!pte_present(pte));
> +	if (!pte_present(pte)) {
> +		struct folio *folio;
> +
> +		folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
> +		ncontig = num_contig_ptes(folio_size(folio), &pgsize);
> +
> +		for (i = 0; i < ncontig; i++, ptep++)
> +			set_pte_at(mm, addr, ptep, pte);
> +		return;
> +	}
>  
>  	if (!pte_cont(pte)) {
>  		set_pte_at(mm, addr, ptep, pte);
> @@ -269,18 +281,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
>  }
>  
> -void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -			  pte_t *ptep, pte_t pte, unsigned long sz)
> -{
> -	int i, ncontig;
> -	size_t pgsize;
> -
> -	ncontig = num_contig_ptes(sz, &pgsize);
> -
> -	for (i = 0; i < ncontig; i++, ptep++)
> -		set_pte(ptep, pte);
> -}
> -
>  pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
>  		      unsigned long addr, unsigned long sz)
>  {
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index ee9a28ef26ee..3bb98434550a 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -928,14 +928,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
>  	atomic_long_sub(l, &mm->hugetlb_usage);
>  }
>  
> -#ifndef set_huge_swap_pte_at
> -static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -					pte_t *ptep, pte_t pte, unsigned long sz)
> -{
> -	set_huge_pte_at(mm, addr, ptep, pte);
> -}
> -#endif
> -
>  #ifndef huge_ptep_modify_prot_start
>  #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
>  static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
> @@ -1119,11 +1111,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
>  {
>  }
>  
> -static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -					pte_t *ptep, pte_t pte, unsigned long sz)
> -{
> -}
> -
>  static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
>  					  unsigned long addr, pte_t *ptep)
>  {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index f338640fbe4a..559084d96082 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -4813,12 +4813,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
>  				entry = swp_entry_to_pte(swp_entry);
>  				if (userfaultfd_wp(src_vma) && uffd_wp)
>  					entry = huge_pte_mkuffd_wp(entry);
> -				set_huge_swap_pte_at(src, addr, src_pte,
> -						     entry, sz);
> +				set_huge_pte_at(src, addr, src_pte, entry);
>  			}
>  			if (!userfaultfd_wp(dst_vma) && uffd_wp)
>  				entry = huge_pte_clear_uffd_wp(entry);
> -			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
> +			set_huge_pte_at(dst, addr, dst_pte, entry);
>  		} else if (unlikely(is_pte_marker(entry))) {
>  			/*
>  			 * We copy the pte marker only if the dst vma has
> @@ -6375,8 +6374,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>  					newpte = pte_swp_mkuffd_wp(newpte);
>  				else if (uffd_wp_resolve)
>  					newpte = pte_swp_clear_uffd_wp(newpte);
> -				set_huge_swap_pte_at(mm, address, ptep,
> -						     newpte, psize);
> +				set_huge_pte_at(mm, address, ptep, newpte);
>  				pages++;
>  			}
>  			spin_unlock(ptl);
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 062e8655f337..338fbb24c602 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1618,9 +1618,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>  			if (folio_test_hugetlb(folio)) {
>  				hugetlb_count_sub(folio_nr_pages(folio), mm);
> -				set_huge_swap_pte_at(mm, address,
> -						     pvmw.pte, pteval,
> -						     vma_mmu_pagesize(vma));
> +				set_huge_pte_at(mm, address, pvmw.pte, pteval);
>  			} else {
>  				dec_mm_counter(mm, mm_counter(&folio->page));
>  				set_pte_at(mm, address, pvmw.pte, pteval);
> @@ -2004,9 +2002,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>  			if (folio_test_hugetlb(folio)) {
>  				hugetlb_count_sub(folio_nr_pages(folio), mm);
> -				set_huge_swap_pte_at(mm, address,
> -						     pvmw.pte, pteval,
> -						     vma_mmu_pagesize(vma));
> +				set_huge_pte_at(mm, address, pvmw.pte, pteval);
>  			} else {
>  				dec_mm_counter(mm, mm_counter(&folio->page));
>  				set_pte_at(mm, address, pvmw.pte, pteval);
> @@ -2074,8 +2070,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
>  			if (pte_uffd_wp(pteval))
>  				swp_pte = pte_swp_mkuffd_wp(swp_pte);
>  			if (folio_test_hugetlb(folio))
> -				set_huge_swap_pte_at(mm, address, pvmw.pte,
> -						     swp_pte, vma_mmu_pagesize(vma));
> +				set_huge_pte_at(mm, address, pvmw.pte, swp_pte);
>  			else
>  				set_pte_at(mm, address, pvmw.pte, swp_pte);
>  			trace_set_migration_pte(address, pte_val(swp_pte),
> -- 
> 2.20.1
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27  3:32   ` Muchun Song
  0 siblings, 0 replies; 28+ messages in thread
From: Muchun Song @ 2022-06-27  3:32 UTC (permalink / raw)
  To: Qi Zheng
  Cc: mike.kravetz, akpm, catalin.marinas, will, linux-arm-kernel,
	linux-kernel, linux-mm

On Sun, Jun 26, 2022 at 10:57:17PM +0800, Qi Zheng wrote:
> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> helper") add set_huge_swap_pte_at() to handle swap entries on
> architectures that support hugepages consisting of contiguous ptes.
> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> 
> The set_huge_swap_pte_at() provide a sz parameter to help determine
> the number of entries to be updated. But in fact, all hugetlb swap
> entries contain pfn information, so we can find the corresponding
> folio through the pfn recorded in the swap entry, then the folio_size()
> is the number of entries that need to be updated.
> 
> And considering that users will easily cause bugs by ignoring the
> difference between set_huge_swap_pte_at() and set_huge_pte_at().
> Let's handle swap entries in set_huge_pte_at() and remove the
> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
> anywhere, which simplifies our coding.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

I think it is a nice cleanup since it simplify the code enough.
We do not need to struggle between set_huge_swap_pte_at() and
set_huge_pte_at(), it is very easy to make mistakes (see commit
5d4af6195c87 and e5251fd43007).

And arm64 is the only user which needs a special
set_huge_swap_pte_at(), it would be nicer if arm64 could handle
it transparently. So

Acked-by: Muchun Song <songmuchun@bytedance.com>

> ---
>  arch/arm64/include/asm/hugetlb.h |  3 ---
>  arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>  include/linux/hugetlb.h          | 13 ------------
>  mm/hugetlb.c                     |  8 +++-----
>  mm/rmap.c                        | 11 +++--------
>  5 files changed, 23 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
> index 1fd2846dbefe..d20f5da2d76f 100644
> --- a/arch/arm64/include/asm/hugetlb.h
> +++ b/arch/arm64/include/asm/hugetlb.h
> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>  			   pte_t *ptep, unsigned long sz);
>  #define __HAVE_ARCH_HUGE_PTEP_GET
>  extern pte_t huge_ptep_get(pte_t *ptep);
> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -				 pte_t *ptep, pte_t pte, unsigned long sz);
> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>  
>  void __init arm64_hugetlb_cma_reserve(void);
>  
> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> index c9e076683e5d..58b89b9d13e0 100644
> --- a/arch/arm64/mm/hugetlbpage.c
> +++ b/arch/arm64/mm/hugetlbpage.c
> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>  	flush_tlb_range(&vma, saddr, addr);
>  }
>  
> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
> +{
> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
> +
> +	return page_folio(pfn_to_page(swp_offset(entry)));
> +}
> +
>  void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  			    pte_t *ptep, pte_t pte)
>  {
> @@ -247,11 +254,16 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  	unsigned long pfn, dpfn;
>  	pgprot_t hugeprot;
>  
> -	/*
> -	 * Code needs to be expanded to handle huge swap and migration
> -	 * entries. Needed for HUGETLB and MEMORY_FAILURE.
> -	 */

When I noticed the comments here, seems this cleanup was on the plan
a few years ago?

> -	WARN_ON(!pte_present(pte));
> +	if (!pte_present(pte)) {
> +		struct folio *folio;
> +
> +		folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
> +		ncontig = num_contig_ptes(folio_size(folio), &pgsize);
> +
> +		for (i = 0; i < ncontig; i++, ptep++)
> +			set_pte_at(mm, addr, ptep, pte);
> +		return;
> +	}
>  
>  	if (!pte_cont(pte)) {
>  		set_pte_at(mm, addr, ptep, pte);
> @@ -269,18 +281,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
>  		set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
>  }
>  
> -void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -			  pte_t *ptep, pte_t pte, unsigned long sz)
> -{
> -	int i, ncontig;
> -	size_t pgsize;
> -
> -	ncontig = num_contig_ptes(sz, &pgsize);
> -
> -	for (i = 0; i < ncontig; i++, ptep++)
> -		set_pte(ptep, pte);
> -}
> -
>  pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
>  		      unsigned long addr, unsigned long sz)
>  {
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index ee9a28ef26ee..3bb98434550a 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -928,14 +928,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
>  	atomic_long_sub(l, &mm->hugetlb_usage);
>  }
>  
> -#ifndef set_huge_swap_pte_at
> -static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -					pte_t *ptep, pte_t pte, unsigned long sz)
> -{
> -	set_huge_pte_at(mm, addr, ptep, pte);
> -}
> -#endif
> -
>  #ifndef huge_ptep_modify_prot_start
>  #define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
>  static inline pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
> @@ -1119,11 +1111,6 @@ static inline void hugetlb_count_sub(long l, struct mm_struct *mm)
>  {
>  }
>  
> -static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -					pte_t *ptep, pte_t pte, unsigned long sz)
> -{
> -}
> -
>  static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
>  					  unsigned long addr, pte_t *ptep)
>  {
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index f338640fbe4a..559084d96082 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -4813,12 +4813,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
>  				entry = swp_entry_to_pte(swp_entry);
>  				if (userfaultfd_wp(src_vma) && uffd_wp)
>  					entry = huge_pte_mkuffd_wp(entry);
> -				set_huge_swap_pte_at(src, addr, src_pte,
> -						     entry, sz);
> +				set_huge_pte_at(src, addr, src_pte, entry);
>  			}
>  			if (!userfaultfd_wp(dst_vma) && uffd_wp)
>  				entry = huge_pte_clear_uffd_wp(entry);
> -			set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
> +			set_huge_pte_at(dst, addr, dst_pte, entry);
>  		} else if (unlikely(is_pte_marker(entry))) {
>  			/*
>  			 * We copy the pte marker only if the dst vma has
> @@ -6375,8 +6374,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
>  					newpte = pte_swp_mkuffd_wp(newpte);
>  				else if (uffd_wp_resolve)
>  					newpte = pte_swp_clear_uffd_wp(newpte);
> -				set_huge_swap_pte_at(mm, address, ptep,
> -						     newpte, psize);
> +				set_huge_pte_at(mm, address, ptep, newpte);
>  				pages++;
>  			}
>  			spin_unlock(ptl);
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 062e8655f337..338fbb24c602 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -1618,9 +1618,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>  			if (folio_test_hugetlb(folio)) {
>  				hugetlb_count_sub(folio_nr_pages(folio), mm);
> -				set_huge_swap_pte_at(mm, address,
> -						     pvmw.pte, pteval,
> -						     vma_mmu_pagesize(vma));
> +				set_huge_pte_at(mm, address, pvmw.pte, pteval);
>  			} else {
>  				dec_mm_counter(mm, mm_counter(&folio->page));
>  				set_pte_at(mm, address, pvmw.pte, pteval);
> @@ -2004,9 +2002,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
>  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
>  			if (folio_test_hugetlb(folio)) {
>  				hugetlb_count_sub(folio_nr_pages(folio), mm);
> -				set_huge_swap_pte_at(mm, address,
> -						     pvmw.pte, pteval,
> -						     vma_mmu_pagesize(vma));
> +				set_huge_pte_at(mm, address, pvmw.pte, pteval);
>  			} else {
>  				dec_mm_counter(mm, mm_counter(&folio->page));
>  				set_pte_at(mm, address, pvmw.pte, pteval);
> @@ -2074,8 +2070,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,
>  			if (pte_uffd_wp(pteval))
>  				swp_pte = pte_swp_mkuffd_wp(swp_pte);
>  			if (folio_test_hugetlb(folio))
> -				set_huge_swap_pte_at(mm, address, pvmw.pte,
> -						     swp_pte, vma_mmu_pagesize(vma));
> +				set_huge_pte_at(mm, address, pvmw.pte, swp_pte);
>  			else
>  				set_pte_at(mm, address, pvmw.pte, swp_pte);
>  			trace_set_migration_pte(address, pte_val(swp_pte),
> -- 
> 2.20.1
> 
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-26 14:57 ` Qi Zheng
@ 2022-06-27  6:18   ` Anshuman Khandual
  -1 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-27  6:18 UTC (permalink / raw)
  To: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 6/26/22 20:27, Qi Zheng wrote:
> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> helper") add set_huge_swap_pte_at() to handle swap entries on
> architectures that support hugepages consisting of contiguous ptes.
> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> 
> The set_huge_swap_pte_at() provide a sz parameter to help determine
> the number of entries to be updated. But in fact, all hugetlb swap
> entries contain pfn information, so we can find the corresponding
> folio through the pfn recorded in the swap entry, then the folio_size()
> is the number of entries that need to be updated.
> 
> And considering that users will easily cause bugs by ignoring the
> difference between set_huge_swap_pte_at() and set_huge_pte_at().
> Let's handle swap entries in set_huge_pte_at() and remove the
> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
> anywhere, which simplifies our coding.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>  arch/arm64/include/asm/hugetlb.h |  3 ---
>  arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>  include/linux/hugetlb.h          | 13 ------------
>  mm/hugetlb.c                     |  8 +++-----
>  mm/rmap.c                        | 11 +++--------
>  5 files changed, 23 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
> index 1fd2846dbefe..d20f5da2d76f 100644
> --- a/arch/arm64/include/asm/hugetlb.h
> +++ b/arch/arm64/include/asm/hugetlb.h
> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>  			   pte_t *ptep, unsigned long sz);
>  #define __HAVE_ARCH_HUGE_PTEP_GET
>  extern pte_t huge_ptep_get(pte_t *ptep);
> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -				 pte_t *ptep, pte_t pte, unsigned long sz);
> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>  
>  void __init arm64_hugetlb_cma_reserve(void);
>  
> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> index c9e076683e5d..58b89b9d13e0 100644
> --- a/arch/arm64/mm/hugetlbpage.c
> +++ b/arch/arm64/mm/hugetlbpage.c
> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>  	flush_tlb_range(&vma, saddr, addr);
>  }
>  
> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
> +{
> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
> +
> +	return page_folio(pfn_to_page(swp_offset(entry)));
> +}

Extracting this huge page size from swap entry is an additional operation which
will increase the over all cost for set_huge_swap_pte_at(). At present the size
value is readily available near set_huge_swap_pte_at() call sites.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27  6:18   ` Anshuman Khandual
  0 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-27  6:18 UTC (permalink / raw)
  To: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 6/26/22 20:27, Qi Zheng wrote:
> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> helper") add set_huge_swap_pte_at() to handle swap entries on
> architectures that support hugepages consisting of contiguous ptes.
> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> 
> The set_huge_swap_pte_at() provide a sz parameter to help determine
> the number of entries to be updated. But in fact, all hugetlb swap
> entries contain pfn information, so we can find the corresponding
> folio through the pfn recorded in the swap entry, then the folio_size()
> is the number of entries that need to be updated.
> 
> And considering that users will easily cause bugs by ignoring the
> difference between set_huge_swap_pte_at() and set_huge_pte_at().
> Let's handle swap entries in set_huge_pte_at() and remove the
> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
> anywhere, which simplifies our coding.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> ---
>  arch/arm64/include/asm/hugetlb.h |  3 ---
>  arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>  include/linux/hugetlb.h          | 13 ------------
>  mm/hugetlb.c                     |  8 +++-----
>  mm/rmap.c                        | 11 +++--------
>  5 files changed, 23 insertions(+), 46 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
> index 1fd2846dbefe..d20f5da2d76f 100644
> --- a/arch/arm64/include/asm/hugetlb.h
> +++ b/arch/arm64/include/asm/hugetlb.h
> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>  			   pte_t *ptep, unsigned long sz);
>  #define __HAVE_ARCH_HUGE_PTEP_GET
>  extern pte_t huge_ptep_get(pte_t *ptep);
> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> -				 pte_t *ptep, pte_t pte, unsigned long sz);
> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>  
>  void __init arm64_hugetlb_cma_reserve(void);
>  
> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> index c9e076683e5d..58b89b9d13e0 100644
> --- a/arch/arm64/mm/hugetlbpage.c
> +++ b/arch/arm64/mm/hugetlbpage.c
> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>  	flush_tlb_range(&vma, saddr, addr);
>  }
>  
> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
> +{
> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
> +
> +	return page_folio(pfn_to_page(swp_offset(entry)));
> +}

Extracting this huge page size from swap entry is an additional operation which
will increase the over all cost for set_huge_swap_pte_at(). At present the size
value is readily available near set_huge_swap_pte_at() call sites.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27  6:18   ` Anshuman Khandual
@ 2022-06-27  6:55     ` Qi Zheng
  -1 siblings, 0 replies; 28+ messages in thread
From: Qi Zheng @ 2022-06-27  6:55 UTC (permalink / raw)
  To: Anshuman Khandual, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 2022/6/27 14:18, Anshuman Khandual wrote:
> 
> 
> On 6/26/22 20:27, Qi Zheng wrote:
>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>> helper") add set_huge_swap_pte_at() to handle swap entries on
>> architectures that support hugepages consisting of contiguous ptes.
>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>
>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>> the number of entries to be updated. But in fact, all hugetlb swap
>> entries contain pfn information, so we can find the corresponding
>> folio through the pfn recorded in the swap entry, then the folio_size()
>> is the number of entries that need to be updated.
>>
>> And considering that users will easily cause bugs by ignoring the
>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>> Let's handle swap entries in set_huge_pte_at() and remove the
>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>> anywhere, which simplifies our coding.
>>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   arch/arm64/include/asm/hugetlb.h |  3 ---
>>   arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>>   include/linux/hugetlb.h          | 13 ------------
>>   mm/hugetlb.c                     |  8 +++-----
>>   mm/rmap.c                        | 11 +++--------
>>   5 files changed, 23 insertions(+), 46 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>> index 1fd2846dbefe..d20f5da2d76f 100644
>> --- a/arch/arm64/include/asm/hugetlb.h
>> +++ b/arch/arm64/include/asm/hugetlb.h
>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>   			   pte_t *ptep, unsigned long sz);
>>   #define __HAVE_ARCH_HUGE_PTEP_GET
>>   extern pte_t huge_ptep_get(pte_t *ptep);
>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>> -				 pte_t *ptep, pte_t pte, unsigned long sz);
>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>   
>>   void __init arm64_hugetlb_cma_reserve(void);
>>   
>> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>> index c9e076683e5d..58b89b9d13e0 100644
>> --- a/arch/arm64/mm/hugetlbpage.c
>> +++ b/arch/arm64/mm/hugetlbpage.c
>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>   	flush_tlb_range(&vma, saddr, addr);
>>   }
>>   
>> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>> +{
>> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>> +
>> +	return page_folio(pfn_to_page(swp_offset(entry)));
>> +}
> 
> Extracting this huge page size from swap entry is an additional operation which
> will increase the over all cost for set_huge_swap_pte_at(). At present the size

Hmm, I think this cost is very small. And replacing
set_huge_swap_pte_at() by transparently handling swap entries helps
reduce possible bugs, which is worthwhile.

> value is readily available near set_huge_swap_pte_at() call sites.

-- 
Thanks,
Qi

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27  6:55     ` Qi Zheng
  0 siblings, 0 replies; 28+ messages in thread
From: Qi Zheng @ 2022-06-27  6:55 UTC (permalink / raw)
  To: Anshuman Khandual, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 2022/6/27 14:18, Anshuman Khandual wrote:
> 
> 
> On 6/26/22 20:27, Qi Zheng wrote:
>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>> helper") add set_huge_swap_pte_at() to handle swap entries on
>> architectures that support hugepages consisting of contiguous ptes.
>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>
>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>> the number of entries to be updated. But in fact, all hugetlb swap
>> entries contain pfn information, so we can find the corresponding
>> folio through the pfn recorded in the swap entry, then the folio_size()
>> is the number of entries that need to be updated.
>>
>> And considering that users will easily cause bugs by ignoring the
>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>> Let's handle swap entries in set_huge_pte_at() and remove the
>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>> anywhere, which simplifies our coding.
>>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> ---
>>   arch/arm64/include/asm/hugetlb.h |  3 ---
>>   arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>>   include/linux/hugetlb.h          | 13 ------------
>>   mm/hugetlb.c                     |  8 +++-----
>>   mm/rmap.c                        | 11 +++--------
>>   5 files changed, 23 insertions(+), 46 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>> index 1fd2846dbefe..d20f5da2d76f 100644
>> --- a/arch/arm64/include/asm/hugetlb.h
>> +++ b/arch/arm64/include/asm/hugetlb.h
>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>   			   pte_t *ptep, unsigned long sz);
>>   #define __HAVE_ARCH_HUGE_PTEP_GET
>>   extern pte_t huge_ptep_get(pte_t *ptep);
>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>> -				 pte_t *ptep, pte_t pte, unsigned long sz);
>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>   
>>   void __init arm64_hugetlb_cma_reserve(void);
>>   
>> diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>> index c9e076683e5d..58b89b9d13e0 100644
>> --- a/arch/arm64/mm/hugetlbpage.c
>> +++ b/arch/arm64/mm/hugetlbpage.c
>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>   	flush_tlb_range(&vma, saddr, addr);
>>   }
>>   
>> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>> +{
>> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>> +
>> +	return page_folio(pfn_to_page(swp_offset(entry)));
>> +}
> 
> Extracting this huge page size from swap entry is an additional operation which
> will increase the over all cost for set_huge_swap_pte_at(). At present the size

Hmm, I think this cost is very small. And replacing
set_huge_swap_pte_at() by transparently handling swap entries helps
reduce possible bugs, which is worthwhile.

> value is readily available near set_huge_swap_pte_at() call sites.

-- 
Thanks,
Qi

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27  6:55     ` Qi Zheng
@ 2022-06-27  7:14       ` Anshuman Khandual
  -1 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-27  7:14 UTC (permalink / raw)
  To: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 6/27/22 12:25, Qi Zheng wrote:
> 
> 
> On 2022/6/27 14:18, Anshuman Khandual wrote:
>>
>>
>> On 6/26/22 20:27, Qi Zheng wrote:
>>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>>> helper") add set_huge_swap_pte_at() to handle swap entries on
>>> architectures that support hugepages consisting of contiguous ptes.
>>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>>
>>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>>> the number of entries to be updated. But in fact, all hugetlb swap
>>> entries contain pfn information, so we can find the corresponding
>>> folio through the pfn recorded in the swap entry, then the folio_size()
>>> is the number of entries that need to be updated.
>>>
>>> And considering that users will easily cause bugs by ignoring the
>>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>>> Let's handle swap entries in set_huge_pte_at() and remove the
>>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>>> anywhere, which simplifies our coding.
>>>
>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>> ---
>>>   arch/arm64/include/asm/hugetlb.h |  3 ---
>>>   arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>>>   include/linux/hugetlb.h          | 13 ------------
>>>   mm/hugetlb.c                     |  8 +++-----
>>>   mm/rmap.c                        | 11 +++--------
>>>   5 files changed, 23 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>>> index 1fd2846dbefe..d20f5da2d76f 100644
>>> --- a/arch/arm64/include/asm/hugetlb.h
>>> +++ b/arch/arm64/include/asm/hugetlb.h
>>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>>                  pte_t *ptep, unsigned long sz);
>>>   #define __HAVE_ARCH_HUGE_PTEP_GET
>>>   extern pte_t huge_ptep_get(pte_t *ptep);
>>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>> -                 pte_t *ptep, pte_t pte, unsigned long sz);
>>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>>     void __init arm64_hugetlb_cma_reserve(void);
>>>   diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>> index c9e076683e5d..58b89b9d13e0 100644
>>> --- a/arch/arm64/mm/hugetlbpage.c
>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>>       flush_tlb_range(&vma, saddr, addr);
>>>   }
>>>   +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>>> +{
>>> +    VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>>> +
>>> +    return page_folio(pfn_to_page(swp_offset(entry)));
>>> +}
>>
>> Extracting this huge page size from swap entry is an additional operation which
>> will increase the over all cost for set_huge_swap_pte_at(). At present the size
> 
> Hmm, I think this cost is very small. And replacing
> set_huge_swap_pte_at() by transparently handling swap entries helps
> reduce possible bugs, which is worthwhile.

Possible bugs ? There are just six call sites for this function.
Although this proposed patch is functionally correct, I dont see
a valid enough reason to increase the overall cost in the path.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27  7:14       ` Anshuman Khandual
  0 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-27  7:14 UTC (permalink / raw)
  To: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 6/27/22 12:25, Qi Zheng wrote:
> 
> 
> On 2022/6/27 14:18, Anshuman Khandual wrote:
>>
>>
>> On 6/26/22 20:27, Qi Zheng wrote:
>>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>>> helper") add set_huge_swap_pte_at() to handle swap entries on
>>> architectures that support hugepages consisting of contiguous ptes.
>>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>>
>>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>>> the number of entries to be updated. But in fact, all hugetlb swap
>>> entries contain pfn information, so we can find the corresponding
>>> folio through the pfn recorded in the swap entry, then the folio_size()
>>> is the number of entries that need to be updated.
>>>
>>> And considering that users will easily cause bugs by ignoring the
>>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>>> Let's handle swap entries in set_huge_pte_at() and remove the
>>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>>> anywhere, which simplifies our coding.
>>>
>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>> ---
>>>   arch/arm64/include/asm/hugetlb.h |  3 ---
>>>   arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>>>   include/linux/hugetlb.h          | 13 ------------
>>>   mm/hugetlb.c                     |  8 +++-----
>>>   mm/rmap.c                        | 11 +++--------
>>>   5 files changed, 23 insertions(+), 46 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>>> index 1fd2846dbefe..d20f5da2d76f 100644
>>> --- a/arch/arm64/include/asm/hugetlb.h
>>> +++ b/arch/arm64/include/asm/hugetlb.h
>>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>>                  pte_t *ptep, unsigned long sz);
>>>   #define __HAVE_ARCH_HUGE_PTEP_GET
>>>   extern pte_t huge_ptep_get(pte_t *ptep);
>>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>> -                 pte_t *ptep, pte_t pte, unsigned long sz);
>>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>>     void __init arm64_hugetlb_cma_reserve(void);
>>>   diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>> index c9e076683e5d..58b89b9d13e0 100644
>>> --- a/arch/arm64/mm/hugetlbpage.c
>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>>       flush_tlb_range(&vma, saddr, addr);
>>>   }
>>>   +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>>> +{
>>> +    VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>>> +
>>> +    return page_folio(pfn_to_page(swp_offset(entry)));
>>> +}
>>
>> Extracting this huge page size from swap entry is an additional operation which
>> will increase the over all cost for set_huge_swap_pte_at(). At present the size
> 
> Hmm, I think this cost is very small. And replacing
> set_huge_swap_pte_at() by transparently handling swap entries helps
> reduce possible bugs, which is worthwhile.

Possible bugs ? There are just six call sites for this function.
Although this proposed patch is functionally correct, I dont see
a valid enough reason to increase the overall cost in the path.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27  7:14       ` Anshuman Khandual
@ 2022-06-27  7:29         ` Qi Zheng
  -1 siblings, 0 replies; 28+ messages in thread
From: Qi Zheng @ 2022-06-27  7:29 UTC (permalink / raw)
  To: Anshuman Khandual, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 2022/6/27 15:14, Anshuman Khandual wrote:
> 
> 
> On 6/27/22 12:25, Qi Zheng wrote:
>>
>>
>> On 2022/6/27 14:18, Anshuman Khandual wrote:
>>>
>>>
>>> On 6/26/22 20:27, Qi Zheng wrote:
>>>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>>>> helper") add set_huge_swap_pte_at() to handle swap entries on
>>>> architectures that support hugepages consisting of contiguous ptes.
>>>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>>>
>>>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>>>> the number of entries to be updated. But in fact, all hugetlb swap
>>>> entries contain pfn information, so we can find the corresponding
>>>> folio through the pfn recorded in the swap entry, then the folio_size()
>>>> is the number of entries that need to be updated.
>>>>
>>>> And considering that users will easily cause bugs by ignoring the
>>>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>>>> Let's handle swap entries in set_huge_pte_at() and remove the
>>>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>>>> anywhere, which simplifies our coding.
>>>>
>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>> ---
>>>>    arch/arm64/include/asm/hugetlb.h |  3 ---
>>>>    arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>>>>    include/linux/hugetlb.h          | 13 ------------
>>>>    mm/hugetlb.c                     |  8 +++-----
>>>>    mm/rmap.c                        | 11 +++--------
>>>>    5 files changed, 23 insertions(+), 46 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>>>> index 1fd2846dbefe..d20f5da2d76f 100644
>>>> --- a/arch/arm64/include/asm/hugetlb.h
>>>> +++ b/arch/arm64/include/asm/hugetlb.h
>>>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>>>                   pte_t *ptep, unsigned long sz);
>>>>    #define __HAVE_ARCH_HUGE_PTEP_GET
>>>>    extern pte_t huge_ptep_get(pte_t *ptep);
>>>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>>> -                 pte_t *ptep, pte_t pte, unsigned long sz);
>>>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>>>      void __init arm64_hugetlb_cma_reserve(void);
>>>>    diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>>> index c9e076683e5d..58b89b9d13e0 100644
>>>> --- a/arch/arm64/mm/hugetlbpage.c
>>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>>>        flush_tlb_range(&vma, saddr, addr);
>>>>    }
>>>>    +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>>>> +{
>>>> +    VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>>>> +
>>>> +    return page_folio(pfn_to_page(swp_offset(entry)));
>>>> +}
>>>
>>> Extracting this huge page size from swap entry is an additional operation which
>>> will increase the over all cost for set_huge_swap_pte_at(). At present the size
>>
>> Hmm, I think this cost is very small. And replacing
>> set_huge_swap_pte_at() by transparently handling swap entries helps
>> reduce possible bugs, which is worthwhile.
> 
> Possible bugs ? There are just six call sites for this function.

For the swap entry of hugetlb, we need to remember that we should
call set_huge_swap_pte_at() instead of set_huge_pte_at() every
time. And the difference between the two is only on arm64, other
architectures should not need to pay attention to this.

> Although this proposed patch is functionally correct, I dont see
> a valid enough reason to increase the overall cost in the path.

-- 
Thanks,
Qi

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27  7:29         ` Qi Zheng
  0 siblings, 0 replies; 28+ messages in thread
From: Qi Zheng @ 2022-06-27  7:29 UTC (permalink / raw)
  To: Anshuman Khandual, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 2022/6/27 15:14, Anshuman Khandual wrote:
> 
> 
> On 6/27/22 12:25, Qi Zheng wrote:
>>
>>
>> On 2022/6/27 14:18, Anshuman Khandual wrote:
>>>
>>>
>>> On 6/26/22 20:27, Qi Zheng wrote:
>>>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>>>> helper") add set_huge_swap_pte_at() to handle swap entries on
>>>> architectures that support hugepages consisting of contiguous ptes.
>>>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>>>
>>>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>>>> the number of entries to be updated. But in fact, all hugetlb swap
>>>> entries contain pfn information, so we can find the corresponding
>>>> folio through the pfn recorded in the swap entry, then the folio_size()
>>>> is the number of entries that need to be updated.
>>>>
>>>> And considering that users will easily cause bugs by ignoring the
>>>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>>>> Let's handle swap entries in set_huge_pte_at() and remove the
>>>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>>>> anywhere, which simplifies our coding.
>>>>
>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>> ---
>>>>    arch/arm64/include/asm/hugetlb.h |  3 ---
>>>>    arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>>>>    include/linux/hugetlb.h          | 13 ------------
>>>>    mm/hugetlb.c                     |  8 +++-----
>>>>    mm/rmap.c                        | 11 +++--------
>>>>    5 files changed, 23 insertions(+), 46 deletions(-)
>>>>
>>>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>>>> index 1fd2846dbefe..d20f5da2d76f 100644
>>>> --- a/arch/arm64/include/asm/hugetlb.h
>>>> +++ b/arch/arm64/include/asm/hugetlb.h
>>>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>>>                   pte_t *ptep, unsigned long sz);
>>>>    #define __HAVE_ARCH_HUGE_PTEP_GET
>>>>    extern pte_t huge_ptep_get(pte_t *ptep);
>>>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>>> -                 pte_t *ptep, pte_t pte, unsigned long sz);
>>>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>>>      void __init arm64_hugetlb_cma_reserve(void);
>>>>    diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>>> index c9e076683e5d..58b89b9d13e0 100644
>>>> --- a/arch/arm64/mm/hugetlbpage.c
>>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>>>        flush_tlb_range(&vma, saddr, addr);
>>>>    }
>>>>    +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>>>> +{
>>>> +    VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>>>> +
>>>> +    return page_folio(pfn_to_page(swp_offset(entry)));
>>>> +}
>>>
>>> Extracting this huge page size from swap entry is an additional operation which
>>> will increase the over all cost for set_huge_swap_pte_at(). At present the size
>>
>> Hmm, I think this cost is very small. And replacing
>> set_huge_swap_pte_at() by transparently handling swap entries helps
>> reduce possible bugs, which is worthwhile.
> 
> Possible bugs ? There are just six call sites for this function.

For the swap entry of hugetlb, we need to remember that we should
call set_huge_swap_pte_at() instead of set_huge_pte_at() every
time. And the difference between the two is only on arm64, other
architectures should not need to pay attention to this.

> Although this proposed patch is functionally correct, I dont see
> a valid enough reason to increase the overall cost in the path.

-- 
Thanks,
Qi

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27  7:29         ` Qi Zheng
@ 2022-06-27  7:35           ` Anshuman Khandual
  -1 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-27  7:35 UTC (permalink / raw)
  To: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 6/27/22 12:59, Qi Zheng wrote:
> 
> 
> On 2022/6/27 15:14, Anshuman Khandual wrote:
>>
>>
>> On 6/27/22 12:25, Qi Zheng wrote:
>>>
>>>
>>> On 2022/6/27 14:18, Anshuman Khandual wrote:
>>>>
>>>>
>>>> On 6/26/22 20:27, Qi Zheng wrote:
>>>>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>>>>> helper") add set_huge_swap_pte_at() to handle swap entries on
>>>>> architectures that support hugepages consisting of contiguous ptes.
>>>>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>>>>
>>>>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>>>>> the number of entries to be updated. But in fact, all hugetlb swap
>>>>> entries contain pfn information, so we can find the corresponding
>>>>> folio through the pfn recorded in the swap entry, then the folio_size()
>>>>> is the number of entries that need to be updated.
>>>>>
>>>>> And considering that users will easily cause bugs by ignoring the
>>>>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>>>>> Let's handle swap entries in set_huge_pte_at() and remove the
>>>>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>>>>> anywhere, which simplifies our coding.
>>>>>
>>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>>> ---
>>>>>    arch/arm64/include/asm/hugetlb.h |  3 ---
>>>>>    arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>>>>>    include/linux/hugetlb.h          | 13 ------------
>>>>>    mm/hugetlb.c                     |  8 +++-----
>>>>>    mm/rmap.c                        | 11 +++--------
>>>>>    5 files changed, 23 insertions(+), 46 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>>>>> index 1fd2846dbefe..d20f5da2d76f 100644
>>>>> --- a/arch/arm64/include/asm/hugetlb.h
>>>>> +++ b/arch/arm64/include/asm/hugetlb.h
>>>>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>>>>                   pte_t *ptep, unsigned long sz);
>>>>>    #define __HAVE_ARCH_HUGE_PTEP_GET
>>>>>    extern pte_t huge_ptep_get(pte_t *ptep);
>>>>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>>>> -                 pte_t *ptep, pte_t pte, unsigned long sz);
>>>>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>>>>      void __init arm64_hugetlb_cma_reserve(void);
>>>>>    diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>>>> index c9e076683e5d..58b89b9d13e0 100644
>>>>> --- a/arch/arm64/mm/hugetlbpage.c
>>>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>>>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>>>>        flush_tlb_range(&vma, saddr, addr);
>>>>>    }
>>>>>    +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>>>>> +{
>>>>> +    VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>>>>> +
>>>>> +    return page_folio(pfn_to_page(swp_offset(entry)));
>>>>> +}
>>>>
>>>> Extracting this huge page size from swap entry is an additional operation which
>>>> will increase the over all cost for set_huge_swap_pte_at(). At present the size
>>>
>>> Hmm, I think this cost is very small. And replacing
>>> set_huge_swap_pte_at() by transparently handling swap entries helps
>>> reduce possible bugs, which is worthwhile.
>>
>> Possible bugs ? There are just six call sites for this function.
> 
> For the swap entry of hugetlb, we need to remember that we should
> call set_huge_swap_pte_at() instead of set_huge_pte_at() every

Which is intuitive, so what is the problem ?

> time. And the difference between the two is only on arm64, other
> architectures should not need to pay attention to this.

Hence there is a generic fallback for other platforms.

> 
>> Although this proposed patch is functionally correct, I dont see
>> a valid enough reason to increase the overall cost in the path.
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27  7:35           ` Anshuman Khandual
  0 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-27  7:35 UTC (permalink / raw)
  To: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm



On 6/27/22 12:59, Qi Zheng wrote:
> 
> 
> On 2022/6/27 15:14, Anshuman Khandual wrote:
>>
>>
>> On 6/27/22 12:25, Qi Zheng wrote:
>>>
>>>
>>> On 2022/6/27 14:18, Anshuman Khandual wrote:
>>>>
>>>>
>>>> On 6/26/22 20:27, Qi Zheng wrote:
>>>>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>>>>> helper") add set_huge_swap_pte_at() to handle swap entries on
>>>>> architectures that support hugepages consisting of contiguous ptes.
>>>>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>>>>
>>>>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>>>>> the number of entries to be updated. But in fact, all hugetlb swap
>>>>> entries contain pfn information, so we can find the corresponding
>>>>> folio through the pfn recorded in the swap entry, then the folio_size()
>>>>> is the number of entries that need to be updated.
>>>>>
>>>>> And considering that users will easily cause bugs by ignoring the
>>>>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>>>>> Let's handle swap entries in set_huge_pte_at() and remove the
>>>>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>>>>> anywhere, which simplifies our coding.
>>>>>
>>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>>> ---
>>>>>    arch/arm64/include/asm/hugetlb.h |  3 ---
>>>>>    arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
>>>>>    include/linux/hugetlb.h          | 13 ------------
>>>>>    mm/hugetlb.c                     |  8 +++-----
>>>>>    mm/rmap.c                        | 11 +++--------
>>>>>    5 files changed, 23 insertions(+), 46 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>>>>> index 1fd2846dbefe..d20f5da2d76f 100644
>>>>> --- a/arch/arm64/include/asm/hugetlb.h
>>>>> +++ b/arch/arm64/include/asm/hugetlb.h
>>>>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>>>>                   pte_t *ptep, unsigned long sz);
>>>>>    #define __HAVE_ARCH_HUGE_PTEP_GET
>>>>>    extern pte_t huge_ptep_get(pte_t *ptep);
>>>>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>>>> -                 pte_t *ptep, pte_t pte, unsigned long sz);
>>>>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>>>>      void __init arm64_hugetlb_cma_reserve(void);
>>>>>    diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>>>> index c9e076683e5d..58b89b9d13e0 100644
>>>>> --- a/arch/arm64/mm/hugetlbpage.c
>>>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>>>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>>>>        flush_tlb_range(&vma, saddr, addr);
>>>>>    }
>>>>>    +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>>>>> +{
>>>>> +    VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>>>>> +
>>>>> +    return page_folio(pfn_to_page(swp_offset(entry)));
>>>>> +}
>>>>
>>>> Extracting this huge page size from swap entry is an additional operation which
>>>> will increase the over all cost for set_huge_swap_pte_at(). At present the size
>>>
>>> Hmm, I think this cost is very small. And replacing
>>> set_huge_swap_pte_at() by transparently handling swap entries helps
>>> reduce possible bugs, which is worthwhile.
>>
>> Possible bugs ? There are just six call sites for this function.
> 
> For the swap entry of hugetlb, we need to remember that we should
> call set_huge_swap_pte_at() instead of set_huge_pte_at() every

Which is intuitive, so what is the problem ?

> time. And the difference between the two is only on arm64, other
> architectures should not need to pay attention to this.

Hence there is a generic fallback for other platforms.

> 
>> Although this proposed patch is functionally correct, I dont see
>> a valid enough reason to increase the overall cost in the path.
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27  7:14       ` Anshuman Khandual
@ 2022-06-27  7:44         ` Muchun Song
  -1 siblings, 0 replies; 28+ messages in thread
From: Muchun Song @ 2022-06-27  7:44 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Qi Zheng, mike.kravetz, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm

On Mon, Jun 27, 2022 at 12:44:19PM +0530, Anshuman Khandual wrote:
> 
> 
> On 6/27/22 12:25, Qi Zheng wrote:
> > 
> > 
> > On 2022/6/27 14:18, Anshuman Khandual wrote:
> >>
> >>
> >> On 6/26/22 20:27, Qi Zheng wrote:
> >>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> >>> helper") add set_huge_swap_pte_at() to handle swap entries on
> >>> architectures that support hugepages consisting of contiguous ptes.
> >>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> >>>
> >>> The set_huge_swap_pte_at() provide a sz parameter to help determine
> >>> the number of entries to be updated. But in fact, all hugetlb swap
> >>> entries contain pfn information, so we can find the corresponding
> >>> folio through the pfn recorded in the swap entry, then the folio_size()
> >>> is the number of entries that need to be updated.
> >>>
> >>> And considering that users will easily cause bugs by ignoring the
> >>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
> >>> Let's handle swap entries in set_huge_pte_at() and remove the
> >>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
> >>> anywhere, which simplifies our coding.
> >>>
> >>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> >>> ---
> >>>   arch/arm64/include/asm/hugetlb.h |  3 ---
> >>>   arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
> >>>   include/linux/hugetlb.h          | 13 ------------
> >>>   mm/hugetlb.c                     |  8 +++-----
> >>>   mm/rmap.c                        | 11 +++--------
> >>>   5 files changed, 23 insertions(+), 46 deletions(-)
> >>>
> >>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
> >>> index 1fd2846dbefe..d20f5da2d76f 100644
> >>> --- a/arch/arm64/include/asm/hugetlb.h
> >>> +++ b/arch/arm64/include/asm/hugetlb.h
> >>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
> >>>                  pte_t *ptep, unsigned long sz);
> >>>   #define __HAVE_ARCH_HUGE_PTEP_GET
> >>>   extern pte_t huge_ptep_get(pte_t *ptep);
> >>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> >>> -                 pte_t *ptep, pte_t pte, unsigned long sz);
> >>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
> >>>     void __init arm64_hugetlb_cma_reserve(void);
> >>>   diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> >>> index c9e076683e5d..58b89b9d13e0 100644
> >>> --- a/arch/arm64/mm/hugetlbpage.c
> >>> +++ b/arch/arm64/mm/hugetlbpage.c
> >>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
> >>>       flush_tlb_range(&vma, saddr, addr);
> >>>   }
> >>>   +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
> >>> +{
> >>> +    VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
> >>> +
> >>> +    return page_folio(pfn_to_page(swp_offset(entry)));
> >>> +}
> >>
> >> Extracting this huge page size from swap entry is an additional operation which
> >> will increase the over all cost for set_huge_swap_pte_at(). At present the size
> > 
> > Hmm, I think this cost is very small. And replacing
> > set_huge_swap_pte_at() by transparently handling swap entries helps
> > reduce possible bugs, which is worthwhile.
> 
> Possible bugs ? There are just six call sites for this function.

I think it is easy to make mistakes (see commit 5d4af6195c87).
I usually think of why the swap entry is special for HugeTLB pages
compared to normal pages (why we do not have set_swap_pte_at?).
set_huge_swap_pte_at() make HugeTLB more special, killing it
can make HugeTLB more consistent with normal page. From the point
of view of code maintenance, I think it is better to kill it. What
do you think?

Thanks.

> Although this proposed patch is functionally correct, I dont see
> a valid enough reason to increase the overall cost in the path.
>



^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27  7:44         ` Muchun Song
  0 siblings, 0 replies; 28+ messages in thread
From: Muchun Song @ 2022-06-27  7:44 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Qi Zheng, mike.kravetz, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm

On Mon, Jun 27, 2022 at 12:44:19PM +0530, Anshuman Khandual wrote:
> 
> 
> On 6/27/22 12:25, Qi Zheng wrote:
> > 
> > 
> > On 2022/6/27 14:18, Anshuman Khandual wrote:
> >>
> >>
> >> On 6/26/22 20:27, Qi Zheng wrote:
> >>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> >>> helper") add set_huge_swap_pte_at() to handle swap entries on
> >>> architectures that support hugepages consisting of contiguous ptes.
> >>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> >>>
> >>> The set_huge_swap_pte_at() provide a sz parameter to help determine
> >>> the number of entries to be updated. But in fact, all hugetlb swap
> >>> entries contain pfn information, so we can find the corresponding
> >>> folio through the pfn recorded in the swap entry, then the folio_size()
> >>> is the number of entries that need to be updated.
> >>>
> >>> And considering that users will easily cause bugs by ignoring the
> >>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
> >>> Let's handle swap entries in set_huge_pte_at() and remove the
> >>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
> >>> anywhere, which simplifies our coding.
> >>>
> >>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> >>> ---
> >>>   arch/arm64/include/asm/hugetlb.h |  3 ---
> >>>   arch/arm64/mm/hugetlbpage.c      | 34 ++++++++++++++++----------------
> >>>   include/linux/hugetlb.h          | 13 ------------
> >>>   mm/hugetlb.c                     |  8 +++-----
> >>>   mm/rmap.c                        | 11 +++--------
> >>>   5 files changed, 23 insertions(+), 46 deletions(-)
> >>>
> >>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
> >>> index 1fd2846dbefe..d20f5da2d76f 100644
> >>> --- a/arch/arm64/include/asm/hugetlb.h
> >>> +++ b/arch/arm64/include/asm/hugetlb.h
> >>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
> >>>                  pte_t *ptep, unsigned long sz);
> >>>   #define __HAVE_ARCH_HUGE_PTEP_GET
> >>>   extern pte_t huge_ptep_get(pte_t *ptep);
> >>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
> >>> -                 pte_t *ptep, pte_t pte, unsigned long sz);
> >>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
> >>>     void __init arm64_hugetlb_cma_reserve(void);
> >>>   diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
> >>> index c9e076683e5d..58b89b9d13e0 100644
> >>> --- a/arch/arm64/mm/hugetlbpage.c
> >>> +++ b/arch/arm64/mm/hugetlbpage.c
> >>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
> >>>       flush_tlb_range(&vma, saddr, addr);
> >>>   }
> >>>   +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
> >>> +{
> >>> +    VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
> >>> +
> >>> +    return page_folio(pfn_to_page(swp_offset(entry)));
> >>> +}
> >>
> >> Extracting this huge page size from swap entry is an additional operation which
> >> will increase the over all cost for set_huge_swap_pte_at(). At present the size
> > 
> > Hmm, I think this cost is very small. And replacing
> > set_huge_swap_pte_at() by transparently handling swap entries helps
> > reduce possible bugs, which is worthwhile.
> 
> Possible bugs ? There are just six call sites for this function.

I think it is easy to make mistakes (see commit 5d4af6195c87).
I usually think of why the swap entry is special for HugeTLB pages
compared to normal pages (why we do not have set_swap_pte_at?).
set_huge_swap_pte_at() make HugeTLB more special, killing it
can make HugeTLB more consistent with normal page. From the point
of view of code maintenance, I think it is better to kill it. What
do you think?

Thanks.

> Although this proposed patch is functionally correct, I dont see
> a valid enough reason to increase the overall cost in the path.
>



_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27  7:44         ` Muchun Song
@ 2022-06-27  8:27           ` Anshuman Khandual
  -1 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-27  8:27 UTC (permalink / raw)
  To: Muchun Song
  Cc: Qi Zheng, mike.kravetz, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm



On 6/27/22 13:14, Muchun Song wrote:
> On Mon, Jun 27, 2022 at 12:44:19PM +0530, Anshuman Khandual wrote:
>>
>>
>> On 6/27/22 12:25, Qi Zheng wrote:
>>>
>>>
>>> On 2022/6/27 14:18, Anshuman Khandual wrote:
>>>>
>>>>
>>>> On 6/26/22 20:27, Qi Zheng wrote:
>>>>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>>>>> helper") add set_huge_swap_pte_at() to handle swap entries on
>>>>> architectures that support hugepages consisting of contiguous ptes.
>>>>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>>>>
>>>>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>>>>> the number of entries to be updated. But in fact, all hugetlb swap
>>>>> entries contain pfn information, so we can find the corresponding
>>>>> folio through the pfn recorded in the swap entry, then the folio_size()
>>>>> is the number of entries that need to be updated.
>>>>>
>>>>> And considering that users will easily cause bugs by ignoring the
>>>>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>>>>> Let's handle swap entries in set_huge_pte_at() and remove the
>>>>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>>>>> anywhere, which simplifies our coding.
>>>>>
>>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>>> ---
>>>>> � arch/arm64/include/asm/hugetlb.h |� 3 ---
>>>>> � arch/arm64/mm/hugetlbpage.c����� | 34 ++++++++++++++++----------------
>>>>> � include/linux/hugetlb.h��������� | 13 ------------
>>>>> � mm/hugetlb.c�������������������� |� 8 +++-----
>>>>> � mm/rmap.c����������������������� | 11 +++--------
>>>>> � 5 files changed, 23 insertions(+), 46 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>>>>> index 1fd2846dbefe..d20f5da2d76f 100644
>>>>> --- a/arch/arm64/include/asm/hugetlb.h
>>>>> +++ b/arch/arm64/include/asm/hugetlb.h
>>>>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>>>> ���������������� pte_t *ptep, unsigned long sz);
>>>>> � #define __HAVE_ARCH_HUGE_PTEP_GET
>>>>> � extern pte_t huge_ptep_get(pte_t *ptep);
>>>>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>>>> -���������������� pte_t *ptep, pte_t pte, unsigned long sz);
>>>>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>>>> � � void __init arm64_hugetlb_cma_reserve(void);
>>>>> � diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>>>> index c9e076683e5d..58b89b9d13e0 100644
>>>>> --- a/arch/arm64/mm/hugetlbpage.c
>>>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>>>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>>>> ����� flush_tlb_range(&vma, saddr, addr);
>>>>> � }
>>>>> � +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>>>>> +{
>>>>> +��� VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>>>>> +
>>>>> +��� return page_folio(pfn_to_page(swp_offset(entry)));
>>>>> +}
>>>>
>>>> Extracting this huge page size from swap entry is an additional operation which
>>>> will increase the over all cost for set_huge_swap_pte_at(). At present the size
>>>
>>> Hmm, I think this cost is very small. And replacing
>>> set_huge_swap_pte_at() by transparently handling swap entries helps
>>> reduce possible bugs, which is worthwhile.
>>
>> Possible bugs ? There are just six call sites for this function.
> 
> I think it is easy to make mistakes (see commit 5d4af6195c87).
> I usually think of why the swap entry is special for HugeTLB pages
> compared to normal pages (why we do not have set_swap_pte_at?).
> set_huge_swap_pte_at() make HugeTLB more special, killing it
> can make HugeTLB more consistent with normal page. From the point
> of view of code maintenance, I think it is better to kill it. What
> do you think?

Okay, alright. Lets drop set_huge_swap_pte_at() which helps make
HugeTLB less special.

> 
> Thanks.
> 
>> Although this proposed patch is functionally correct, I dont see
>> a valid enough reason to increase the overall cost in the path.
>>
> 
> 

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27  8:27           ` Anshuman Khandual
  0 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-27  8:27 UTC (permalink / raw)
  To: Muchun Song
  Cc: Qi Zheng, mike.kravetz, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm



On 6/27/22 13:14, Muchun Song wrote:
> On Mon, Jun 27, 2022 at 12:44:19PM +0530, Anshuman Khandual wrote:
>>
>>
>> On 6/27/22 12:25, Qi Zheng wrote:
>>>
>>>
>>> On 2022/6/27 14:18, Anshuman Khandual wrote:
>>>>
>>>>
>>>> On 6/26/22 20:27, Qi Zheng wrote:
>>>>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>>>>> helper") add set_huge_swap_pte_at() to handle swap entries on
>>>>> architectures that support hugepages consisting of contiguous ptes.
>>>>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
>>>>>
>>>>> The set_huge_swap_pte_at() provide a sz parameter to help determine
>>>>> the number of entries to be updated. But in fact, all hugetlb swap
>>>>> entries contain pfn information, so we can find the corresponding
>>>>> folio through the pfn recorded in the swap entry, then the folio_size()
>>>>> is the number of entries that need to be updated.
>>>>>
>>>>> And considering that users will easily cause bugs by ignoring the
>>>>> difference between set_huge_swap_pte_at() and set_huge_pte_at().
>>>>> Let's handle swap entries in set_huge_pte_at() and remove the
>>>>> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
>>>>> anywhere, which simplifies our coding.
>>>>>
>>>>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>>>>> ---
>>>>> � arch/arm64/include/asm/hugetlb.h |� 3 ---
>>>>> � arch/arm64/mm/hugetlbpage.c����� | 34 ++++++++++++++++----------------
>>>>> � include/linux/hugetlb.h��������� | 13 ------------
>>>>> � mm/hugetlb.c�������������������� |� 8 +++-----
>>>>> � mm/rmap.c����������������������� | 11 +++--------
>>>>> � 5 files changed, 23 insertions(+), 46 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
>>>>> index 1fd2846dbefe..d20f5da2d76f 100644
>>>>> --- a/arch/arm64/include/asm/hugetlb.h
>>>>> +++ b/arch/arm64/include/asm/hugetlb.h
>>>>> @@ -46,9 +46,6 @@ extern void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
>>>>> ���������������� pte_t *ptep, unsigned long sz);
>>>>> � #define __HAVE_ARCH_HUGE_PTEP_GET
>>>>> � extern pte_t huge_ptep_get(pte_t *ptep);
>>>>> -extern void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr,
>>>>> -���������������� pte_t *ptep, pte_t pte, unsigned long sz);
>>>>> -#define set_huge_swap_pte_at set_huge_swap_pte_at
>>>>> � � void __init arm64_hugetlb_cma_reserve(void);
>>>>> � diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
>>>>> index c9e076683e5d..58b89b9d13e0 100644
>>>>> --- a/arch/arm64/mm/hugetlbpage.c
>>>>> +++ b/arch/arm64/mm/hugetlbpage.c
>>>>> @@ -238,6 +238,13 @@ static void clear_flush(struct mm_struct *mm,
>>>>> ����� flush_tlb_range(&vma, saddr, addr);
>>>>> � }
>>>>> � +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>>>>> +{
>>>>> +��� VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>>>>> +
>>>>> +��� return page_folio(pfn_to_page(swp_offset(entry)));
>>>>> +}
>>>>
>>>> Extracting this huge page size from swap entry is an additional operation which
>>>> will increase the over all cost for set_huge_swap_pte_at(). At present the size
>>>
>>> Hmm, I think this cost is very small. And replacing
>>> set_huge_swap_pte_at() by transparently handling swap entries helps
>>> reduce possible bugs, which is worthwhile.
>>
>> Possible bugs ? There are just six call sites for this function.
> 
> I think it is easy to make mistakes (see commit 5d4af6195c87).
> I usually think of why the swap entry is special for HugeTLB pages
> compared to normal pages (why we do not have set_swap_pte_at?).
> set_huge_swap_pte_at() make HugeTLB more special, killing it
> can make HugeTLB more consistent with normal page. From the point
> of view of code maintenance, I think it is better to kill it. What
> do you think?

Okay, alright. Lets drop set_huge_swap_pte_at() which helps make
HugeTLB less special.

> 
> Thanks.
> 
>> Although this proposed patch is functionally correct, I dont see
>> a valid enough reason to increase the overall cost in the path.
>>
> 
> 

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27  7:35           ` Anshuman Khandual
@ 2022-06-27 14:34             ` Matthew Wilcox
  -1 siblings, 0 replies; 28+ messages in thread
From: Matthew Wilcox @ 2022-06-27 14:34 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm

On Mon, Jun 27, 2022 at 01:05:00PM +0530, Anshuman Khandual wrote:
> > For the swap entry of hugetlb, we need to remember that we should
> > call set_huge_swap_pte_at() instead of set_huge_pte_at() every
> 
> Which is intuitive, so what is the problem ?

The problem is that HugeTLB is DIFFERENT from the rest of the VM.
This has a huge cost in programmer time, which is infinitely more
valuable than the cycle shaving you're quibbling over.  We should take
any and every opportunity to make huge pages work the same way as THP
and order-0 pages.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27 14:34             ` Matthew Wilcox
  0 siblings, 0 replies; 28+ messages in thread
From: Matthew Wilcox @ 2022-06-27 14:34 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm

On Mon, Jun 27, 2022 at 01:05:00PM +0530, Anshuman Khandual wrote:
> > For the swap entry of hugetlb, we need to remember that we should
> > call set_huge_swap_pte_at() instead of set_huge_pte_at() every
> 
> Which is intuitive, so what is the problem ?

The problem is that HugeTLB is DIFFERENT from the rest of the VM.
This has a huge cost in programmer time, which is infinitely more
valuable than the cycle shaving you're quibbling over.  We should take
any and every opportunity to make huge pages work the same way as THP
and order-0 pages.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-26 14:57 ` Qi Zheng
@ 2022-06-27 14:41   ` Matthew Wilcox
  -1 siblings, 0 replies; 28+ messages in thread
From: Matthew Wilcox @ 2022-06-27 14:41 UTC (permalink / raw)
  To: Qi Zheng
  Cc: mike.kravetz, songmuchun, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm

On Sun, Jun 26, 2022 at 10:57:17PM +0800, Qi Zheng wrote:
> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> helper") add set_huge_swap_pte_at() to handle swap entries on
> architectures that support hugepages consisting of contiguous ptes.
> And currently the set_huge_swap_pte_at() is only overridden by arm64.

Bleh.  I hate the way we handle these currently.

> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
> +{
> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
> +
> +	return page_folio(pfn_to_page(swp_offset(entry)));
> +}

We haven't needed a pfn_to_folio() yet, but perhaps we should have one?

Related, how should we store migration entries for multi-order folios
in the page tables?  We can either encode the individual page in
question, or we can encode the folio.  Do we need to support folios
being mapped askew (ie unaligned), or will folios always be mapped
aligned?

> +	if (!pte_present(pte)) {
> +		struct folio *folio;
> +
> +		folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
> +		ncontig = num_contig_ptes(folio_size(folio), &pgsize);
> +
> +		for (i = 0; i < ncontig; i++, ptep++)
> +			set_pte_at(mm, addr, ptep, pte);
> +		return;
> +	}

It seems like a shame to calculate folio_size() only to turn it into a
number of pages.  Don't you want to just use:

		ncontig = folio_nr_pages(folio);


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-27 14:41   ` Matthew Wilcox
  0 siblings, 0 replies; 28+ messages in thread
From: Matthew Wilcox @ 2022-06-27 14:41 UTC (permalink / raw)
  To: Qi Zheng
  Cc: mike.kravetz, songmuchun, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm

On Sun, Jun 26, 2022 at 10:57:17PM +0800, Qi Zheng wrote:
> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> helper") add set_huge_swap_pte_at() to handle swap entries on
> architectures that support hugepages consisting of contiguous ptes.
> And currently the set_huge_swap_pte_at() is only overridden by arm64.

Bleh.  I hate the way we handle these currently.

> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
> +{
> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
> +
> +	return page_folio(pfn_to_page(swp_offset(entry)));
> +}

We haven't needed a pfn_to_folio() yet, but perhaps we should have one?

Related, how should we store migration entries for multi-order folios
in the page tables?  We can either encode the individual page in
question, or we can encode the folio.  Do we need to support folios
being mapped askew (ie unaligned), or will folios always be mapped
aligned?

> +	if (!pte_present(pte)) {
> +		struct folio *folio;
> +
> +		folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
> +		ncontig = num_contig_ptes(folio_size(folio), &pgsize);
> +
> +		for (i = 0; i < ncontig; i++, ptep++)
> +			set_pte_at(mm, addr, ptep, pte);
> +		return;
> +	}

It seems like a shame to calculate folio_size() only to turn it into a
number of pages.  Don't you want to just use:

		ncontig = folio_nr_pages(folio);


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27 14:41   ` Matthew Wilcox
@ 2022-06-28  3:34     ` Qi Zheng
  -1 siblings, 0 replies; 28+ messages in thread
From: Qi Zheng @ 2022-06-28  3:34 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: mike.kravetz, songmuchun, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm



On 2022/6/27 22:41, Matthew Wilcox wrote:
> On Sun, Jun 26, 2022 at 10:57:17PM +0800, Qi Zheng wrote:
>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>> helper") add set_huge_swap_pte_at() to handle swap entries on
>> architectures that support hugepages consisting of contiguous ptes.
>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> 
> Bleh.  I hate the way we handle these currently.
> 
>> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>> +{
>> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>> +
>> +	return page_folio(pfn_to_page(swp_offset(entry)));
>> +}
> 
> We haven't needed a pfn_to_folio() yet, but perhaps we should have one?

Hi,

IMO, it would be better to have a pfn_to_folio(), which can save the
redundant page_folio() call in the current case.

But this is not related to the current patch, maybe it can be a
separate optimization patch.

> 
> Related, how should we store migration entries for multi-order folios
> in the page tables?  We can either encode the individual page in
> question, or we can encode the folio.  Do we need to support folios
> being mapped askew (ie unaligned), or will folios always be mapped
> aligned?

Do we currently have a scenario where we need to use skew mapped folios?
Maybe it can be used in pte-mapped THP? Hmm, I have no idea.

> 
>> +	if (!pte_present(pte)) {
>> +		struct folio *folio;
>> +
>> +		folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
>> +		ncontig = num_contig_ptes(folio_size(folio), &pgsize);
>> +
>> +		for (i = 0; i < ncontig; i++, ptep++)
>> +			set_pte_at(mm, addr, ptep, pte);
>> +		return;
>> +	}
> 
> It seems like a shame to calculate folio_size() only to turn it into a
> number of pages.  Don't you want to just use:
> 
> 		ncontig = folio_nr_pages(folio);

We can't use folio_nr_pages() here, because for PMD_SIZE we only need
one entry instead of the PTRS_PER_PTE entries returned by
folio_nr_pages().

Thanks,
Qi

> 

-- 
Thanks,
Qi

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-28  3:34     ` Qi Zheng
  0 siblings, 0 replies; 28+ messages in thread
From: Qi Zheng @ 2022-06-28  3:34 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: mike.kravetz, songmuchun, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm



On 2022/6/27 22:41, Matthew Wilcox wrote:
> On Sun, Jun 26, 2022 at 10:57:17PM +0800, Qi Zheng wrote:
>> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
>> helper") add set_huge_swap_pte_at() to handle swap entries on
>> architectures that support hugepages consisting of contiguous ptes.
>> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> 
> Bleh.  I hate the way we handle these currently.
> 
>> +static inline struct folio *hugetlb_swap_entry_to_folio(swp_entry_t entry)
>> +{
>> +	VM_BUG_ON(!is_migration_entry(entry) && !is_hwpoison_entry(entry));
>> +
>> +	return page_folio(pfn_to_page(swp_offset(entry)));
>> +}
> 
> We haven't needed a pfn_to_folio() yet, but perhaps we should have one?

Hi,

IMO, it would be better to have a pfn_to_folio(), which can save the
redundant page_folio() call in the current case.

But this is not related to the current patch, maybe it can be a
separate optimization patch.

> 
> Related, how should we store migration entries for multi-order folios
> in the page tables?  We can either encode the individual page in
> question, or we can encode the folio.  Do we need to support folios
> being mapped askew (ie unaligned), or will folios always be mapped
> aligned?

Do we currently have a scenario where we need to use skew mapped folios?
Maybe it can be used in pte-mapped THP? Hmm, I have no idea.

> 
>> +	if (!pte_present(pte)) {
>> +		struct folio *folio;
>> +
>> +		folio = hugetlb_swap_entry_to_folio(pte_to_swp_entry(pte));
>> +		ncontig = num_contig_ptes(folio_size(folio), &pgsize);
>> +
>> +		for (i = 0; i < ncontig; i++, ptep++)
>> +			set_pte_at(mm, addr, ptep, pte);
>> +		return;
>> +	}
> 
> It seems like a shame to calculate folio_size() only to turn it into a
> number of pages.  Don't you want to just use:
> 
> 		ncontig = folio_nr_pages(folio);

We can't use folio_nr_pages() here, because for PMD_SIZE we only need
one entry instead of the PTRS_PER_PTE entries returned by
folio_nr_pages().

Thanks,
Qi

> 

-- 
Thanks,
Qi

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-27 14:34             ` Matthew Wilcox
@ 2022-06-28  5:47               ` Anshuman Khandual
  -1 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-28  5:47 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm



On 6/27/22 20:04, Matthew Wilcox wrote:
> On Mon, Jun 27, 2022 at 01:05:00PM +0530, Anshuman Khandual wrote:
>>> For the swap entry of hugetlb, we need to remember that we should
>>> call set_huge_swap_pte_at() instead of set_huge_pte_at() every
>>
>> Which is intuitive, so what is the problem ?
> 
> The problem is that HugeTLB is DIFFERENT from the rest of the VM.

Agreed.

> This has a huge cost in programmer time, which is infinitely more
> valuable than the cycle shaving you're quibbling over.  We should take
> any and every opportunity to make huge pages work the same way as THP
> and order-0 pages.

Got it.

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2022-06-28  5:47               ` Anshuman Khandual
  0 siblings, 0 replies; 28+ messages in thread
From: Anshuman Khandual @ 2022-06-28  5:47 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will,
	linux-arm-kernel, linux-kernel, linux-mm



On 6/27/22 20:04, Matthew Wilcox wrote:
> On Mon, Jun 27, 2022 at 01:05:00PM +0530, Anshuman Khandual wrote:
>>> For the swap entry of hugetlb, we need to remember that we should
>>> call set_huge_swap_pte_at() instead of set_huge_pte_at() every
>>
>> Which is intuitive, so what is the problem ?
> 
> The problem is that HugeTLB is DIFFERENT from the rest of the VM.

Agreed.

> This has a huge cost in programmer time, which is infinitely more
> valuable than the cycle shaving you're quibbling over.  We should take
> any and every opportunity to make huge pages work the same way as THP
> and order-0 pages.

Got it.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
  2022-06-26 14:57 ` Qi Zheng
@ 2023-09-21 16:25   ` Ryan Roberts
  -1 siblings, 0 replies; 28+ messages in thread
From: Ryan Roberts @ 2023-09-21 16:25 UTC (permalink / raw)
  To: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm

On 26/06/2022 15:57, Qi Zheng wrote:
> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> helper") add set_huge_swap_pte_at() to handle swap entries on
> architectures that support hugepages consisting of contiguous ptes.
> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> 
> The set_huge_swap_pte_at() provide a sz parameter to help determine
> the number of entries to be updated. But in fact, all hugetlb swap
> entries contain pfn information, so we can find the corresponding
> folio through the pfn recorded in the swap entry, then the folio_size()
> is the number of entries that need to be updated.
> 
> And considering that users will easily cause bugs by ignoring the
> difference between set_huge_swap_pte_at() and set_huge_pte_at().
> Let's handle swap entries in set_huge_pte_at() and remove the
> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
> anywhere, which simplifies our coding.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

Hi,

FYI, I discovered a bug in v6.6-rc1 that causes a kernel panic, which I believe
is caused by this change. I've posted a fix along with a detailed explanation at
[1].

[1]
https://lore.kernel.org/linux-arm-kernel/20230921162007.1630149-1-ryan.roberts@arm.com/

Thanks,
Ryan


^ permalink raw reply	[flat|nested] 28+ messages in thread

* Re: [PATCH] mm: hugetlb: kill set_huge_swap_pte_at()
@ 2023-09-21 16:25   ` Ryan Roberts
  0 siblings, 0 replies; 28+ messages in thread
From: Ryan Roberts @ 2023-09-21 16:25 UTC (permalink / raw)
  To: Qi Zheng, mike.kravetz, songmuchun, akpm, catalin.marinas, will
  Cc: linux-arm-kernel, linux-kernel, linux-mm

On 26/06/2022 15:57, Qi Zheng wrote:
> The commit e5251fd43007 ("mm/hugetlb: introduce set_huge_swap_pte_at()
> helper") add set_huge_swap_pte_at() to handle swap entries on
> architectures that support hugepages consisting of contiguous ptes.
> And currently the set_huge_swap_pte_at() is only overridden by arm64.
> 
> The set_huge_swap_pte_at() provide a sz parameter to help determine
> the number of entries to be updated. But in fact, all hugetlb swap
> entries contain pfn information, so we can find the corresponding
> folio through the pfn recorded in the swap entry, then the folio_size()
> is the number of entries that need to be updated.
> 
> And considering that users will easily cause bugs by ignoring the
> difference between set_huge_swap_pte_at() and set_huge_pte_at().
> Let's handle swap entries in set_huge_pte_at() and remove the
> set_huge_swap_pte_at(), then we can call set_huge_pte_at()
> anywhere, which simplifies our coding.
> 
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>

Hi,

FYI, I discovered a bug in v6.6-rc1 that causes a kernel panic, which I believe
is caused by this change. I've posted a fix along with a detailed explanation at
[1].

[1]
https://lore.kernel.org/linux-arm-kernel/20230921162007.1630149-1-ryan.roberts@arm.com/

Thanks,
Ryan


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 28+ messages in thread

end of thread, other threads:[~2023-09-21 17:07 UTC | newest]

Thread overview: 28+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-26 14:57 [PATCH] mm: hugetlb: kill set_huge_swap_pte_at() Qi Zheng
2022-06-26 14:57 ` Qi Zheng
2022-06-27  3:32 ` Muchun Song
2022-06-27  3:32   ` Muchun Song
2022-06-27  6:18 ` Anshuman Khandual
2022-06-27  6:18   ` Anshuman Khandual
2022-06-27  6:55   ` Qi Zheng
2022-06-27  6:55     ` Qi Zheng
2022-06-27  7:14     ` Anshuman Khandual
2022-06-27  7:14       ` Anshuman Khandual
2022-06-27  7:29       ` Qi Zheng
2022-06-27  7:29         ` Qi Zheng
2022-06-27  7:35         ` Anshuman Khandual
2022-06-27  7:35           ` Anshuman Khandual
2022-06-27 14:34           ` Matthew Wilcox
2022-06-27 14:34             ` Matthew Wilcox
2022-06-28  5:47             ` Anshuman Khandual
2022-06-28  5:47               ` Anshuman Khandual
2022-06-27  7:44       ` Muchun Song
2022-06-27  7:44         ` Muchun Song
2022-06-27  8:27         ` Anshuman Khandual
2022-06-27  8:27           ` Anshuman Khandual
2022-06-27 14:41 ` Matthew Wilcox
2022-06-27 14:41   ` Matthew Wilcox
2022-06-28  3:34   ` Qi Zheng
2022-06-28  3:34     ` Qi Zheng
2023-09-21 16:25 ` Ryan Roberts
2023-09-21 16:25   ` Ryan Roberts

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.