linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [RFC v2 0/2] change ->index to PAGE_SIZE for hugetlb pages
@ 2023-05-19 22:01 Sidhartha Kumar
  2023-05-19 22:01 ` [RFC v2 1/2] mm/filemap: remove hugetlb special casing in filemap.c Sidhartha Kumar
  2023-05-19 22:01 ` [RFC v2 2/2] mm/hugetlb: add wrapper functions for interactions with page cache Sidhartha Kumar
  0 siblings, 2 replies; 3+ messages in thread
From: Sidhartha Kumar @ 2023-05-19 22:01 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: akpm, songmuchun, mike.kravetz, willy, david, nphamcs, Sidhartha Kumar

This RFC patchset attempts to implement a listed filemap TODO which is
changing hugetlb folios to have ->index in PAGE_SIZE. This simplifies many
functions within filemap.c as they have to special case hugetlb pages.
From the last RFC[1], Mike pointed out that hugetlb will have to maintain
a huge page sized index as well because it is used for the reservation
map as well as the hash function for the hugetlb mutex table.

This patchset adds new wrappers for hugetlb code to to interact with the
page cache. These wrappers calculate a linear page index as this is now
what the page cache expects for hugetlb pages as well.

This series passes the LTP hugetlb test cases. I will do more testing
and performance analysis but wanted to get some early feedback if this
approach is acceptable.

[1]:https://lore.kernel.org/lkml/20230425012721.GA6696@monkey/T/

Sidhartha Kumar (2):
  mm/filemap: remove hugetlb special casing in filemap.c
  mm/hugetlb: add wrapper functions for interactions with page cache

 fs/hugetlbfs/inode.c    | 14 +++++++-------
 include/linux/hugetlb.h | 21 +++++++++++++++++++--
 include/linux/pagemap.h |  8 --------
 mm/filemap.c            | 36 +++++++++++-------------------------
 mm/hugetlb.c            | 22 +++++++++++++---------
 5 files changed, 50 insertions(+), 51 deletions(-)

-- 
2.40.1



^ permalink raw reply	[flat|nested] 3+ messages in thread

* [RFC v2 1/2] mm/filemap: remove hugetlb special casing in filemap.c
  2023-05-19 22:01 [RFC v2 0/2] change ->index to PAGE_SIZE for hugetlb pages Sidhartha Kumar
@ 2023-05-19 22:01 ` Sidhartha Kumar
  2023-05-19 22:01 ` [RFC v2 2/2] mm/hugetlb: add wrapper functions for interactions with page cache Sidhartha Kumar
  1 sibling, 0 replies; 3+ messages in thread
From: Sidhartha Kumar @ 2023-05-19 22:01 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: akpm, songmuchun, mike.kravetz, willy, david, nphamcs, Sidhartha Kumar

This patch removes special cased hugetlb handling code within the page
cache by changing the granularity of each index to the base page size
rather than the huge page size.

Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
 include/linux/pagemap.h |  6 ------
 mm/filemap.c            | 36 +++++++++++-------------------------
 2 files changed, 11 insertions(+), 31 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index c1ae5ebc375fe..7ca967849c2cc 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -719,9 +719,6 @@ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index)
  */
 static inline bool folio_contains(struct folio *folio, pgoff_t index)
 {
-	/* HugeTLBfs indexes the page cache in units of hpage_size */
-	if (folio_test_hugetlb(folio))
-		return folio->index == index;
 	return index - folio_index(folio) < folio_nr_pages(folio);
 }
 
@@ -846,12 +843,9 @@ static inline loff_t folio_file_pos(struct folio *folio)
 
 /*
  * Get the offset in PAGE_SIZE (even for hugetlb folios).
- * (TODO: hugetlb folios should have ->index in PAGE_SIZE)
  */
 static inline pgoff_t folio_pgoff(struct folio *folio)
 {
-	if (unlikely(folio_test_hugetlb(folio)))
-		return hugetlb_basepage_index(&folio->page);
 	return folio->index;
 }
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 570bc8c3db878..12eee69240525 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -134,11 +134,8 @@ static void page_cache_delete(struct address_space *mapping,
 
 	mapping_set_update(&xas, mapping);
 
-	/* hugetlb pages are represented by a single entry in the xarray */
-	if (!folio_test_hugetlb(folio)) {
-		xas_set_order(&xas, folio->index, folio_order(folio));
-		nr = folio_nr_pages(folio);
-	}
+	xas_set_order(&xas, folio->index, folio_order(folio));
+	nr = folio_nr_pages(folio);
 
 	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
 
@@ -237,7 +234,7 @@ void filemap_free_folio(struct address_space *mapping, struct folio *folio)
 	if (free_folio)
 		free_folio(folio);
 
-	if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+	if (folio_test_large(folio))
 		refs = folio_nr_pages(folio);
 	folio_put_refs(folio, refs);
 }
@@ -858,14 +855,15 @@ noinline int __filemap_add_folio(struct address_space *mapping,
 
 	if (!huge) {
 		int error = mem_cgroup_charge(folio, NULL, gfp);
-		VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
 		if (error)
 			return error;
 		charged = true;
-		xas_set_order(&xas, index, folio_order(folio));
-		nr = folio_nr_pages(folio);
 	}
 
+	VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+	xas_set_order(&xas, index, folio_order(folio));
+	nr = folio_nr_pages(folio);
+
 	gfp &= GFP_RECLAIM_MASK;
 	folio_ref_add(folio, nr);
 	folio->mapping = mapping;
@@ -2042,7 +2040,7 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
 		int idx = folio_batch_count(fbatch) - 1;
 
 		folio = fbatch->folios[idx];
-		if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+		if (!xa_is_value(folio))
 			nr = folio_nr_pages(folio);
 		*start = indices[idx] + nr;
 	}
@@ -2106,7 +2104,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
 		int idx = folio_batch_count(fbatch) - 1;
 
 		folio = fbatch->folios[idx];
-		if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+		if (!xa_is_value(folio))
 			nr = folio_nr_pages(folio);
 		*start = indices[idx] + nr;
 	}
@@ -2147,9 +2145,6 @@ unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
 			continue;
 		if (!folio_batch_add(fbatch, folio)) {
 			unsigned long nr = folio_nr_pages(folio);
-
-			if (folio_test_hugetlb(folio))
-				nr = 1;
 			*start = folio->index + nr;
 			goto out;
 		}
@@ -2175,7 +2170,7 @@ EXPORT_SYMBOL(filemap_get_folios);
 static inline
 bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max)
 {
-	if (!folio_test_large(folio) || folio_test_hugetlb(folio))
+	if (!folio_test_large(folio))
 		return false;
 	if (index >= max)
 		return false;
@@ -2225,9 +2220,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
 
 		if (!folio_batch_add(fbatch, folio)) {
 			nr = folio_nr_pages(folio);
-
-			if (folio_test_hugetlb(folio))
-				nr = 1;
 			*start = folio->index + nr;
 			goto out;
 		}
@@ -2244,10 +2236,7 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
 
 	if (nr) {
 		folio = fbatch->folios[nr - 1];
-		if (folio_test_hugetlb(folio))
-			*start = folio->index + 1;
-		else
-			*start = folio->index + folio_nr_pages(folio);
+		*start = folio->index + folio_nr_pages(folio);
 	}
 out:
 	rcu_read_unlock();
@@ -2285,9 +2274,6 @@ unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
 			continue;
 		if (!folio_batch_add(fbatch, folio)) {
 			unsigned long nr = folio_nr_pages(folio);
-
-			if (folio_test_hugetlb(folio))
-				nr = 1;
 			*start = folio->index + nr;
 			goto out;
 		}
-- 
2.40.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [RFC v2 2/2] mm/hugetlb: add wrapper functions for interactions with page cache
  2023-05-19 22:01 [RFC v2 0/2] change ->index to PAGE_SIZE for hugetlb pages Sidhartha Kumar
  2023-05-19 22:01 ` [RFC v2 1/2] mm/filemap: remove hugetlb special casing in filemap.c Sidhartha Kumar
@ 2023-05-19 22:01 ` Sidhartha Kumar
  1 sibling, 0 replies; 3+ messages in thread
From: Sidhartha Kumar @ 2023-05-19 22:01 UTC (permalink / raw)
  To: linux-kernel, linux-mm
  Cc: akpm, songmuchun, mike.kravetz, willy, david, nphamcs, Sidhartha Kumar

Add filemap_lock_hugetlb_folio() which is wraps __filemap_get_folio()
and passes in a linear page index. hugetlb_add_to_page_cache() is modified
to also compute a linear page index before calling into page cache code.

linear_page_index() is modified to perform the computation on hugetlb
so we can use it in the page cache wrappers.

Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
---
 fs/hugetlbfs/inode.c    | 14 +++++++-------
 include/linux/hugetlb.h | 21 +++++++++++++++++++--
 include/linux/pagemap.h |  2 --
 mm/hugetlb.c            | 22 +++++++++++++---------
 4 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 90361a922cec7..204a9510510f1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -617,20 +617,19 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
 	struct hstate *h = hstate_inode(inode);
 	struct address_space *mapping = &inode->i_data;
 	const pgoff_t start = lstart >> huge_page_shift(h);
-	const pgoff_t end = lend >> huge_page_shift(h);
 	struct folio_batch fbatch;
 	pgoff_t next, index;
 	int i, freed = 0;
 	bool truncate_op = (lend == LLONG_MAX);
 
 	folio_batch_init(&fbatch);
-	next = start;
-	while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
+	next = lstart;
+	while (filemap_get_folios(mapping, &next, lend - 1, &fbatch)) {
 		for (i = 0; i < folio_batch_count(&fbatch); ++i) {
 			struct folio *folio = fbatch.folios[i];
 			u32 hash = 0;
 
-			index = folio->index;
+			index = folio->index >> huge_page_shift(h);
 			hash = hugetlb_fault_mutex_hash(mapping, index);
 			mutex_lock(&hugetlb_fault_mutex_table[hash]);
 
@@ -693,10 +692,11 @@ static void hugetlbfs_zero_partial_page(struct hstate *h,
 					loff_t start,
 					loff_t end)
 {
-	pgoff_t idx = start >> huge_page_shift(h);
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma = find_vma(mm, start);
 	struct folio *folio;
 
-	folio = filemap_lock_folio(mapping, idx);
+	folio = filemap_lock_hugetlb_folio(vma, start);
 	if (IS_ERR(folio))
 		return;
 
@@ -868,7 +868,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 		}
 		clear_huge_page(&folio->page, addr, pages_per_huge_page(h));
 		__folio_mark_uptodate(folio);
-		error = hugetlb_add_to_page_cache(folio, mapping, index);
+		error = hugetlb_add_to_page_cache(folio, &pseudo_vma, mapping, addr);
 		if (unlikely(error)) {
 			restore_reserve_on_error(h, &pseudo_vma, addr, folio);
 			folio_put(folio);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 21f942025fecd..55f90e051b7a2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -727,8 +727,8 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
 				nodemask_t *nmask, gfp_t gfp_mask);
 struct folio *alloc_hugetlb_folio_vma(struct hstate *h, struct vm_area_struct *vma,
 				unsigned long address);
-int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
-			pgoff_t idx);
+int hugetlb_add_to_page_cache(struct folio *folio, struct vm_area_struct *vma,
+				struct address_space *mapping, unsigned long address);
 void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 				unsigned long address, struct folio *folio);
 
@@ -755,6 +755,16 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
 	return folio->_hugetlb_subpool;
 }
 
+/* Wrapper function for __filemap_get_folio*/
+static inline struct folio *filemap_lock_hugetlb_folio(struct vm_area_struct *vma,
+						unsigned long address)
+{
+	struct address_space *mapping = vma->vm_file->f_mapping;
+
+	pgoff_t idx = linear_page_index(vma, address);
+	return __filemap_get_folio(mapping, idx, FGP_LOCK, 0);
+}
+
 static inline void hugetlb_set_folio_subpool(struct folio *folio,
 					struct hugepage_subpool *subpool)
 {
@@ -1021,6 +1031,13 @@ static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio
 	return NULL;
 }
 
+/* Wrapper function for __filemap_get_folio*/
+static inline struct folio *filemap_lock_hugetlb_folio(struct vm_area_struct *vma,
+						unsigned long address)
+{
+	return NULL;
+}
+
 static inline int isolate_or_dissolve_huge_page(struct page *page,
 						struct list_head *list)
 {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7ca967849c2cc..86f7b180d2521 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -856,8 +856,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
 					unsigned long address)
 {
 	pgoff_t pgoff;
-	if (unlikely(is_vm_hugetlb_page(vma)))
-		return linear_hugepage_index(vma, address);
 	pgoff = (address - vma->vm_start) >> PAGE_SHIFT;
 	pgoff += vma->vm_pgoff;
 	return pgoff;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ea24718db4aff..5abab61af0ca5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -951,7 +951,7 @@ static long region_count(struct resv_map *resv, long f, long t)
 
 /*
  * Convert the address within this vma to the page offset within
- * the mapping, in pagecache page units; huge pages here.
+ * the mapping, huge page units here.
  */
 static pgoff_t vma_hugecache_offset(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long address)
@@ -5730,7 +5730,7 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
 			struct vm_area_struct *vma, unsigned long address)
 {
 	struct address_space *mapping = vma->vm_file->f_mapping;
-	pgoff_t idx = vma_hugecache_offset(h, vma, address);
+	pgoff_t idx = linear_page_index(vma, address);
 	bool present;
 
 	rcu_read_lock();
@@ -5740,13 +5740,16 @@ static bool hugetlbfs_pagecache_present(struct hstate *h,
 	return present;
 }
 
-int hugetlb_add_to_page_cache(struct folio *folio, struct address_space *mapping,
-			   pgoff_t idx)
+int hugetlb_add_to_page_cache(struct folio *folio,
+			struct vm_area_struct *vma,
+			struct address_space *mapping,
+			unsigned long address)
 {
 	struct inode *inode = mapping->host;
 	struct hstate *h = hstate_inode(inode);
 	int err;
 
+	pgoff_t idx = linear_page_index(vma, address);
 	__folio_set_locked(folio);
 	err = __filemap_add_folio(mapping, folio, idx, GFP_KERNEL, NULL);
 
@@ -5854,7 +5857,8 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 	 * before we get page_table_lock.
 	 */
 	new_folio = false;
-	folio = filemap_lock_folio(mapping, idx);
+
+	folio = filemap_lock_hugetlb_folio(vma, address);
 	if (IS_ERR(folio)) {
 		size = i_size_read(mapping->host) >> huge_page_shift(h);
 		if (idx >= size)
@@ -5913,7 +5917,7 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 		new_folio = true;
 
 		if (vma->vm_flags & VM_MAYSHARE) {
-			int err = hugetlb_add_to_page_cache(folio, mapping, idx);
+			int err = hugetlb_add_to_page_cache(folio, vma, mapping, address);
 			if (err) {
 				/*
 				 * err can't be -EEXIST which implies someone
@@ -6145,7 +6149,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 		/* Just decrements count, does not deallocate */
 		vma_end_reservation(h, vma, haddr);
 
-		pagecache_folio = filemap_lock_folio(mapping, idx);
+		pagecache_folio = filemap_lock_hugetlb_folio(vma, address);
 		if (IS_ERR(pagecache_folio))
 			pagecache_folio = NULL;
 	}
@@ -6258,7 +6262,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 
 	if (is_continue) {
 		ret = -EFAULT;
-		folio = filemap_lock_folio(mapping, idx);
+		folio = filemap_lock_hugetlb_folio(dst_vma, dst_addr);
 		if (IS_ERR(folio))
 			goto out;
 		folio_in_pagecache = true;
@@ -6350,7 +6354,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte,
 		 * hugetlb_fault_mutex_table that here must be hold by
 		 * the caller.
 		 */
-		ret = hugetlb_add_to_page_cache(folio, mapping, idx);
+		ret = hugetlb_add_to_page_cache(folio, dst_vma, mapping, dst_addr);
 		if (ret)
 			goto out_release_nounlock;
 		folio_in_pagecache = true;
-- 
2.40.1



^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-05-19 22:02 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-19 22:01 [RFC v2 0/2] change ->index to PAGE_SIZE for hugetlb pages Sidhartha Kumar
2023-05-19 22:01 ` [RFC v2 1/2] mm/filemap: remove hugetlb special casing in filemap.c Sidhartha Kumar
2023-05-19 22:01 ` [RFC v2 2/2] mm/hugetlb: add wrapper functions for interactions with page cache Sidhartha Kumar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).