All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Pankaj Raghav (Samsung)" <kernel@pankajraghav.com>
To: linux-xfs@vger.kernel.org, linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, david@fromorbit.com,
	chandan.babu@oracle.com, akpm@linux-foundation.org,
	mcgrof@kernel.org, ziy@nvidia.com, hare@suse.de,
	djwong@kernel.org, gost.dev@samsung.com, linux-mm@kvack.org,
	willy@infradead.org
Subject: [PATCH 01/13] mm: Support order-1 folios in the page cache
Date: Mon, 26 Feb 2024 10:49:24 +0100	[thread overview]
Message-ID: <20240226094936.2677493-2-kernel@pankajraghav.com> (raw)
In-Reply-To: <20240226094936.2677493-1-kernel@pankajraghav.com>

From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

Folios of order 1 have no space to store the deferred list.  This is
not a problem for the page cache as file-backed folios are never
placed on the deferred list.  All we need to do is prevent the core
MM from touching the deferred list for order 1 folios and remove the
code which prevented us from allocating order 1 folios.

Link: https://lore.kernel.org/linux-mm/90344ea7-4eec-47ee-5996-0c22f42d6a6a@google.com/
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/huge_mm.h |  7 +++++--
 mm/filemap.c            |  2 --
 mm/huge_memory.c        | 23 ++++++++++++++++++-----
 mm/internal.h           |  4 +---
 mm/readahead.c          |  3 ---
 5 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 5adb86af35fc..916a2a539517 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -263,7 +263,7 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
 		unsigned long len, unsigned long pgoff, unsigned long flags);
 
-void folio_prep_large_rmappable(struct folio *folio);
+struct folio *folio_prep_large_rmappable(struct folio *folio);
 bool can_split_folio(struct folio *folio, int *pextra_pins);
 int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
@@ -410,7 +410,10 @@ static inline unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
 	return 0;
 }
 
-static inline void folio_prep_large_rmappable(struct folio *folio) {}
+static inline struct folio *folio_prep_large_rmappable(struct folio *folio)
+{
+	return folio;
+}
 
 #define transparent_hugepage_flags 0UL
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 750e779c23db..2b00442b9d19 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1912,8 +1912,6 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 			gfp_t alloc_gfp = gfp;
 
 			err = -ENOMEM;
-			if (order == 1)
-				order = 0;
 			if (order > 0)
 				alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
 			folio = filemap_alloc_folio(alloc_gfp, order);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 94c958f7ebb5..81fd1ba57088 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -788,11 +788,15 @@ struct deferred_split *get_deferred_split_queue(struct folio *folio)
 }
 #endif
 
-void folio_prep_large_rmappable(struct folio *folio)
+struct folio *folio_prep_large_rmappable(struct folio *folio)
 {
-	VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio);
-	INIT_LIST_HEAD(&folio->_deferred_list);
+	if (!folio || !folio_test_large(folio))
+		return folio;
+	if (folio_order(folio) > 1)
+		INIT_LIST_HEAD(&folio->_deferred_list);
 	folio_set_large_rmappable(folio);
+
+	return folio;
 }
 
 static inline bool is_transparent_hugepage(struct folio *folio)
@@ -3082,7 +3086,8 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	/* Prevent deferred_split_scan() touching ->_refcount */
 	spin_lock(&ds_queue->split_queue_lock);
 	if (folio_ref_freeze(folio, 1 + extra_pins)) {
-		if (!list_empty(&folio->_deferred_list)) {
+		if (folio_order(folio) > 1 &&
+		    !list_empty(&folio->_deferred_list)) {
 			ds_queue->split_queue_len--;
 			list_del(&folio->_deferred_list);
 		}
@@ -3133,6 +3138,9 @@ void folio_undo_large_rmappable(struct folio *folio)
 	struct deferred_split *ds_queue;
 	unsigned long flags;
 
+	if (folio_order(folio) <= 1)
+		return;
+
 	/*
 	 * At this point, there is no one trying to add the folio to
 	 * deferred_list. If folio is not in deferred_list, it's safe
@@ -3158,7 +3166,12 @@ void deferred_split_folio(struct folio *folio)
 #endif
 	unsigned long flags;
 
-	VM_BUG_ON_FOLIO(folio_order(folio) < 2, folio);
+	/*
+	 * Order 1 folios have no space for a deferred list, but we also
+	 * won't waste much memory by not adding them to the deferred list.
+	 */
+	if (folio_order(folio) <= 1)
+		return;
 
 	/*
 	 * The try_to_unmap() in page reclaim path might reach here too,
diff --git a/mm/internal.h b/mm/internal.h
index f309a010d50f..5174b5b0c344 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -419,9 +419,7 @@ static inline struct folio *page_rmappable_folio(struct page *page)
 {
 	struct folio *folio = (struct folio *)page;
 
-	if (folio && folio_order(folio) > 1)
-		folio_prep_large_rmappable(folio);
-	return folio;
+	return folio_prep_large_rmappable(folio);
 }
 
 static inline void prep_compound_head(struct page *page, unsigned int order)
diff --git a/mm/readahead.c b/mm/readahead.c
index 2648ec4f0494..369c70e2be42 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -516,9 +516,6 @@ void page_cache_ra_order(struct readahead_control *ractl,
 		/* Don't allocate pages past EOF */
 		while (index + (1UL << order) - 1 > limit)
 			order--;
-		/* THP machinery does not support order-1 */
-		if (order == 1)
-			order = 0;
 		err = ra_alloc_folio(ractl, index, mark, order, gfp);
 		if (err)
 			break;
-- 
2.43.0


  reply	other threads:[~2024-02-26  9:49 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-26  9:49 [PATCH 00/13] enable bs > ps in XFS Pankaj Raghav (Samsung)
2024-02-26  9:49 ` Pankaj Raghav (Samsung) [this message]
2024-02-26  9:49 ` [PATCH 02/13] fs: Allow fine-grained control of folio sizes Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 03/13] filemap: align the index to mapping_min_order in the page cache Pankaj Raghav (Samsung)
2024-02-26 14:40   ` Matthew Wilcox
2024-02-27 10:06     ` Pankaj Raghav (Samsung)
2024-02-27 16:22       ` Kent Overstreet
2024-02-27 16:36         ` Pankaj Raghav (Samsung)
2024-02-27 16:40           ` Kent Overstreet
2024-02-27 16:55             ` Pankaj Raghav (Samsung)
2024-02-27 17:02               ` Kent Overstreet
2024-02-27 17:09                 ` Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 04/13] filemap: use mapping_min_order while allocating folios Pankaj Raghav (Samsung)
2024-02-26 14:47   ` Matthew Wilcox
2024-02-27 12:09     ` Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 05/13] readahead: set file_ra_state->ra_pages to be at least mapping_min_order Pankaj Raghav (Samsung)
2024-02-26 14:49   ` Matthew Wilcox
2024-02-27 12:42     ` Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 06/13] readahead: align index to mapping_min_order in ondemand_ra and force_ra Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 07/13] readahead: rework loop in page_cache_ra_unbounded() Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 08/13] readahead: allocate folios with mapping_min_order in ra_(unbounded|order) Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 09/13] mm: do not split a folio if it has minimum folio order requirement Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 10/13] iomap: fix iomap_dio_zero() for fs bs > system page size Pankaj Raghav (Samsung)
2024-02-26 17:58   ` Matthew Wilcox
2024-02-27  9:33     ` Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 11/13] xfs: expose block size in stat Pankaj Raghav (Samsung)
2024-02-26 12:44   ` Dave Chinner
2024-02-27  8:53     ` Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 12/13] xfs: make the calculation generic in xfs_sb_validate_fsb_count() Pankaj Raghav (Samsung)
2024-02-26 12:47   ` Dave Chinner
2024-02-26 13:21   ` Matthew Wilcox
2024-02-27  8:44     ` Pankaj Raghav (Samsung)
2024-02-26  9:49 ` [PATCH 13/13] xfs: enable block size larger than page size support Pankaj Raghav (Samsung)
2024-02-26 13:26   ` Matthew Wilcox
2024-02-26 21:18     ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240226094936.2677493-2-kernel@pankajraghav.com \
    --to=kernel@pankajraghav.com \
    --cc=akpm@linux-foundation.org \
    --cc=chandan.babu@oracle.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=gost.dev@samsung.com \
    --cc=hare@suse.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.