* [PATCH 01/21] shmem: Convert shmem_alloc_hugepage() to use vma_alloc_folio()
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 02/21] mm/huge_memory: Convert do_huge_pmd_anonymous_page() " Matthew Wilcox (Oracle)
` (20 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
For now, return the head page of the folio, but remove use of the
old alloc_pages_vma() API.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/shmem.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 4b2fea33158e..c89394221a7e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1527,7 +1527,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
struct vm_area_struct pvma;
struct address_space *mapping = info->vfs_inode.i_mapping;
pgoff_t hindex;
- struct page *page;
+ struct folio *folio;
hindex = round_down(index, HPAGE_PMD_NR);
if (xa_find(&mapping->i_pages, &hindex, hindex + HPAGE_PMD_NR - 1,
@@ -1535,13 +1535,11 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
return NULL;
shmem_pseudo_vma_init(&pvma, info, hindex);
- page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, true);
+ folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, &pvma, 0, true);
shmem_pseudo_vma_destroy(&pvma);
- if (page)
- prep_transhuge_page(page);
- else
+ if (!folio)
count_vm_event(THP_FILE_FALLBACK);
- return page;
+ return &folio->page;
}
static struct page *shmem_alloc_page(gfp_t gfp,
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 02/21] mm/huge_memory: Convert do_huge_pmd_anonymous_page() to use vma_alloc_folio()
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 01/21] shmem: Convert shmem_alloc_hugepage() to use vma_alloc_folio() Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 03/21] mm: Remove alloc_pages_vma() Matthew Wilcox (Oracle)
` (19 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Remove the use of this old API, eliminating a call to prep_transhuge_page().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/huge_memory.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c468fee595ff..caf0e7d27337 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -725,7 +725,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
gfp_t gfp;
- struct page *page;
+ struct folio *folio;
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
if (!transhuge_vma_suitable(vma, haddr))
@@ -774,13 +774,12 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
return ret;
}
gfp = vma_thp_gfp_mask(vma);
- page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
- if (unlikely(!page)) {
+ folio = vma_alloc_folio(gfp, HPAGE_PMD_ORDER, vma, haddr, true);
+ if (unlikely(!folio)) {
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
}
- prep_transhuge_page(page);
- return __do_huge_pmd_anonymous_page(vmf, page, gfp);
+ return __do_huge_pmd_anonymous_page(vmf, &folio->page, gfp);
}
static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr,
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 03/21] mm: Remove alloc_pages_vma()
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 01/21] shmem: Convert shmem_alloc_hugepage() to use vma_alloc_folio() Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 02/21] mm/huge_memory: Convert do_huge_pmd_anonymous_page() " Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 04/21] vmscan: Use folio_mapped() in shrink_page_list() Matthew Wilcox (Oracle)
` (18 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
All callers have now been converted to use vma_alloc_folio(), so
convert the body of alloc_pages_vma() to allocate folios instead.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/gfp.h | 18 +++++++---------
mm/mempolicy.c | 51 ++++++++++++++++++++++-----------------------
2 files changed, 32 insertions(+), 37 deletions(-)
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 3e3d36fc2109..2a08a3c4ba95 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -613,13 +613,8 @@ static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
#ifdef CONFIG_NUMA
struct page *alloc_pages(gfp_t gfp, unsigned int order);
struct folio *folio_alloc(gfp_t gfp, unsigned order);
-struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
- struct vm_area_struct *vma, unsigned long addr,
- bool hugepage);
struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage);
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
- alloc_pages_vma(gfp_mask, order, vma, addr, true)
#else
static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
@@ -629,16 +624,17 @@ static inline struct folio *folio_alloc(gfp_t gfp, unsigned int order)
{
return __folio_alloc_node(gfp, order, numa_node_id());
}
-#define alloc_pages_vma(gfp_mask, order, vma, addr, hugepage) \
- alloc_pages(gfp_mask, order)
#define vma_alloc_folio(gfp, order, vma, addr, hugepage) \
folio_alloc(gfp, order)
-#define alloc_hugepage_vma(gfp_mask, vma, addr, order) \
- alloc_pages(gfp_mask, order)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
-#define alloc_page_vma(gfp_mask, vma, addr) \
- alloc_pages_vma(gfp_mask, 0, vma, addr, false)
+static inline struct page *alloc_page_vma(gfp_t gfp,
+ struct vm_area_struct *vma, unsigned long addr)
+{
+ struct folio *folio = vma_alloc_folio(gfp, 0, vma, addr, false);
+
+ return &folio->page;
+}
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 8c74107a2b15..174efbee1cb5 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2135,44 +2135,55 @@ static struct page *alloc_pages_preferred_many(gfp_t gfp, unsigned int order,
}
/**
- * alloc_pages_vma - Allocate a page for a VMA.
+ * vma_alloc_folio - Allocate a folio for a VMA.
* @gfp: GFP flags.
- * @order: Order of the GFP allocation.
+ * @order: Order of the folio.
* @vma: Pointer to VMA or NULL if not available.
* @addr: Virtual address of the allocation. Must be inside @vma.
* @hugepage: For hugepages try only the preferred node if possible.
*
- * Allocate a page for a specific address in @vma, using the appropriate
+ * Allocate a folio for a specific address in @vma, using the appropriate
* NUMA policy. When @vma is not NULL the caller must hold the mmap_lock
* of the mm_struct of the VMA to prevent it from going away. Should be
- * used for all allocations for pages that will be mapped into user space.
+ * used for all allocations for folios that will be mapped into user space.
*
- * Return: The page on success or NULL if allocation fails.
+ * Return: The folio on success or NULL if allocation fails.
*/
-struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
+struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned long addr, bool hugepage)
{
struct mempolicy *pol;
int node = numa_node_id();
- struct page *page;
+ struct folio *folio;
int preferred_nid;
nodemask_t *nmask;
pol = get_vma_policy(vma, addr);
if (pol->mode == MPOL_INTERLEAVE) {
+ struct page *page;
unsigned nid;
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
mpol_cond_put(pol);
+ gfp |= __GFP_COMP;
page = alloc_page_interleave(gfp, order, nid);
+ if (page && order > 1)
+ prep_transhuge_page(page);
+ folio = (struct folio *)page;
goto out;
}
if (pol->mode == MPOL_PREFERRED_MANY) {
+ struct page *page;
+
node = policy_node(gfp, pol, node);
+ gfp |= __GFP_COMP;
page = alloc_pages_preferred_many(gfp, order, node, pol);
mpol_cond_put(pol);
+ if (page && order > 1)
+ prep_transhuge_page(page);
+ folio = (struct folio *)page;
goto out;
}
@@ -2199,8 +2210,8 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
* First, try to allocate THP only on local node, but
* don't reclaim unnecessarily, just compact.
*/
- page = __alloc_pages_node(hpage_node,
- gfp | __GFP_THISNODE | __GFP_NORETRY, order);
+ folio = __folio_alloc_node(gfp | __GFP_THISNODE |
+ __GFP_NORETRY, order, hpage_node);
/*
* If hugepage allocations are configured to always
@@ -2208,8 +2219,9 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
* to prefer hugepage backing, retry allowing remote
* memory with both reclaim and compact as well.
*/
- if (!page && (gfp & __GFP_DIRECT_RECLAIM))
- page = __alloc_pages(gfp, order, hpage_node, nmask);
+ if (!folio && (gfp & __GFP_DIRECT_RECLAIM))
+ folio = __folio_alloc(gfp, order, hpage_node,
+ nmask);
goto out;
}
@@ -2217,25 +2229,12 @@ struct page *alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
nmask = policy_nodemask(gfp, pol);
preferred_nid = policy_node(gfp, pol, node);
- page = __alloc_pages(gfp, order, preferred_nid, nmask);
+ folio = __folio_alloc(gfp, order, preferred_nid, nmask);
mpol_cond_put(pol);
out:
- return page;
-}
-EXPORT_SYMBOL(alloc_pages_vma);
-
-struct folio *vma_alloc_folio(gfp_t gfp, int order, struct vm_area_struct *vma,
- unsigned long addr, bool hugepage)
-{
- struct folio *folio;
-
- folio = (struct folio *)alloc_pages_vma(gfp, order, vma, addr,
- hugepage);
- if (folio && order > 1)
- prep_transhuge_page(&folio->page);
-
return folio;
}
+EXPORT_SYMBOL(vma_alloc_folio);
/**
* alloc_pages - Allocate pages.
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 04/21] vmscan: Use folio_mapped() in shrink_page_list()
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (2 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 03/21] mm: Remove alloc_pages_vma() Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 05/21] vmscan: Convert the writeback handling in shrink_page_list() to folios Matthew Wilcox (Oracle)
` (17 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Remove some legacy function calls.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/vmscan.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1678802e03e7..27be6f9b2ba5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1549,7 +1549,7 @@ static unsigned int shrink_page_list(struct list_head *page_list,
if (unlikely(!page_evictable(page)))
goto activate_locked;
- if (!sc->may_unmap && page_mapped(page))
+ if (!sc->may_unmap && folio_mapped(folio))
goto keep_locked;
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
@@ -1743,21 +1743,21 @@ static unsigned int shrink_page_list(struct list_head *page_list,
}
/*
- * The page is mapped into the page tables of one or more
+ * The folio is mapped into the page tables of one or more
* processes. Try to unmap it here.
*/
- if (page_mapped(page)) {
+ if (folio_mapped(folio)) {
enum ttu_flags flags = TTU_BATCH_FLUSH;
- bool was_swapbacked = PageSwapBacked(page);
+ bool was_swapbacked = folio_test_swapbacked(folio);
- if (PageTransHuge(page) &&
- thp_order(page) >= HPAGE_PMD_ORDER)
+ if (folio_test_pmd_mappable(folio))
flags |= TTU_SPLIT_HUGE_PMD;
try_to_unmap(folio, flags);
- if (page_mapped(page)) {
+ if (folio_mapped(folio)) {
stat->nr_unmap_fail += nr_pages;
- if (!was_swapbacked && PageSwapBacked(page))
+ if (!was_swapbacked &&
+ folio_test_swapbacked(folio))
stat->nr_lazyfree_fail += nr_pages;
goto activate_locked;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 05/21] vmscan: Convert the writeback handling in shrink_page_list() to folios
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (3 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 04/21] vmscan: Use folio_mapped() in shrink_page_list() Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 06/21] swap: Turn get_swap_page() into folio_alloc_swap() Matthew Wilcox (Oracle)
` (16 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Slightly more efficient due to fewer calls to compound_head().
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/vmscan.c | 77 ++++++++++++++++++++++++++++-------------------------
1 file changed, 41 insertions(+), 36 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 27be6f9b2ba5..19c1bcd886ef 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1578,40 +1578,42 @@ static unsigned int shrink_page_list(struct list_head *page_list,
stat->nr_congested += nr_pages;
/*
- * If a page at the tail of the LRU is under writeback, there
+ * If a folio at the tail of the LRU is under writeback, there
* are three cases to consider.
*
- * 1) If reclaim is encountering an excessive number of pages
- * under writeback and this page is both under writeback and
- * PageReclaim then it indicates that pages are being queued
- * for IO but are being recycled through the LRU before the
- * IO can complete. Waiting on the page itself risks an
- * indefinite stall if it is impossible to writeback the
- * page due to IO error or disconnected storage so instead
- * note that the LRU is being scanned too quickly and the
- * caller can stall after page list has been processed.
+ * 1) If reclaim is encountering an excessive number of folios
+ * under writeback and this folio is both under
+ * writeback and has the reclaim flag set then it
+ * indicates that folios are being queued for I/O but
+ * are being recycled through the LRU before the I/O
+ * can complete. Waiting on the folio itself risks an
+ * indefinite stall if it is impossible to writeback
+ * the folio due to I/O error or disconnected storage
+ * so instead note that the LRU is being scanned too
+ * quickly and the caller can stall after the folio
+ * list has been processed.
*
- * 2) Global or new memcg reclaim encounters a page that is
+ * 2) Global or new memcg reclaim encounters a folio that is
* not marked for immediate reclaim, or the caller does not
* have __GFP_FS (or __GFP_IO if it's simply going to swap,
- * not to fs). In this case mark the page for immediate
+ * not to fs). In this case mark the folio for immediate
* reclaim and continue scanning.
*
* Require may_enter_fs because we would wait on fs, which
- * may not have submitted IO yet. And the loop driver might
- * enter reclaim, and deadlock if it waits on a page for
+ * may not have submitted I/O yet. And the loop driver might
+ * enter reclaim, and deadlock if it waits on a folio for
* which it is needed to do the write (loop masks off
* __GFP_IO|__GFP_FS for this reason); but more thought
* would probably show more reasons.
*
- * 3) Legacy memcg encounters a page that is already marked
- * PageReclaim. memcg does not have any dirty pages
+ * 3) Legacy memcg encounters a folio that already has the
+ * reclaim flag set. memcg does not have any dirty folio
* throttling so we could easily OOM just because too many
- * pages are in writeback and there is nothing else to
+ * folios are in writeback and there is nothing else to
* reclaim. Wait for the writeback to complete.
*
- * In cases 1) and 2) we activate the pages to get them out of
- * the way while we continue scanning for clean pages on the
+ * In cases 1) and 2) we activate the folios to get them out of
+ * the way while we continue scanning for clean folios on the
* inactive list and refilling from the active list. The
* observation here is that waiting for disk writes is more
* expensive than potentially causing reloads down the line.
@@ -1619,38 +1621,41 @@ static unsigned int shrink_page_list(struct list_head *page_list,
* memory pressure on the cache working set any longer than it
* takes to write them to disk.
*/
- if (PageWriteback(page)) {
+ if (folio_test_writeback(folio)) {
/* Case 1 above */
if (current_is_kswapd() &&
- PageReclaim(page) &&
+ folio_test_reclaim(folio) &&
test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
stat->nr_immediate += nr_pages;
goto activate_locked;
/* Case 2 above */
} else if (writeback_throttling_sane(sc) ||
- !PageReclaim(page) || !may_enter_fs) {
+ !folio_test_reclaim(folio) || !may_enter_fs) {
/*
- * This is slightly racy - end_page_writeback()
- * might have just cleared PageReclaim, then
- * setting PageReclaim here end up interpreted
- * as PageReadahead - but that does not matter
- * enough to care. What we do want is for this
- * page to have PageReclaim set next time memcg
- * reclaim reaches the tests above, so it will
- * then wait_on_page_writeback() to avoid OOM;
- * and it's also appropriate in global reclaim.
+ * This is slightly racy -
+ * folio_end_writeback() might have just
+ * cleared the reclaim flag, then setting
+ * reclaim here ends up interpreted as
+ * the readahead flag - but that does
+ * not matter enough to care. What we
+ * do want is for this folio to have
+ * the reclaim flag set next time memcg
+ * reclaim reaches the tests above, so
+ * it will then folio_wait_writeback()
+ * to avoid OOM; and it's also appropriate
+ * in global reclaim.
*/
- SetPageReclaim(page);
+ folio_set_reclaim(folio);
stat->nr_writeback += nr_pages;
goto activate_locked;
/* Case 3 above */
} else {
- unlock_page(page);
- wait_on_page_writeback(page);
- /* then go back and try same page again */
- list_add_tail(&page->lru, page_list);
+ folio_unlock(folio);
+ folio_wait_writeback(folio);
+ /* then go back and try same folio again */
+ list_add_tail(&folio->lru, page_list);
continue;
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 06/21] swap: Turn get_swap_page() into folio_alloc_swap()
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (4 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 05/21] vmscan: Convert the writeback handling in shrink_page_list() to folios Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 07/21] swap: Convert add_to_swap() to take a folio Matthew Wilcox (Oracle)
` (15 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
This removes an assumption that a large folio is HPAGE_PMD_NR pages
in size.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/swap.h | 13 +++++++------
mm/memcontrol.c | 16 ++++++++--------
mm/shmem.c | 3 ++-
mm/swap_slots.c | 14 +++++++-------
mm/swap_state.c | 3 ++-
mm/swapfile.c | 17 +++++++++--------
6 files changed, 35 insertions(+), 31 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 27093b477c5f..147a9a173508 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -494,7 +494,7 @@ static inline long get_nr_swap_pages(void)
}
extern void si_swapinfo(struct sysinfo *);
-extern swp_entry_t get_swap_page(struct page *page);
+swp_entry_t folio_alloc_swap(struct folio *folio);
extern void put_swap_page(struct page *page, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
@@ -685,7 +685,7 @@ static inline int try_to_free_swap(struct page *page)
return 0;
}
-static inline swp_entry_t get_swap_page(struct page *page)
+static inline swp_entry_t folio_alloc_swap(struct folio *folio)
{
swp_entry_t entry;
entry.val = 0;
@@ -739,12 +739,13 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
#ifdef CONFIG_MEMCG_SWAP
void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
-extern int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry);
-static inline int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry);
+static inline int mem_cgroup_try_charge_swap(struct folio *folio,
+ swp_entry_t entry)
{
if (mem_cgroup_disabled())
return 0;
- return __mem_cgroup_try_charge_swap(page, entry);
+ return __mem_cgroup_try_charge_swap(folio, entry);
}
extern void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages);
@@ -762,7 +763,7 @@ static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
{
}
-static inline int mem_cgroup_try_charge_swap(struct page *page,
+static inline int mem_cgroup_try_charge_swap(struct folio *folio,
swp_entry_t entry)
{
return 0;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 598fece89e2b..985eff804004 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7125,17 +7125,17 @@ void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
}
/**
- * __mem_cgroup_try_charge_swap - try charging swap space for a page
- * @page: page being added to swap
+ * __mem_cgroup_try_charge_swap - try charging swap space for a folio
+ * @folio: folio being added to swap
* @entry: swap entry to charge
*
- * Try to charge @page's memcg for the swap space at @entry.
+ * Try to charge @folio's memcg for the swap space at @entry.
*
* Returns 0 on success, -ENOMEM on failure.
*/
-int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry)
{
- unsigned int nr_pages = thp_nr_pages(page);
+ unsigned int nr_pages = folio_nr_pages(folio);
struct page_counter *counter;
struct mem_cgroup *memcg;
unsigned short oldid;
@@ -7143,9 +7143,9 @@ int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
return 0;
- memcg = page_memcg(page);
+ memcg = folio_memcg(folio);
- VM_WARN_ON_ONCE_PAGE(!memcg, page);
+ VM_WARN_ON_ONCE_FOLIO(!memcg, folio);
if (!memcg)
return 0;
@@ -7168,7 +7168,7 @@ int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
if (nr_pages > 1)
mem_cgroup_id_get_many(memcg, nr_pages - 1);
oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg), nr_pages);
- VM_BUG_ON_PAGE(oldid, page);
+ VM_BUG_ON_FOLIO(oldid, folio);
mod_memcg_state(memcg, MEMCG_SWAP, nr_pages);
return 0;
diff --git a/mm/shmem.c b/mm/shmem.c
index c89394221a7e..85c23696efc6 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1312,6 +1312,7 @@ int shmem_unuse(unsigned int type)
*/
static int shmem_writepage(struct page *page, struct writeback_control *wbc)
{
+ struct folio *folio = page_folio(page);
struct shmem_inode_info *info;
struct address_space *mapping;
struct inode *inode;
@@ -1385,7 +1386,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
SetPageUptodate(page);
}
- swap = get_swap_page(page);
+ swap = folio_alloc_swap(folio);
if (!swap.val)
goto redirty;
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 2b5531840583..0218ec1cd24c 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -117,7 +117,7 @@ static int alloc_swap_slot_cache(unsigned int cpu)
/*
* Do allocation outside swap_slots_cache_mutex
- * as kvzalloc could trigger reclaim and get_swap_page,
+ * as kvzalloc could trigger reclaim and folio_alloc_swap,
* which can lock swap_slots_cache_mutex.
*/
slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
@@ -213,7 +213,7 @@ static void __drain_swap_slots_cache(unsigned int type)
* this function can be invoked in the cpu
* hot plug path:
* cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
- * -> memory allocation -> direct reclaim -> get_swap_page
+ * -> memory allocation -> direct reclaim -> folio_alloc_swap
* -> drain_swap_slots_cache
*
* Hence the loop over current online cpu below could miss cpu that
@@ -301,16 +301,16 @@ int free_swap_slot(swp_entry_t entry)
return 0;
}
-swp_entry_t get_swap_page(struct page *page)
+swp_entry_t folio_alloc_swap(struct folio *folio)
{
swp_entry_t entry;
struct swap_slots_cache *cache;
entry.val = 0;
- if (PageTransHuge(page)) {
+ if (folio_test_large(folio)) {
if (IS_ENABLED(CONFIG_THP_SWAP))
- get_swap_pages(1, &entry, HPAGE_PMD_NR);
+ get_swap_pages(1, &entry, folio_nr_pages(folio));
goto out;
}
@@ -344,8 +344,8 @@ swp_entry_t get_swap_page(struct page *page)
get_swap_pages(1, &entry, 1);
out:
- if (mem_cgroup_try_charge_swap(page, entry)) {
- put_swap_page(page, entry);
+ if (mem_cgroup_try_charge_swap(folio, entry)) {
+ put_swap_page(&folio->page, entry);
entry.val = 0;
}
return entry;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 013856004825..989ad18f5468 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -183,13 +183,14 @@ void __delete_from_swap_cache(struct page *page,
*/
int add_to_swap(struct page *page)
{
+ struct folio *folio = page_folio(page);
swp_entry_t entry;
int err;
VM_BUG_ON_PAGE(!PageLocked(page), page);
VM_BUG_ON_PAGE(!PageUptodate(page), page);
- entry = get_swap_page(page);
+ entry = folio_alloc_swap(folio);
if (!entry.val)
return 0;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 63c61f8b2611..c34f41553144 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -76,9 +76,9 @@ static PLIST_HEAD(swap_active_head);
/*
* all available (active, not full) swap_info_structs
* protected with swap_avail_lock, ordered by priority.
- * This is used by get_swap_page() instead of swap_active_head
+ * This is used by folio_alloc_swap() instead of swap_active_head
* because swap_active_head includes all swap_info_structs,
- * but get_swap_page() doesn't need to look at full ones.
+ * but folio_alloc_swap() doesn't need to look at full ones.
* This uses its own lock instead of swap_lock because when a
* swap_info_struct changes between not-full/full, it needs to
* add/remove itself to/from this list, but the swap_info_struct->lock
@@ -2093,11 +2093,12 @@ static int try_to_unuse(unsigned int type)
* Under global memory pressure, swap entries can be reinserted back
* into process space after the mmlist loop above passes over them.
*
- * Limit the number of retries? No: when mmget_not_zero() above fails,
- * that mm is likely to be freeing swap from exit_mmap(), which proceeds
- * at its own independent pace; and even shmem_writepage() could have
- * been preempted after get_swap_page(), temporarily hiding that swap.
- * It's easy and robust (though cpu-intensive) just to keep retrying.
+ * Limit the number of retries? No: when mmget_not_zero()
+ * above fails, that mm is likely to be freeing swap from
+ * exit_mmap(), which proceeds at its own independent pace;
+ * and even shmem_writepage() could have been preempted after
+ * folio_alloc_swap(), temporarily hiding that swap. It's easy
+ * and robust (though cpu-intensive) just to keep retrying.
*/
if (READ_ONCE(si->inuse_pages)) {
if (!signal_pending(current))
@@ -2310,7 +2311,7 @@ static void _enable_swap_info(struct swap_info_struct *p)
* which on removal of any swap_info_struct with an auto-assigned
* (i.e. negative) priority increments the auto-assigned priority
* of any lower-priority swap_info_structs.
- * swap_avail_head needs to be priority ordered for get_swap_page(),
+ * swap_avail_head needs to be priority ordered for folio_alloc_swap(),
* which allocates swap pages from the highest available priority
* swap_info_struct.
*/
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 07/21] swap: Convert add_to_swap() to take a folio
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (5 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 06/21] swap: Turn get_swap_page() into folio_alloc_swap() Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 08/21] vmscan: Convert dirty page handling to folios Matthew Wilcox (Oracle)
` (14 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
The only caller already has a folio available, so this saves a conversion.
Also convert the return type to boolean.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/swap.h | 6 +++---
mm/swap_state.c | 47 +++++++++++++++++++++++---------------------
mm/vmscan.c | 6 +++---
3 files changed, 31 insertions(+), 28 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 147a9a173508..f87bb495e482 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -449,7 +449,7 @@ static inline unsigned long total_swapcache_pages(void)
}
extern void show_swap_cache_info(void);
-extern int add_to_swap(struct page *page);
+bool add_to_swap(struct folio *folio);
extern void *get_shadow_from_swap_cache(swp_entry_t entry);
extern int add_to_swap_cache(struct page *page, swp_entry_t entry,
gfp_t gfp, void **shadowp);
@@ -630,9 +630,9 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
return find_get_page(mapping, index);
}
-static inline int add_to_swap(struct page *page)
+static inline bool add_to_swap(struct folio *folio)
{
- return 0;
+ return false;
}
static inline void *get_shadow_from_swap_cache(swp_entry_t entry)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 989ad18f5468..858d8904b06e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -175,24 +175,26 @@ void __delete_from_swap_cache(struct page *page,
}
/**
- * add_to_swap - allocate swap space for a page
- * @page: page we want to move to swap
+ * add_to_swap - allocate swap space for a folio
+ * @folio: folio we want to move to swap
*
- * Allocate swap space for the page and add the page to the
- * swap cache. Caller needs to hold the page lock.
+ * Allocate swap space for the folio and add the folio to the
+ * swap cache.
+ *
+ * Context: Caller needs to hold the folio lock.
+ * Return: Whether the folio was added to the swap cache.
*/
-int add_to_swap(struct page *page)
+bool add_to_swap(struct folio *folio)
{
- struct folio *folio = page_folio(page);
swp_entry_t entry;
int err;
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(!PageUptodate(page), page);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(!folio_test_uptodate(folio), folio);
entry = folio_alloc_swap(folio);
if (!entry.val)
- return 0;
+ return false;
/*
* XArray node allocations from PF_MEMALLOC contexts could
@@ -205,7 +207,7 @@ int add_to_swap(struct page *page)
/*
* Add it to the swap cache.
*/
- err = add_to_swap_cache(page, entry,
+ err = add_to_swap_cache(&folio->page, entry,
__GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN, NULL);
if (err)
/*
@@ -214,22 +216,23 @@ int add_to_swap(struct page *page)
*/
goto fail;
/*
- * Normally the page will be dirtied in unmap because its pte should be
- * dirty. A special case is MADV_FREE page. The page's pte could have
- * dirty bit cleared but the page's SwapBacked bit is still set because
- * clearing the dirty bit and SwapBacked bit has no lock protected. For
- * such page, unmap will not set dirty bit for it, so page reclaim will
- * not write the page out. This can cause data corruption when the page
- * is swap in later. Always setting the dirty bit for the page solves
- * the problem.
+ * Normally the folio will be dirtied in unmap because its
+ * pte should be dirty. A special case is MADV_FREE page. The
+ * page's pte could have dirty bit cleared but the folio's
+ * SwapBacked flag is still set because clearing the dirty bit
+ * and SwapBacked flag has no lock protected. For such folio,
+ * unmap will not set dirty bit for it, so folio reclaim will
+ * not write the folio out. This can cause data corruption when
+ * the folio is swapped in later. Always setting the dirty flag
+ * for the folio solves the problem.
*/
- set_page_dirty(page);
+ folio_mark_dirty(folio);
- return 1;
+ return true;
fail:
- put_swap_page(page, entry);
- return 0;
+ put_swap_page(&folio->page, entry);
+ return false;
}
/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 19c1bcd886ef..8f7c32b3d65e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1710,8 +1710,8 @@ static unsigned int shrink_page_list(struct list_head *page_list,
page_list))
goto activate_locked;
}
- if (!add_to_swap(page)) {
- if (!PageTransHuge(page))
+ if (!add_to_swap(folio)) {
+ if (!folio_test_large(folio))
goto activate_locked_split;
/* Fallback to swap normal pages */
if (split_folio_to_list(folio,
@@ -1720,7 +1720,7 @@ static unsigned int shrink_page_list(struct list_head *page_list,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
count_vm_event(THP_SWPOUT_FALLBACK);
#endif
- if (!add_to_swap(page))
+ if (!add_to_swap(folio))
goto activate_locked_split;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 08/21] vmscan: Convert dirty page handling to folios
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (6 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 07/21] swap: Convert add_to_swap() to take a folio Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 09/21] vmscan: Convert page buffer handling to use folios Matthew Wilcox (Oracle)
` (13 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Mostly this just eliminates calls to compound_head(), but
NR_VMSCAN_IMMEDIATE was being incremented by 1 instead of by nr_pages.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/vmscan.c | 48 ++++++++++++++++++++++++++----------------------
1 file changed, 26 insertions(+), 22 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8f7c32b3d65e..950eeb2f759b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1768,28 +1768,31 @@ static unsigned int shrink_page_list(struct list_head *page_list,
}
}
- if (PageDirty(page)) {
+ if (folio_test_dirty(folio)) {
/*
- * Only kswapd can writeback filesystem pages
+ * Only kswapd can writeback filesystem folios
* to avoid risk of stack overflow. But avoid
- * injecting inefficient single-page IO into
+ * injecting inefficient single-folio I/O into
* flusher writeback as much as possible: only
- * write pages when we've encountered many
- * dirty pages, and when we've already scanned
- * the rest of the LRU for clean pages and see
- * the same dirty pages again (PageReclaim).
+ * write folios when we've encountered many
+ * dirty folios, and when we've already scanned
+ * the rest of the LRU for clean folios and see
+ * the same dirty folios again (with the reclaim
+ * flag set).
*/
- if (page_is_file_lru(page) &&
- (!current_is_kswapd() || !PageReclaim(page) ||
+ if (folio_is_file_lru(folio) &&
+ (!current_is_kswapd() ||
+ !folio_test_reclaim(folio) ||
!test_bit(PGDAT_DIRTY, &pgdat->flags))) {
/*
* Immediately reclaim when written back.
- * Similar in principal to deactivate_page()
- * except we already have the page isolated
+ * Similar in principle to deactivate_page()
+ * except we already have the folio isolated
* and know it's dirty
*/
- inc_node_page_state(page, NR_VMSCAN_IMMEDIATE);
- SetPageReclaim(page);
+ node_stat_mod_folio(folio, NR_VMSCAN_IMMEDIATE,
+ nr_pages);
+ folio_set_reclaim(folio);
goto activate_locked;
}
@@ -1802,8 +1805,8 @@ static unsigned int shrink_page_list(struct list_head *page_list,
goto keep_locked;
/*
- * Page is dirty. Flush the TLB if a writable entry
- * potentially exists to avoid CPU writes after IO
+ * Folio is dirty. Flush the TLB if a writable entry
+ * potentially exists to avoid CPU writes after I/O
* starts and then write it out here.
*/
try_to_unmap_flush_dirty();
@@ -1815,23 +1818,24 @@ static unsigned int shrink_page_list(struct list_head *page_list,
case PAGE_SUCCESS:
stat->nr_pageout += nr_pages;
- if (PageWriteback(page))
+ if (folio_test_writeback(folio))
goto keep;
- if (PageDirty(page))
+ if (folio_test_dirty(folio))
goto keep;
/*
* A synchronous write - probably a ramdisk. Go
- * ahead and try to reclaim the page.
+ * ahead and try to reclaim the folio.
*/
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto keep;
- if (PageDirty(page) || PageWriteback(page))
+ if (folio_test_dirty(folio) ||
+ folio_test_writeback(folio))
goto keep_locked;
- mapping = page_mapping(page);
+ mapping = folio_mapping(folio);
fallthrough;
case PAGE_CLEAN:
- ; /* try to free the page below */
+ ; /* try to free the folio below */
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 09/21] vmscan: Convert page buffer handling to use folios
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (7 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 08/21] vmscan: Convert dirty page handling to folios Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:50 ` Andrew Morton
2022-04-29 19:23 ` [PATCH 10/21] vmscan: Convert lazy freeing to folios Matthew Wilcox (Oracle)
` (12 subsequent siblings)
21 siblings, 1 reply; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
This mostly just removes calls to compound_head() although nr_reclaimed
should be incremented by the number of pages, not just 1.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/vmscan.c | 50 ++++++++++++++++++++++++++------------------------
1 file changed, 26 insertions(+), 24 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 950eeb2f759b..cda43f0bb285 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1840,42 +1840,44 @@ static unsigned int shrink_page_list(struct list_head *page_list,
}
/*
- * If the page has buffers, try to free the buffer mappings
- * associated with this page. If we succeed we try to free
- * the page as well.
+ * If the folio has buffers, try to free the buffer
+ * mappings associated with this folio. If we succeed
+ * we try to free the folio as well.
*
- * We do this even if the page is PageDirty().
- * try_to_release_page() does not perform I/O, but it is
- * possible for a page to have PageDirty set, but it is actually
- * clean (all its buffers are clean). This happens if the
- * buffers were written out directly, with submit_bh(). ext3
- * will do this, as well as the blockdev mapping.
- * try_to_release_page() will discover that cleanness and will
- * drop the buffers and mark the page clean - it can be freed.
+ * We do this even if the folio is dirty.
+ * filemap_release_folio() does not perform I/O, but it
+ * is possible for a folio to have the dirty flag set,
+ * but it is actually clean (all its buffers are clean).
+ * This happens if the buffers were written out directly,
+ * with submit_bh(). ext3 will do this, as well as
+ * the blockdev mapping. filemap_release_folio() will
+ * discover that cleanness and will drop the buffers
+ * and mark the folio clean - it can be freed.
*
- * Rarely, pages can have buffers and no ->mapping. These are
- * the pages which were not successfully invalidated in
- * truncate_cleanup_page(). We try to drop those buffers here
- * and if that worked, and the page is no longer mapped into
- * process address space (page_count == 1) it can be freed.
- * Otherwise, leave the page on the LRU so it is swappable.
+ * Rarely, folios can have buffers and no ->mapping.
+ * These are the folios which were not successfully
+ * invalidated in truncate_cleanup_folio(). We try to
+ * drop those buffers here and if that worked, and the
+ * folio is no longer mapped into process address space
+ * (refcount == 1) it can be freed. Otherwise, leave
+ * the folio on the LRU so it is swappable.
*/
- if (page_has_private(page)) {
- if (!try_to_release_page(page, sc->gfp_mask))
+ if (folio_has_private(folio)) {
+ if (!filemap_release_folio(folio, sc->gfp_mask))
goto activate_locked;
- if (!mapping && page_count(page) == 1) {
- unlock_page(page);
- if (put_page_testzero(page))
+ if (!mapping && folio_ref_count(folio) == 1) {
+ folio_unlock(folio);
+ if (folio_put_testzero(folio))
goto free_it;
else {
/*
* rare race with speculative reference.
* the speculative reference will free
- * this page shortly, so we may
+ * this folio shortly, so we may
* increment nr_reclaimed here (and
* leave it off the LRU).
*/
- nr_reclaimed++;
+ nr_reclaimed += nr_pages;
continue;
}
}
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 10/21] vmscan: Convert lazy freeing to folios
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (8 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 09/21] vmscan: Convert page buffer handling to use folios Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 11/21] vmscan: Move initialisation of mapping down Matthew Wilcox (Oracle)
` (11 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Remove a hidden call to compound_head(), and account nr_pages instead
of a single page. This matches the code in lru_lazyfree_fn() that
accounts nr_pages to PGLAZYFREE.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/memcontrol.h | 14 ++++++++++++++
mm/vmscan.c | 18 +++++++++---------
2 files changed, 23 insertions(+), 9 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 89b14729d59f..06a16c82558b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1061,6 +1061,15 @@ static inline void count_memcg_page_event(struct page *page,
count_memcg_events(memcg, idx, 1);
}
+static inline void count_memcg_folio_events(struct folio *folio,
+ enum vm_event_item idx, unsigned long nr)
+{
+ struct mem_cgroup *memcg = folio_memcg(folio);
+
+ if (memcg)
+ count_memcg_events(memcg, idx, nr);
+}
+
static inline void count_memcg_event_mm(struct mm_struct *mm,
enum vm_event_item idx)
{
@@ -1498,6 +1507,11 @@ static inline void count_memcg_page_event(struct page *page,
{
}
+static inline void count_memcg_folio_events(struct folio *folio,
+ enum vm_event_item idx, unsigned long nr)
+{
+}
+
static inline
void count_memcg_event_mm(struct mm_struct *mm, enum vm_event_item idx)
{
diff --git a/mm/vmscan.c b/mm/vmscan.c
index cda43f0bb285..0368ea3e9880 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1883,20 +1883,20 @@ static unsigned int shrink_page_list(struct list_head *page_list,
}
}
- if (PageAnon(page) && !PageSwapBacked(page)) {
+ if (folio_test_anon(folio) && !folio_test_swapbacked(folio)) {
/* follow __remove_mapping for reference */
- if (!page_ref_freeze(page, 1))
+ if (!folio_ref_freeze(folio, 1))
goto keep_locked;
/*
- * The page has only one reference left, which is
+ * The folio has only one reference left, which is
* from the isolation. After the caller puts the
- * page back on lru and drops the reference, the
- * page will be freed anyway. It doesn't matter
- * which lru it goes. So we don't bother checking
- * PageDirty here.
+ * folio back on the lru and drops the reference, the
+ * folio will be freed anyway. It doesn't matter
+ * which lru it goes on. So we don't bother checking
+ * the dirty flag here.
*/
- count_vm_event(PGLAZYFREED);
- count_memcg_page_event(page, PGLAZYFREED);
+ count_vm_events(PGLAZYFREED, nr_pages);
+ count_memcg_folio_events(folio, PGLAZYFREED, nr_pages);
} else if (!mapping || !__remove_mapping(mapping, folio, true,
sc->target_mem_cgroup))
goto keep_locked;
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 11/21] vmscan: Move initialisation of mapping down
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (9 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 10/21] vmscan: Convert lazy freeing to folios Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 12/21] vmscan: Convert the activate_locked portion of shrink_page_list to folios Matthew Wilcox (Oracle)
` (10 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Now that we don't interrogate the BDI for congestion, we can delay looking
up the folio's mapping until we've got further through the function,
reducing register pressure and saving a call to folio_mapping for folios
we're adding to the swap cache.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/vmscan.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 0368ea3e9880..9ac2583ca5e5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1568,12 +1568,11 @@ static unsigned int shrink_page_list(struct list_head *page_list,
stat->nr_unqueued_dirty += nr_pages;
/*
- * Treat this page as congested if the underlying BDI is or if
+ * Treat this page as congested if
* pages are cycling through the LRU so quickly that the
* pages marked for immediate reclaim are making it to the
* end of the LRU a second time.
*/
- mapping = page_mapping(page);
if (writeback && PageReclaim(page))
stat->nr_congested += nr_pages;
@@ -1725,9 +1724,6 @@ static unsigned int shrink_page_list(struct list_head *page_list,
}
may_enter_fs = true;
-
- /* Adding to swap updated mapping */
- mapping = page_mapping(page);
}
} else if (PageSwapBacked(page) && PageTransHuge(page)) {
/* Split shmem THP */
@@ -1768,6 +1764,7 @@ static unsigned int shrink_page_list(struct list_head *page_list,
}
}
+ mapping = folio_mapping(folio);
if (folio_test_dirty(folio)) {
/*
* Only kswapd can writeback filesystem folios
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 12/21] vmscan: Convert the activate_locked portion of shrink_page_list to folios
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (10 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 11/21] vmscan: Move initialisation of mapping down Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 13/21] vmscan: Remove remaining uses of page in shrink_page_list Matthew Wilcox (Oracle)
` (9 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
This accounts the number of pages activated correctly for large folios.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/vmscan.c | 17 +++++++++--------
1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9ac2583ca5e5..85c9758f6f32 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1927,15 +1927,16 @@ static unsigned int shrink_page_list(struct list_head *page_list,
}
activate_locked:
/* Not a candidate for swapping, so reclaim swap space. */
- if (PageSwapCache(page) && (mem_cgroup_swap_full(page) ||
- PageMlocked(page)))
- try_to_free_swap(page);
- VM_BUG_ON_PAGE(PageActive(page), page);
- if (!PageMlocked(page)) {
- int type = page_is_file_lru(page);
- SetPageActive(page);
+ if (folio_test_swapcache(folio) &&
+ (mem_cgroup_swap_full(&folio->page) ||
+ folio_test_mlocked(folio)))
+ try_to_free_swap(&folio->page);
+ VM_BUG_ON_FOLIO(folio_test_active(folio), folio);
+ if (!folio_test_mlocked(folio)) {
+ int type = folio_is_file_lru(folio);
+ folio_set_active(folio);
stat->nr_activate[type] += nr_pages;
- count_memcg_page_event(page, PGACTIVATE);
+ count_memcg_folio_events(folio, PGACTIVATE, nr_pages);
}
keep_locked:
unlock_page(page);
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 13/21] vmscan: Remove remaining uses of page in shrink_page_list
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (11 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 12/21] vmscan: Convert the activate_locked portion of shrink_page_list to folios Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 14/21] mm/shmem: Use a folio in shmem_unused_huge_shrink Matthew Wilcox (Oracle)
` (8 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
These are all straightforward conversions to the folio API.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/vmscan.c | 115 ++++++++++++++++++++++++++--------------------------
1 file changed, 57 insertions(+), 58 deletions(-)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 85c9758f6f32..cc9b93c7fa0c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1524,7 +1524,6 @@ static unsigned int shrink_page_list(struct list_head *page_list,
retry:
while (!list_empty(page_list)) {
struct address_space *mapping;
- struct page *page;
struct folio *folio;
enum page_references references = PAGEREF_RECLAIM;
bool dirty, writeback, may_enter_fs;
@@ -1534,31 +1533,31 @@ static unsigned int shrink_page_list(struct list_head *page_list,
folio = lru_to_folio(page_list);
list_del(&folio->lru);
- page = &folio->page;
- if (!trylock_page(page))
+ if (!folio_trylock(folio))
goto keep;
- VM_BUG_ON_PAGE(PageActive(page), page);
+ VM_BUG_ON_FOLIO(folio_test_active(folio), folio);
- nr_pages = compound_nr(page);
+ nr_pages = folio_nr_pages(folio);
- /* Account the number of base pages even though THP */
+ /* Account the number of base pages */
sc->nr_scanned += nr_pages;
- if (unlikely(!page_evictable(page)))
+ if (unlikely(!folio_evictable(folio)))
goto activate_locked;
if (!sc->may_unmap && folio_mapped(folio))
goto keep_locked;
may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
- (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO));
+ (folio_test_swapcache(folio) &&
+ (sc->gfp_mask & __GFP_IO));
/*
* The number of dirty pages determines if a node is marked
* reclaim_congested. kswapd will stall and start writing
- * pages if the tail of the LRU is all dirty unqueued pages.
+ * folios if the tail of the LRU is all dirty unqueued folios.
*/
folio_check_dirty_writeback(folio, &dirty, &writeback);
if (dirty || writeback)
@@ -1568,21 +1567,21 @@ static unsigned int shrink_page_list(struct list_head *page_list,
stat->nr_unqueued_dirty += nr_pages;
/*
- * Treat this page as congested if
- * pages are cycling through the LRU so quickly that the
- * pages marked for immediate reclaim are making it to the
- * end of the LRU a second time.
+ * Treat this folio as congested if folios are cycling
+ * through the LRU so quickly that the folios marked
+ * for immediate reclaim are making it to the end of
+ * the LRU a second time.
*/
- if (writeback && PageReclaim(page))
+ if (writeback && folio_test_reclaim(folio))
stat->nr_congested += nr_pages;
/*
* If a folio at the tail of the LRU is under writeback, there
* are three cases to consider.
*
- * 1) If reclaim is encountering an excessive number of folios
- * under writeback and this folio is both under
- * writeback and has the reclaim flag set then it
+ * 1) If reclaim is encountering an excessive number
+ * of folios under writeback and this folio has both
+ * the writeback and reclaim flags set, then it
* indicates that folios are being queued for I/O but
* are being recycled through the LRU before the I/O
* can complete. Waiting on the folio itself risks an
@@ -1633,16 +1632,16 @@ static unsigned int shrink_page_list(struct list_head *page_list,
!folio_test_reclaim(folio) || !may_enter_fs) {
/*
* This is slightly racy -
- * folio_end_writeback() might have just
- * cleared the reclaim flag, then setting
- * reclaim here ends up interpreted as
- * the readahead flag - but that does
- * not matter enough to care. What we
- * do want is for this folio to have
- * the reclaim flag set next time memcg
- * reclaim reaches the tests above, so
- * it will then folio_wait_writeback()
- * to avoid OOM; and it's also appropriate
+ * folio_end_writeback() might have
+ * just cleared the reclaim flag, then
+ * setting the reclaim flag here ends up
+ * interpreted as the readahead flag - but
+ * that does not matter enough to care.
+ * What we do want is for this folio to
+ * have the reclaim flag set next time
+ * memcg reclaim reaches the tests above,
+ * so it will then wait for writeback to
+ * avoid OOM; and it's also appropriate
* in global reclaim.
*/
folio_set_reclaim(folio);
@@ -1670,37 +1669,37 @@ static unsigned int shrink_page_list(struct list_head *page_list,
goto keep_locked;
case PAGEREF_RECLAIM:
case PAGEREF_RECLAIM_CLEAN:
- ; /* try to reclaim the page below */
+ ; /* try to reclaim the folio below */
}
/*
- * Before reclaiming the page, try to relocate
+ * Before reclaiming the folio, try to relocate
* its contents to another node.
*/
if (do_demote_pass &&
- (thp_migration_supported() || !PageTransHuge(page))) {
- list_add(&page->lru, &demote_pages);
- unlock_page(page);
+ (thp_migration_supported() || !folio_test_large(folio))) {
+ list_add(&folio->lru, &demote_pages);
+ folio_unlock(folio);
continue;
}
/*
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
- * Lazyfree page could be freed directly
+ * Lazyfree folio could be freed directly
*/
- if (PageAnon(page) && PageSwapBacked(page)) {
- if (!PageSwapCache(page)) {
+ if (folio_test_anon(folio) && folio_test_swapbacked(folio)) {
+ if (!folio_test_swapcache(folio)) {
if (!(sc->gfp_mask & __GFP_IO))
goto keep_locked;
if (folio_maybe_dma_pinned(folio))
goto keep_locked;
- if (PageTransHuge(page)) {
- /* cannot split THP, skip it */
+ if (folio_test_large(folio)) {
+ /* cannot split folio, skip it */
if (!can_split_folio(folio, NULL))
goto activate_locked;
/*
- * Split pages without a PMD map right
+ * Split folios without a PMD map right
* away. Chances are some or all of the
* tail pages can be freed without IO.
*/
@@ -1725,20 +1724,19 @@ static unsigned int shrink_page_list(struct list_head *page_list,
may_enter_fs = true;
}
- } else if (PageSwapBacked(page) && PageTransHuge(page)) {
- /* Split shmem THP */
+ } else if (folio_test_swapbacked(folio) &&
+ folio_test_large(folio)) {
+ /* Split shmem folio */
if (split_folio_to_list(folio, page_list))
goto keep_locked;
}
/*
- * THP may get split above, need minus tail pages and update
- * nr_pages to avoid accounting tail pages twice.
- *
- * The tail pages that are added into swap cache successfully
- * reach here.
+ * If the folio was split above, the tail pages will make
+ * their own pass through this function and be accounted
+ * then.
*/
- if ((nr_pages > 1) && !PageTransHuge(page)) {
+ if ((nr_pages > 1) && !folio_test_large(folio)) {
sc->nr_scanned -= (nr_pages - 1);
nr_pages = 1;
}
@@ -1898,11 +1896,11 @@ static unsigned int shrink_page_list(struct list_head *page_list,
sc->target_mem_cgroup))
goto keep_locked;
- unlock_page(page);
+ folio_unlock(folio);
free_it:
/*
- * THP may get swapped out in a whole, need account
- * all base pages.
+ * Folio may get swapped out as a whole, need to account
+ * all pages in it.
*/
nr_reclaimed += nr_pages;
@@ -1910,10 +1908,10 @@ static unsigned int shrink_page_list(struct list_head *page_list,
* Is there need to periodically free_page_list? It would
* appear not as the counts should be low
*/
- if (unlikely(PageTransHuge(page)))
- destroy_compound_page(page);
+ if (unlikely(folio_test_large(folio)))
+ destroy_compound_page(&folio->page);
else
- list_add(&page->lru, &free_pages);
+ list_add(&folio->lru, &free_pages);
continue;
activate_locked_split:
@@ -1939,18 +1937,19 @@ static unsigned int shrink_page_list(struct list_head *page_list,
count_memcg_folio_events(folio, PGACTIVATE, nr_pages);
}
keep_locked:
- unlock_page(page);
+ folio_unlock(folio);
keep:
- list_add(&page->lru, &ret_pages);
- VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
+ list_add(&folio->lru, &ret_pages);
+ VM_BUG_ON_FOLIO(folio_test_lru(folio) ||
+ folio_test_unevictable(folio), folio);
}
/* 'page_list' is always empty here */
- /* Migrate pages selected for demotion */
+ /* Migrate folios selected for demotion */
nr_reclaimed += demote_page_list(&demote_pages, pgdat);
- /* Pages that could not be demoted are still in @demote_pages */
+ /* Folios that could not be demoted are still in @demote_pages */
if (!list_empty(&demote_pages)) {
- /* Pages which failed to demoted go back on @page_list for retry: */
+ /* Folios which weren't demoted go back on @page_list for retry: */
list_splice_init(&demote_pages, page_list);
do_demote_pass = false;
goto retry;
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 14/21] mm/shmem: Use a folio in shmem_unused_huge_shrink
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (12 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 13/21] vmscan: Remove remaining uses of page in shrink_page_list Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 15/21] mm/swap: Add folio_throttle_swaprate Matthew Wilcox (Oracle)
` (7 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
When calling split_huge_page() we usually have to find the precise page,
but that's not necessary here because we only need to unlock and put
the folio afterwards. Saves 231 bytes of text (20% of this function).
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/shmem.c | 23 ++++++++++++-----------
1 file changed, 12 insertions(+), 11 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 85c23696efc6..3461bdec6b38 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -553,7 +553,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
LIST_HEAD(to_remove);
struct inode *inode;
struct shmem_inode_info *info;
- struct page *page;
+ struct folio *folio;
unsigned long batch = sc ? sc->nr_to_scan : 128;
int split = 0;
@@ -597,6 +597,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
list_for_each_safe(pos, next, &list) {
int ret;
+ pgoff_t index;
info = list_entry(pos, struct shmem_inode_info, shrinklist);
inode = &info->vfs_inode;
@@ -604,14 +605,14 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
if (nr_to_split && split >= nr_to_split)
goto move_back;
- page = find_get_page(inode->i_mapping,
- (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
- if (!page)
+ index = (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT;
+ folio = filemap_get_folio(inode->i_mapping, index);
+ if (!folio)
goto drop;
/* No huge page at the end of the file: nothing to split */
- if (!PageTransHuge(page)) {
- put_page(page);
+ if (!folio_test_large(folio)) {
+ folio_put(folio);
goto drop;
}
@@ -622,14 +623,14 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
* Waiting for the lock may lead to deadlock in the
* reclaim path.
*/
- if (!trylock_page(page)) {
- put_page(page);
+ if (!folio_trylock(folio)) {
+ folio_put(folio);
goto move_back;
}
- ret = split_huge_page(page);
- unlock_page(page);
- put_page(page);
+ ret = split_huge_page(&folio->page);
+ folio_unlock(folio);
+ folio_put(folio);
/* If split failed move the inode on the list back to shrinklist */
if (ret)
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 15/21] mm/swap: Add folio_throttle_swaprate
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (13 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 14/21] mm/shmem: Use a folio in shmem_unused_huge_shrink Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 16/21] mm/shmem: Convert shmem_add_to_page_cache to take a folio Matthew Wilcox (Oracle)
` (6 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
The only use of the page argument to cgroup_throttle_swaprate() is to
get the node ID, and this will be the same for all pages in the folio,
so just pass in the first page of the folio.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
include/linux/swap.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f87bb495e482..96f7129f6ee2 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -736,6 +736,10 @@ static inline void cgroup_throttle_swaprate(struct page *page, gfp_t gfp_mask)
{
}
#endif
+static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
+{
+ cgroup_throttle_swaprate(&folio->page, gfp);
+}
#ifdef CONFIG_MEMCG_SWAP
void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 16/21] mm/shmem: Convert shmem_add_to_page_cache to take a folio
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (14 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 15/21] mm/swap: Add folio_throttle_swaprate Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-05-03 11:10 ` Sebastian Andrzej Siewior
2022-04-29 19:23 ` [PATCH 17/21] mm/shmem: Turn shmem_should_replace_page into shmem_should_replace_folio Matthew Wilcox (Oracle)
` (5 subsequent siblings)
21 siblings, 1 reply; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Shrinks shmem_add_to_page_cache() by 16 bytes. All the callers grow,
but this is temporary as they will all be converted to folios soon.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/shmem.c | 57 +++++++++++++++++++++++++++++-------------------------
1 file changed, 31 insertions(+), 26 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 3461bdec6b38..4331a4daac01 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -695,36 +695,35 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
/*
* Like add_to_page_cache_locked, but error if expected item has gone.
*/
-static int shmem_add_to_page_cache(struct page *page,
+static int shmem_add_to_page_cache(struct folio *folio,
struct address_space *mapping,
pgoff_t index, void *expected, gfp_t gfp,
struct mm_struct *charge_mm)
{
- XA_STATE_ORDER(xas, &mapping->i_pages, index, compound_order(page));
- unsigned long nr = compound_nr(page);
+ XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio));
+ long nr = folio_nr_pages(folio);
int error;
- VM_BUG_ON_PAGE(PageTail(page), page);
- VM_BUG_ON_PAGE(index != round_down(index, nr), page);
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(!PageSwapBacked(page), page);
- VM_BUG_ON(expected && PageTransHuge(page));
+ VM_BUG_ON_FOLIO(index != round_down(index, nr), folio);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio);
+ VM_BUG_ON(expected && folio_test_large(folio));
- page_ref_add(page, nr);
- page->mapping = mapping;
- page->index = index;
+ folio_ref_add(folio, nr);
+ folio->mapping = mapping;
+ folio->index = index;
- if (!PageSwapCache(page)) {
- error = mem_cgroup_charge(page_folio(page), charge_mm, gfp);
+ if (!folio_test_swapcache(folio)) {
+ error = mem_cgroup_charge(folio, charge_mm, gfp);
if (error) {
- if (PageTransHuge(page)) {
+ if (folio_test_large(folio)) {
count_vm_event(THP_FILE_FALLBACK);
count_vm_event(THP_FILE_FALLBACK_CHARGE);
}
goto error;
}
}
- cgroup_throttle_swaprate(page, gfp);
+ folio_throttle_swaprate(folio, gfp);
do {
xas_lock_irq(&xas);
@@ -736,16 +735,16 @@ static int shmem_add_to_page_cache(struct page *page,
xas_set_err(&xas, -EEXIST);
goto unlock;
}
- xas_store(&xas, page);
+ xas_store(&xas, folio);
if (xas_error(&xas))
goto unlock;
- if (PageTransHuge(page)) {
+ if (folio_test_large(folio)) {
count_vm_event(THP_FILE_ALLOC);
- __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
+ __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr);
}
mapping->nrpages += nr;
- __mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
- __mod_lruvec_page_state(page, NR_SHMEM, nr);
+ __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr);
+ __lruvec_stat_mod_folio(folio, NR_SHMEM, nr);
unlock:
xas_unlock_irq(&xas);
} while (xas_nomem(&xas, gfp));
@@ -757,8 +756,8 @@ static int shmem_add_to_page_cache(struct page *page,
return 0;
error:
- page->mapping = NULL;
- page_ref_sub(page, nr);
+ folio->mapping = NULL;
+ folio_ref_sub(folio, nr);
return error;
}
@@ -1690,7 +1689,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
- struct page *page;
+ struct page *page = NULL;
+ struct folio *folio;
swp_entry_t swap;
int error;
@@ -1740,7 +1740,8 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
goto failed;
}
- error = shmem_add_to_page_cache(page, mapping, index,
+ folio = page_folio(page);
+ error = shmem_add_to_page_cache(folio, mapping, index,
swp_to_radix_entry(swap), gfp,
charge_mm);
if (error)
@@ -1791,6 +1792,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo;
struct mm_struct *charge_mm;
+ struct folio *folio;
struct page *page;
pgoff_t hindex = index;
gfp_t huge_gfp;
@@ -1905,7 +1907,8 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
if (sgp == SGP_WRITE)
__SetPageReferenced(page);
- error = shmem_add_to_page_cache(page, mapping, hindex,
+ folio = page_folio(page);
+ error = shmem_add_to_page_cache(folio, mapping, hindex,
NULL, gfp & GFP_RECLAIM_MASK,
charge_mm);
if (error)
@@ -2327,6 +2330,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
gfp_t gfp = mapping_gfp_mask(mapping);
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
void *page_kaddr;
+ struct folio *folio;
struct page *page;
int ret;
pgoff_t max_off;
@@ -2385,7 +2389,8 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
if (unlikely(pgoff >= max_off))
goto out_release;
- ret = shmem_add_to_page_cache(page, mapping, pgoff, NULL,
+ folio = page_folio(page);
+ ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL,
gfp & GFP_RECLAIM_MASK, dst_mm);
if (ret)
goto out_release;
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH 16/21] mm/shmem: Convert shmem_add_to_page_cache to take a folio
2022-04-29 19:23 ` [PATCH 16/21] mm/shmem: Convert shmem_add_to_page_cache to take a folio Matthew Wilcox (Oracle)
@ 2022-05-03 11:10 ` Sebastian Andrzej Siewior
2022-05-03 12:48 ` Matthew Wilcox
0 siblings, 1 reply; 29+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-05-03 11:10 UTC (permalink / raw)
To: Matthew Wilcox (Oracle); +Cc: akpm, linux-mm, tglx
On 2022-04-29 20:23:24 [+0100], Matthew Wilcox (Oracle) wrote:
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 3461bdec6b38..4331a4daac01 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -736,16 +735,16 @@ static int shmem_add_to_page_cache(struct page *page,
> xas_set_err(&xas, -EEXIST);
> goto unlock;
> }
> - xas_store(&xas, page);
> + xas_store(&xas, folio);
> if (xas_error(&xas))
> goto unlock;
> - if (PageTransHuge(page)) {
> + if (folio_test_large(folio)) {
> count_vm_event(THP_FILE_ALLOC);
> - __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
> + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr);
> }
| CC mm/shmem.o
|In file included from <command-line>:
|mm/shmem.c: In function ‘shmem_add_to_page_cache’:
|include/linux/compiler_types.h:352:45: error: call to ‘__compiletime_assert_262’ declared with attribute error: BUILD_BUG failed
| 352 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
| | ^
…
|mm/shmem.c:743:40: note: in expansion of macro ‘THP_FILE_ALLOC’
| 743 | count_vm_event(THP_FILE_ALLOC);
| | ^~~~~~~~~~~~~~
|
and
| $ git grep THP_FILE_ALLOC
| include/linux/vm_event_item.h: THP_FILE_ALLOC,
| include/linux/vm_event_item.h:#define THP_FILE_ALLOC ({ BUILD_BUG(); 0; })
| mm/shmem.c: count_vm_event(THP_FILE_ALLOC);
| $ grep CONFIG_TRANSPARENT_HUGEPAGE .config
| # CONFIG_TRANSPARENT_HUGEPAGE is not set
Sebastian
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 16/21] mm/shmem: Convert shmem_add_to_page_cache to take a folio
2022-05-03 11:10 ` Sebastian Andrzej Siewior
@ 2022-05-03 12:48 ` Matthew Wilcox
2022-05-03 13:00 ` Sebastian Andrzej Siewior
0 siblings, 1 reply; 29+ messages in thread
From: Matthew Wilcox @ 2022-05-03 12:48 UTC (permalink / raw)
To: Sebastian Andrzej Siewior; +Cc: akpm, linux-mm, tglx
On Tue, May 03, 2022 at 01:10:09PM +0200, Sebastian Andrzej Siewior wrote:
> On 2022-04-29 20:23:24 [+0100], Matthew Wilcox (Oracle) wrote:
> > diff --git a/mm/shmem.c b/mm/shmem.c
> > index 3461bdec6b38..4331a4daac01 100644
> > --- a/mm/shmem.c
> > +++ b/mm/shmem.c
> > @@ -736,16 +735,16 @@ static int shmem_add_to_page_cache(struct page *page,
> > xas_set_err(&xas, -EEXIST);
> > goto unlock;
> > }
> > - xas_store(&xas, page);
> > + xas_store(&xas, folio);
> > if (xas_error(&xas))
> > goto unlock;
> > - if (PageTransHuge(page)) {
> > + if (folio_test_large(folio)) {
> > count_vm_event(THP_FILE_ALLOC);
> > - __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
> > + __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr);
> > }
>
> | CC mm/shmem.o
> |In file included from <command-line>:
> |mm/shmem.c: In function ‘shmem_add_to_page_cache’:
> |include/linux/compiler_types.h:352:45: error: call to ‘__compiletime_assert_262’ declared with attribute error: BUILD_BUG failed
> | 352 | _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
> | | ^
> …
> |mm/shmem.c:743:40: note: in expansion of macro ‘THP_FILE_ALLOC’
> | 743 | count_vm_event(THP_FILE_ALLOC);
Thanks. Stephen already reported that; fix here:
https://lore.kernel.org/all/Ym++SI1ftbRg+9zK@casper.infradead.org/
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 16/21] mm/shmem: Convert shmem_add_to_page_cache to take a folio
2022-05-03 12:48 ` Matthew Wilcox
@ 2022-05-03 13:00 ` Sebastian Andrzej Siewior
2022-05-03 13:05 ` Matthew Wilcox
0 siblings, 1 reply; 29+ messages in thread
From: Sebastian Andrzej Siewior @ 2022-05-03 13:00 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: akpm, linux-mm, tglx
On 2022-05-03 13:48:50 [+0100], Matthew Wilcox wrote:
> Thanks. Stephen already reported that; fix here:
>
> https://lore.kernel.org/all/Ym++SI1ftbRg+9zK@casper.infradead.org/
Stephen says "I applied the above patch to the mm tree merge today" and
I have here next-20220503. I don't have the BUILD_BUG() in
can_split_folio() anymore so I have this change.
My guess is that since THP_FILE_ALLOC is defined as BUILD_BUG() for
!CONFIG_TRANSPARENT_HUGEPAGE and there is nothing that removes that part
of the code, I end up in BUILD_BUG with CGROUP and no THP.
PageTransHuge() used to "return false" for !CONFIG_TRANSPARENT_HUGEPAGE
which isn't the case for folio_test_large().
Sebastian
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: [PATCH 16/21] mm/shmem: Convert shmem_add_to_page_cache to take a folio
2022-05-03 13:00 ` Sebastian Andrzej Siewior
@ 2022-05-03 13:05 ` Matthew Wilcox
2022-05-03 13:09 ` Sebastian Andrzej Siewior
0 siblings, 1 reply; 29+ messages in thread
From: Matthew Wilcox @ 2022-05-03 13:05 UTC (permalink / raw)
To: Sebastian Andrzej Siewior; +Cc: akpm, linux-mm, tglx
On Tue, May 03, 2022 at 03:00:05PM +0200, Sebastian Andrzej Siewior wrote:
> On 2022-05-03 13:48:50 [+0100], Matthew Wilcox wrote:
> > Thanks. Stephen already reported that; fix here:
> >
> > https://lore.kernel.org/all/Ym++SI1ftbRg+9zK@casper.infradead.org/
>
> Stephen says "I applied the above patch to the mm tree merge today" and
> I have here next-20220503. I don't have the BUILD_BUG() in
> can_split_folio() anymore so I have this change.
Ah! I didn't realise you were testing next; I thought you'd picked up
these patches some other way.
> My guess is that since THP_FILE_ALLOC is defined as BUILD_BUG() for
> !CONFIG_TRANSPARENT_HUGEPAGE and there is nothing that removes that part
> of the code, I end up in BUILD_BUG with CGROUP and no THP.
>
> PageTransHuge() used to "return false" for !CONFIG_TRANSPARENT_HUGEPAGE
> which isn't the case for folio_test_large().
Indeed, indeed. I missed another case. This fixes it:
diff --git a/mm/shmem.c b/mm/shmem.c
index 5b161a92e6f1..019ad8bf0d21 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -716,7 +716,7 @@ static int shmem_add_to_page_cache(struct folio *folio,
if (!folio_test_swapcache(folio)) {
error = mem_cgroup_charge(folio, charge_mm, gfp);
if (error) {
- if (folio_test_large(folio)) {
+ if (folio_test_pmd_mappable(folio)) {
count_vm_event(THP_FILE_FALLBACK);
count_vm_event(THP_FILE_FALLBACK_CHARGE);
}
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 17/21] mm/shmem: Turn shmem_should_replace_page into shmem_should_replace_folio
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (15 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 16/21] mm/shmem: Convert shmem_add_to_page_cache to take a folio Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 18/21] mm/shmem: Turn shmem_alloc_page() into shmem_alloc_folio() Matthew Wilcox (Oracle)
` (4 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
This is a straightforward conversion.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/shmem.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 4331a4daac01..4b8d0972bf72 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1600,9 +1600,9 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
* NUMA mempolicy, and applied also to anonymous pages in do_swap_page();
* but for now it is a simple matter of zone.
*/
-static bool shmem_should_replace_page(struct page *page, gfp_t gfp)
+static bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp)
{
- return page_zonenum(page) > gfp_zone(gfp);
+ return folio_zonenum(folio) > gfp_zone(gfp);
}
static int shmem_replace_page(struct page **pagep, gfp_t gfp,
@@ -1734,13 +1734,13 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
*/
arch_swap_restore(swap, page);
- if (shmem_should_replace_page(page, gfp)) {
+ folio = page_folio(page);
+ if (shmem_should_replace_folio(folio, gfp)) {
error = shmem_replace_page(&page, gfp, info, index);
if (error)
goto failed;
}
- folio = page_folio(page);
error = shmem_add_to_page_cache(folio, mapping, index,
swp_to_radix_entry(swap), gfp,
charge_mm);
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 18/21] mm/shmem: Turn shmem_alloc_page() into shmem_alloc_folio()
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (16 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 17/21] mm/shmem: Turn shmem_should_replace_page into shmem_should_replace_folio Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 19/21] mm/shmem: Convert shmem_alloc_and_acct_page to use a folio Matthew Wilcox (Oracle)
` (3 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Call vma_alloc_folio() directly instead of alloc_page_vma().
It's a bit messy in the callers, but they're about to be
cleaned up when they get converted to folios.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/shmem.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index 4b8d0972bf72..afee80747647 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1543,17 +1543,17 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
return &folio->page;
}
-static struct page *shmem_alloc_page(gfp_t gfp,
+static struct folio *shmem_alloc_folio(gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
{
struct vm_area_struct pvma;
- struct page *page;
+ struct folio *folio;
shmem_pseudo_vma_init(&pvma, info, index);
- page = alloc_page_vma(gfp, &pvma, 0);
+ folio = vma_alloc_folio(gfp, 0, &pvma, 0, false);
shmem_pseudo_vma_destroy(&pvma);
- return page;
+ return folio;
}
static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
@@ -1575,7 +1575,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
if (huge)
page = shmem_alloc_hugepage(gfp, info, index);
else
- page = shmem_alloc_page(gfp, info, index);
+ page = &shmem_alloc_folio(gfp, info, index)->page;
if (page) {
__SetPageLocked(page);
__SetPageSwapBacked(page);
@@ -1625,7 +1625,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
* limit chance of success by further cpuset and node constraints.
*/
gfp &= ~GFP_CONSTRAINT_MASK;
- newpage = shmem_alloc_page(gfp, info, index);
+ newpage = &shmem_alloc_folio(gfp, info, index)->page;
if (!newpage)
return -ENOMEM;
@@ -2350,7 +2350,6 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
if (!*pagep) {
ret = -ENOMEM;
- page = shmem_alloc_page(gfp, info, pgoff);
if (!page)
goto out_unacct_blocks;
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 19/21] mm/shmem: Convert shmem_alloc_and_acct_page to use a folio
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (17 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 18/21] mm/shmem: Turn shmem_alloc_page() into shmem_alloc_folio() Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 20/21] mm/shmem: Convert shmem_getpage_gfp " Matthew Wilcox (Oracle)
` (2 subsequent siblings)
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Convert shmem_alloc_hugepage() to return the folio that it uses
and use a folio throughout shmem_alloc_and_acct_page(). Continue
to return a page from shmem_alloc_and_acct_page() for now.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/shmem.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index afee80747647..e65daf511a9b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1522,7 +1522,7 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
return result;
}
-static struct page *shmem_alloc_hugepage(gfp_t gfp,
+static struct folio *shmem_alloc_hugefolio(gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
{
struct vm_area_struct pvma;
@@ -1540,7 +1540,7 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
shmem_pseudo_vma_destroy(&pvma);
if (!folio)
count_vm_event(THP_FILE_FALLBACK);
- return &folio->page;
+ return folio;
}
static struct folio *shmem_alloc_folio(gfp_t gfp,
@@ -1561,7 +1561,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
pgoff_t index, bool huge)
{
struct shmem_inode_info *info = SHMEM_I(inode);
- struct page *page;
+ struct folio *folio;
int nr;
int err = -ENOSPC;
@@ -1573,13 +1573,13 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
goto failed;
if (huge)
- page = shmem_alloc_hugepage(gfp, info, index);
+ folio = shmem_alloc_hugefolio(gfp, info, index);
else
- page = &shmem_alloc_folio(gfp, info, index)->page;
- if (page) {
- __SetPageLocked(page);
- __SetPageSwapBacked(page);
- return page;
+ folio = shmem_alloc_folio(gfp, info, index);
+ if (folio) {
+ __folio_set_locked(folio);
+ __folio_set_swapbacked(folio);
+ return &folio->page;
}
err = -ENOMEM;
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 20/21] mm/shmem: Convert shmem_getpage_gfp to use a folio
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (18 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 19/21] mm/shmem: Convert shmem_alloc_and_acct_page to use a folio Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-04-29 19:23 ` [PATCH 21/21] mm/shmem: Convert shmem_swapin_page() to shmem_swapin_folio() Matthew Wilcox (Oracle)
2022-05-03 15:14 ` [PATCH 00/21] Folio patches for 5.19 Nathan Chancellor
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
Rename shmem_alloc_and_acct_page() to shmem_alloc_and_acct_folio() and
have it return a folio, then use a folio throuughout shmem_getpage_gfp().
It continues to return a struct page.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
mm/shmem.c | 92 +++++++++++++++++++++++++-----------------------------
1 file changed, 43 insertions(+), 49 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index e65daf511a9b..7457f352cf9f 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1556,8 +1556,7 @@ static struct folio *shmem_alloc_folio(gfp_t gfp,
return folio;
}
-static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
- struct inode *inode,
+static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode,
pgoff_t index, bool huge)
{
struct shmem_inode_info *info = SHMEM_I(inode);
@@ -1579,7 +1578,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp,
if (folio) {
__folio_set_locked(folio);
__folio_set_swapbacked(folio);
- return &folio->page;
+ return folio;
}
err = -ENOMEM;
@@ -1793,7 +1792,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct shmem_sb_info *sbinfo;
struct mm_struct *charge_mm;
struct folio *folio;
- struct page *page;
pgoff_t hindex = index;
gfp_t huge_gfp;
int error;
@@ -1811,19 +1809,18 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
sbinfo = SHMEM_SB(inode->i_sb);
charge_mm = vma ? vma->vm_mm : NULL;
- page = pagecache_get_page(mapping, index,
- FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
-
- if (page && vma && userfaultfd_minor(vma)) {
- if (!xa_is_value(page)) {
- unlock_page(page);
- put_page(page);
+ folio = __filemap_get_folio(mapping, index, FGP_ENTRY | FGP_LOCK, 0);
+ if (folio && vma && userfaultfd_minor(vma)) {
+ if (!xa_is_value(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
}
*fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
return 0;
}
- if (xa_is_value(page)) {
+ if (xa_is_value(folio)) {
+ struct page *page = &folio->page;
error = shmem_swapin_page(inode, index, &page,
sgp, gfp, vma, fault_type);
if (error == -EEXIST)
@@ -1833,17 +1830,17 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
return error;
}
- if (page) {
- hindex = page->index;
+ if (folio) {
+ hindex = folio->index;
if (sgp == SGP_WRITE)
- mark_page_accessed(page);
- if (PageUptodate(page))
+ folio_mark_accessed(folio);
+ if (folio_test_uptodate(folio))
goto out;
/* fallocated page */
if (sgp != SGP_READ)
goto clear;
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
}
/*
@@ -1870,17 +1867,16 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
huge_gfp = vma_thp_gfp_mask(vma);
huge_gfp = limit_gfp_mask(huge_gfp, gfp);
- page = shmem_alloc_and_acct_page(huge_gfp, inode, index, true);
- if (IS_ERR(page)) {
+ folio = shmem_alloc_and_acct_folio(huge_gfp, inode, index, true);
+ if (IS_ERR(folio)) {
alloc_nohuge:
- page = shmem_alloc_and_acct_page(gfp, inode,
- index, false);
+ folio = shmem_alloc_and_acct_folio(gfp, inode, index, false);
}
- if (IS_ERR(page)) {
+ if (IS_ERR(folio)) {
int retry = 5;
- error = PTR_ERR(page);
- page = NULL;
+ error = PTR_ERR(folio);
+ folio = NULL;
if (error != -ENOSPC)
goto unlock;
/*
@@ -1899,30 +1895,29 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
goto unlock;
}
- if (PageTransHuge(page))
+ if (folio_test_large(folio))
hindex = round_down(index, HPAGE_PMD_NR);
else
hindex = index;
if (sgp == SGP_WRITE)
- __SetPageReferenced(page);
+ __folio_set_referenced(folio);
- folio = page_folio(page);
error = shmem_add_to_page_cache(folio, mapping, hindex,
NULL, gfp & GFP_RECLAIM_MASK,
charge_mm);
if (error)
goto unacct;
- lru_cache_add(page);
+ folio_add_lru(folio);
spin_lock_irq(&info->lock);
- info->alloced += compound_nr(page);
- inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
+ info->alloced += folio_nr_pages(folio);
+ inode->i_blocks += BLOCKS_PER_PAGE << folio_order(folio);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
alloced = true;
- if (PageTransHuge(page) &&
+ if (folio_test_large(folio) &&
DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
hindex + HPAGE_PMD_NR - 1) {
/*
@@ -1953,22 +1948,21 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
* but SGP_FALLOC on a page fallocated earlier must initialize
* it now, lest undo on failure cancel our earlier guarantee.
*/
- if (sgp != SGP_WRITE && !PageUptodate(page)) {
- int i;
+ if (sgp != SGP_WRITE && !folio_test_uptodate(folio)) {
+ long i, n = folio_nr_pages(folio);
- for (i = 0; i < compound_nr(page); i++) {
- clear_highpage(page + i);
- flush_dcache_page(page + i);
- }
- SetPageUptodate(page);
+ for (i = 0; i < n; i++)
+ clear_highpage(folio_page(folio, i));
+ flush_dcache_folio(folio);
+ folio_mark_uptodate(folio);
}
/* Perhaps the file has been truncated since we checked */
if (sgp <= SGP_CACHE &&
((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
if (alloced) {
- ClearPageDirty(page);
- delete_from_page_cache(page);
+ folio_clear_dirty(folio);
+ filemap_remove_folio(folio);
spin_lock_irq(&info->lock);
shmem_recalc_inode(inode);
spin_unlock_irq(&info->lock);
@@ -1977,24 +1971,24 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
goto unlock;
}
out:
- *pagep = page + index - hindex;
+ *pagep = folio_page(folio, index - hindex);
return 0;
/*
* Error recovery.
*/
unacct:
- shmem_inode_unacct_blocks(inode, compound_nr(page));
+ shmem_inode_unacct_blocks(inode, folio_nr_pages(folio));
- if (PageTransHuge(page)) {
- unlock_page(page);
- put_page(page);
+ if (folio_test_large(folio)) {
+ folio_unlock(folio);
+ folio_put(folio);
goto alloc_nohuge;
}
unlock:
- if (page) {
- unlock_page(page);
- put_page(page);
+ if (folio) {
+ folio_unlock(folio);
+ folio_put(folio);
}
if (error == -ENOSPC && !once++) {
spin_lock_irq(&info->lock);
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* [PATCH 21/21] mm/shmem: Convert shmem_swapin_page() to shmem_swapin_folio()
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (19 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 20/21] mm/shmem: Convert shmem_getpage_gfp " Matthew Wilcox (Oracle)
@ 2022-04-29 19:23 ` Matthew Wilcox (Oracle)
2022-05-03 15:14 ` [PATCH 00/21] Folio patches for 5.19 Nathan Chancellor
21 siblings, 0 replies; 29+ messages in thread
From: Matthew Wilcox (Oracle) @ 2022-04-29 19:23 UTC (permalink / raw)
To: akpm; +Cc: Matthew Wilcox (Oracle), linux-mm
shmem_swapin_page() only brings in order-0 pages, which are folios
by definition.
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
arch/arm64/include/asm/pgtable.h | 6 +-
include/linux/pgtable.h | 2 +-
mm/shmem.c | 108 ++++++++++++++-----------------
3 files changed, 54 insertions(+), 62 deletions(-)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index dff2b483ea50..27cb6a355fb0 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -964,10 +964,10 @@ static inline void arch_swap_invalidate_area(int type)
}
#define __HAVE_ARCH_SWAP_RESTORE
-static inline void arch_swap_restore(swp_entry_t entry, struct page *page)
+static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
{
- if (system_supports_mte() && mte_restore_tags(entry, page))
- set_bit(PG_mte_tagged, &page->flags);
+ if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
+ set_bit(PG_mte_tagged, &folio->flags);
}
#endif /* CONFIG_ARM64_MTE */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index f4f4077b97aa..a1c44b015463 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -738,7 +738,7 @@ static inline void arch_swap_invalidate_area(int type)
#endif
#ifndef __HAVE_ARCH_SWAP_RESTORE
-static inline void arch_swap_restore(swp_entry_t entry, struct page *page)
+static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
{
}
#endif
diff --git a/mm/shmem.c b/mm/shmem.c
index 7457f352cf9f..673a0e783496 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -134,8 +134,8 @@ static unsigned long shmem_default_max_inodes(void)
}
#endif
-static int shmem_swapin_page(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp,
+static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
+ struct folio **foliop, enum sgp_type sgp,
gfp_t gfp, struct vm_area_struct *vma,
vm_fault_t *fault_type);
static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
@@ -1158,69 +1158,64 @@ static void shmem_evict_inode(struct inode *inode)
}
static int shmem_find_swap_entries(struct address_space *mapping,
- pgoff_t start, unsigned int nr_entries,
- struct page **entries, pgoff_t *indices,
- unsigned int type)
+ pgoff_t start, struct folio_batch *fbatch,
+ pgoff_t *indices, unsigned int type)
{
XA_STATE(xas, &mapping->i_pages, start);
- struct page *page;
+ struct folio *folio;
swp_entry_t entry;
unsigned int ret = 0;
- if (!nr_entries)
- return 0;
-
rcu_read_lock();
- xas_for_each(&xas, page, ULONG_MAX) {
- if (xas_retry(&xas, page))
+ xas_for_each(&xas, folio, ULONG_MAX) {
+ if (xas_retry(&xas, folio))
continue;
- if (!xa_is_value(page))
+ if (!xa_is_value(folio))
continue;
- entry = radix_to_swp_entry(page);
+ entry = radix_to_swp_entry(folio);
if (swp_type(entry) != type)
continue;
indices[ret] = xas.xa_index;
- entries[ret] = page;
+ if (!folio_batch_add(fbatch, folio))
+ break;
if (need_resched()) {
xas_pause(&xas);
cond_resched_rcu();
}
- if (++ret == nr_entries)
- break;
}
rcu_read_unlock();
- return ret;
+ return xas.xa_index;
}
/*
* Move the swapped pages for an inode to page cache. Returns the count
* of pages swapped in, or the error in case of failure.
*/
-static int shmem_unuse_swap_entries(struct inode *inode, struct pagevec pvec,
- pgoff_t *indices)
+static int shmem_unuse_swap_entries(struct inode *inode,
+ struct folio_batch *fbatch, pgoff_t *indices)
{
int i = 0;
int ret = 0;
int error = 0;
struct address_space *mapping = inode->i_mapping;
- for (i = 0; i < pvec.nr; i++) {
- struct page *page = pvec.pages[i];
+ for (i = 0; i < folio_batch_count(fbatch); i++) {
+ struct folio *folio = fbatch->folios[i];
- if (!xa_is_value(page))
+ if (!xa_is_value(folio))
continue;
- error = shmem_swapin_page(inode, indices[i],
- &page, SGP_CACHE,
+ error = shmem_swapin_folio(inode, indices[i],
+ &folio, SGP_CACHE,
mapping_gfp_mask(mapping),
NULL, NULL);
if (error == 0) {
- unlock_page(page);
- put_page(page);
+ folio_unlock(folio);
+ folio_put(folio);
ret++;
}
if (error == -ENOMEM)
@@ -1237,26 +1232,23 @@ static int shmem_unuse_inode(struct inode *inode, unsigned int type)
{
struct address_space *mapping = inode->i_mapping;
pgoff_t start = 0;
- struct pagevec pvec;
+ struct folio_batch fbatch;
pgoff_t indices[PAGEVEC_SIZE];
int ret = 0;
- pagevec_init(&pvec);
do {
- unsigned int nr_entries = PAGEVEC_SIZE;
-
- pvec.nr = shmem_find_swap_entries(mapping, start, nr_entries,
- pvec.pages, indices, type);
- if (pvec.nr == 0) {
+ folio_batch_init(&fbatch);
+ shmem_find_swap_entries(mapping, start, &fbatch, indices, type);
+ if (folio_batch_count(&fbatch) == 0) {
ret = 0;
break;
}
- ret = shmem_unuse_swap_entries(inode, pvec, indices);
+ ret = shmem_unuse_swap_entries(inode, &fbatch, indices);
if (ret < 0)
break;
- start = indices[pvec.nr - 1];
+ start = indices[folio_batch_count(&fbatch) - 1];
} while (true);
return ret;
@@ -1680,22 +1672,22 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
* Returns 0 and the page in pagep if success. On failure, returns the
* error code and NULL in *pagep.
*/
-static int shmem_swapin_page(struct inode *inode, pgoff_t index,
- struct page **pagep, enum sgp_type sgp,
+static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
+ struct folio **foliop, enum sgp_type sgp,
gfp_t gfp, struct vm_area_struct *vma,
vm_fault_t *fault_type)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
- struct page *page = NULL;
+ struct page *page;
struct folio *folio;
swp_entry_t swap;
int error;
- VM_BUG_ON(!*pagep || !xa_is_value(*pagep));
- swap = radix_to_swp_entry(*pagep);
- *pagep = NULL;
+ VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
+ swap = radix_to_swp_entry(*foliop);
+ *foliop = NULL;
/* Look it up and read it in.. */
page = lookup_swap_cache(swap, NULL, 0);
@@ -1713,27 +1705,28 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
goto failed;
}
}
+ folio = page_folio(page);
/* We have to do this with page locked to prevent races */
- lock_page(page);
- if (!PageSwapCache(page) || page_private(page) != swap.val ||
+ folio_lock(folio);
+ if (!folio_test_swapcache(folio) ||
+ folio_swap_entry(folio).val != swap.val ||
!shmem_confirm_swap(mapping, index, swap)) {
error = -EEXIST;
goto unlock;
}
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
error = -EIO;
goto failed;
}
- wait_on_page_writeback(page);
+ folio_wait_writeback(folio);
/*
* Some architectures may have to restore extra metadata to the
- * physical page after reading from swap.
+ * folio after reading from swap.
*/
- arch_swap_restore(swap, page);
+ arch_swap_restore(swap, folio);
- folio = page_folio(page);
if (shmem_should_replace_folio(folio, gfp)) {
error = shmem_replace_page(&page, gfp, info, index);
if (error)
@@ -1752,21 +1745,21 @@ static int shmem_swapin_page(struct inode *inode, pgoff_t index,
spin_unlock_irq(&info->lock);
if (sgp == SGP_WRITE)
- mark_page_accessed(page);
+ folio_mark_accessed(folio);
- delete_from_swap_cache(page);
- set_page_dirty(page);
+ delete_from_swap_cache(&folio->page);
+ folio_mark_dirty(folio);
swap_free(swap);
- *pagep = page;
+ *foliop = folio;
return 0;
failed:
if (!shmem_confirm_swap(mapping, index, swap))
error = -EEXIST;
unlock:
- if (page) {
- unlock_page(page);
- put_page(page);
+ if (folio) {
+ folio_unlock(folio);
+ folio_put(folio);
}
return error;
@@ -1820,13 +1813,12 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
}
if (xa_is_value(folio)) {
- struct page *page = &folio->page;
- error = shmem_swapin_page(inode, index, &page,
+ error = shmem_swapin_folio(inode, index, &folio,
sgp, gfp, vma, fault_type);
if (error == -EEXIST)
goto repeat;
- *pagep = page;
+ *pagep = &folio->page;
return error;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 29+ messages in thread
* Re: [PATCH 00/21] Folio patches for 5.19
2022-04-29 19:23 [PATCH 00/21] Folio patches for 5.19 Matthew Wilcox (Oracle)
` (20 preceding siblings ...)
2022-04-29 19:23 ` [PATCH 21/21] mm/shmem: Convert shmem_swapin_page() to shmem_swapin_folio() Matthew Wilcox (Oracle)
@ 2022-05-03 15:14 ` Nathan Chancellor
21 siblings, 0 replies; 29+ messages in thread
From: Nathan Chancellor @ 2022-05-03 15:14 UTC (permalink / raw)
To: Matthew Wilcox (Oracle); +Cc: akpm, linux-mm, llvm
On Fri, Apr 29, 2022 at 08:23:08PM +0100, Matthew Wilcox (Oracle) wrote:
> Andrew, do you want to include these patches in -mm?
>
> - Finish the conversion from alloc_pages_vma() to vma_alloc_folio()
> - Finish converting shrink_page_list() to folios
> - Start converting shmem from pages to folios (alas, not finished,
> I have simply run out of time with all the debugging/fixing needed
> for 5.18)
>
> Matthew Wilcox (Oracle) (21):
> shmem: Convert shmem_alloc_hugepage() to use vma_alloc_folio()
> mm/huge_memory: Convert do_huge_pmd_anonymous_page() to use
> vma_alloc_folio()
> mm: Remove alloc_pages_vma()
> vmscan: Use folio_mapped() in shrink_page_list()
> vmscan: Convert the writeback handling in shrink_page_list() to folios
> swap: Turn get_swap_page() into folio_alloc_swap()
> swap: Convert add_to_swap() to take a folio
> vmscan: Convert dirty page handling to folios
> vmscan: Convert page buffer handling to use folios
> vmscan: Convert lazy freeing to folios
> vmscan: Move initialisation of mapping down
> vmscan: Convert the activate_locked portion of shrink_page_list to
> folios
> vmscan: Remove remaining uses of page in shrink_page_list
> mm/shmem: Use a folio in shmem_unused_huge_shrink
> mm/swap: Add folio_throttle_swaprate
> mm/shmem: Convert shmem_add_to_page_cache to take a folio
> mm/shmem: Turn shmem_should_replace_page into
> shmem_should_replace_folio
> mm/shmem: Turn shmem_alloc_page() into shmem_alloc_folio()
> mm/shmem: Convert shmem_alloc_and_acct_page to use a folio
> mm/shmem: Convert shmem_getpage_gfp to use a folio
> mm/shmem: Convert shmem_swapin_page() to shmem_swapin_folio()
This series is now in next-20220503 and causes the following clang
warnings:
mm/shmem.c:1704:7: error: variable 'folio' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized]
if (!page) {
^~~~~
mm/shmem.c:1761:6: note: uninitialized use occurs here
if (folio) {
^~~~~
mm/shmem.c:1704:3: note: remove the 'if' if its condition is always false
if (!page) {
^~~~~~~~~~~~
mm/shmem.c:1685:21: note: initialize the variable 'folio' to silence this warning
struct folio *folio;
^
= NULL
mm/shmem.c:2340:8: error: variable 'page' is uninitialized when used here [-Werror,-Wuninitialized]
if (!page)
^~~~
mm/shmem.c:2321:19: note: initialize the variable 'page' to silence this warning
struct page *page;
^
= NULL
2 errors generated.
The first warning is pretty simple as far as I can tell:
diff --git a/mm/shmem.c b/mm/shmem.c
index 820fde6c2ef6..6a18641a90ff 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1682,7 +1682,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
struct shmem_inode_info *info = SHMEM_I(inode);
struct mm_struct *charge_mm = vma ? vma->vm_mm : NULL;
struct page *page;
- struct folio *folio;
+ struct folio *folio = NULL;
swp_entry_t swap;
int error;
However, I am not sure about the second one. It appears to be caused by
patch 18 in this series. Should it have actually been:
diff --git a/mm/shmem.c b/mm/shmem.c
index 820fde6c2ef6..9e0bd0cffe30 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2337,6 +2337,7 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
if (!*pagep) {
ret = -ENOMEM;
+ page = &shmem_alloc_folio(gfp, info, pgoff)->page;
if (!page)
goto out_unacct_blocks;
?
Cheers,
Nathan
^ permalink raw reply related [flat|nested] 29+ messages in thread