linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/2] swap readahead clean up
@ 2017-11-01  5:28 Minchan Kim
  2017-11-01  5:28 ` [PATCH 1/2] mm:swap: clean up swap readahead Minchan Kim
  2017-11-01  5:28 ` [PATCH 2/2] mm:swap: unify cluster-based and vma-based " Minchan Kim
  0 siblings, 2 replies; 8+ messages in thread
From: Minchan Kim @ 2017-11-01  5:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, linux-mm, Hugh Dickins, Huang Ying, kernel-team,
	Minchan Kim

This patchset cleans up recent added vma-based readahead code via
unifying cluster-based readahead.

Minchan Kim (2):
  mm:swap: clean up swap readahead
  mm:swap: unify cluster-based and vma-based swap readahead

 include/linux/swap.h |  32 +++++++--------
 mm/memory.c          |  24 +++--------
 mm/shmem.c           |   5 ++-
 mm/swap_state.c      | 110 +++++++++++++++++++++++++++++----------------------
 4 files changed, 87 insertions(+), 84 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH 1/2] mm:swap: clean up swap readahead
  2017-11-01  5:28 [PATCH 0/2] swap readahead clean up Minchan Kim
@ 2017-11-01  5:28 ` Minchan Kim
  2017-11-01  5:41   ` Huang, Ying
  2017-11-01  5:28 ` [PATCH 2/2] mm:swap: unify cluster-based and vma-based " Minchan Kim
  1 sibling, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-11-01  5:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, linux-mm, Hugh Dickins, Huang Ying, kernel-team,
	Minchan Kim

When I see recent change of swap readahead, I am very unhappy
about current code structure which diverges two swap readahead
algorithm in do_swap_page. This patch is to clean it up.

Main motivation is that fault handler doesn't need to be aware of
readahead algorithms but just should call swapin_readahead.

As first step, this patch cleans up a little bit but not perfect
(I just separate for review easier) so next patch will make the goal
complete.

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 include/linux/swap.h | 17 ++--------
 mm/memory.c          | 17 +++-------
 mm/swap_state.c      | 89 ++++++++++++++++++++++++++++------------------------
 3 files changed, 55 insertions(+), 68 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 84255b3da7c1..7c7c8b344bc9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -427,12 +427,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
 			bool *new_page_allocated);
 extern struct page *swapin_readahead(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr);
-
-extern struct page *swap_readahead_detect(struct vm_fault *vmf,
-					  struct vma_swap_readahead *swap_ra);
 extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
-					   struct vm_fault *vmf,
-					   struct vma_swap_readahead *swap_ra);
+					   struct vm_fault *vmf);
 
 /* linux/mm/swapfile.c */
 extern atomic_long_t nr_swap_pages;
@@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
 	return false;
 }
 
-static inline struct page *swap_readahead_detect(
-	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
-{
-	return NULL;
-}
-
-static inline struct page *do_swap_page_readahead(
-	swp_entry_t fentry, gfp_t gfp_mask,
-	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
+static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
+				gfp_t gfp_mask, struct vm_fault *vmf)
 {
 	return NULL;
 }
diff --git a/mm/memory.c b/mm/memory.c
index 8a0c410037d2..e955298e4290 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL, *swapcache = NULL;
 	struct mem_cgroup *memcg;
-	struct vma_swap_readahead swap_ra;
 	swp_entry_t entry;
 	pte_t pte;
 	int locked;
 	int exclusive = 0;
 	int ret = 0;
-	bool vma_readahead = swap_use_vma_readahead();
 
-	if (vma_readahead)
-		page = swap_readahead_detect(vmf, &swap_ra);
-	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
-		if (page)
-			put_page(page);
+	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
 		goto out;
-	}
 
 	entry = pte_to_swp_entry(vmf->orig_pte);
 	if (unlikely(non_swap_entry(entry))) {
@@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
 
 
 	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
-	if (!page)
-		page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
-					 vmf->address);
+	page = lookup_swap_cache(entry, vma, vmf->address);
 	if (!page) {
 		struct swap_info_struct *si = swp_swap_info(entry);
 
@@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
 				swap_readpage(page, true);
 			}
 		} else {
-			if (vma_readahead)
+			if (swap_use_vma_readahead())
 				page = do_swap_page_readahead(entry,
-					GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
+					GFP_HIGHUSER_MOVABLE, vmf);
 			else
 				page = swapin_readahead(entry,
 				       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 6c017ced11e6..e3c535fcd2df 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
 			       unsigned long addr)
 {
 	struct page *page;
-	unsigned long ra_info;
-	int win, hits, readahead;
 
 	page = find_get_page(swap_address_space(entry), swp_offset(entry));
 
 	INC_CACHE_INFO(find_total);
 	if (page) {
+		bool vma_ra = swap_use_vma_readahead();
+		bool readahead = TestClearPageReadahead(page);
+
 		INC_CACHE_INFO(find_success);
 		if (unlikely(PageTransCompound(page)))
 			return page;
-		readahead = TestClearPageReadahead(page);
-		if (vma) {
-			ra_info = GET_SWAP_RA_VAL(vma);
-			win = SWAP_RA_WIN(ra_info);
-			hits = SWAP_RA_HITS(ra_info);
+
+		if (vma && vma_ra) {
+			unsigned long ra_val;
+			int win, hits;
+
+			ra_val = GET_SWAP_RA_VAL(vma);
+			win = SWAP_RA_WIN(ra_val);
+			hits = SWAP_RA_HITS(ra_val);
 			if (readahead)
 				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
 			atomic_long_set(&vma->swap_readahead_info,
 					SWAP_RA_VAL(addr, win, hits));
 		}
+
 		if (readahead) {
 			count_vm_event(SWAP_RA_HIT);
-			if (!vma)
+			if (!vma || !vma_ra)
 				atomic_inc(&swapin_readahead_hits);
 		}
 	}
+
 	return page;
 }
 
@@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
 		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
 }
 
-struct page *swap_readahead_detect(struct vm_fault *vmf,
-				   struct vma_swap_readahead *swap_ra)
+static void swap_ra_info(struct vm_fault *vmf,
+			struct vma_swap_readahead *ra_info)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	unsigned long swap_ra_info;
-	struct page *page;
+	unsigned long ra_val;
 	swp_entry_t entry;
 	unsigned long faddr, pfn, fpfn;
 	unsigned long start, end;
-	pte_t *pte;
+	pte_t *pte, *orig_pte;
 	unsigned int max_win, hits, prev_win, win, left;
 #ifndef CONFIG_64BIT
 	pte_t *tpte;
@@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
 	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
 			     SWAP_RA_ORDER_CEILING);
 	if (max_win == 1) {
-		swap_ra->win = 1;
-		return NULL;
+		ra_info->win = 1;
+		return;
 	}
 
 	faddr = vmf->address;
-	entry = pte_to_swp_entry(vmf->orig_pte);
-	if ((unlikely(non_swap_entry(entry))))
-		return NULL;
-	page = lookup_swap_cache(entry, vma, faddr);
-	if (page)
-		return page;
+	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
+	entry = pte_to_swp_entry(*pte);
+	if ((unlikely(non_swap_entry(entry)))) {
+		pte_unmap(orig_pte);
+		return;
+	}
 
 	fpfn = PFN_DOWN(faddr);
-	swap_ra_info = GET_SWAP_RA_VAL(vma);
-	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
-	prev_win = SWAP_RA_WIN(swap_ra_info);
-	hits = SWAP_RA_HITS(swap_ra_info);
-	swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
+	ra_val = GET_SWAP_RA_VAL(vma);
+	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
+	prev_win = SWAP_RA_WIN(ra_val);
+	hits = SWAP_RA_HITS(ra_val);
+	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
 					       max_win, prev_win);
 	atomic_long_set(&vma->swap_readahead_info,
 			SWAP_RA_VAL(faddr, win, 0));
 
-	if (win == 1)
-		return NULL;
+	if (win == 1) {
+		pte_unmap(orig_pte);
+		return;
+	}
 
 	/* Copy the PTEs because the page table may be unmapped */
 	if (fpfn == pfn + 1)
@@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
 		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
 				  &start, &end);
 	}
-	swap_ra->nr_pte = end - start;
-	swap_ra->offset = fpfn - start;
-	pte = vmf->pte - swap_ra->offset;
+	ra_info->nr_pte = end - start;
+	ra_info->offset = fpfn - start;
+	pte -= ra_info->offset;
 #ifdef CONFIG_64BIT
-	swap_ra->ptes = pte;
+	ra_info->ptes = pte;
 #else
-	tpte = swap_ra->ptes;
+	tpte = ra_info->ptes;
 	for (pfn = start; pfn != end; pfn++)
 		*tpte++ = *pte++;
 #endif
-
-	return NULL;
+	pte_unmap(orig_pte);
 }
 
 struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
-				    struct vm_fault *vmf,
-				    struct vma_swap_readahead *swap_ra)
+				    struct vm_fault *vmf)
 {
 	struct blk_plug plug;
 	struct vm_area_struct *vma = vmf->vma;
@@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 	swp_entry_t entry;
 	unsigned int i;
 	bool page_allocated;
+	struct vma_swap_readahead ra_info = {0,};
 
-	if (swap_ra->win == 1)
+	swap_ra_info(vmf, &ra_info);
+	if (ra_info.win == 1)
 		goto skip;
 
 	blk_start_plug(&plug);
-	for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
+	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
 	     i++, pte++) {
 		pentry = *pte;
 		if (pte_none(pentry))
@@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 			continue;
 		if (page_allocated) {
 			swap_readpage(page, false);
-			if (i != swap_ra->offset &&
+			if (i != ra_info.offset &&
 			    likely(!PageTransCompound(page))) {
 				SetPageReadahead(page);
 				count_vm_event(SWAP_RA);
@@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 	lru_add_drain();
 skip:
 	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
-				     swap_ra->win == 1);
+				     ra_info.win == 1);
 }
 
 #ifdef CONFIG_SYSFS
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH 2/2] mm:swap: unify cluster-based and vma-based swap readahead
  2017-11-01  5:28 [PATCH 0/2] swap readahead clean up Minchan Kim
  2017-11-01  5:28 ` [PATCH 1/2] mm:swap: clean up swap readahead Minchan Kim
@ 2017-11-01  5:28 ` Minchan Kim
  2017-11-01  6:17   ` Huang, Ying
  1 sibling, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-11-01  5:28 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-kernel, linux-mm, Hugh Dickins, Huang Ying, kernel-team,
	Minchan Kim

This patch makes do_swap_page no need to be aware of two different
swap readahead algorithm. Just unify cluster-based and vma-based
readahead function call.

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 include/linux/swap.h | 17 ++++++++++++-----
 mm/memory.c          | 11 ++++-------
 mm/shmem.c           |  5 ++++-
 mm/swap_state.c      | 21 +++++++++++++++------
 4 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7c7c8b344bc9..9cc330360eac 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -425,9 +425,11 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
 extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
 			struct vm_area_struct *vma, unsigned long addr,
 			bool *new_page_allocated);
-extern struct page *swapin_readahead(swp_entry_t, gfp_t,
-			struct vm_area_struct *vma, unsigned long addr);
-extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
+extern struct page *cluster_readahead(swp_entry_t entry, gfp_t flag,
+				struct vm_fault *vmf);
+extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
+				struct vm_fault *vmf);
+extern struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 					   struct vm_fault *vmf);
 
 /* linux/mm/swapfile.c */
@@ -536,8 +538,13 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp)
 {
 }
 
+static inline struct page *cluster_readahead(swp_entry_t, gfp_t gfp_mask
+						struct vm_fault *vmf)
+{
+}
+
 static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
-			struct vm_area_struct *vma, unsigned long addr)
+			struct vm_fault *vmf)
 {
 	return NULL;
 }
@@ -547,7 +554,7 @@ static inline bool swap_use_vma_readahead(void)
 	return false;
 }
 
-static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
+static inline struct page *vma_readahead(swp_entry_t fentry,
 				gfp_t gfp_mask, struct vm_fault *vmf)
 {
 	return NULL;
diff --git a/mm/memory.c b/mm/memory.c
index e955298e4290..ce5e3d7ccc5c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2889,7 +2889,8 @@ int do_swap_page(struct vm_fault *vmf)
 		if (si->flags & SWP_SYNCHRONOUS_IO &&
 				__swap_count(si, entry) == 1) {
 			/* skip swapcache */
-			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
+			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
+							vmf->address);
 			if (page) {
 				__SetPageLocked(page);
 				__SetPageSwapBacked(page);
@@ -2898,12 +2899,8 @@ int do_swap_page(struct vm_fault *vmf)
 				swap_readpage(page, true);
 			}
 		} else {
-			if (swap_use_vma_readahead())
-				page = do_swap_page_readahead(entry,
-					GFP_HIGHUSER_MOVABLE, vmf);
-			else
-				page = swapin_readahead(entry,
-				       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
+			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+						vmf);
 			swapcache = page;
 		}
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 62dfdc097e44..2522bc0958e1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1413,9 +1413,12 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
 {
 	struct vm_area_struct pvma;
 	struct page *page;
+	struct vm_fault vmf;
 
 	shmem_pseudo_vma_init(&pvma, info, index);
-	page = swapin_readahead(swap, gfp, &pvma, 0);
+	vmf.vma = &pvma;
+	vmf.address = 0;
+	page = cluster_readahead(swap, gfp, &vmf);
 	shmem_pseudo_vma_destroy(&pvma);
 
 	return page;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e3c535fcd2df..5ee53d4ee047 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -538,11 +538,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
 }
 
 /**
- * swapin_readahead - swap in pages in hope we need them soon
+ * cluster_readahead - swap in pages in hope we need them soon
  * @entry: swap entry of this memory
  * @gfp_mask: memory allocation flags
- * @vma: user vma this address belongs to
- * @addr: target address for mempolicy
+ * @vmf: fault information
  *
  * Returns the struct page for entry and addr, after queueing swapin.
  *
@@ -556,8 +555,8 @@ static unsigned long swapin_nr_pages(unsigned long offset)
  *
  * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
  */
-struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
-			struct vm_area_struct *vma, unsigned long addr)
+struct page *cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
+				struct vm_fault *vmf)
 {
 	struct page *page;
 	unsigned long entry_offset = swp_offset(entry);
@@ -566,6 +565,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	unsigned long mask;
 	struct blk_plug plug;
 	bool do_poll = true, page_allocated;
+	struct vm_area_struct *vma = vmf->vma;
+	unsigned long addr = vmf->address;
 
 	mask = swapin_nr_pages(offset) - 1;
 	if (!mask)
@@ -603,6 +604,14 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
 	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
 }
 
+struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
+				struct vm_fault *vmf)
+{
+	return swap_use_vma_readahead() ?
+				vma_readahead(entry, gfp_mask, vmf) :
+				cluster_readahead(entry, gfp_mask, vmf);
+}
+
 int init_swap_address_space(unsigned int type, unsigned long nr_pages)
 {
 	struct address_space *spaces, *space;
@@ -719,7 +728,7 @@ static void swap_ra_info(struct vm_fault *vmf,
 	pte_unmap(orig_pte);
 }
 
-struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
+struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
 				    struct vm_fault *vmf)
 {
 	struct blk_plug plug;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] mm:swap: clean up swap readahead
  2017-11-01  5:28 ` [PATCH 1/2] mm:swap: clean up swap readahead Minchan Kim
@ 2017-11-01  5:41   ` Huang, Ying
  2017-11-01  5:45     ` Minchan Kim
  0 siblings, 1 reply; 8+ messages in thread
From: Huang, Ying @ 2017-11-01  5:41 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Andrew Morton, linux-kernel, linux-mm, Hugh Dickins, Huang Ying,
	kernel-team

Hi, Minchan,

Minchan Kim <minchan@kernel.org> writes:

> When I see recent change of swap readahead, I am very unhappy
> about current code structure which diverges two swap readahead
> algorithm in do_swap_page. This patch is to clean it up.
>
> Main motivation is that fault handler doesn't need to be aware of
> readahead algorithms but just should call swapin_readahead.
>
> As first step, this patch cleans up a little bit but not perfect
> (I just separate for review easier) so next patch will make the goal
> complete.
>
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> ---
>  include/linux/swap.h | 17 ++--------
>  mm/memory.c          | 17 +++-------
>  mm/swap_state.c      | 89 ++++++++++++++++++++++++++++------------------------
>  3 files changed, 55 insertions(+), 68 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 84255b3da7c1..7c7c8b344bc9 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -427,12 +427,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
>  			bool *new_page_allocated);
>  extern struct page *swapin_readahead(swp_entry_t, gfp_t,
>  			struct vm_area_struct *vma, unsigned long addr);
> -
> -extern struct page *swap_readahead_detect(struct vm_fault *vmf,
> -					  struct vma_swap_readahead *swap_ra);
>  extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> -					   struct vm_fault *vmf,
> -					   struct vma_swap_readahead *swap_ra);
> +					   struct vm_fault *vmf);
>  
>  /* linux/mm/swapfile.c */
>  extern atomic_long_t nr_swap_pages;
> @@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
>  	return false;
>  }
>  
> -static inline struct page *swap_readahead_detect(
> -	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> -{
> -	return NULL;
> -}
> -
> -static inline struct page *do_swap_page_readahead(
> -	swp_entry_t fentry, gfp_t gfp_mask,
> -	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> +static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> +				gfp_t gfp_mask, struct vm_fault *vmf)
>  {
>  	return NULL;
>  }
> diff --git a/mm/memory.c b/mm/memory.c
> index 8a0c410037d2..e955298e4290 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
>  	struct vm_area_struct *vma = vmf->vma;
>  	struct page *page = NULL, *swapcache = NULL;
>  	struct mem_cgroup *memcg;
> -	struct vma_swap_readahead swap_ra;
>  	swp_entry_t entry;
>  	pte_t pte;
>  	int locked;
>  	int exclusive = 0;
>  	int ret = 0;
> -	bool vma_readahead = swap_use_vma_readahead();
>  
> -	if (vma_readahead)
> -		page = swap_readahead_detect(vmf, &swap_ra);
> -	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
> -		if (page)
> -			put_page(page);
> +	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
>  		goto out;
> -	}

The page table holding PTE may be unmapped in pte_unmap_same(), so is it
safe for us to access page table after this in do_swap_page_readahead()?

Best Regards,
Huang, Ying

>  	entry = pte_to_swp_entry(vmf->orig_pte);
>  	if (unlikely(non_swap_entry(entry))) {
> @@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
>  
>  
>  	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
> -	if (!page)
> -		page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
> -					 vmf->address);
> +	page = lookup_swap_cache(entry, vma, vmf->address);
>  	if (!page) {
>  		struct swap_info_struct *si = swp_swap_info(entry);
>  
> @@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
>  				swap_readpage(page, true);
>  			}
>  		} else {
> -			if (vma_readahead)
> +			if (swap_use_vma_readahead())
>  				page = do_swap_page_readahead(entry,
> -					GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
> +					GFP_HIGHUSER_MOVABLE, vmf);
>  			else
>  				page = swapin_readahead(entry,
>  				       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 6c017ced11e6..e3c535fcd2df 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
>  			       unsigned long addr)
>  {
>  	struct page *page;
> -	unsigned long ra_info;
> -	int win, hits, readahead;
>  
>  	page = find_get_page(swap_address_space(entry), swp_offset(entry));
>  
>  	INC_CACHE_INFO(find_total);
>  	if (page) {
> +		bool vma_ra = swap_use_vma_readahead();
> +		bool readahead = TestClearPageReadahead(page);
> +
>  		INC_CACHE_INFO(find_success);
>  		if (unlikely(PageTransCompound(page)))
>  			return page;
> -		readahead = TestClearPageReadahead(page);
> -		if (vma) {
> -			ra_info = GET_SWAP_RA_VAL(vma);
> -			win = SWAP_RA_WIN(ra_info);
> -			hits = SWAP_RA_HITS(ra_info);
> +
> +		if (vma && vma_ra) {
> +			unsigned long ra_val;
> +			int win, hits;
> +
> +			ra_val = GET_SWAP_RA_VAL(vma);
> +			win = SWAP_RA_WIN(ra_val);
> +			hits = SWAP_RA_HITS(ra_val);
>  			if (readahead)
>  				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
>  			atomic_long_set(&vma->swap_readahead_info,
>  					SWAP_RA_VAL(addr, win, hits));
>  		}
> +
>  		if (readahead) {
>  			count_vm_event(SWAP_RA_HIT);
> -			if (!vma)
> +			if (!vma || !vma_ra)
>  				atomic_inc(&swapin_readahead_hits);
>  		}
>  	}
> +
>  	return page;
>  }
>  
> @@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
>  		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
>  }
>  
> -struct page *swap_readahead_detect(struct vm_fault *vmf,
> -				   struct vma_swap_readahead *swap_ra)
> +static void swap_ra_info(struct vm_fault *vmf,
> +			struct vma_swap_readahead *ra_info)
>  {
>  	struct vm_area_struct *vma = vmf->vma;
> -	unsigned long swap_ra_info;
> -	struct page *page;
> +	unsigned long ra_val;
>  	swp_entry_t entry;
>  	unsigned long faddr, pfn, fpfn;
>  	unsigned long start, end;
> -	pte_t *pte;
> +	pte_t *pte, *orig_pte;
>  	unsigned int max_win, hits, prev_win, win, left;
>  #ifndef CONFIG_64BIT
>  	pte_t *tpte;
> @@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
>  	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
>  			     SWAP_RA_ORDER_CEILING);
>  	if (max_win == 1) {
> -		swap_ra->win = 1;
> -		return NULL;
> +		ra_info->win = 1;
> +		return;
>  	}
>  
>  	faddr = vmf->address;
> -	entry = pte_to_swp_entry(vmf->orig_pte);
> -	if ((unlikely(non_swap_entry(entry))))
> -		return NULL;
> -	page = lookup_swap_cache(entry, vma, faddr);
> -	if (page)
> -		return page;
> +	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
> +	entry = pte_to_swp_entry(*pte);
> +	if ((unlikely(non_swap_entry(entry)))) {
> +		pte_unmap(orig_pte);
> +		return;
> +	}
>  
>  	fpfn = PFN_DOWN(faddr);
> -	swap_ra_info = GET_SWAP_RA_VAL(vma);
> -	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
> -	prev_win = SWAP_RA_WIN(swap_ra_info);
> -	hits = SWAP_RA_HITS(swap_ra_info);
> -	swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> +	ra_val = GET_SWAP_RA_VAL(vma);
> +	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
> +	prev_win = SWAP_RA_WIN(ra_val);
> +	hits = SWAP_RA_HITS(ra_val);
> +	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
>  					       max_win, prev_win);
>  	atomic_long_set(&vma->swap_readahead_info,
>  			SWAP_RA_VAL(faddr, win, 0));
>  
> -	if (win == 1)
> -		return NULL;
> +	if (win == 1) {
> +		pte_unmap(orig_pte);
> +		return;
> +	}
>  
>  	/* Copy the PTEs because the page table may be unmapped */
>  	if (fpfn == pfn + 1)
> @@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
>  		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
>  				  &start, &end);
>  	}
> -	swap_ra->nr_pte = end - start;
> -	swap_ra->offset = fpfn - start;
> -	pte = vmf->pte - swap_ra->offset;
> +	ra_info->nr_pte = end - start;
> +	ra_info->offset = fpfn - start;
> +	pte -= ra_info->offset;
>  #ifdef CONFIG_64BIT
> -	swap_ra->ptes = pte;
> +	ra_info->ptes = pte;
>  #else
> -	tpte = swap_ra->ptes;
> +	tpte = ra_info->ptes;
>  	for (pfn = start; pfn != end; pfn++)
>  		*tpte++ = *pte++;
>  #endif
> -
> -	return NULL;
> +	pte_unmap(orig_pte);
>  }
>  
>  struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> -				    struct vm_fault *vmf,
> -				    struct vma_swap_readahead *swap_ra)
> +				    struct vm_fault *vmf)
>  {
>  	struct blk_plug plug;
>  	struct vm_area_struct *vma = vmf->vma;
> @@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>  	swp_entry_t entry;
>  	unsigned int i;
>  	bool page_allocated;
> +	struct vma_swap_readahead ra_info = {0,};
>  
> -	if (swap_ra->win == 1)
> +	swap_ra_info(vmf, &ra_info);
> +	if (ra_info.win == 1)
>  		goto skip;
>  
>  	blk_start_plug(&plug);
> -	for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
> +	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
>  	     i++, pte++) {
>  		pentry = *pte;
>  		if (pte_none(pentry))
> @@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>  			continue;
>  		if (page_allocated) {
>  			swap_readpage(page, false);
> -			if (i != swap_ra->offset &&
> +			if (i != ra_info.offset &&
>  			    likely(!PageTransCompound(page))) {
>  				SetPageReadahead(page);
>  				count_vm_event(SWAP_RA);
> @@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>  	lru_add_drain();
>  skip:
>  	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
> -				     swap_ra->win == 1);
> +				     ra_info.win == 1);
>  }
>  
>  #ifdef CONFIG_SYSFS

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] mm:swap: clean up swap readahead
  2017-11-01  5:41   ` Huang, Ying
@ 2017-11-01  5:45     ` Minchan Kim
  2017-11-01  6:04       ` Huang, Ying
  0 siblings, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-11-01  5:45 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Andrew Morton, linux-kernel, linux-mm, Hugh Dickins, kernel-team

Hi Huang,

On Wed, Nov 01, 2017 at 01:41:00PM +0800, Huang, Ying wrote:
> Hi, Minchan,
> 
> Minchan Kim <minchan@kernel.org> writes:
> 
> > When I see recent change of swap readahead, I am very unhappy
> > about current code structure which diverges two swap readahead
> > algorithm in do_swap_page. This patch is to clean it up.
> >
> > Main motivation is that fault handler doesn't need to be aware of
> > readahead algorithms but just should call swapin_readahead.
> >
> > As first step, this patch cleans up a little bit but not perfect
> > (I just separate for review easier) so next patch will make the goal
> > complete.
> >
> > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > ---
> >  include/linux/swap.h | 17 ++--------
> >  mm/memory.c          | 17 +++-------
> >  mm/swap_state.c      | 89 ++++++++++++++++++++++++++++------------------------
> >  3 files changed, 55 insertions(+), 68 deletions(-)
> >
> > diff --git a/include/linux/swap.h b/include/linux/swap.h
> > index 84255b3da7c1..7c7c8b344bc9 100644
> > --- a/include/linux/swap.h
> > +++ b/include/linux/swap.h
> > @@ -427,12 +427,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
> >  			bool *new_page_allocated);
> >  extern struct page *swapin_readahead(swp_entry_t, gfp_t,
> >  			struct vm_area_struct *vma, unsigned long addr);
> > -
> > -extern struct page *swap_readahead_detect(struct vm_fault *vmf,
> > -					  struct vma_swap_readahead *swap_ra);
> >  extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > -					   struct vm_fault *vmf,
> > -					   struct vma_swap_readahead *swap_ra);
> > +					   struct vm_fault *vmf);
> >  
> >  /* linux/mm/swapfile.c */
> >  extern atomic_long_t nr_swap_pages;
> > @@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
> >  	return false;
> >  }
> >  
> > -static inline struct page *swap_readahead_detect(
> > -	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> > -{
> > -	return NULL;
> > -}
> > -
> > -static inline struct page *do_swap_page_readahead(
> > -	swp_entry_t fentry, gfp_t gfp_mask,
> > -	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> > +static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> > +				gfp_t gfp_mask, struct vm_fault *vmf)
> >  {
> >  	return NULL;
> >  }
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 8a0c410037d2..e955298e4290 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
> >  	struct vm_area_struct *vma = vmf->vma;
> >  	struct page *page = NULL, *swapcache = NULL;
> >  	struct mem_cgroup *memcg;
> > -	struct vma_swap_readahead swap_ra;
> >  	swp_entry_t entry;
> >  	pte_t pte;
> >  	int locked;
> >  	int exclusive = 0;
> >  	int ret = 0;
> > -	bool vma_readahead = swap_use_vma_readahead();
> >  
> > -	if (vma_readahead)
> > -		page = swap_readahead_detect(vmf, &swap_ra);
> > -	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
> > -		if (page)
> > -			put_page(page);
> > +	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
> >  		goto out;
> > -	}
> 
> The page table holding PTE may be unmapped in pte_unmap_same(), so is it
> safe for us to access page table after this in do_swap_page_readahead()?

That's why I calls pte_offset_map in swap_ra_info before the access.

> 
> Best Regards,
> Huang, Ying
> 
> >  	entry = pte_to_swp_entry(vmf->orig_pte);
> >  	if (unlikely(non_swap_entry(entry))) {
> > @@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
> >  
> >  
> >  	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
> > -	if (!page)
> > -		page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
> > -					 vmf->address);
> > +	page = lookup_swap_cache(entry, vma, vmf->address);
> >  	if (!page) {
> >  		struct swap_info_struct *si = swp_swap_info(entry);
> >  
> > @@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
> >  				swap_readpage(page, true);
> >  			}
> >  		} else {
> > -			if (vma_readahead)
> > +			if (swap_use_vma_readahead())
> >  				page = do_swap_page_readahead(entry,
> > -					GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
> > +					GFP_HIGHUSER_MOVABLE, vmf);
> >  			else
> >  				page = swapin_readahead(entry,
> >  				       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > index 6c017ced11e6..e3c535fcd2df 100644
> > --- a/mm/swap_state.c
> > +++ b/mm/swap_state.c
> > @@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
> >  			       unsigned long addr)
> >  {
> >  	struct page *page;
> > -	unsigned long ra_info;
> > -	int win, hits, readahead;
> >  
> >  	page = find_get_page(swap_address_space(entry), swp_offset(entry));
> >  
> >  	INC_CACHE_INFO(find_total);
> >  	if (page) {
> > +		bool vma_ra = swap_use_vma_readahead();
> > +		bool readahead = TestClearPageReadahead(page);
> > +
> >  		INC_CACHE_INFO(find_success);
> >  		if (unlikely(PageTransCompound(page)))
> >  			return page;
> > -		readahead = TestClearPageReadahead(page);
> > -		if (vma) {
> > -			ra_info = GET_SWAP_RA_VAL(vma);
> > -			win = SWAP_RA_WIN(ra_info);
> > -			hits = SWAP_RA_HITS(ra_info);
> > +
> > +		if (vma && vma_ra) {
> > +			unsigned long ra_val;
> > +			int win, hits;
> > +
> > +			ra_val = GET_SWAP_RA_VAL(vma);
> > +			win = SWAP_RA_WIN(ra_val);
> > +			hits = SWAP_RA_HITS(ra_val);
> >  			if (readahead)
> >  				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
> >  			atomic_long_set(&vma->swap_readahead_info,
> >  					SWAP_RA_VAL(addr, win, hits));
> >  		}
> > +
> >  		if (readahead) {
> >  			count_vm_event(SWAP_RA_HIT);
> > -			if (!vma)
> > +			if (!vma || !vma_ra)
> >  				atomic_inc(&swapin_readahead_hits);
> >  		}
> >  	}
> > +
> >  	return page;
> >  }
> >  
> > @@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
> >  		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
> >  }
> >  
> > -struct page *swap_readahead_detect(struct vm_fault *vmf,
> > -				   struct vma_swap_readahead *swap_ra)
> > +static void swap_ra_info(struct vm_fault *vmf,
> > +			struct vma_swap_readahead *ra_info)
> >  {
> >  	struct vm_area_struct *vma = vmf->vma;
> > -	unsigned long swap_ra_info;
> > -	struct page *page;
> > +	unsigned long ra_val;
> >  	swp_entry_t entry;
> >  	unsigned long faddr, pfn, fpfn;
> >  	unsigned long start, end;
> > -	pte_t *pte;
> > +	pte_t *pte, *orig_pte;
> >  	unsigned int max_win, hits, prev_win, win, left;
> >  #ifndef CONFIG_64BIT
> >  	pte_t *tpte;
> > @@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
> >  	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
> >  			     SWAP_RA_ORDER_CEILING);
> >  	if (max_win == 1) {
> > -		swap_ra->win = 1;
> > -		return NULL;
> > +		ra_info->win = 1;
> > +		return;
> >  	}
> >  
> >  	faddr = vmf->address;
> > -	entry = pte_to_swp_entry(vmf->orig_pte);
> > -	if ((unlikely(non_swap_entry(entry))))
> > -		return NULL;
> > -	page = lookup_swap_cache(entry, vma, faddr);
> > -	if (page)
> > -		return page;
> > +	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
> > +	entry = pte_to_swp_entry(*pte);
> > +	if ((unlikely(non_swap_entry(entry)))) {
> > +		pte_unmap(orig_pte);
> > +		return;
> > +	}
> >  
> >  	fpfn = PFN_DOWN(faddr);
> > -	swap_ra_info = GET_SWAP_RA_VAL(vma);
> > -	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
> > -	prev_win = SWAP_RA_WIN(swap_ra_info);
> > -	hits = SWAP_RA_HITS(swap_ra_info);
> > -	swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> > +	ra_val = GET_SWAP_RA_VAL(vma);
> > +	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
> > +	prev_win = SWAP_RA_WIN(ra_val);
> > +	hits = SWAP_RA_HITS(ra_val);
> > +	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> >  					       max_win, prev_win);
> >  	atomic_long_set(&vma->swap_readahead_info,
> >  			SWAP_RA_VAL(faddr, win, 0));
> >  
> > -	if (win == 1)
> > -		return NULL;
> > +	if (win == 1) {
> > +		pte_unmap(orig_pte);
> > +		return;
> > +	}
> >  
> >  	/* Copy the PTEs because the page table may be unmapped */
> >  	if (fpfn == pfn + 1)
> > @@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
> >  		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
> >  				  &start, &end);
> >  	}
> > -	swap_ra->nr_pte = end - start;
> > -	swap_ra->offset = fpfn - start;
> > -	pte = vmf->pte - swap_ra->offset;
> > +	ra_info->nr_pte = end - start;
> > +	ra_info->offset = fpfn - start;
> > +	pte -= ra_info->offset;
> >  #ifdef CONFIG_64BIT
> > -	swap_ra->ptes = pte;
> > +	ra_info->ptes = pte;
> >  #else
> > -	tpte = swap_ra->ptes;
> > +	tpte = ra_info->ptes;
> >  	for (pfn = start; pfn != end; pfn++)
> >  		*tpte++ = *pte++;
> >  #endif
> > -
> > -	return NULL;
> > +	pte_unmap(orig_pte);
> >  }
> >  
> >  struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > -				    struct vm_fault *vmf,
> > -				    struct vma_swap_readahead *swap_ra)
> > +				    struct vm_fault *vmf)
> >  {
> >  	struct blk_plug plug;
> >  	struct vm_area_struct *vma = vmf->vma;
> > @@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> >  	swp_entry_t entry;
> >  	unsigned int i;
> >  	bool page_allocated;
> > +	struct vma_swap_readahead ra_info = {0,};
> >  
> > -	if (swap_ra->win == 1)
> > +	swap_ra_info(vmf, &ra_info);
> > +	if (ra_info.win == 1)
> >  		goto skip;
> >  
> >  	blk_start_plug(&plug);
> > -	for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
> > +	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
> >  	     i++, pte++) {
> >  		pentry = *pte;
> >  		if (pte_none(pentry))
> > @@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> >  			continue;
> >  		if (page_allocated) {
> >  			swap_readpage(page, false);
> > -			if (i != swap_ra->offset &&
> > +			if (i != ra_info.offset &&
> >  			    likely(!PageTransCompound(page))) {
> >  				SetPageReadahead(page);
> >  				count_vm_event(SWAP_RA);
> > @@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> >  	lru_add_drain();
> >  skip:
> >  	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
> > -				     swap_ra->win == 1);
> > +				     ra_info.win == 1);
> >  }
> >  
> >  #ifdef CONFIG_SYSFS
> 
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org.  For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 1/2] mm:swap: clean up swap readahead
  2017-11-01  5:45     ` Minchan Kim
@ 2017-11-01  6:04       ` Huang, Ying
  0 siblings, 0 replies; 8+ messages in thread
From: Huang, Ying @ 2017-11-01  6:04 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Huang, Ying, Andrew Morton, linux-kernel, linux-mm, Hugh Dickins,
	kernel-team

Minchan Kim <minchan@kernel.org> writes:

> Hi Huang,
>
> On Wed, Nov 01, 2017 at 01:41:00PM +0800, Huang, Ying wrote:
>> Hi, Minchan,
>> 
>> Minchan Kim <minchan@kernel.org> writes:
>> 
>> > When I see recent change of swap readahead, I am very unhappy
>> > about current code structure which diverges two swap readahead
>> > algorithm in do_swap_page. This patch is to clean it up.
>> >
>> > Main motivation is that fault handler doesn't need to be aware of
>> > readahead algorithms but just should call swapin_readahead.
>> >
>> > As first step, this patch cleans up a little bit but not perfect
>> > (I just separate for review easier) so next patch will make the goal
>> > complete.
>> >
>> > Signed-off-by: Minchan Kim <minchan@kernel.org>
>> > ---
>> >  include/linux/swap.h | 17 ++--------
>> >  mm/memory.c          | 17 +++-------
>> >  mm/swap_state.c      | 89 ++++++++++++++++++++++++++++------------------------
>> >  3 files changed, 55 insertions(+), 68 deletions(-)
>> >
>> > diff --git a/include/linux/swap.h b/include/linux/swap.h
>> > index 84255b3da7c1..7c7c8b344bc9 100644
>> > --- a/include/linux/swap.h
>> > +++ b/include/linux/swap.h
>> > @@ -427,12 +427,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
>> >  			bool *new_page_allocated);
>> >  extern struct page *swapin_readahead(swp_entry_t, gfp_t,
>> >  			struct vm_area_struct *vma, unsigned long addr);
>> > -
>> > -extern struct page *swap_readahead_detect(struct vm_fault *vmf,
>> > -					  struct vma_swap_readahead *swap_ra);
>> >  extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> > -					   struct vm_fault *vmf,
>> > -					   struct vma_swap_readahead *swap_ra);
>> > +					   struct vm_fault *vmf);
>> >  
>> >  /* linux/mm/swapfile.c */
>> >  extern atomic_long_t nr_swap_pages;
>> > @@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
>> >  	return false;
>> >  }
>> >  
>> > -static inline struct page *swap_readahead_detect(
>> > -	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
>> > -{
>> > -	return NULL;
>> > -}
>> > -
>> > -static inline struct page *do_swap_page_readahead(
>> > -	swp_entry_t fentry, gfp_t gfp_mask,
>> > -	struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
>> > +static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
>> > +				gfp_t gfp_mask, struct vm_fault *vmf)
>> >  {
>> >  	return NULL;
>> >  }
>> > diff --git a/mm/memory.c b/mm/memory.c
>> > index 8a0c410037d2..e955298e4290 100644
>> > --- a/mm/memory.c
>> > +++ b/mm/memory.c
>> > @@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
>> >  	struct vm_area_struct *vma = vmf->vma;
>> >  	struct page *page = NULL, *swapcache = NULL;
>> >  	struct mem_cgroup *memcg;
>> > -	struct vma_swap_readahead swap_ra;
>> >  	swp_entry_t entry;
>> >  	pte_t pte;
>> >  	int locked;
>> >  	int exclusive = 0;
>> >  	int ret = 0;
>> > -	bool vma_readahead = swap_use_vma_readahead();
>> >  
>> > -	if (vma_readahead)
>> > -		page = swap_readahead_detect(vmf, &swap_ra);
>> > -	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
>> > -		if (page)
>> > -			put_page(page);
>> > +	if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
>> >  		goto out;
>> > -	}
>> 
>> The page table holding PTE may be unmapped in pte_unmap_same(), so is it
>> safe for us to access page table after this in do_swap_page_readahead()?
>
> That's why I calls pte_offset_map in swap_ra_info before the access.

Oh, I found it!  Thanks for explanation!

Best Regards,
Huang, Ying

>> 
>> Best Regards,
>> Huang, Ying
>> 
>> >  	entry = pte_to_swp_entry(vmf->orig_pte);
>> >  	if (unlikely(non_swap_entry(entry))) {
>> > @@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
>> >  
>> >  
>> >  	delayacct_set_flag(DELAYACCT_PF_SWAPIN);
>> > -	if (!page)
>> > -		page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
>> > -					 vmf->address);
>> > +	page = lookup_swap_cache(entry, vma, vmf->address);
>> >  	if (!page) {
>> >  		struct swap_info_struct *si = swp_swap_info(entry);
>> >  
>> > @@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
>> >  				swap_readpage(page, true);
>> >  			}
>> >  		} else {
>> > -			if (vma_readahead)
>> > +			if (swap_use_vma_readahead())
>> >  				page = do_swap_page_readahead(entry,
>> > -					GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
>> > +					GFP_HIGHUSER_MOVABLE, vmf);
>> >  			else
>> >  				page = swapin_readahead(entry,
>> >  				       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
>> > diff --git a/mm/swap_state.c b/mm/swap_state.c
>> > index 6c017ced11e6..e3c535fcd2df 100644
>> > --- a/mm/swap_state.c
>> > +++ b/mm/swap_state.c
>> > @@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
>> >  			       unsigned long addr)
>> >  {
>> >  	struct page *page;
>> > -	unsigned long ra_info;
>> > -	int win, hits, readahead;
>> >  
>> >  	page = find_get_page(swap_address_space(entry), swp_offset(entry));
>> >  
>> >  	INC_CACHE_INFO(find_total);
>> >  	if (page) {
>> > +		bool vma_ra = swap_use_vma_readahead();
>> > +		bool readahead = TestClearPageReadahead(page);
>> > +
>> >  		INC_CACHE_INFO(find_success);
>> >  		if (unlikely(PageTransCompound(page)))
>> >  			return page;
>> > -		readahead = TestClearPageReadahead(page);
>> > -		if (vma) {
>> > -			ra_info = GET_SWAP_RA_VAL(vma);
>> > -			win = SWAP_RA_WIN(ra_info);
>> > -			hits = SWAP_RA_HITS(ra_info);
>> > +
>> > +		if (vma && vma_ra) {
>> > +			unsigned long ra_val;
>> > +			int win, hits;
>> > +
>> > +			ra_val = GET_SWAP_RA_VAL(vma);
>> > +			win = SWAP_RA_WIN(ra_val);
>> > +			hits = SWAP_RA_HITS(ra_val);
>> >  			if (readahead)
>> >  				hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
>> >  			atomic_long_set(&vma->swap_readahead_info,
>> >  					SWAP_RA_VAL(addr, win, hits));
>> >  		}
>> > +
>> >  		if (readahead) {
>> >  			count_vm_event(SWAP_RA_HIT);
>> > -			if (!vma)
>> > +			if (!vma || !vma_ra)
>> >  				atomic_inc(&swapin_readahead_hits);
>> >  		}
>> >  	}
>> > +
>> >  	return page;
>> >  }
>> >  
>> > @@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
>> >  		    PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
>> >  }
>> >  
>> > -struct page *swap_readahead_detect(struct vm_fault *vmf,
>> > -				   struct vma_swap_readahead *swap_ra)
>> > +static void swap_ra_info(struct vm_fault *vmf,
>> > +			struct vma_swap_readahead *ra_info)
>> >  {
>> >  	struct vm_area_struct *vma = vmf->vma;
>> > -	unsigned long swap_ra_info;
>> > -	struct page *page;
>> > +	unsigned long ra_val;
>> >  	swp_entry_t entry;
>> >  	unsigned long faddr, pfn, fpfn;
>> >  	unsigned long start, end;
>> > -	pte_t *pte;
>> > +	pte_t *pte, *orig_pte;
>> >  	unsigned int max_win, hits, prev_win, win, left;
>> >  #ifndef CONFIG_64BIT
>> >  	pte_t *tpte;
>> > @@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
>> >  	max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
>> >  			     SWAP_RA_ORDER_CEILING);
>> >  	if (max_win == 1) {
>> > -		swap_ra->win = 1;
>> > -		return NULL;
>> > +		ra_info->win = 1;
>> > +		return;
>> >  	}
>> >  
>> >  	faddr = vmf->address;
>> > -	entry = pte_to_swp_entry(vmf->orig_pte);
>> > -	if ((unlikely(non_swap_entry(entry))))
>> > -		return NULL;
>> > -	page = lookup_swap_cache(entry, vma, faddr);
>> > -	if (page)
>> > -		return page;
>> > +	orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
>> > +	entry = pte_to_swp_entry(*pte);
>> > +	if ((unlikely(non_swap_entry(entry)))) {
>> > +		pte_unmap(orig_pte);
>> > +		return;
>> > +	}
>> >  
>> >  	fpfn = PFN_DOWN(faddr);
>> > -	swap_ra_info = GET_SWAP_RA_VAL(vma);
>> > -	pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
>> > -	prev_win = SWAP_RA_WIN(swap_ra_info);
>> > -	hits = SWAP_RA_HITS(swap_ra_info);
>> > -	swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
>> > +	ra_val = GET_SWAP_RA_VAL(vma);
>> > +	pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
>> > +	prev_win = SWAP_RA_WIN(ra_val);
>> > +	hits = SWAP_RA_HITS(ra_val);
>> > +	ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
>> >  					       max_win, prev_win);
>> >  	atomic_long_set(&vma->swap_readahead_info,
>> >  			SWAP_RA_VAL(faddr, win, 0));
>> >  
>> > -	if (win == 1)
>> > -		return NULL;
>> > +	if (win == 1) {
>> > +		pte_unmap(orig_pte);
>> > +		return;
>> > +	}
>> >  
>> >  	/* Copy the PTEs because the page table may be unmapped */
>> >  	if (fpfn == pfn + 1)
>> > @@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
>> >  		swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
>> >  				  &start, &end);
>> >  	}
>> > -	swap_ra->nr_pte = end - start;
>> > -	swap_ra->offset = fpfn - start;
>> > -	pte = vmf->pte - swap_ra->offset;
>> > +	ra_info->nr_pte = end - start;
>> > +	ra_info->offset = fpfn - start;
>> > +	pte -= ra_info->offset;
>> >  #ifdef CONFIG_64BIT
>> > -	swap_ra->ptes = pte;
>> > +	ra_info->ptes = pte;
>> >  #else
>> > -	tpte = swap_ra->ptes;
>> > +	tpte = ra_info->ptes;
>> >  	for (pfn = start; pfn != end; pfn++)
>> >  		*tpte++ = *pte++;
>> >  #endif
>> > -
>> > -	return NULL;
>> > +	pte_unmap(orig_pte);
>> >  }
>> >  
>> >  struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> > -				    struct vm_fault *vmf,
>> > -				    struct vma_swap_readahead *swap_ra)
>> > +				    struct vm_fault *vmf)
>> >  {
>> >  	struct blk_plug plug;
>> >  	struct vm_area_struct *vma = vmf->vma;
>> > @@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> >  	swp_entry_t entry;
>> >  	unsigned int i;
>> >  	bool page_allocated;
>> > +	struct vma_swap_readahead ra_info = {0,};
>> >  
>> > -	if (swap_ra->win == 1)
>> > +	swap_ra_info(vmf, &ra_info);
>> > +	if (ra_info.win == 1)
>> >  		goto skip;
>> >  
>> >  	blk_start_plug(&plug);
>> > -	for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
>> > +	for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
>> >  	     i++, pte++) {
>> >  		pentry = *pte;
>> >  		if (pte_none(pentry))
>> > @@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> >  			continue;
>> >  		if (page_allocated) {
>> >  			swap_readpage(page, false);
>> > -			if (i != swap_ra->offset &&
>> > +			if (i != ra_info.offset &&
>> >  			    likely(!PageTransCompound(page))) {
>> >  				SetPageReadahead(page);
>> >  				count_vm_event(SWAP_RA);
>> > @@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> >  	lru_add_drain();
>> >  skip:
>> >  	return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
>> > -				     swap_ra->win == 1);
>> > +				     ra_info.win == 1);
>> >  }
>> >  
>> >  #ifdef CONFIG_SYSFS
>> 
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] mm:swap: unify cluster-based and vma-based swap readahead
  2017-11-01  5:28 ` [PATCH 2/2] mm:swap: unify cluster-based and vma-based " Minchan Kim
@ 2017-11-01  6:17   ` Huang, Ying
  2017-11-01  6:25     ` Minchan Kim
  0 siblings, 1 reply; 8+ messages in thread
From: Huang, Ying @ 2017-11-01  6:17 UTC (permalink / raw)
  To: Minchan Kim
  Cc: Andrew Morton, linux-kernel, linux-mm, Hugh Dickins, Huang Ying,
	kernel-team

Minchan Kim <minchan@kernel.org> writes:

> This patch makes do_swap_page no need to be aware of two different
> swap readahead algorithm. Just unify cluster-based and vma-based
> readahead function call.
>
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> ---
>  include/linux/swap.h | 17 ++++++++++++-----
>  mm/memory.c          | 11 ++++-------
>  mm/shmem.c           |  5 ++++-
>  mm/swap_state.c      | 21 +++++++++++++++------
>  4 files changed, 35 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7c7c8b344bc9..9cc330360eac 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -425,9 +425,11 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
>  extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
>  			struct vm_area_struct *vma, unsigned long addr,
>  			bool *new_page_allocated);
> -extern struct page *swapin_readahead(swp_entry_t, gfp_t,
> -			struct vm_area_struct *vma, unsigned long addr);
> -extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> +extern struct page *cluster_readahead(swp_entry_t entry, gfp_t flag,
> +				struct vm_fault *vmf);

In addition to swap readahead, there are file readahead too.  So better
add swap in name, such as swap_cluster_readahead()?

> +extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
> +				struct vm_fault *vmf);
> +extern struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>  					   struct vm_fault *vmf);

I don't find vma_readahead() is used outside of page_state.c, why
declare it here?

>  
>  /* linux/mm/swapfile.c */
> @@ -536,8 +538,13 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp)
>  {
>  }
>  
> +static inline struct page *cluster_readahead(swp_entry_t, gfp_t gfp_mask
> +						struct vm_fault *vmf)
> +{
> +}
> +
>  static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
> -			struct vm_area_struct *vma, unsigned long addr)
> +			struct vm_fault *vmf)
>  {
>  	return NULL;
>  }
> @@ -547,7 +554,7 @@ static inline bool swap_use_vma_readahead(void)
>  	return false;
>  }

Now swap_use_vma_readahead() is used in swap_state.c only, so we can
remove it from the header file?

> -static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> +static inline struct page *vma_readahead(swp_entry_t fentry,
>  				gfp_t gfp_mask, struct vm_fault *vmf)
>  {
>  	return NULL;
> diff --git a/mm/memory.c b/mm/memory.c
> index e955298e4290..ce5e3d7ccc5c 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2889,7 +2889,8 @@ int do_swap_page(struct vm_fault *vmf)
>  		if (si->flags & SWP_SYNCHRONOUS_IO &&
>  				__swap_count(si, entry) == 1) {
>  			/* skip swapcache */
> -			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> +			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
> +							vmf->address);
>  			if (page) {
>  				__SetPageLocked(page);
>  				__SetPageSwapBacked(page);
> @@ -2898,12 +2899,8 @@ int do_swap_page(struct vm_fault *vmf)
>  				swap_readpage(page, true);
>  			}
>  		} else {
> -			if (swap_use_vma_readahead())
> -				page = do_swap_page_readahead(entry,
> -					GFP_HIGHUSER_MOVABLE, vmf);
> -			else
> -				page = swapin_readahead(entry,
> -				       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> +			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
> +						vmf);
>  			swapcache = page;
>  		}
>  
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 62dfdc097e44..2522bc0958e1 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1413,9 +1413,12 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
>  {
>  	struct vm_area_struct pvma;
>  	struct page *page;
> +	struct vm_fault vmf;
>  
>  	shmem_pseudo_vma_init(&pvma, info, index);
> -	page = swapin_readahead(swap, gfp, &pvma, 0);
> +	vmf.vma = &pvma;
> +	vmf.address = 0;
> +	page = cluster_readahead(swap, gfp, &vmf);
>  	shmem_pseudo_vma_destroy(&pvma);
>  
>  	return page;
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index e3c535fcd2df..5ee53d4ee047 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -538,11 +538,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
>  }
>  
>  /**
> - * swapin_readahead - swap in pages in hope we need them soon
> + * cluster_readahead - swap in pages in hope we need them soon
>   * @entry: swap entry of this memory
>   * @gfp_mask: memory allocation flags
> - * @vma: user vma this address belongs to
> - * @addr: target address for mempolicy
> + * @vmf: fault information
>   *
>   * Returns the struct page for entry and addr, after queueing swapin.
>   *
> @@ -556,8 +555,8 @@ static unsigned long swapin_nr_pages(unsigned long offset)
>   *
>   * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
>   */
> -struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> -			struct vm_area_struct *vma, unsigned long addr)
> +struct page *cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
> +				struct vm_fault *vmf)
>  {
>  	struct page *page;
>  	unsigned long entry_offset = swp_offset(entry);
> @@ -566,6 +565,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
>  	unsigned long mask;
>  	struct blk_plug plug;
>  	bool do_poll = true, page_allocated;
> +	struct vm_area_struct *vma = vmf->vma;
> +	unsigned long addr = vmf->address;
>  
>  	mask = swapin_nr_pages(offset) - 1;
>  	if (!mask)
> @@ -603,6 +604,14 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
>  	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
>  }
>

No function document for swapin_readahead()?  It is the main interface
for swap readahead.

> +struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> +				struct vm_fault *vmf)
> +{
> +	return swap_use_vma_readahead() ?
> +				vma_readahead(entry, gfp_mask, vmf) :
> +				cluster_readahead(entry, gfp_mask, vmf);
> +}
> +
>  int init_swap_address_space(unsigned int type, unsigned long nr_pages)
>  {
>  	struct address_space *spaces, *space;
> @@ -719,7 +728,7 @@ static void swap_ra_info(struct vm_fault *vmf,
>  	pte_unmap(orig_pte);
>  }
>  
> -struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> +struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>  				    struct vm_fault *vmf)
>  {
>  	struct blk_plug plug;

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH 2/2] mm:swap: unify cluster-based and vma-based swap readahead
  2017-11-01  6:17   ` Huang, Ying
@ 2017-11-01  6:25     ` Minchan Kim
  0 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-11-01  6:25 UTC (permalink / raw)
  To: Huang, Ying
  Cc: Andrew Morton, linux-kernel, linux-mm, Hugh Dickins, kernel-team

On Wed, Nov 01, 2017 at 02:17:17PM +0800, Huang, Ying wrote:
> Minchan Kim <minchan@kernel.org> writes:
> 
> > This patch makes do_swap_page no need to be aware of two different
> > swap readahead algorithm. Just unify cluster-based and vma-based
> > readahead function call.
> >
> > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > ---
> >  include/linux/swap.h | 17 ++++++++++++-----
> >  mm/memory.c          | 11 ++++-------
> >  mm/shmem.c           |  5 ++++-
> >  mm/swap_state.c      | 21 +++++++++++++++------
> >  4 files changed, 35 insertions(+), 19 deletions(-)
> >
> > diff --git a/include/linux/swap.h b/include/linux/swap.h
> > index 7c7c8b344bc9..9cc330360eac 100644
> > --- a/include/linux/swap.h
> > +++ b/include/linux/swap.h
> > @@ -425,9 +425,11 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
> >  extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
> >  			struct vm_area_struct *vma, unsigned long addr,
> >  			bool *new_page_allocated);
> > -extern struct page *swapin_readahead(swp_entry_t, gfp_t,
> > -			struct vm_area_struct *vma, unsigned long addr);
> > -extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > +extern struct page *cluster_readahead(swp_entry_t entry, gfp_t flag,
> > +				struct vm_fault *vmf);
> 
> In addition to swap readahead, there are file readahead too.  So better
> add swap in name, such as swap_cluster_readahead()?

Yub.

> 
> > +extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
> > +				struct vm_fault *vmf);
> > +extern struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> >  					   struct vm_fault *vmf);
> 
> I don't find vma_readahead() is used outside of page_state.c, why
> declare it here?

By wrapping function, it's pointless to declare.
Yub, Let's drop it.


> 
> >  
> >  /* linux/mm/swapfile.c */
> > @@ -536,8 +538,13 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp)
> >  {
> >  }
> >  
> > +static inline struct page *cluster_readahead(swp_entry_t, gfp_t gfp_mask
> > +						struct vm_fault *vmf)
> > +{
> > +}
> > +
> >  static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
> > -			struct vm_area_struct *vma, unsigned long addr)
> > +			struct vm_fault *vmf)
> >  {
> >  	return NULL;
> >  }
> > @@ -547,7 +554,7 @@ static inline bool swap_use_vma_readahead(void)
> >  	return false;
> >  }
> 
> Now swap_use_vma_readahead() is used in swap_state.c only, so we can
> remove it from the header file?

Will do.

> 
> > -static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> > +static inline struct page *vma_readahead(swp_entry_t fentry,
> >  				gfp_t gfp_mask, struct vm_fault *vmf)
> >  {
> >  	return NULL;
> > diff --git a/mm/memory.c b/mm/memory.c
> > index e955298e4290..ce5e3d7ccc5c 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2889,7 +2889,8 @@ int do_swap_page(struct vm_fault *vmf)
> >  		if (si->flags & SWP_SYNCHRONOUS_IO &&
> >  				__swap_count(si, entry) == 1) {
> >  			/* skip swapcache */
> > -			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> > +			page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
> > +							vmf->address);
> >  			if (page) {
> >  				__SetPageLocked(page);
> >  				__SetPageSwapBacked(page);
> > @@ -2898,12 +2899,8 @@ int do_swap_page(struct vm_fault *vmf)
> >  				swap_readpage(page, true);
> >  			}
> >  		} else {
> > -			if (swap_use_vma_readahead())
> > -				page = do_swap_page_readahead(entry,
> > -					GFP_HIGHUSER_MOVABLE, vmf);
> > -			else
> > -				page = swapin_readahead(entry,
> > -				       GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> > +			page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
> > +						vmf);
> >  			swapcache = page;
> >  		}
> >  
> > diff --git a/mm/shmem.c b/mm/shmem.c
> > index 62dfdc097e44..2522bc0958e1 100644
> > --- a/mm/shmem.c
> > +++ b/mm/shmem.c
> > @@ -1413,9 +1413,12 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
> >  {
> >  	struct vm_area_struct pvma;
> >  	struct page *page;
> > +	struct vm_fault vmf;
> >  
> >  	shmem_pseudo_vma_init(&pvma, info, index);
> > -	page = swapin_readahead(swap, gfp, &pvma, 0);
> > +	vmf.vma = &pvma;
> > +	vmf.address = 0;
> > +	page = cluster_readahead(swap, gfp, &vmf);
> >  	shmem_pseudo_vma_destroy(&pvma);
> >  
> >  	return page;
> > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > index e3c535fcd2df..5ee53d4ee047 100644
> > --- a/mm/swap_state.c
> > +++ b/mm/swap_state.c
> > @@ -538,11 +538,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
> >  }
> >  
> >  /**
> > - * swapin_readahead - swap in pages in hope we need them soon
> > + * cluster_readahead - swap in pages in hope we need them soon
> >   * @entry: swap entry of this memory
> >   * @gfp_mask: memory allocation flags
> > - * @vma: user vma this address belongs to
> > - * @addr: target address for mempolicy
> > + * @vmf: fault information
> >   *
> >   * Returns the struct page for entry and addr, after queueing swapin.
> >   *
> > @@ -556,8 +555,8 @@ static unsigned long swapin_nr_pages(unsigned long offset)
> >   *
> >   * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
> >   */
> > -struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> > -			struct vm_area_struct *vma, unsigned long addr)
> > +struct page *cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
> > +				struct vm_fault *vmf)
> >  {
> >  	struct page *page;
> >  	unsigned long entry_offset = swp_offset(entry);
> > @@ -566,6 +565,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> >  	unsigned long mask;
> >  	struct blk_plug plug;
> >  	bool do_poll = true, page_allocated;
> > +	struct vm_area_struct *vma = vmf->vma;
> > +	unsigned long addr = vmf->address;
> >  
> >  	mask = swapin_nr_pages(offset) - 1;
> >  	if (!mask)
> > @@ -603,6 +604,14 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> >  	return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
> >  }
> >
> 
> No function document for swapin_readahead()?  It is the main interface
> for swap readahead.

Okay, I will remain original desciription and append some more.

Thanks for the quick review, Huang.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-11-01  6:25 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-01  5:28 [PATCH 0/2] swap readahead clean up Minchan Kim
2017-11-01  5:28 ` [PATCH 1/2] mm:swap: clean up swap readahead Minchan Kim
2017-11-01  5:41   ` Huang, Ying
2017-11-01  5:45     ` Minchan Kim
2017-11-01  6:04       ` Huang, Ying
2017-11-01  5:28 ` [PATCH 2/2] mm:swap: unify cluster-based and vma-based " Minchan Kim
2017-11-01  6:17   ` Huang, Ying
2017-11-01  6:25     ` Minchan Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).