* [PATCH 0/2] swap readahead clean up
@ 2017-11-01 5:28 Minchan Kim
2017-11-01 5:28 ` [PATCH 1/2] mm:swap: clean up swap readahead Minchan Kim
2017-11-01 5:28 ` [PATCH 2/2] mm:swap: unify cluster-based and vma-based " Minchan Kim
0 siblings, 2 replies; 8+ messages in thread
From: Minchan Kim @ 2017-11-01 5:28 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, linux-mm, Hugh Dickins, Huang Ying, kernel-team,
Minchan Kim
This patchset cleans up recent added vma-based readahead code via
unifying cluster-based readahead.
Minchan Kim (2):
mm:swap: clean up swap readahead
mm:swap: unify cluster-based and vma-based swap readahead
include/linux/swap.h | 32 +++++++--------
mm/memory.c | 24 +++--------
mm/shmem.c | 5 ++-
mm/swap_state.c | 110 +++++++++++++++++++++++++++++----------------------
4 files changed, 87 insertions(+), 84 deletions(-)
--
2.7.4
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 1/2] mm:swap: clean up swap readahead
2017-11-01 5:28 [PATCH 0/2] swap readahead clean up Minchan Kim
@ 2017-11-01 5:28 ` Minchan Kim
2017-11-01 5:41 ` Huang, Ying
2017-11-01 5:28 ` [PATCH 2/2] mm:swap: unify cluster-based and vma-based " Minchan Kim
1 sibling, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-11-01 5:28 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, linux-mm, Hugh Dickins, Huang Ying, kernel-team,
Minchan Kim
When I see recent change of swap readahead, I am very unhappy
about current code structure which diverges two swap readahead
algorithm in do_swap_page. This patch is to clean it up.
Main motivation is that fault handler doesn't need to be aware of
readahead algorithms but just should call swapin_readahead.
As first step, this patch cleans up a little bit but not perfect
(I just separate for review easier) so next patch will make the goal
complete.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
include/linux/swap.h | 17 ++--------
mm/memory.c | 17 +++-------
mm/swap_state.c | 89 ++++++++++++++++++++++++++++------------------------
3 files changed, 55 insertions(+), 68 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 84255b3da7c1..7c7c8b344bc9 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -427,12 +427,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
bool *new_page_allocated);
extern struct page *swapin_readahead(swp_entry_t, gfp_t,
struct vm_area_struct *vma, unsigned long addr);
-
-extern struct page *swap_readahead_detect(struct vm_fault *vmf,
- struct vma_swap_readahead *swap_ra);
extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
- struct vm_fault *vmf,
- struct vma_swap_readahead *swap_ra);
+ struct vm_fault *vmf);
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
@@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
return false;
}
-static inline struct page *swap_readahead_detect(
- struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
-{
- return NULL;
-}
-
-static inline struct page *do_swap_page_readahead(
- swp_entry_t fentry, gfp_t gfp_mask,
- struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
+static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
+ gfp_t gfp_mask, struct vm_fault *vmf)
{
return NULL;
}
diff --git a/mm/memory.c b/mm/memory.c
index 8a0c410037d2..e955298e4290 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
struct vm_area_struct *vma = vmf->vma;
struct page *page = NULL, *swapcache = NULL;
struct mem_cgroup *memcg;
- struct vma_swap_readahead swap_ra;
swp_entry_t entry;
pte_t pte;
int locked;
int exclusive = 0;
int ret = 0;
- bool vma_readahead = swap_use_vma_readahead();
- if (vma_readahead)
- page = swap_readahead_detect(vmf, &swap_ra);
- if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
- if (page)
- put_page(page);
+ if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
goto out;
- }
entry = pte_to_swp_entry(vmf->orig_pte);
if (unlikely(non_swap_entry(entry))) {
@@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
delayacct_set_flag(DELAYACCT_PF_SWAPIN);
- if (!page)
- page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
- vmf->address);
+ page = lookup_swap_cache(entry, vma, vmf->address);
if (!page) {
struct swap_info_struct *si = swp_swap_info(entry);
@@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
swap_readpage(page, true);
}
} else {
- if (vma_readahead)
+ if (swap_use_vma_readahead())
page = do_swap_page_readahead(entry,
- GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
+ GFP_HIGHUSER_MOVABLE, vmf);
else
page = swapin_readahead(entry,
GFP_HIGHUSER_MOVABLE, vma, vmf->address);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 6c017ced11e6..e3c535fcd2df 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
unsigned long addr)
{
struct page *page;
- unsigned long ra_info;
- int win, hits, readahead;
page = find_get_page(swap_address_space(entry), swp_offset(entry));
INC_CACHE_INFO(find_total);
if (page) {
+ bool vma_ra = swap_use_vma_readahead();
+ bool readahead = TestClearPageReadahead(page);
+
INC_CACHE_INFO(find_success);
if (unlikely(PageTransCompound(page)))
return page;
- readahead = TestClearPageReadahead(page);
- if (vma) {
- ra_info = GET_SWAP_RA_VAL(vma);
- win = SWAP_RA_WIN(ra_info);
- hits = SWAP_RA_HITS(ra_info);
+
+ if (vma && vma_ra) {
+ unsigned long ra_val;
+ int win, hits;
+
+ ra_val = GET_SWAP_RA_VAL(vma);
+ win = SWAP_RA_WIN(ra_val);
+ hits = SWAP_RA_HITS(ra_val);
if (readahead)
hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
atomic_long_set(&vma->swap_readahead_info,
SWAP_RA_VAL(addr, win, hits));
}
+
if (readahead) {
count_vm_event(SWAP_RA_HIT);
- if (!vma)
+ if (!vma || !vma_ra)
atomic_inc(&swapin_readahead_hits);
}
}
+
return page;
}
@@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
}
-struct page *swap_readahead_detect(struct vm_fault *vmf,
- struct vma_swap_readahead *swap_ra)
+static void swap_ra_info(struct vm_fault *vmf,
+ struct vma_swap_readahead *ra_info)
{
struct vm_area_struct *vma = vmf->vma;
- unsigned long swap_ra_info;
- struct page *page;
+ unsigned long ra_val;
swp_entry_t entry;
unsigned long faddr, pfn, fpfn;
unsigned long start, end;
- pte_t *pte;
+ pte_t *pte, *orig_pte;
unsigned int max_win, hits, prev_win, win, left;
#ifndef CONFIG_64BIT
pte_t *tpte;
@@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
SWAP_RA_ORDER_CEILING);
if (max_win == 1) {
- swap_ra->win = 1;
- return NULL;
+ ra_info->win = 1;
+ return;
}
faddr = vmf->address;
- entry = pte_to_swp_entry(vmf->orig_pte);
- if ((unlikely(non_swap_entry(entry))))
- return NULL;
- page = lookup_swap_cache(entry, vma, faddr);
- if (page)
- return page;
+ orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
+ entry = pte_to_swp_entry(*pte);
+ if ((unlikely(non_swap_entry(entry)))) {
+ pte_unmap(orig_pte);
+ return;
+ }
fpfn = PFN_DOWN(faddr);
- swap_ra_info = GET_SWAP_RA_VAL(vma);
- pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
- prev_win = SWAP_RA_WIN(swap_ra_info);
- hits = SWAP_RA_HITS(swap_ra_info);
- swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
+ ra_val = GET_SWAP_RA_VAL(vma);
+ pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
+ prev_win = SWAP_RA_WIN(ra_val);
+ hits = SWAP_RA_HITS(ra_val);
+ ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
max_win, prev_win);
atomic_long_set(&vma->swap_readahead_info,
SWAP_RA_VAL(faddr, win, 0));
- if (win == 1)
- return NULL;
+ if (win == 1) {
+ pte_unmap(orig_pte);
+ return;
+ }
/* Copy the PTEs because the page table may be unmapped */
if (fpfn == pfn + 1)
@@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
&start, &end);
}
- swap_ra->nr_pte = end - start;
- swap_ra->offset = fpfn - start;
- pte = vmf->pte - swap_ra->offset;
+ ra_info->nr_pte = end - start;
+ ra_info->offset = fpfn - start;
+ pte -= ra_info->offset;
#ifdef CONFIG_64BIT
- swap_ra->ptes = pte;
+ ra_info->ptes = pte;
#else
- tpte = swap_ra->ptes;
+ tpte = ra_info->ptes;
for (pfn = start; pfn != end; pfn++)
*tpte++ = *pte++;
#endif
-
- return NULL;
+ pte_unmap(orig_pte);
}
struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
- struct vm_fault *vmf,
- struct vma_swap_readahead *swap_ra)
+ struct vm_fault *vmf)
{
struct blk_plug plug;
struct vm_area_struct *vma = vmf->vma;
@@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
swp_entry_t entry;
unsigned int i;
bool page_allocated;
+ struct vma_swap_readahead ra_info = {0,};
- if (swap_ra->win == 1)
+ swap_ra_info(vmf, &ra_info);
+ if (ra_info.win == 1)
goto skip;
blk_start_plug(&plug);
- for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
+ for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
i++, pte++) {
pentry = *pte;
if (pte_none(pentry))
@@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
continue;
if (page_allocated) {
swap_readpage(page, false);
- if (i != swap_ra->offset &&
+ if (i != ra_info.offset &&
likely(!PageTransCompound(page))) {
SetPageReadahead(page);
count_vm_event(SWAP_RA);
@@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
lru_add_drain();
skip:
return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
- swap_ra->win == 1);
+ ra_info.win == 1);
}
#ifdef CONFIG_SYSFS
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] mm:swap: unify cluster-based and vma-based swap readahead
2017-11-01 5:28 [PATCH 0/2] swap readahead clean up Minchan Kim
2017-11-01 5:28 ` [PATCH 1/2] mm:swap: clean up swap readahead Minchan Kim
@ 2017-11-01 5:28 ` Minchan Kim
2017-11-01 6:17 ` Huang, Ying
1 sibling, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-11-01 5:28 UTC (permalink / raw)
To: Andrew Morton
Cc: linux-kernel, linux-mm, Hugh Dickins, Huang Ying, kernel-team,
Minchan Kim
This patch makes do_swap_page no need to be aware of two different
swap readahead algorithm. Just unify cluster-based and vma-based
readahead function call.
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
include/linux/swap.h | 17 ++++++++++++-----
mm/memory.c | 11 ++++-------
mm/shmem.c | 5 ++++-
mm/swap_state.c | 21 +++++++++++++++------
4 files changed, 35 insertions(+), 19 deletions(-)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 7c7c8b344bc9..9cc330360eac 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -425,9 +425,11 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
struct vm_area_struct *vma, unsigned long addr,
bool *new_page_allocated);
-extern struct page *swapin_readahead(swp_entry_t, gfp_t,
- struct vm_area_struct *vma, unsigned long addr);
-extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
+extern struct page *cluster_readahead(swp_entry_t entry, gfp_t flag,
+ struct vm_fault *vmf);
+extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
+ struct vm_fault *vmf);
+extern struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
struct vm_fault *vmf);
/* linux/mm/swapfile.c */
@@ -536,8 +538,13 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp)
{
}
+static inline struct page *cluster_readahead(swp_entry_t, gfp_t gfp_mask
+ struct vm_fault *vmf)
+{
+}
+
static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
- struct vm_area_struct *vma, unsigned long addr)
+ struct vm_fault *vmf)
{
return NULL;
}
@@ -547,7 +554,7 @@ static inline bool swap_use_vma_readahead(void)
return false;
}
-static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
+static inline struct page *vma_readahead(swp_entry_t fentry,
gfp_t gfp_mask, struct vm_fault *vmf)
{
return NULL;
diff --git a/mm/memory.c b/mm/memory.c
index e955298e4290..ce5e3d7ccc5c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2889,7 +2889,8 @@ int do_swap_page(struct vm_fault *vmf)
if (si->flags & SWP_SYNCHRONOUS_IO &&
__swap_count(si, entry) == 1) {
/* skip swapcache */
- page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
+ page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
+ vmf->address);
if (page) {
__SetPageLocked(page);
__SetPageSwapBacked(page);
@@ -2898,12 +2899,8 @@ int do_swap_page(struct vm_fault *vmf)
swap_readpage(page, true);
}
} else {
- if (swap_use_vma_readahead())
- page = do_swap_page_readahead(entry,
- GFP_HIGHUSER_MOVABLE, vmf);
- else
- page = swapin_readahead(entry,
- GFP_HIGHUSER_MOVABLE, vma, vmf->address);
+ page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+ vmf);
swapcache = page;
}
diff --git a/mm/shmem.c b/mm/shmem.c
index 62dfdc097e44..2522bc0958e1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1413,9 +1413,12 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
{
struct vm_area_struct pvma;
struct page *page;
+ struct vm_fault vmf;
shmem_pseudo_vma_init(&pvma, info, index);
- page = swapin_readahead(swap, gfp, &pvma, 0);
+ vmf.vma = &pvma;
+ vmf.address = 0;
+ page = cluster_readahead(swap, gfp, &vmf);
shmem_pseudo_vma_destroy(&pvma);
return page;
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e3c535fcd2df..5ee53d4ee047 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -538,11 +538,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
}
/**
- * swapin_readahead - swap in pages in hope we need them soon
+ * cluster_readahead - swap in pages in hope we need them soon
* @entry: swap entry of this memory
* @gfp_mask: memory allocation flags
- * @vma: user vma this address belongs to
- * @addr: target address for mempolicy
+ * @vmf: fault information
*
* Returns the struct page for entry and addr, after queueing swapin.
*
@@ -556,8 +555,8 @@ static unsigned long swapin_nr_pages(unsigned long offset)
*
* Caller must hold down_read on the vma->vm_mm if vma is not NULL.
*/
-struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
- struct vm_area_struct *vma, unsigned long addr)
+struct page *cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
+ struct vm_fault *vmf)
{
struct page *page;
unsigned long entry_offset = swp_offset(entry);
@@ -566,6 +565,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
unsigned long mask;
struct blk_plug plug;
bool do_poll = true, page_allocated;
+ struct vm_area_struct *vma = vmf->vma;
+ unsigned long addr = vmf->address;
mask = swapin_nr_pages(offset) - 1;
if (!mask)
@@ -603,6 +604,14 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
}
+struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
+ struct vm_fault *vmf)
+{
+ return swap_use_vma_readahead() ?
+ vma_readahead(entry, gfp_mask, vmf) :
+ cluster_readahead(entry, gfp_mask, vmf);
+}
+
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
{
struct address_space *spaces, *space;
@@ -719,7 +728,7 @@ static void swap_ra_info(struct vm_fault *vmf,
pte_unmap(orig_pte);
}
-struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
+struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
struct vm_fault *vmf)
{
struct blk_plug plug;
--
2.7.4
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] mm:swap: clean up swap readahead
2017-11-01 5:28 ` [PATCH 1/2] mm:swap: clean up swap readahead Minchan Kim
@ 2017-11-01 5:41 ` Huang, Ying
2017-11-01 5:45 ` Minchan Kim
0 siblings, 1 reply; 8+ messages in thread
From: Huang, Ying @ 2017-11-01 5:41 UTC (permalink / raw)
To: Minchan Kim
Cc: Andrew Morton, linux-kernel, linux-mm, Hugh Dickins, Huang Ying,
kernel-team
Hi, Minchan,
Minchan Kim <minchan@kernel.org> writes:
> When I see recent change of swap readahead, I am very unhappy
> about current code structure which diverges two swap readahead
> algorithm in do_swap_page. This patch is to clean it up.
>
> Main motivation is that fault handler doesn't need to be aware of
> readahead algorithms but just should call swapin_readahead.
>
> As first step, this patch cleans up a little bit but not perfect
> (I just separate for review easier) so next patch will make the goal
> complete.
>
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> ---
> include/linux/swap.h | 17 ++--------
> mm/memory.c | 17 +++-------
> mm/swap_state.c | 89 ++++++++++++++++++++++++++++------------------------
> 3 files changed, 55 insertions(+), 68 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 84255b3da7c1..7c7c8b344bc9 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -427,12 +427,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
> bool *new_page_allocated);
> extern struct page *swapin_readahead(swp_entry_t, gfp_t,
> struct vm_area_struct *vma, unsigned long addr);
> -
> -extern struct page *swap_readahead_detect(struct vm_fault *vmf,
> - struct vma_swap_readahead *swap_ra);
> extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> - struct vm_fault *vmf,
> - struct vma_swap_readahead *swap_ra);
> + struct vm_fault *vmf);
>
> /* linux/mm/swapfile.c */
> extern atomic_long_t nr_swap_pages;
> @@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
> return false;
> }
>
> -static inline struct page *swap_readahead_detect(
> - struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> -{
> - return NULL;
> -}
> -
> -static inline struct page *do_swap_page_readahead(
> - swp_entry_t fentry, gfp_t gfp_mask,
> - struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> +static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> + gfp_t gfp_mask, struct vm_fault *vmf)
> {
> return NULL;
> }
> diff --git a/mm/memory.c b/mm/memory.c
> index 8a0c410037d2..e955298e4290 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
> struct vm_area_struct *vma = vmf->vma;
> struct page *page = NULL, *swapcache = NULL;
> struct mem_cgroup *memcg;
> - struct vma_swap_readahead swap_ra;
> swp_entry_t entry;
> pte_t pte;
> int locked;
> int exclusive = 0;
> int ret = 0;
> - bool vma_readahead = swap_use_vma_readahead();
>
> - if (vma_readahead)
> - page = swap_readahead_detect(vmf, &swap_ra);
> - if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
> - if (page)
> - put_page(page);
> + if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
> goto out;
> - }
The page table holding PTE may be unmapped in pte_unmap_same(), so is it
safe for us to access page table after this in do_swap_page_readahead()?
Best Regards,
Huang, Ying
> entry = pte_to_swp_entry(vmf->orig_pte);
> if (unlikely(non_swap_entry(entry))) {
> @@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
>
>
> delayacct_set_flag(DELAYACCT_PF_SWAPIN);
> - if (!page)
> - page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
> - vmf->address);
> + page = lookup_swap_cache(entry, vma, vmf->address);
> if (!page) {
> struct swap_info_struct *si = swp_swap_info(entry);
>
> @@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
> swap_readpage(page, true);
> }
> } else {
> - if (vma_readahead)
> + if (swap_use_vma_readahead())
> page = do_swap_page_readahead(entry,
> - GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
> + GFP_HIGHUSER_MOVABLE, vmf);
> else
> page = swapin_readahead(entry,
> GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index 6c017ced11e6..e3c535fcd2df 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
> unsigned long addr)
> {
> struct page *page;
> - unsigned long ra_info;
> - int win, hits, readahead;
>
> page = find_get_page(swap_address_space(entry), swp_offset(entry));
>
> INC_CACHE_INFO(find_total);
> if (page) {
> + bool vma_ra = swap_use_vma_readahead();
> + bool readahead = TestClearPageReadahead(page);
> +
> INC_CACHE_INFO(find_success);
> if (unlikely(PageTransCompound(page)))
> return page;
> - readahead = TestClearPageReadahead(page);
> - if (vma) {
> - ra_info = GET_SWAP_RA_VAL(vma);
> - win = SWAP_RA_WIN(ra_info);
> - hits = SWAP_RA_HITS(ra_info);
> +
> + if (vma && vma_ra) {
> + unsigned long ra_val;
> + int win, hits;
> +
> + ra_val = GET_SWAP_RA_VAL(vma);
> + win = SWAP_RA_WIN(ra_val);
> + hits = SWAP_RA_HITS(ra_val);
> if (readahead)
> hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
> atomic_long_set(&vma->swap_readahead_info,
> SWAP_RA_VAL(addr, win, hits));
> }
> +
> if (readahead) {
> count_vm_event(SWAP_RA_HIT);
> - if (!vma)
> + if (!vma || !vma_ra)
> atomic_inc(&swapin_readahead_hits);
> }
> }
> +
> return page;
> }
>
> @@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
> PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
> }
>
> -struct page *swap_readahead_detect(struct vm_fault *vmf,
> - struct vma_swap_readahead *swap_ra)
> +static void swap_ra_info(struct vm_fault *vmf,
> + struct vma_swap_readahead *ra_info)
> {
> struct vm_area_struct *vma = vmf->vma;
> - unsigned long swap_ra_info;
> - struct page *page;
> + unsigned long ra_val;
> swp_entry_t entry;
> unsigned long faddr, pfn, fpfn;
> unsigned long start, end;
> - pte_t *pte;
> + pte_t *pte, *orig_pte;
> unsigned int max_win, hits, prev_win, win, left;
> #ifndef CONFIG_64BIT
> pte_t *tpte;
> @@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
> max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
> SWAP_RA_ORDER_CEILING);
> if (max_win == 1) {
> - swap_ra->win = 1;
> - return NULL;
> + ra_info->win = 1;
> + return;
> }
>
> faddr = vmf->address;
> - entry = pte_to_swp_entry(vmf->orig_pte);
> - if ((unlikely(non_swap_entry(entry))))
> - return NULL;
> - page = lookup_swap_cache(entry, vma, faddr);
> - if (page)
> - return page;
> + orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
> + entry = pte_to_swp_entry(*pte);
> + if ((unlikely(non_swap_entry(entry)))) {
> + pte_unmap(orig_pte);
> + return;
> + }
>
> fpfn = PFN_DOWN(faddr);
> - swap_ra_info = GET_SWAP_RA_VAL(vma);
> - pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
> - prev_win = SWAP_RA_WIN(swap_ra_info);
> - hits = SWAP_RA_HITS(swap_ra_info);
> - swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> + ra_val = GET_SWAP_RA_VAL(vma);
> + pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
> + prev_win = SWAP_RA_WIN(ra_val);
> + hits = SWAP_RA_HITS(ra_val);
> + ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> max_win, prev_win);
> atomic_long_set(&vma->swap_readahead_info,
> SWAP_RA_VAL(faddr, win, 0));
>
> - if (win == 1)
> - return NULL;
> + if (win == 1) {
> + pte_unmap(orig_pte);
> + return;
> + }
>
> /* Copy the PTEs because the page table may be unmapped */
> if (fpfn == pfn + 1)
> @@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
> swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
> &start, &end);
> }
> - swap_ra->nr_pte = end - start;
> - swap_ra->offset = fpfn - start;
> - pte = vmf->pte - swap_ra->offset;
> + ra_info->nr_pte = end - start;
> + ra_info->offset = fpfn - start;
> + pte -= ra_info->offset;
> #ifdef CONFIG_64BIT
> - swap_ra->ptes = pte;
> + ra_info->ptes = pte;
> #else
> - tpte = swap_ra->ptes;
> + tpte = ra_info->ptes;
> for (pfn = start; pfn != end; pfn++)
> *tpte++ = *pte++;
> #endif
> -
> - return NULL;
> + pte_unmap(orig_pte);
> }
>
> struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> - struct vm_fault *vmf,
> - struct vma_swap_readahead *swap_ra)
> + struct vm_fault *vmf)
> {
> struct blk_plug plug;
> struct vm_area_struct *vma = vmf->vma;
> @@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> swp_entry_t entry;
> unsigned int i;
> bool page_allocated;
> + struct vma_swap_readahead ra_info = {0,};
>
> - if (swap_ra->win == 1)
> + swap_ra_info(vmf, &ra_info);
> + if (ra_info.win == 1)
> goto skip;
>
> blk_start_plug(&plug);
> - for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
> + for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
> i++, pte++) {
> pentry = *pte;
> if (pte_none(pentry))
> @@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> continue;
> if (page_allocated) {
> swap_readpage(page, false);
> - if (i != swap_ra->offset &&
> + if (i != ra_info.offset &&
> likely(!PageTransCompound(page))) {
> SetPageReadahead(page);
> count_vm_event(SWAP_RA);
> @@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> lru_add_drain();
> skip:
> return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
> - swap_ra->win == 1);
> + ra_info.win == 1);
> }
>
> #ifdef CONFIG_SYSFS
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] mm:swap: clean up swap readahead
2017-11-01 5:41 ` Huang, Ying
@ 2017-11-01 5:45 ` Minchan Kim
2017-11-01 6:04 ` Huang, Ying
0 siblings, 1 reply; 8+ messages in thread
From: Minchan Kim @ 2017-11-01 5:45 UTC (permalink / raw)
To: Huang, Ying
Cc: Andrew Morton, linux-kernel, linux-mm, Hugh Dickins, kernel-team
Hi Huang,
On Wed, Nov 01, 2017 at 01:41:00PM +0800, Huang, Ying wrote:
> Hi, Minchan,
>
> Minchan Kim <minchan@kernel.org> writes:
>
> > When I see recent change of swap readahead, I am very unhappy
> > about current code structure which diverges two swap readahead
> > algorithm in do_swap_page. This patch is to clean it up.
> >
> > Main motivation is that fault handler doesn't need to be aware of
> > readahead algorithms but just should call swapin_readahead.
> >
> > As first step, this patch cleans up a little bit but not perfect
> > (I just separate for review easier) so next patch will make the goal
> > complete.
> >
> > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > ---
> > include/linux/swap.h | 17 ++--------
> > mm/memory.c | 17 +++-------
> > mm/swap_state.c | 89 ++++++++++++++++++++++++++++------------------------
> > 3 files changed, 55 insertions(+), 68 deletions(-)
> >
> > diff --git a/include/linux/swap.h b/include/linux/swap.h
> > index 84255b3da7c1..7c7c8b344bc9 100644
> > --- a/include/linux/swap.h
> > +++ b/include/linux/swap.h
> > @@ -427,12 +427,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
> > bool *new_page_allocated);
> > extern struct page *swapin_readahead(swp_entry_t, gfp_t,
> > struct vm_area_struct *vma, unsigned long addr);
> > -
> > -extern struct page *swap_readahead_detect(struct vm_fault *vmf,
> > - struct vma_swap_readahead *swap_ra);
> > extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > - struct vm_fault *vmf,
> > - struct vma_swap_readahead *swap_ra);
> > + struct vm_fault *vmf);
> >
> > /* linux/mm/swapfile.c */
> > extern atomic_long_t nr_swap_pages;
> > @@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
> > return false;
> > }
> >
> > -static inline struct page *swap_readahead_detect(
> > - struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> > -{
> > - return NULL;
> > -}
> > -
> > -static inline struct page *do_swap_page_readahead(
> > - swp_entry_t fentry, gfp_t gfp_mask,
> > - struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
> > +static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> > + gfp_t gfp_mask, struct vm_fault *vmf)
> > {
> > return NULL;
> > }
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 8a0c410037d2..e955298e4290 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
> > struct vm_area_struct *vma = vmf->vma;
> > struct page *page = NULL, *swapcache = NULL;
> > struct mem_cgroup *memcg;
> > - struct vma_swap_readahead swap_ra;
> > swp_entry_t entry;
> > pte_t pte;
> > int locked;
> > int exclusive = 0;
> > int ret = 0;
> > - bool vma_readahead = swap_use_vma_readahead();
> >
> > - if (vma_readahead)
> > - page = swap_readahead_detect(vmf, &swap_ra);
> > - if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
> > - if (page)
> > - put_page(page);
> > + if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
> > goto out;
> > - }
>
> The page table holding PTE may be unmapped in pte_unmap_same(), so is it
> safe for us to access page table after this in do_swap_page_readahead()?
That's why I calls pte_offset_map in swap_ra_info before the access.
>
> Best Regards,
> Huang, Ying
>
> > entry = pte_to_swp_entry(vmf->orig_pte);
> > if (unlikely(non_swap_entry(entry))) {
> > @@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
> >
> >
> > delayacct_set_flag(DELAYACCT_PF_SWAPIN);
> > - if (!page)
> > - page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
> > - vmf->address);
> > + page = lookup_swap_cache(entry, vma, vmf->address);
> > if (!page) {
> > struct swap_info_struct *si = swp_swap_info(entry);
> >
> > @@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
> > swap_readpage(page, true);
> > }
> > } else {
> > - if (vma_readahead)
> > + if (swap_use_vma_readahead())
> > page = do_swap_page_readahead(entry,
> > - GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
> > + GFP_HIGHUSER_MOVABLE, vmf);
> > else
> > page = swapin_readahead(entry,
> > GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > index 6c017ced11e6..e3c535fcd2df 100644
> > --- a/mm/swap_state.c
> > +++ b/mm/swap_state.c
> > @@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
> > unsigned long addr)
> > {
> > struct page *page;
> > - unsigned long ra_info;
> > - int win, hits, readahead;
> >
> > page = find_get_page(swap_address_space(entry), swp_offset(entry));
> >
> > INC_CACHE_INFO(find_total);
> > if (page) {
> > + bool vma_ra = swap_use_vma_readahead();
> > + bool readahead = TestClearPageReadahead(page);
> > +
> > INC_CACHE_INFO(find_success);
> > if (unlikely(PageTransCompound(page)))
> > return page;
> > - readahead = TestClearPageReadahead(page);
> > - if (vma) {
> > - ra_info = GET_SWAP_RA_VAL(vma);
> > - win = SWAP_RA_WIN(ra_info);
> > - hits = SWAP_RA_HITS(ra_info);
> > +
> > + if (vma && vma_ra) {
> > + unsigned long ra_val;
> > + int win, hits;
> > +
> > + ra_val = GET_SWAP_RA_VAL(vma);
> > + win = SWAP_RA_WIN(ra_val);
> > + hits = SWAP_RA_HITS(ra_val);
> > if (readahead)
> > hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
> > atomic_long_set(&vma->swap_readahead_info,
> > SWAP_RA_VAL(addr, win, hits));
> > }
> > +
> > if (readahead) {
> > count_vm_event(SWAP_RA_HIT);
> > - if (!vma)
> > + if (!vma || !vma_ra)
> > atomic_inc(&swapin_readahead_hits);
> > }
> > }
> > +
> > return page;
> > }
> >
> > @@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
> > PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
> > }
> >
> > -struct page *swap_readahead_detect(struct vm_fault *vmf,
> > - struct vma_swap_readahead *swap_ra)
> > +static void swap_ra_info(struct vm_fault *vmf,
> > + struct vma_swap_readahead *ra_info)
> > {
> > struct vm_area_struct *vma = vmf->vma;
> > - unsigned long swap_ra_info;
> > - struct page *page;
> > + unsigned long ra_val;
> > swp_entry_t entry;
> > unsigned long faddr, pfn, fpfn;
> > unsigned long start, end;
> > - pte_t *pte;
> > + pte_t *pte, *orig_pte;
> > unsigned int max_win, hits, prev_win, win, left;
> > #ifndef CONFIG_64BIT
> > pte_t *tpte;
> > @@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
> > max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
> > SWAP_RA_ORDER_CEILING);
> > if (max_win == 1) {
> > - swap_ra->win = 1;
> > - return NULL;
> > + ra_info->win = 1;
> > + return;
> > }
> >
> > faddr = vmf->address;
> > - entry = pte_to_swp_entry(vmf->orig_pte);
> > - if ((unlikely(non_swap_entry(entry))))
> > - return NULL;
> > - page = lookup_swap_cache(entry, vma, faddr);
> > - if (page)
> > - return page;
> > + orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
> > + entry = pte_to_swp_entry(*pte);
> > + if ((unlikely(non_swap_entry(entry)))) {
> > + pte_unmap(orig_pte);
> > + return;
> > + }
> >
> > fpfn = PFN_DOWN(faddr);
> > - swap_ra_info = GET_SWAP_RA_VAL(vma);
> > - pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
> > - prev_win = SWAP_RA_WIN(swap_ra_info);
> > - hits = SWAP_RA_HITS(swap_ra_info);
> > - swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> > + ra_val = GET_SWAP_RA_VAL(vma);
> > + pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
> > + prev_win = SWAP_RA_WIN(ra_val);
> > + hits = SWAP_RA_HITS(ra_val);
> > + ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
> > max_win, prev_win);
> > atomic_long_set(&vma->swap_readahead_info,
> > SWAP_RA_VAL(faddr, win, 0));
> >
> > - if (win == 1)
> > - return NULL;
> > + if (win == 1) {
> > + pte_unmap(orig_pte);
> > + return;
> > + }
> >
> > /* Copy the PTEs because the page table may be unmapped */
> > if (fpfn == pfn + 1)
> > @@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
> > swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
> > &start, &end);
> > }
> > - swap_ra->nr_pte = end - start;
> > - swap_ra->offset = fpfn - start;
> > - pte = vmf->pte - swap_ra->offset;
> > + ra_info->nr_pte = end - start;
> > + ra_info->offset = fpfn - start;
> > + pte -= ra_info->offset;
> > #ifdef CONFIG_64BIT
> > - swap_ra->ptes = pte;
> > + ra_info->ptes = pte;
> > #else
> > - tpte = swap_ra->ptes;
> > + tpte = ra_info->ptes;
> > for (pfn = start; pfn != end; pfn++)
> > *tpte++ = *pte++;
> > #endif
> > -
> > - return NULL;
> > + pte_unmap(orig_pte);
> > }
> >
> > struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > - struct vm_fault *vmf,
> > - struct vma_swap_readahead *swap_ra)
> > + struct vm_fault *vmf)
> > {
> > struct blk_plug plug;
> > struct vm_area_struct *vma = vmf->vma;
> > @@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > swp_entry_t entry;
> > unsigned int i;
> > bool page_allocated;
> > + struct vma_swap_readahead ra_info = {0,};
> >
> > - if (swap_ra->win == 1)
> > + swap_ra_info(vmf, &ra_info);
> > + if (ra_info.win == 1)
> > goto skip;
> >
> > blk_start_plug(&plug);
> > - for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
> > + for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
> > i++, pte++) {
> > pentry = *pte;
> > if (pte_none(pentry))
> > @@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > continue;
> > if (page_allocated) {
> > swap_readpage(page, false);
> > - if (i != swap_ra->offset &&
> > + if (i != ra_info.offset &&
> > likely(!PageTransCompound(page))) {
> > SetPageReadahead(page);
> > count_vm_event(SWAP_RA);
> > @@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > lru_add_drain();
> > skip:
> > return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
> > - swap_ra->win == 1);
> > + ra_info.win == 1);
> > }
> >
> > #ifdef CONFIG_SYSFS
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] mm:swap: clean up swap readahead
2017-11-01 5:45 ` Minchan Kim
@ 2017-11-01 6:04 ` Huang, Ying
0 siblings, 0 replies; 8+ messages in thread
From: Huang, Ying @ 2017-11-01 6:04 UTC (permalink / raw)
To: Minchan Kim
Cc: Huang, Ying, Andrew Morton, linux-kernel, linux-mm, Hugh Dickins,
kernel-team
Minchan Kim <minchan@kernel.org> writes:
> Hi Huang,
>
> On Wed, Nov 01, 2017 at 01:41:00PM +0800, Huang, Ying wrote:
>> Hi, Minchan,
>>
>> Minchan Kim <minchan@kernel.org> writes:
>>
>> > When I see recent change of swap readahead, I am very unhappy
>> > about current code structure which diverges two swap readahead
>> > algorithm in do_swap_page. This patch is to clean it up.
>> >
>> > Main motivation is that fault handler doesn't need to be aware of
>> > readahead algorithms but just should call swapin_readahead.
>> >
>> > As first step, this patch cleans up a little bit but not perfect
>> > (I just separate for review easier) so next patch will make the goal
>> > complete.
>> >
>> > Signed-off-by: Minchan Kim <minchan@kernel.org>
>> > ---
>> > include/linux/swap.h | 17 ++--------
>> > mm/memory.c | 17 +++-------
>> > mm/swap_state.c | 89 ++++++++++++++++++++++++++++------------------------
>> > 3 files changed, 55 insertions(+), 68 deletions(-)
>> >
>> > diff --git a/include/linux/swap.h b/include/linux/swap.h
>> > index 84255b3da7c1..7c7c8b344bc9 100644
>> > --- a/include/linux/swap.h
>> > +++ b/include/linux/swap.h
>> > @@ -427,12 +427,8 @@ extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
>> > bool *new_page_allocated);
>> > extern struct page *swapin_readahead(swp_entry_t, gfp_t,
>> > struct vm_area_struct *vma, unsigned long addr);
>> > -
>> > -extern struct page *swap_readahead_detect(struct vm_fault *vmf,
>> > - struct vma_swap_readahead *swap_ra);
>> > extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> > - struct vm_fault *vmf,
>> > - struct vma_swap_readahead *swap_ra);
>> > + struct vm_fault *vmf);
>> >
>> > /* linux/mm/swapfile.c */
>> > extern atomic_long_t nr_swap_pages;
>> > @@ -551,15 +547,8 @@ static inline bool swap_use_vma_readahead(void)
>> > return false;
>> > }
>> >
>> > -static inline struct page *swap_readahead_detect(
>> > - struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
>> > -{
>> > - return NULL;
>> > -}
>> > -
>> > -static inline struct page *do_swap_page_readahead(
>> > - swp_entry_t fentry, gfp_t gfp_mask,
>> > - struct vm_fault *vmf, struct vma_swap_readahead *swap_ra)
>> > +static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
>> > + gfp_t gfp_mask, struct vm_fault *vmf)
>> > {
>> > return NULL;
>> > }
>> > diff --git a/mm/memory.c b/mm/memory.c
>> > index 8a0c410037d2..e955298e4290 100644
>> > --- a/mm/memory.c
>> > +++ b/mm/memory.c
>> > @@ -2849,21 +2849,14 @@ int do_swap_page(struct vm_fault *vmf)
>> > struct vm_area_struct *vma = vmf->vma;
>> > struct page *page = NULL, *swapcache = NULL;
>> > struct mem_cgroup *memcg;
>> > - struct vma_swap_readahead swap_ra;
>> > swp_entry_t entry;
>> > pte_t pte;
>> > int locked;
>> > int exclusive = 0;
>> > int ret = 0;
>> > - bool vma_readahead = swap_use_vma_readahead();
>> >
>> > - if (vma_readahead)
>> > - page = swap_readahead_detect(vmf, &swap_ra);
>> > - if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte)) {
>> > - if (page)
>> > - put_page(page);
>> > + if (!pte_unmap_same(vma->vm_mm, vmf->pmd, vmf->pte, vmf->orig_pte))
>> > goto out;
>> > - }
>>
>> The page table holding PTE may be unmapped in pte_unmap_same(), so is it
>> safe for us to access page table after this in do_swap_page_readahead()?
>
> That's why I calls pte_offset_map in swap_ra_info before the access.
Oh, I found it! Thanks for explanation!
Best Regards,
Huang, Ying
>>
>> Best Regards,
>> Huang, Ying
>>
>> > entry = pte_to_swp_entry(vmf->orig_pte);
>> > if (unlikely(non_swap_entry(entry))) {
>> > @@ -2889,9 +2882,7 @@ int do_swap_page(struct vm_fault *vmf)
>> >
>> >
>> > delayacct_set_flag(DELAYACCT_PF_SWAPIN);
>> > - if (!page)
>> > - page = lookup_swap_cache(entry, vma_readahead ? vma : NULL,
>> > - vmf->address);
>> > + page = lookup_swap_cache(entry, vma, vmf->address);
>> > if (!page) {
>> > struct swap_info_struct *si = swp_swap_info(entry);
>> >
>> > @@ -2907,9 +2898,9 @@ int do_swap_page(struct vm_fault *vmf)
>> > swap_readpage(page, true);
>> > }
>> > } else {
>> > - if (vma_readahead)
>> > + if (swap_use_vma_readahead())
>> > page = do_swap_page_readahead(entry,
>> > - GFP_HIGHUSER_MOVABLE, vmf, &swap_ra);
>> > + GFP_HIGHUSER_MOVABLE, vmf);
>> > else
>> > page = swapin_readahead(entry,
>> > GFP_HIGHUSER_MOVABLE, vma, vmf->address);
>> > diff --git a/mm/swap_state.c b/mm/swap_state.c
>> > index 6c017ced11e6..e3c535fcd2df 100644
>> > --- a/mm/swap_state.c
>> > +++ b/mm/swap_state.c
>> > @@ -331,32 +331,38 @@ struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma,
>> > unsigned long addr)
>> > {
>> > struct page *page;
>> > - unsigned long ra_info;
>> > - int win, hits, readahead;
>> >
>> > page = find_get_page(swap_address_space(entry), swp_offset(entry));
>> >
>> > INC_CACHE_INFO(find_total);
>> > if (page) {
>> > + bool vma_ra = swap_use_vma_readahead();
>> > + bool readahead = TestClearPageReadahead(page);
>> > +
>> > INC_CACHE_INFO(find_success);
>> > if (unlikely(PageTransCompound(page)))
>> > return page;
>> > - readahead = TestClearPageReadahead(page);
>> > - if (vma) {
>> > - ra_info = GET_SWAP_RA_VAL(vma);
>> > - win = SWAP_RA_WIN(ra_info);
>> > - hits = SWAP_RA_HITS(ra_info);
>> > +
>> > + if (vma && vma_ra) {
>> > + unsigned long ra_val;
>> > + int win, hits;
>> > +
>> > + ra_val = GET_SWAP_RA_VAL(vma);
>> > + win = SWAP_RA_WIN(ra_val);
>> > + hits = SWAP_RA_HITS(ra_val);
>> > if (readahead)
>> > hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
>> > atomic_long_set(&vma->swap_readahead_info,
>> > SWAP_RA_VAL(addr, win, hits));
>> > }
>> > +
>> > if (readahead) {
>> > count_vm_event(SWAP_RA_HIT);
>> > - if (!vma)
>> > + if (!vma || !vma_ra)
>> > atomic_inc(&swapin_readahead_hits);
>> > }
>> > }
>> > +
>> > return page;
>> > }
>> >
>> > @@ -645,16 +651,15 @@ static inline void swap_ra_clamp_pfn(struct vm_area_struct *vma,
>> > PFN_DOWN((faddr & PMD_MASK) + PMD_SIZE));
>> > }
>> >
>> > -struct page *swap_readahead_detect(struct vm_fault *vmf,
>> > - struct vma_swap_readahead *swap_ra)
>> > +static void swap_ra_info(struct vm_fault *vmf,
>> > + struct vma_swap_readahead *ra_info)
>> > {
>> > struct vm_area_struct *vma = vmf->vma;
>> > - unsigned long swap_ra_info;
>> > - struct page *page;
>> > + unsigned long ra_val;
>> > swp_entry_t entry;
>> > unsigned long faddr, pfn, fpfn;
>> > unsigned long start, end;
>> > - pte_t *pte;
>> > + pte_t *pte, *orig_pte;
>> > unsigned int max_win, hits, prev_win, win, left;
>> > #ifndef CONFIG_64BIT
>> > pte_t *tpte;
>> > @@ -663,30 +668,32 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
>> > max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
>> > SWAP_RA_ORDER_CEILING);
>> > if (max_win == 1) {
>> > - swap_ra->win = 1;
>> > - return NULL;
>> > + ra_info->win = 1;
>> > + return;
>> > }
>> >
>> > faddr = vmf->address;
>> > - entry = pte_to_swp_entry(vmf->orig_pte);
>> > - if ((unlikely(non_swap_entry(entry))))
>> > - return NULL;
>> > - page = lookup_swap_cache(entry, vma, faddr);
>> > - if (page)
>> > - return page;
>> > + orig_pte = pte = pte_offset_map(vmf->pmd, faddr);
>> > + entry = pte_to_swp_entry(*pte);
>> > + if ((unlikely(non_swap_entry(entry)))) {
>> > + pte_unmap(orig_pte);
>> > + return;
>> > + }
>> >
>> > fpfn = PFN_DOWN(faddr);
>> > - swap_ra_info = GET_SWAP_RA_VAL(vma);
>> > - pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
>> > - prev_win = SWAP_RA_WIN(swap_ra_info);
>> > - hits = SWAP_RA_HITS(swap_ra_info);
>> > - swap_ra->win = win = __swapin_nr_pages(pfn, fpfn, hits,
>> > + ra_val = GET_SWAP_RA_VAL(vma);
>> > + pfn = PFN_DOWN(SWAP_RA_ADDR(ra_val));
>> > + prev_win = SWAP_RA_WIN(ra_val);
>> > + hits = SWAP_RA_HITS(ra_val);
>> > + ra_info->win = win = __swapin_nr_pages(pfn, fpfn, hits,
>> > max_win, prev_win);
>> > atomic_long_set(&vma->swap_readahead_info,
>> > SWAP_RA_VAL(faddr, win, 0));
>> >
>> > - if (win == 1)
>> > - return NULL;
>> > + if (win == 1) {
>> > + pte_unmap(orig_pte);
>> > + return;
>> > + }
>> >
>> > /* Copy the PTEs because the page table may be unmapped */
>> > if (fpfn == pfn + 1)
>> > @@ -699,23 +706,21 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
>> > swap_ra_clamp_pfn(vma, faddr, fpfn - left, fpfn + win - left,
>> > &start, &end);
>> > }
>> > - swap_ra->nr_pte = end - start;
>> > - swap_ra->offset = fpfn - start;
>> > - pte = vmf->pte - swap_ra->offset;
>> > + ra_info->nr_pte = end - start;
>> > + ra_info->offset = fpfn - start;
>> > + pte -= ra_info->offset;
>> > #ifdef CONFIG_64BIT
>> > - swap_ra->ptes = pte;
>> > + ra_info->ptes = pte;
>> > #else
>> > - tpte = swap_ra->ptes;
>> > + tpte = ra_info->ptes;
>> > for (pfn = start; pfn != end; pfn++)
>> > *tpte++ = *pte++;
>> > #endif
>> > -
>> > - return NULL;
>> > + pte_unmap(orig_pte);
>> > }
>> >
>> > struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> > - struct vm_fault *vmf,
>> > - struct vma_swap_readahead *swap_ra)
>> > + struct vm_fault *vmf)
>> > {
>> > struct blk_plug plug;
>> > struct vm_area_struct *vma = vmf->vma;
>> > @@ -724,12 +729,14 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> > swp_entry_t entry;
>> > unsigned int i;
>> > bool page_allocated;
>> > + struct vma_swap_readahead ra_info = {0,};
>> >
>> > - if (swap_ra->win == 1)
>> > + swap_ra_info(vmf, &ra_info);
>> > + if (ra_info.win == 1)
>> > goto skip;
>> >
>> > blk_start_plug(&plug);
>> > - for (i = 0, pte = swap_ra->ptes; i < swap_ra->nr_pte;
>> > + for (i = 0, pte = ra_info.ptes; i < ra_info.nr_pte;
>> > i++, pte++) {
>> > pentry = *pte;
>> > if (pte_none(pentry))
>> > @@ -745,7 +752,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> > continue;
>> > if (page_allocated) {
>> > swap_readpage(page, false);
>> > - if (i != swap_ra->offset &&
>> > + if (i != ra_info.offset &&
>> > likely(!PageTransCompound(page))) {
>> > SetPageReadahead(page);
>> > count_vm_event(SWAP_RA);
>> > @@ -757,7 +764,7 @@ struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
>> > lru_add_drain();
>> > skip:
>> > return read_swap_cache_async(fentry, gfp_mask, vma, vmf->address,
>> > - swap_ra->win == 1);
>> > + ra_info.win == 1);
>> > }
>> >
>> > #ifdef CONFIG_SYSFS
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@kvack.org. For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] mm:swap: unify cluster-based and vma-based swap readahead
2017-11-01 5:28 ` [PATCH 2/2] mm:swap: unify cluster-based and vma-based " Minchan Kim
@ 2017-11-01 6:17 ` Huang, Ying
2017-11-01 6:25 ` Minchan Kim
0 siblings, 1 reply; 8+ messages in thread
From: Huang, Ying @ 2017-11-01 6:17 UTC (permalink / raw)
To: Minchan Kim
Cc: Andrew Morton, linux-kernel, linux-mm, Hugh Dickins, Huang Ying,
kernel-team
Minchan Kim <minchan@kernel.org> writes:
> This patch makes do_swap_page no need to be aware of two different
> swap readahead algorithm. Just unify cluster-based and vma-based
> readahead function call.
>
> Signed-off-by: Minchan Kim <minchan@kernel.org>
> ---
> include/linux/swap.h | 17 ++++++++++++-----
> mm/memory.c | 11 ++++-------
> mm/shmem.c | 5 ++++-
> mm/swap_state.c | 21 +++++++++++++++------
> 4 files changed, 35 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7c7c8b344bc9..9cc330360eac 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -425,9 +425,11 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
> extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
> struct vm_area_struct *vma, unsigned long addr,
> bool *new_page_allocated);
> -extern struct page *swapin_readahead(swp_entry_t, gfp_t,
> - struct vm_area_struct *vma, unsigned long addr);
> -extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> +extern struct page *cluster_readahead(swp_entry_t entry, gfp_t flag,
> + struct vm_fault *vmf);
In addition to swap readahead, there are file readahead too. So better
add swap in name, such as swap_cluster_readahead()?
> +extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
> + struct vm_fault *vmf);
> +extern struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> struct vm_fault *vmf);
I don't find vma_readahead() is used outside of page_state.c, why
declare it here?
>
> /* linux/mm/swapfile.c */
> @@ -536,8 +538,13 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp)
> {
> }
>
> +static inline struct page *cluster_readahead(swp_entry_t, gfp_t gfp_mask
> + struct vm_fault *vmf)
> +{
> +}
> +
> static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
> - struct vm_area_struct *vma, unsigned long addr)
> + struct vm_fault *vmf)
> {
> return NULL;
> }
> @@ -547,7 +554,7 @@ static inline bool swap_use_vma_readahead(void)
> return false;
> }
Now swap_use_vma_readahead() is used in swap_state.c only, so we can
remove it from the header file?
> -static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> +static inline struct page *vma_readahead(swp_entry_t fentry,
> gfp_t gfp_mask, struct vm_fault *vmf)
> {
> return NULL;
> diff --git a/mm/memory.c b/mm/memory.c
> index e955298e4290..ce5e3d7ccc5c 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -2889,7 +2889,8 @@ int do_swap_page(struct vm_fault *vmf)
> if (si->flags & SWP_SYNCHRONOUS_IO &&
> __swap_count(si, entry) == 1) {
> /* skip swapcache */
> - page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> + page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
> + vmf->address);
> if (page) {
> __SetPageLocked(page);
> __SetPageSwapBacked(page);
> @@ -2898,12 +2899,8 @@ int do_swap_page(struct vm_fault *vmf)
> swap_readpage(page, true);
> }
> } else {
> - if (swap_use_vma_readahead())
> - page = do_swap_page_readahead(entry,
> - GFP_HIGHUSER_MOVABLE, vmf);
> - else
> - page = swapin_readahead(entry,
> - GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> + page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
> + vmf);
> swapcache = page;
> }
>
> diff --git a/mm/shmem.c b/mm/shmem.c
> index 62dfdc097e44..2522bc0958e1 100644
> --- a/mm/shmem.c
> +++ b/mm/shmem.c
> @@ -1413,9 +1413,12 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
> {
> struct vm_area_struct pvma;
> struct page *page;
> + struct vm_fault vmf;
>
> shmem_pseudo_vma_init(&pvma, info, index);
> - page = swapin_readahead(swap, gfp, &pvma, 0);
> + vmf.vma = &pvma;
> + vmf.address = 0;
> + page = cluster_readahead(swap, gfp, &vmf);
> shmem_pseudo_vma_destroy(&pvma);
>
> return page;
> diff --git a/mm/swap_state.c b/mm/swap_state.c
> index e3c535fcd2df..5ee53d4ee047 100644
> --- a/mm/swap_state.c
> +++ b/mm/swap_state.c
> @@ -538,11 +538,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
> }
>
> /**
> - * swapin_readahead - swap in pages in hope we need them soon
> + * cluster_readahead - swap in pages in hope we need them soon
> * @entry: swap entry of this memory
> * @gfp_mask: memory allocation flags
> - * @vma: user vma this address belongs to
> - * @addr: target address for mempolicy
> + * @vmf: fault information
> *
> * Returns the struct page for entry and addr, after queueing swapin.
> *
> @@ -556,8 +555,8 @@ static unsigned long swapin_nr_pages(unsigned long offset)
> *
> * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
> */
> -struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> - struct vm_area_struct *vma, unsigned long addr)
> +struct page *cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
> + struct vm_fault *vmf)
> {
> struct page *page;
> unsigned long entry_offset = swp_offset(entry);
> @@ -566,6 +565,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> unsigned long mask;
> struct blk_plug plug;
> bool do_poll = true, page_allocated;
> + struct vm_area_struct *vma = vmf->vma;
> + unsigned long addr = vmf->address;
>
> mask = swapin_nr_pages(offset) - 1;
> if (!mask)
> @@ -603,6 +604,14 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
> }
>
No function document for swapin_readahead()? It is the main interface
for swap readahead.
> +struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> + struct vm_fault *vmf)
> +{
> + return swap_use_vma_readahead() ?
> + vma_readahead(entry, gfp_mask, vmf) :
> + cluster_readahead(entry, gfp_mask, vmf);
> +}
> +
> int init_swap_address_space(unsigned int type, unsigned long nr_pages)
> {
> struct address_space *spaces, *space;
> @@ -719,7 +728,7 @@ static void swap_ra_info(struct vm_fault *vmf,
> pte_unmap(orig_pte);
> }
>
> -struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> +struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> struct vm_fault *vmf)
> {
> struct blk_plug plug;
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 2/2] mm:swap: unify cluster-based and vma-based swap readahead
2017-11-01 6:17 ` Huang, Ying
@ 2017-11-01 6:25 ` Minchan Kim
0 siblings, 0 replies; 8+ messages in thread
From: Minchan Kim @ 2017-11-01 6:25 UTC (permalink / raw)
To: Huang, Ying
Cc: Andrew Morton, linux-kernel, linux-mm, Hugh Dickins, kernel-team
On Wed, Nov 01, 2017 at 02:17:17PM +0800, Huang, Ying wrote:
> Minchan Kim <minchan@kernel.org> writes:
>
> > This patch makes do_swap_page no need to be aware of two different
> > swap readahead algorithm. Just unify cluster-based and vma-based
> > readahead function call.
> >
> > Signed-off-by: Minchan Kim <minchan@kernel.org>
> > ---
> > include/linux/swap.h | 17 ++++++++++++-----
> > mm/memory.c | 11 ++++-------
> > mm/shmem.c | 5 ++++-
> > mm/swap_state.c | 21 +++++++++++++++------
> > 4 files changed, 35 insertions(+), 19 deletions(-)
> >
> > diff --git a/include/linux/swap.h b/include/linux/swap.h
> > index 7c7c8b344bc9..9cc330360eac 100644
> > --- a/include/linux/swap.h
> > +++ b/include/linux/swap.h
> > @@ -425,9 +425,11 @@ extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
> > extern struct page *__read_swap_cache_async(swp_entry_t, gfp_t,
> > struct vm_area_struct *vma, unsigned long addr,
> > bool *new_page_allocated);
> > -extern struct page *swapin_readahead(swp_entry_t, gfp_t,
> > - struct vm_area_struct *vma, unsigned long addr);
> > -extern struct page *do_swap_page_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > +extern struct page *cluster_readahead(swp_entry_t entry, gfp_t flag,
> > + struct vm_fault *vmf);
>
> In addition to swap readahead, there are file readahead too. So better
> add swap in name, such as swap_cluster_readahead()?
Yub.
>
> > +extern struct page *swapin_readahead(swp_entry_t entry, gfp_t flag,
> > + struct vm_fault *vmf);
> > +extern struct page *vma_readahead(swp_entry_t fentry, gfp_t gfp_mask,
> > struct vm_fault *vmf);
>
> I don't find vma_readahead() is used outside of page_state.c, why
> declare it here?
By wrapping function, it's pointless to declare.
Yub, Let's drop it.
>
> >
> > /* linux/mm/swapfile.c */
> > @@ -536,8 +538,13 @@ static inline void put_swap_page(struct page *page, swp_entry_t swp)
> > {
> > }
> >
> > +static inline struct page *cluster_readahead(swp_entry_t, gfp_t gfp_mask
> > + struct vm_fault *vmf)
> > +{
> > +}
> > +
> > static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
> > - struct vm_area_struct *vma, unsigned long addr)
> > + struct vm_fault *vmf)
> > {
> > return NULL;
> > }
> > @@ -547,7 +554,7 @@ static inline bool swap_use_vma_readahead(void)
> > return false;
> > }
>
> Now swap_use_vma_readahead() is used in swap_state.c only, so we can
> remove it from the header file?
Will do.
>
> > -static inline struct page *do_swap_page_readahead(swp_entry_t fentry,
> > +static inline struct page *vma_readahead(swp_entry_t fentry,
> > gfp_t gfp_mask, struct vm_fault *vmf)
> > {
> > return NULL;
> > diff --git a/mm/memory.c b/mm/memory.c
> > index e955298e4290..ce5e3d7ccc5c 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -2889,7 +2889,8 @@ int do_swap_page(struct vm_fault *vmf)
> > if (si->flags & SWP_SYNCHRONOUS_IO &&
> > __swap_count(si, entry) == 1) {
> > /* skip swapcache */
> > - page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> > + page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma,
> > + vmf->address);
> > if (page) {
> > __SetPageLocked(page);
> > __SetPageSwapBacked(page);
> > @@ -2898,12 +2899,8 @@ int do_swap_page(struct vm_fault *vmf)
> > swap_readpage(page, true);
> > }
> > } else {
> > - if (swap_use_vma_readahead())
> > - page = do_swap_page_readahead(entry,
> > - GFP_HIGHUSER_MOVABLE, vmf);
> > - else
> > - page = swapin_readahead(entry,
> > - GFP_HIGHUSER_MOVABLE, vma, vmf->address);
> > + page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
> > + vmf);
> > swapcache = page;
> > }
> >
> > diff --git a/mm/shmem.c b/mm/shmem.c
> > index 62dfdc097e44..2522bc0958e1 100644
> > --- a/mm/shmem.c
> > +++ b/mm/shmem.c
> > @@ -1413,9 +1413,12 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
> > {
> > struct vm_area_struct pvma;
> > struct page *page;
> > + struct vm_fault vmf;
> >
> > shmem_pseudo_vma_init(&pvma, info, index);
> > - page = swapin_readahead(swap, gfp, &pvma, 0);
> > + vmf.vma = &pvma;
> > + vmf.address = 0;
> > + page = cluster_readahead(swap, gfp, &vmf);
> > shmem_pseudo_vma_destroy(&pvma);
> >
> > return page;
> > diff --git a/mm/swap_state.c b/mm/swap_state.c
> > index e3c535fcd2df..5ee53d4ee047 100644
> > --- a/mm/swap_state.c
> > +++ b/mm/swap_state.c
> > @@ -538,11 +538,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
> > }
> >
> > /**
> > - * swapin_readahead - swap in pages in hope we need them soon
> > + * cluster_readahead - swap in pages in hope we need them soon
> > * @entry: swap entry of this memory
> > * @gfp_mask: memory allocation flags
> > - * @vma: user vma this address belongs to
> > - * @addr: target address for mempolicy
> > + * @vmf: fault information
> > *
> > * Returns the struct page for entry and addr, after queueing swapin.
> > *
> > @@ -556,8 +555,8 @@ static unsigned long swapin_nr_pages(unsigned long offset)
> > *
> > * Caller must hold down_read on the vma->vm_mm if vma is not NULL.
> > */
> > -struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> > - struct vm_area_struct *vma, unsigned long addr)
> > +struct page *cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
> > + struct vm_fault *vmf)
> > {
> > struct page *page;
> > unsigned long entry_offset = swp_offset(entry);
> > @@ -566,6 +565,8 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> > unsigned long mask;
> > struct blk_plug plug;
> > bool do_poll = true, page_allocated;
> > + struct vm_area_struct *vma = vmf->vma;
> > + unsigned long addr = vmf->address;
> >
> > mask = swapin_nr_pages(offset) - 1;
> > if (!mask)
> > @@ -603,6 +604,14 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
> > return read_swap_cache_async(entry, gfp_mask, vma, addr, do_poll);
> > }
> >
>
> No function document for swapin_readahead()? It is the main interface
> for swap readahead.
Okay, I will remain original desciription and append some more.
Thanks for the quick review, Huang.
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2017-11-01 6:25 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-11-01 5:28 [PATCH 0/2] swap readahead clean up Minchan Kim
2017-11-01 5:28 ` [PATCH 1/2] mm:swap: clean up swap readahead Minchan Kim
2017-11-01 5:41 ` Huang, Ying
2017-11-01 5:45 ` Minchan Kim
2017-11-01 6:04 ` Huang, Ying
2017-11-01 5:28 ` [PATCH 2/2] mm:swap: unify cluster-based and vma-based " Minchan Kim
2017-11-01 6:17 ` Huang, Ying
2017-11-01 6:25 ` Minchan Kim
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).