diff for duplicates of <20190520035254.57579-2-minchan@kernel.org>
diff --git a/a/1.txt b/N1/1.txt
index c3147ed..a706a63 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1,338 +1,133 @@
-When a process expects no accesses to a certain memory range
-it could hint kernel that the pages can be reclaimed
-when memory pressure happens but data should be preserved
-for future use. This could reduce workingset eviction so it
-ends up increasing performance.
-This patch introduces the new MADV_COOL hint to madvise(2)
-syscall. MADV_COOL can be used by a process to mark a memory range
-as not expected to be used in the near future. The hint can help
-kernel in deciding which pages to evict early during memory
-pressure.
+On Mon, 20 May 2019 12:52:48 +0900 Minchan Kim wrote:
+> +static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,
+> + unsigned long end, struct mm_walk *walk)
+> +{
+> + pte_t *orig_pte, *pte, ptent;
+> + spinlock_t *ptl;
+> + struct page *page;
+> + struct vm_area_struct *vma = walk->vma;
+> + unsigned long next;
+> +
+> + next = pmd_addr_end(addr, end);
+> + if (pmd_trans_huge(*pmd)) {
+> + spinlock_t *ptl;
-Internally, it works via deactivating memory from active list to
-inactive's head so when the memory pressure happens, they will be
-reclaimed earlier than other active pages unless there is no
-access until the time.
+Seems not needed with another ptl declared above.
+> +
+> + ptl = pmd_trans_huge_lock(pmd, vma);
+> + if (!ptl)
+> + return 0;
+> +
+> + if (is_huge_zero_pmd(*pmd))
+> + goto huge_unlock;
+> +
+> + page = pmd_page(*pmd);
+> + if (page_mapcount(page) > 1)
+> + goto huge_unlock;
+> +
+> + if (next - addr != HPAGE_PMD_SIZE) {
+> + int err;
-* v1r2
- * use clear_page_young in deactivate_page - joelaf
+Alternately, we deactivate thp only if the address range from userspace
+is sane enough, in order to avoid complex works we have to do here.
+> +
+> + get_page(page);
+> + spin_unlock(ptl);
+> + lock_page(page);
+> + err = split_huge_page(page);
+> + unlock_page(page);
+> + put_page(page);
+> + if (!err)
+> + goto regular_page;
+> + return 0;
+> + }
+> +
+> + pmdp_test_and_clear_young(vma, addr, pmd);
+> + deactivate_page(page);
+> +huge_unlock:
+> + spin_unlock(ptl);
+> + return 0;
+> + }
+> +
+> + if (pmd_trans_unstable(pmd))
+> + return 0;
+> +
+> +regular_page:
-* v1r1
- * Revise the description - surenb
- * Renaming from MADV_WARM to MADV_COOL - surenb
+Take a look at pending signal?
-Signed-off-by: Minchan Kim <minchan@kernel.org>
----
- include/linux/page-flags.h | 1 +
- include/linux/page_idle.h | 15 ++++
- include/linux/swap.h | 1 +
- include/uapi/asm-generic/mman-common.h | 1 +
- mm/madvise.c | 112 +++++++++++++++++++++++++
- mm/swap.c | 43 ++++++++++
- 6 files changed, 173 insertions(+)
+> + orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+> + for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
-diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
-index 9f8712a4b1a5..58b06654c8dd 100644
---- a/include/linux/page-flags.h
-+++ b/include/linux/page-flags.h
-@@ -424,6 +424,7 @@ static inline bool set_hwpoison_free_buddy_page(struct page *page)
- TESTPAGEFLAG(Young, young, PF_ANY)
- SETPAGEFLAG(Young, young, PF_ANY)
- TESTCLEARFLAG(Young, young, PF_ANY)
-+CLEARPAGEFLAG(Young, young, PF_ANY)
- PAGEFLAG(Idle, idle, PF_ANY)
- #endif
-
-diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
-index 1e894d34bdce..f3f43b317150 100644
---- a/include/linux/page_idle.h
-+++ b/include/linux/page_idle.h
-@@ -19,6 +19,11 @@ static inline void set_page_young(struct page *page)
- SetPageYoung(page);
- }
-
-+static inline void clear_page_young(struct page *page)
-+{
-+ ClearPageYoung(page);
-+}
-+
- static inline bool test_and_clear_page_young(struct page *page)
- {
- return TestClearPageYoung(page);
-@@ -65,6 +70,16 @@ static inline void set_page_young(struct page *page)
- set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
- }
-
-+static void clear_page_young(struct page *page)
-+{
-+ struct page_ext *page_ext = lookup_page_ext(page);
-+
-+ if (unlikely(!page_ext))
-+ return;
-+
-+ clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
-+}
-+
- static inline bool test_and_clear_page_young(struct page *page)
- {
- struct page_ext *page_ext = lookup_page_ext(page);
-diff --git a/include/linux/swap.h b/include/linux/swap.h
-index 4bfb5c4ac108..64795abea003 100644
---- a/include/linux/swap.h
-+++ b/include/linux/swap.h
-@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu);
- extern void lru_add_drain_all(void);
- extern void rotate_reclaimable_page(struct page *page);
- extern void deactivate_file_page(struct page *page);
-+extern void deactivate_page(struct page *page);
- extern void mark_page_lazyfree(struct page *page);
- extern void swap_setup(void);
-
-diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
-index abd238d0f7a4..f7a4a5d4b642 100644
---- a/include/uapi/asm-generic/mman-common.h
-+++ b/include/uapi/asm-generic/mman-common.h
-@@ -42,6 +42,7 @@
- #define MADV_SEQUENTIAL 2 /* expect sequential page references */
- #define MADV_WILLNEED 3 /* will need these pages */
- #define MADV_DONTNEED 4 /* don't need these pages */
-+#define MADV_COOL 5 /* deactivatie these pages */
-
- /* common parameters: try to keep these consistent across architectures */
- #define MADV_FREE 8 /* free pages only if memory pressure */
-diff --git a/mm/madvise.c b/mm/madvise.c
-index 628022e674a7..c05817fb570d 100644
---- a/mm/madvise.c
-+++ b/mm/madvise.c
-@@ -8,6 +8,7 @@
-
- #include <linux/mman.h>
- #include <linux/pagemap.h>
-+#include <linux/page_idle.h>
- #include <linux/syscalls.h>
- #include <linux/mempolicy.h>
- #include <linux/page-isolation.h>
-@@ -40,6 +41,7 @@ static int madvise_need_mmap_write(int behavior)
- case MADV_REMOVE:
- case MADV_WILLNEED:
- case MADV_DONTNEED:
-+ case MADV_COOL:
- case MADV_FREE:
- return 0;
- default:
-@@ -307,6 +309,113 @@ static long madvise_willneed(struct vm_area_struct *vma,
- return 0;
- }
-
-+static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,
-+ unsigned long end, struct mm_walk *walk)
-+{
-+ pte_t *orig_pte, *pte, ptent;
-+ spinlock_t *ptl;
-+ struct page *page;
-+ struct vm_area_struct *vma = walk->vma;
-+ unsigned long next;
-+
-+ next = pmd_addr_end(addr, end);
-+ if (pmd_trans_huge(*pmd)) {
-+ spinlock_t *ptl;
-+
-+ ptl = pmd_trans_huge_lock(pmd, vma);
-+ if (!ptl)
-+ return 0;
-+
-+ if (is_huge_zero_pmd(*pmd))
-+ goto huge_unlock;
-+
-+ page = pmd_page(*pmd);
-+ if (page_mapcount(page) > 1)
-+ goto huge_unlock;
-+
-+ if (next - addr != HPAGE_PMD_SIZE) {
-+ int err;
-+
-+ get_page(page);
-+ spin_unlock(ptl);
-+ lock_page(page);
-+ err = split_huge_page(page);
-+ unlock_page(page);
-+ put_page(page);
-+ if (!err)
-+ goto regular_page;
-+ return 0;
-+ }
-+
-+ pmdp_test_and_clear_young(vma, addr, pmd);
-+ deactivate_page(page);
-+huge_unlock:
-+ spin_unlock(ptl);
-+ return 0;
-+ }
-+
-+ if (pmd_trans_unstable(pmd))
-+ return 0;
-+
-+regular_page:
-+ orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-+ for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
-+ ptent = *pte;
-+
-+ if (pte_none(ptent))
-+ continue;
-+
-+ if (!pte_present(ptent))
-+ continue;
-+
-+ page = vm_normal_page(vma, addr, ptent);
-+ if (!page)
-+ continue;
-+
-+ if (page_mapcount(page) > 1)
-+ continue;
-+
-+ ptep_test_and_clear_young(vma, addr, pte);
-+ deactivate_page(page);
-+ }
-+
-+ pte_unmap_unlock(orig_pte, ptl);
-+ cond_resched();
-+
-+ return 0;
-+}
-+
-+static void madvise_cool_page_range(struct mmu_gather *tlb,
-+ struct vm_area_struct *vma,
-+ unsigned long addr, unsigned long end)
-+{
-+ struct mm_walk cool_walk = {
-+ .pmd_entry = madvise_cool_pte_range,
-+ .mm = vma->vm_mm,
-+ };
-+
-+ tlb_start_vma(tlb, vma);
-+ walk_page_range(addr, end, &cool_walk);
-+ tlb_end_vma(tlb, vma);
-+}
-+
-+static long madvise_cool(struct vm_area_struct *vma,
-+ unsigned long start_addr, unsigned long end_addr)
-+{
-+ struct mm_struct *mm = vma->vm_mm;
-+ struct mmu_gather tlb;
-+
-+ if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
-+ return -EINVAL;
-+
-+ lru_add_drain();
-+ tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
-+ madvise_cool_page_range(&tlb, vma, start_addr, end_addr);
-+ tlb_finish_mmu(&tlb, start_addr, end_addr);
-+
-+ return 0;
-+}
-+
- static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
- unsigned long end, struct mm_walk *walk)
-
-@@ -695,6 +804,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
- return madvise_remove(vma, prev, start, end);
- case MADV_WILLNEED:
- return madvise_willneed(vma, prev, start, end);
-+ case MADV_COOL:
-+ return madvise_cool(vma, start, end);
- case MADV_FREE:
- case MADV_DONTNEED:
- return madvise_dontneed_free(vma, prev, start, end, behavior);
-@@ -716,6 +827,7 @@ madvise_behavior_valid(int behavior)
- case MADV_WILLNEED:
- case MADV_DONTNEED:
- case MADV_FREE:
-+ case MADV_COOL:
- #ifdef CONFIG_KSM
- case MADV_MERGEABLE:
- case MADV_UNMERGEABLE:
-diff --git a/mm/swap.c b/mm/swap.c
-index 3a75722e68a9..0f94c3b5397d 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -46,6 +46,7 @@ int page_cluster;
- static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
- static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
- static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
-+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
- static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
- #ifdef CONFIG_SMP
- static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
-@@ -537,6 +538,23 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
- update_page_reclaim_stat(lruvec, file, 0);
- }
-
-+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
-+ void *arg)
-+{
-+ if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-+ int file = page_is_file_cache(page);
-+ int lru = page_lru_base_type(page);
-+
-+ del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
-+ ClearPageActive(page);
-+ ClearPageReferenced(page);
-+ clear_page_young(page);
-+ add_page_to_lru_list(page, lruvec, lru);
-+
-+ __count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
-+ update_page_reclaim_stat(lruvec, file, 0);
-+ }
-+}
-
- static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
- void *arg)
-@@ -589,6 +607,10 @@ void lru_add_drain_cpu(int cpu)
- if (pagevec_count(pvec))
- pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
-
-+ pvec = &per_cpu(lru_deactivate_pvecs, cpu);
-+ if (pagevec_count(pvec))
-+ pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
-+
- pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
- if (pagevec_count(pvec))
- pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
-@@ -622,6 +644,26 @@ void deactivate_file_page(struct page *page)
- }
- }
-
-+/*
-+ * deactivate_page - deactivate a page
-+ * @page: page to deactivate
-+ *
-+ * deactivate_page() moves @page to the inactive list if @page was on the active
-+ * list and was not an unevictable page. This is done to accelerate the reclaim
-+ * of @page.
-+ */
-+void deactivate_page(struct page *page)
-+{
-+ if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-+ struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
-+
-+ get_page(page);
-+ if (!pagevec_add(pvec, page) || PageCompound(page))
-+ pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
-+ put_cpu_var(lru_deactivate_pvecs);
-+ }
-+}
-+
- /**
- * mark_page_lazyfree - make an anon page lazyfree
- * @page: page to deactivate
-@@ -686,6 +728,7 @@ void lru_add_drain_all(void)
- if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
- pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
- pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
-+ pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
- pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
- need_activate_page_drain(cpu)) {
- INIT_WORK(work, lru_add_drain_per_cpu);
---
-2.21.0.1020.gf2820cf01a-goog
\ No newline at end of file
+s/end/next/ ?
+> + ptent = *pte;
+> +
+> + if (pte_none(ptent))
+> + continue;
+> +
+> + if (!pte_present(ptent))
+> + continue;
+> +
+> + page = vm_normal_page(vma, addr, ptent);
+> + if (!page)
+> + continue;
+> +
+> + if (page_mapcount(page) > 1)
+> + continue;
+> +
+> + ptep_test_and_clear_young(vma, addr, pte);
+> + deactivate_page(page);
+> + }
+> +
+> + pte_unmap_unlock(orig_pte, ptl);
+> + cond_resched();
+> +
+> + return 0;
+> +}
+> +
+> +static long madvise_cool(struct vm_area_struct *vma,
+> + unsigned long start_addr, unsigned long end_addr)
+> +{
+> + struct mm_struct *mm = vma->vm_mm;
+> + struct mmu_gather tlb;
+> +
+> + if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+> + return -EINVAL;
+
+No service in case of VM_IO?
+> +
+> + lru_add_drain();
+> + tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+> + madvise_cool_page_range(&tlb, vma, start_addr, end_addr);
+> + tlb_finish_mmu(&tlb, start_addr, end_addr);
+> +
+> + return 0;
+> +}
+> +
+> +/*
+> + * deactivate_page - deactivate a page
+> + * @page: page to deactivate
+> + *
+> + * deactivate_page() moves @page to the inactive list if @page was on the active
+> + * list and was not an unevictable page. This is done to accelerate the reclaim
+> + * of @page.
+> + */
+> +void deactivate_page(struct page *page)
+> +{
+> + if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+> + struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+> +
+> + get_page(page);
+
+A line of comment seems needed for pinning the page.
+
+> + if (!pagevec_add(pvec, page) || PageCompound(page))
+> + pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+> + put_cpu_var(lru_deactivate_pvecs);
+> + }
+> +}
+> +
+
+--
+Hillf
\ No newline at end of file
diff --git a/a/content_digest b/N1/content_digest
index da3243e..d2328d2 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -2,19 +2,20 @@
"ref\00020190520035254.57579-1-minchan\@kernel.org\0"
]
[
- "From\0Minchan Kim <minchan\@kernel.org>\0"
+ "From\0Hillf Danton <hdanton\@sina.com>\0"
]
[
- "Subject\0[RFC 1/7] mm: introduce MADV_COOL\0"
+ "Subject\0Re: [RFC 1/7] mm: introduce MADV_COOL\0"
]
[
- "Date\0Mon, 20 May 2019 12:52:48 +0900\0"
+ "Date\0Tue, 28 May 2019 16:53:01 +0800\0"
]
[
- "To\0Andrew Morton <akpm\@linux-foundation.org>\0"
+ "To\0Minchan Kim <minchan\@kernel.org>\0"
]
[
- "Cc\0LKML <linux-kernel\@vger.kernel.org>",
+ "Cc\0Andrew Morton <akpm\@linux-foundation.org>",
+ " LKML <linux-kernel\@vger.kernel.org>",
" linux-mm <linux-mm\@kvack.org>",
" Michal Hocko <mhocko\@suse.com>",
" Johannes Weiner <hannes\@cmpxchg.org>",
@@ -24,8 +25,7 @@
" Daniel Colascione <dancol\@google.com>",
" Shakeel Butt <shakeelb\@google.com>",
" Sonny Rao <sonnyrao\@google.com>",
- " Brian Geffon <bgeffon\@google.com>",
- " Minchan Kim <minchan\@kernel.org>\0"
+ " Brian Geffon <bgeffon\@google.com>\0"
]
[
"\0000:1\0"
@@ -34,344 +34,139 @@
"b\0"
]
[
- "When a process expects no accesses to a certain memory range\n",
- "it could hint kernel that the pages can be reclaimed\n",
- "when memory pressure happens but data should be preserved\n",
- "for future use. This could reduce workingset eviction so it\n",
- "ends up increasing performance.\n",
"\n",
- "This patch introduces the new MADV_COOL hint to madvise(2)\n",
- "syscall. MADV_COOL can be used by a process to mark a memory range\n",
- "as not expected to be used in the near future. The hint can help\n",
- "kernel in deciding which pages to evict early during memory\n",
- "pressure.\n",
+ "On Mon, 20 May 2019 12:52:48 +0900 Minchan Kim wrote:\n",
+ "> +static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,\n",
+ "> +\t\t\t\tunsigned long end, struct mm_walk *walk)\n",
+ "> +{\n",
+ "> +\tpte_t *orig_pte, *pte, ptent;\n",
+ "> +\tspinlock_t *ptl;\n",
+ "> +\tstruct page *page;\n",
+ "> +\tstruct vm_area_struct *vma = walk->vma;\n",
+ "> +\tunsigned long next;\n",
+ "> +\n",
+ "> +\tnext = pmd_addr_end(addr, end);\n",
+ "> +\tif (pmd_trans_huge(*pmd)) {\n",
+ "> +\t\tspinlock_t *ptl;\n",
"\n",
- "Internally, it works via deactivating memory from active list to\n",
- "inactive's head so when the memory pressure happens, they will be\n",
- "reclaimed earlier than other active pages unless there is no\n",
- "access until the time.\n",
+ "Seems not needed with another ptl declared above.\n",
+ "> +\n",
+ "> +\t\tptl = pmd_trans_huge_lock(pmd, vma);\n",
+ "> +\t\tif (!ptl)\n",
+ "> +\t\t\treturn 0;\n",
+ "> +\n",
+ "> +\t\tif (is_huge_zero_pmd(*pmd))\n",
+ "> +\t\t\tgoto huge_unlock;\n",
+ "> +\n",
+ "> +\t\tpage = pmd_page(*pmd);\n",
+ "> +\t\tif (page_mapcount(page) > 1)\n",
+ "> +\t\t\tgoto huge_unlock;\n",
+ "> +\n",
+ "> +\t\tif (next - addr != HPAGE_PMD_SIZE) {\n",
+ "> +\t\t\tint err;\n",
"\n",
- "* v1r2\n",
- " * use clear_page_young in deactivate_page - joelaf\n",
+ "Alternately, we deactivate thp only if the address range from userspace\n",
+ "is sane enough, in order to avoid complex works we have to do here.\n",
+ "> +\n",
+ "> +\t\t\tget_page(page);\n",
+ "> +\t\t\tspin_unlock(ptl);\n",
+ "> +\t\t\tlock_page(page);\n",
+ "> +\t\t\terr = split_huge_page(page);\n",
+ "> +\t\t\tunlock_page(page);\n",
+ "> +\t\t\tput_page(page);\n",
+ "> +\t\t\tif (!err)\n",
+ "> +\t\t\t\tgoto regular_page;\n",
+ "> +\t\t\treturn 0;\n",
+ "> +\t\t}\n",
+ "> +\n",
+ "> +\t\tpmdp_test_and_clear_young(vma, addr, pmd);\n",
+ "> +\t\tdeactivate_page(page);\n",
+ "> +huge_unlock:\n",
+ "> +\t\tspin_unlock(ptl);\n",
+ "> +\t\treturn 0;\n",
+ "> +\t}\n",
+ "> +\n",
+ "> +\tif (pmd_trans_unstable(pmd))\n",
+ "> +\t\treturn 0;\n",
+ "> +\n",
+ "> +regular_page:\n",
"\n",
- "* v1r1\n",
- " * Revise the description - surenb\n",
- " * Renaming from MADV_WARM to MADV_COOL - surenb\n",
+ "Take a look at pending signal?\n",
"\n",
- "Signed-off-by: Minchan Kim <minchan\@kernel.org>\n",
- "---\n",
- " include/linux/page-flags.h | 1 +\n",
- " include/linux/page_idle.h | 15 ++++\n",
- " include/linux/swap.h | 1 +\n",
- " include/uapi/asm-generic/mman-common.h | 1 +\n",
- " mm/madvise.c | 112 +++++++++++++++++++++++++\n",
- " mm/swap.c | 43 ++++++++++\n",
- " 6 files changed, 173 insertions(+)\n",
+ "> +\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n",
+ "> +\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n",
"\n",
- "diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h\n",
- "index 9f8712a4b1a5..58b06654c8dd 100644\n",
- "--- a/include/linux/page-flags.h\n",
- "+++ b/include/linux/page-flags.h\n",
- "\@\@ -424,6 +424,7 \@\@ static inline bool set_hwpoison_free_buddy_page(struct page *page)\n",
- " TESTPAGEFLAG(Young, young, PF_ANY)\n",
- " SETPAGEFLAG(Young, young, PF_ANY)\n",
- " TESTCLEARFLAG(Young, young, PF_ANY)\n",
- "+CLEARPAGEFLAG(Young, young, PF_ANY)\n",
- " PAGEFLAG(Idle, idle, PF_ANY)\n",
- " #endif\n",
- " \n",
- "diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h\n",
- "index 1e894d34bdce..f3f43b317150 100644\n",
- "--- a/include/linux/page_idle.h\n",
- "+++ b/include/linux/page_idle.h\n",
- "\@\@ -19,6 +19,11 \@\@ static inline void set_page_young(struct page *page)\n",
- " \tSetPageYoung(page);\n",
- " }\n",
- " \n",
- "+static inline void clear_page_young(struct page *page)\n",
- "+{\n",
- "+\tClearPageYoung(page);\n",
- "+}\n",
- "+\n",
- " static inline bool test_and_clear_page_young(struct page *page)\n",
- " {\n",
- " \treturn TestClearPageYoung(page);\n",
- "\@\@ -65,6 +70,16 \@\@ static inline void set_page_young(struct page *page)\n",
- " \tset_bit(PAGE_EXT_YOUNG, &page_ext->flags);\n",
- " }\n",
- " \n",
- "+static void clear_page_young(struct page *page)\n",
- "+{\n",
- "+\tstruct page_ext *page_ext = lookup_page_ext(page);\n",
- "+\n",
- "+\tif (unlikely(!page_ext))\n",
- "+\t\treturn;\n",
- "+\n",
- "+\tclear_bit(PAGE_EXT_YOUNG, &page_ext->flags);\n",
- "+}\n",
- "+\n",
- " static inline bool test_and_clear_page_young(struct page *page)\n",
- " {\n",
- " \tstruct page_ext *page_ext = lookup_page_ext(page);\n",
- "diff --git a/include/linux/swap.h b/include/linux/swap.h\n",
- "index 4bfb5c4ac108..64795abea003 100644\n",
- "--- a/include/linux/swap.h\n",
- "+++ b/include/linux/swap.h\n",
- "\@\@ -340,6 +340,7 \@\@ extern void lru_add_drain_cpu(int cpu);\n",
- " extern void lru_add_drain_all(void);\n",
- " extern void rotate_reclaimable_page(struct page *page);\n",
- " extern void deactivate_file_page(struct page *page);\n",
- "+extern void deactivate_page(struct page *page);\n",
- " extern void mark_page_lazyfree(struct page *page);\n",
- " extern void swap_setup(void);\n",
- " \n",
- "diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h\n",
- "index abd238d0f7a4..f7a4a5d4b642 100644\n",
- "--- a/include/uapi/asm-generic/mman-common.h\n",
- "+++ b/include/uapi/asm-generic/mman-common.h\n",
- "\@\@ -42,6 +42,7 \@\@\n",
- " #define MADV_SEQUENTIAL\t2\t\t/* expect sequential page references */\n",
- " #define MADV_WILLNEED\t3\t\t/* will need these pages */\n",
- " #define MADV_DONTNEED\t4\t\t/* don't need these pages */\n",
- "+#define MADV_COOL\t5\t\t/* deactivatie these pages */\n",
- " \n",
- " /* common parameters: try to keep these consistent across architectures */\n",
- " #define MADV_FREE\t8\t\t/* free pages only if memory pressure */\n",
- "diff --git a/mm/madvise.c b/mm/madvise.c\n",
- "index 628022e674a7..c05817fb570d 100644\n",
- "--- a/mm/madvise.c\n",
- "+++ b/mm/madvise.c\n",
- "\@\@ -8,6 +8,7 \@\@\n",
- " \n",
- " #include <linux/mman.h>\n",
- " #include <linux/pagemap.h>\n",
- "+#include <linux/page_idle.h>\n",
- " #include <linux/syscalls.h>\n",
- " #include <linux/mempolicy.h>\n",
- " #include <linux/page-isolation.h>\n",
- "\@\@ -40,6 +41,7 \@\@ static int madvise_need_mmap_write(int behavior)\n",
- " \tcase MADV_REMOVE:\n",
- " \tcase MADV_WILLNEED:\n",
- " \tcase MADV_DONTNEED:\n",
- "+\tcase MADV_COOL:\n",
- " \tcase MADV_FREE:\n",
- " \t\treturn 0;\n",
- " \tdefault:\n",
- "\@\@ -307,6 +309,113 \@\@ static long madvise_willneed(struct vm_area_struct *vma,\n",
- " \treturn 0;\n",
- " }\n",
- " \n",
- "+static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,\n",
- "+\t\t\t\tunsigned long end, struct mm_walk *walk)\n",
- "+{\n",
- "+\tpte_t *orig_pte, *pte, ptent;\n",
- "+\tspinlock_t *ptl;\n",
- "+\tstruct page *page;\n",
- "+\tstruct vm_area_struct *vma = walk->vma;\n",
- "+\tunsigned long next;\n",
- "+\n",
- "+\tnext = pmd_addr_end(addr, end);\n",
- "+\tif (pmd_trans_huge(*pmd)) {\n",
- "+\t\tspinlock_t *ptl;\n",
- "+\n",
- "+\t\tptl = pmd_trans_huge_lock(pmd, vma);\n",
- "+\t\tif (!ptl)\n",
- "+\t\t\treturn 0;\n",
- "+\n",
- "+\t\tif (is_huge_zero_pmd(*pmd))\n",
- "+\t\t\tgoto huge_unlock;\n",
- "+\n",
- "+\t\tpage = pmd_page(*pmd);\n",
- "+\t\tif (page_mapcount(page) > 1)\n",
- "+\t\t\tgoto huge_unlock;\n",
- "+\n",
- "+\t\tif (next - addr != HPAGE_PMD_SIZE) {\n",
- "+\t\t\tint err;\n",
- "+\n",
- "+\t\t\tget_page(page);\n",
- "+\t\t\tspin_unlock(ptl);\n",
- "+\t\t\tlock_page(page);\n",
- "+\t\t\terr = split_huge_page(page);\n",
- "+\t\t\tunlock_page(page);\n",
- "+\t\t\tput_page(page);\n",
- "+\t\t\tif (!err)\n",
- "+\t\t\t\tgoto regular_page;\n",
- "+\t\t\treturn 0;\n",
- "+\t\t}\n",
- "+\n",
- "+\t\tpmdp_test_and_clear_young(vma, addr, pmd);\n",
- "+\t\tdeactivate_page(page);\n",
- "+huge_unlock:\n",
- "+\t\tspin_unlock(ptl);\n",
- "+\t\treturn 0;\n",
- "+\t}\n",
- "+\n",
- "+\tif (pmd_trans_unstable(pmd))\n",
- "+\t\treturn 0;\n",
- "+\n",
- "+regular_page:\n",
- "+\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n",
- "+\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n",
- "+\t\tptent = *pte;\n",
- "+\n",
- "+\t\tif (pte_none(ptent))\n",
- "+\t\t\tcontinue;\n",
- "+\n",
- "+\t\tif (!pte_present(ptent))\n",
- "+\t\t\tcontinue;\n",
- "+\n",
- "+\t\tpage = vm_normal_page(vma, addr, ptent);\n",
- "+\t\tif (!page)\n",
- "+\t\t\tcontinue;\n",
- "+\n",
- "+\t\tif (page_mapcount(page) > 1)\n",
- "+\t\t\tcontinue;\n",
- "+\n",
- "+\t\tptep_test_and_clear_young(vma, addr, pte);\n",
- "+\t\tdeactivate_page(page);\n",
- "+\t}\n",
- "+\n",
- "+\tpte_unmap_unlock(orig_pte, ptl);\n",
- "+\tcond_resched();\n",
- "+\n",
- "+\treturn 0;\n",
- "+}\n",
- "+\n",
- "+static void madvise_cool_page_range(struct mmu_gather *tlb,\n",
- "+\t\t\t struct vm_area_struct *vma,\n",
- "+\t\t\t unsigned long addr, unsigned long end)\n",
- "+{\n",
- "+\tstruct mm_walk cool_walk = {\n",
- "+\t\t.pmd_entry = madvise_cool_pte_range,\n",
- "+\t\t.mm = vma->vm_mm,\n",
- "+\t};\n",
- "+\n",
- "+\ttlb_start_vma(tlb, vma);\n",
- "+\twalk_page_range(addr, end, &cool_walk);\n",
- "+\ttlb_end_vma(tlb, vma);\n",
- "+}\n",
- "+\n",
- "+static long madvise_cool(struct vm_area_struct *vma,\n",
- "+\t\t\tunsigned long start_addr, unsigned long end_addr)\n",
- "+{\n",
- "+\tstruct mm_struct *mm = vma->vm_mm;\n",
- "+\tstruct mmu_gather tlb;\n",
- "+\n",
- "+\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n",
- "+\t\treturn -EINVAL;\n",
- "+\n",
- "+\tlru_add_drain();\n",
- "+\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n",
- "+\tmadvise_cool_page_range(&tlb, vma, start_addr, end_addr);\n",
- "+\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n",
- "+\n",
- "+\treturn 0;\n",
- "+}\n",
- "+\n",
- " static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,\n",
- " \t\t\t\tunsigned long end, struct mm_walk *walk)\n",
- " \n",
- "\@\@ -695,6 +804,8 \@\@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,\n",
- " \t\treturn madvise_remove(vma, prev, start, end);\n",
- " \tcase MADV_WILLNEED:\n",
- " \t\treturn madvise_willneed(vma, prev, start, end);\n",
- "+\tcase MADV_COOL:\n",
- "+\t\treturn madvise_cool(vma, start, end);\n",
- " \tcase MADV_FREE:\n",
- " \tcase MADV_DONTNEED:\n",
- " \t\treturn madvise_dontneed_free(vma, prev, start, end, behavior);\n",
- "\@\@ -716,6 +827,7 \@\@ madvise_behavior_valid(int behavior)\n",
- " \tcase MADV_WILLNEED:\n",
- " \tcase MADV_DONTNEED:\n",
- " \tcase MADV_FREE:\n",
- "+\tcase MADV_COOL:\n",
- " #ifdef CONFIG_KSM\n",
- " \tcase MADV_MERGEABLE:\n",
- " \tcase MADV_UNMERGEABLE:\n",
- "diff --git a/mm/swap.c b/mm/swap.c\n",
- "index 3a75722e68a9..0f94c3b5397d 100644\n",
- "--- a/mm/swap.c\n",
- "+++ b/mm/swap.c\n",
- "\@\@ -46,6 +46,7 \@\@ int page_cluster;\n",
- " static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);\n",
- " static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);\n",
- " static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);\n",
- "+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);\n",
- " static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);\n",
- " #ifdef CONFIG_SMP\n",
- " static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);\n",
- "\@\@ -537,6 +538,23 \@\@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,\n",
- " \tupdate_page_reclaim_stat(lruvec, file, 0);\n",
- " }\n",
- " \n",
- "+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,\n",
- "+\t\t\t void *arg)\n",
- "+{\n",
- "+\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n",
- "+\t\tint file = page_is_file_cache(page);\n",
- "+\t\tint lru = page_lru_base_type(page);\n",
- "+\n",
- "+\t\tdel_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);\n",
- "+\t\tClearPageActive(page);\n",
- "+\t\tClearPageReferenced(page);\n",
- "+\t\tclear_page_young(page);\n",
- "+\t\tadd_page_to_lru_list(page, lruvec, lru);\n",
- "+\n",
- "+\t\t__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));\n",
- "+\t\tupdate_page_reclaim_stat(lruvec, file, 0);\n",
- "+\t}\n",
- "+}\n",
- " \n",
- " static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,\n",
- " \t\t\t void *arg)\n",
- "\@\@ -589,6 +607,10 \@\@ void lru_add_drain_cpu(int cpu)\n",
- " \tif (pagevec_count(pvec))\n",
- " \t\tpagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);\n",
- " \n",
- "+\tpvec = &per_cpu(lru_deactivate_pvecs, cpu);\n",
- "+\tif (pagevec_count(pvec))\n",
- "+\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n",
- "+\n",
- " \tpvec = &per_cpu(lru_lazyfree_pvecs, cpu);\n",
- " \tif (pagevec_count(pvec))\n",
- " \t\tpagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);\n",
- "\@\@ -622,6 +644,26 \@\@ void deactivate_file_page(struct page *page)\n",
- " \t}\n",
- " }\n",
- " \n",
- "+/*\n",
- "+ * deactivate_page - deactivate a page\n",
- "+ * \@page: page to deactivate\n",
- "+ *\n",
- "+ * deactivate_page() moves \@page to the inactive list if \@page was on the active\n",
- "+ * list and was not an unevictable page. This is done to accelerate the reclaim\n",
- "+ * of \@page.\n",
- "+ */\n",
- "+void deactivate_page(struct page *page)\n",
- "+{\n",
- "+\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n",
- "+\t\tstruct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);\n",
- "+\n",
- "+\t\tget_page(page);\n",
- "+\t\tif (!pagevec_add(pvec, page) || PageCompound(page))\n",
- "+\t\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n",
- "+\t\tput_cpu_var(lru_deactivate_pvecs);\n",
- "+\t}\n",
- "+}\n",
- "+\n",
- " /**\n",
- " * mark_page_lazyfree - make an anon page lazyfree\n",
- " * \@page: page to deactivate\n",
- "\@\@ -686,6 +728,7 \@\@ void lru_add_drain_all(void)\n",
- " \t\tif (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||\n",
- " \t\t pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||\n",
- " \t\t pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||\n",
- "+\t\t pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||\n",
- " \t\t pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||\n",
- " \t\t need_activate_page_drain(cpu)) {\n",
- " \t\t\tINIT_WORK(work, lru_add_drain_per_cpu);\n",
- "-- \n",
- "2.21.0.1020.gf2820cf01a-goog"
+ "s/end/next/ ?\n",
+ "> +\t\tptent = *pte;\n",
+ "> +\n",
+ "> +\t\tif (pte_none(ptent))\n",
+ "> +\t\t\tcontinue;\n",
+ "> +\n",
+ "> +\t\tif (!pte_present(ptent))\n",
+ "> +\t\t\tcontinue;\n",
+ "> +\n",
+ "> +\t\tpage = vm_normal_page(vma, addr, ptent);\n",
+ "> +\t\tif (!page)\n",
+ "> +\t\t\tcontinue;\n",
+ "> +\n",
+ "> +\t\tif (page_mapcount(page) > 1)\n",
+ "> +\t\t\tcontinue;\n",
+ "> +\n",
+ "> +\t\tptep_test_and_clear_young(vma, addr, pte);\n",
+ "> +\t\tdeactivate_page(page);\n",
+ "> +\t}\n",
+ "> +\n",
+ "> +\tpte_unmap_unlock(orig_pte, ptl);\n",
+ "> +\tcond_resched();\n",
+ "> +\n",
+ "> +\treturn 0;\n",
+ "> +}\n",
+ "> +\n",
+ "> +static long madvise_cool(struct vm_area_struct *vma,\n",
+ "> +\t\t\tunsigned long start_addr, unsigned long end_addr)\n",
+ "> +{\n",
+ "> +\tstruct mm_struct *mm = vma->vm_mm;\n",
+ "> +\tstruct mmu_gather tlb;\n",
+ "> +\n",
+ "> +\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n",
+ "> +\t\treturn -EINVAL;\n",
+ "\n",
+ "No service in case of VM_IO?\n",
+ "> +\n",
+ "> +\tlru_add_drain();\n",
+ "> +\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n",
+ "> +\tmadvise_cool_page_range(&tlb, vma, start_addr, end_addr);\n",
+ "> +\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n",
+ "> +\n",
+ "> +\treturn 0;\n",
+ "> +}\n",
+ "> +\n",
+ "> +/*\n",
+ "> + * deactivate_page - deactivate a page\n",
+ "> + * \@page: page to deactivate\n",
+ "> + *\n",
+ "> + * deactivate_page() moves \@page to the inactive list if \@page was on the active\n",
+ "> + * list and was not an unevictable page. This is done to accelerate the reclaim\n",
+ "> + * of \@page.\n",
+ "> + */\n",
+ "> +void deactivate_page(struct page *page)\n",
+ "> +{\n",
+ "> +\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n",
+ "> +\t\tstruct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);\n",
+ "> +\n",
+ "> +\t\tget_page(page);\n",
+ "\n",
+ "A line of comment seems needed for pinning the page.\n",
+ "\n",
+ "> +\t\tif (!pagevec_add(pvec, page) || PageCompound(page))\n",
+ "> +\t\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n",
+ "> +\t\tput_cpu_var(lru_deactivate_pvecs);\n",
+ "> +\t}\n",
+ "> +}\n",
+ "> +\n",
+ "\n",
+ "--\n",
+ "Hillf"
]
-76714939b2b300d19638111661c4e1e3f756b169a376ced66bb0a12185ab9e8f
+3f726f038c347818f2b246f151934fc80ce11befed99d28ed4eadc3835104774
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).