linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
diff for duplicates of <20190520035254.57579-2-minchan@kernel.org>

diff --git a/a/1.txt b/N1/1.txt
index c3147ed..a706a63 100644
--- a/a/1.txt
+++ b/N1/1.txt
@@ -1,338 +1,133 @@
-When a process expects no accesses to a certain memory range
-it could hint kernel that the pages can be reclaimed
-when memory pressure happens but data should be preserved
-for future use.  This could reduce workingset eviction so it
-ends up increasing performance.
 
-This patch introduces the new MADV_COOL hint to madvise(2)
-syscall. MADV_COOL can be used by a process to mark a memory range
-as not expected to be used in the near future. The hint can help
-kernel in deciding which pages to evict early during memory
-pressure.
+On Mon, 20 May 2019 12:52:48 +0900 Minchan Kim wrote:
+> +static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,
+> +				unsigned long end, struct mm_walk *walk)
+> +{
+> +	pte_t *orig_pte, *pte, ptent;
+> +	spinlock_t *ptl;
+> +	struct page *page;
+> +	struct vm_area_struct *vma = walk->vma;
+> +	unsigned long next;
+> +
+> +	next = pmd_addr_end(addr, end);
+> +	if (pmd_trans_huge(*pmd)) {
+> +		spinlock_t *ptl;
 
-Internally, it works via deactivating memory from active list to
-inactive's head so when the memory pressure happens, they will be
-reclaimed earlier than other active pages unless there is no
-access until the time.
+Seems not needed with another ptl declared above.
+> +
+> +		ptl = pmd_trans_huge_lock(pmd, vma);
+> +		if (!ptl)
+> +			return 0;
+> +
+> +		if (is_huge_zero_pmd(*pmd))
+> +			goto huge_unlock;
+> +
+> +		page = pmd_page(*pmd);
+> +		if (page_mapcount(page) > 1)
+> +			goto huge_unlock;
+> +
+> +		if (next - addr != HPAGE_PMD_SIZE) {
+> +			int err;
 
-* v1r2
- * use clear_page_young in deactivate_page - joelaf
+Alternately, we deactivate thp only if the address range from userspace
+is sane enough, in order to avoid complex works we have to do here.
+> +
+> +			get_page(page);
+> +			spin_unlock(ptl);
+> +			lock_page(page);
+> +			err = split_huge_page(page);
+> +			unlock_page(page);
+> +			put_page(page);
+> +			if (!err)
+> +				goto regular_page;
+> +			return 0;
+> +		}
+> +
+> +		pmdp_test_and_clear_young(vma, addr, pmd);
+> +		deactivate_page(page);
+> +huge_unlock:
+> +		spin_unlock(ptl);
+> +		return 0;
+> +	}
+> +
+> +	if (pmd_trans_unstable(pmd))
+> +		return 0;
+> +
+> +regular_page:
 
-* v1r1
- * Revise the description - surenb
- * Renaming from MADV_WARM to MADV_COOL - surenb
+Take a look at pending signal?
 
-Signed-off-by: Minchan Kim <minchan@kernel.org>
----
- include/linux/page-flags.h             |   1 +
- include/linux/page_idle.h              |  15 ++++
- include/linux/swap.h                   |   1 +
- include/uapi/asm-generic/mman-common.h |   1 +
- mm/madvise.c                           | 112 +++++++++++++++++++++++++
- mm/swap.c                              |  43 ++++++++++
- 6 files changed, 173 insertions(+)
+> +	orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+> +	for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
 
-diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
-index 9f8712a4b1a5..58b06654c8dd 100644
---- a/include/linux/page-flags.h
-+++ b/include/linux/page-flags.h
-@@ -424,6 +424,7 @@ static inline bool set_hwpoison_free_buddy_page(struct page *page)
- TESTPAGEFLAG(Young, young, PF_ANY)
- SETPAGEFLAG(Young, young, PF_ANY)
- TESTCLEARFLAG(Young, young, PF_ANY)
-+CLEARPAGEFLAG(Young, young, PF_ANY)
- PAGEFLAG(Idle, idle, PF_ANY)
- #endif
- 
-diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
-index 1e894d34bdce..f3f43b317150 100644
---- a/include/linux/page_idle.h
-+++ b/include/linux/page_idle.h
-@@ -19,6 +19,11 @@ static inline void set_page_young(struct page *page)
- 	SetPageYoung(page);
- }
- 
-+static inline void clear_page_young(struct page *page)
-+{
-+	ClearPageYoung(page);
-+}
-+
- static inline bool test_and_clear_page_young(struct page *page)
- {
- 	return TestClearPageYoung(page);
-@@ -65,6 +70,16 @@ static inline void set_page_young(struct page *page)
- 	set_bit(PAGE_EXT_YOUNG, &page_ext->flags);
- }
- 
-+static void clear_page_young(struct page *page)
-+{
-+	struct page_ext *page_ext = lookup_page_ext(page);
-+
-+	if (unlikely(!page_ext))
-+		return;
-+
-+	clear_bit(PAGE_EXT_YOUNG, &page_ext->flags);
-+}
-+
- static inline bool test_and_clear_page_young(struct page *page)
- {
- 	struct page_ext *page_ext = lookup_page_ext(page);
-diff --git a/include/linux/swap.h b/include/linux/swap.h
-index 4bfb5c4ac108..64795abea003 100644
---- a/include/linux/swap.h
-+++ b/include/linux/swap.h
-@@ -340,6 +340,7 @@ extern void lru_add_drain_cpu(int cpu);
- extern void lru_add_drain_all(void);
- extern void rotate_reclaimable_page(struct page *page);
- extern void deactivate_file_page(struct page *page);
-+extern void deactivate_page(struct page *page);
- extern void mark_page_lazyfree(struct page *page);
- extern void swap_setup(void);
- 
-diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
-index abd238d0f7a4..f7a4a5d4b642 100644
---- a/include/uapi/asm-generic/mman-common.h
-+++ b/include/uapi/asm-generic/mman-common.h
-@@ -42,6 +42,7 @@
- #define MADV_SEQUENTIAL	2		/* expect sequential page references */
- #define MADV_WILLNEED	3		/* will need these pages */
- #define MADV_DONTNEED	4		/* don't need these pages */
-+#define MADV_COOL	5		/* deactivatie these pages */
- 
- /* common parameters: try to keep these consistent across architectures */
- #define MADV_FREE	8		/* free pages only if memory pressure */
-diff --git a/mm/madvise.c b/mm/madvise.c
-index 628022e674a7..c05817fb570d 100644
---- a/mm/madvise.c
-+++ b/mm/madvise.c
-@@ -8,6 +8,7 @@
- 
- #include <linux/mman.h>
- #include <linux/pagemap.h>
-+#include <linux/page_idle.h>
- #include <linux/syscalls.h>
- #include <linux/mempolicy.h>
- #include <linux/page-isolation.h>
-@@ -40,6 +41,7 @@ static int madvise_need_mmap_write(int behavior)
- 	case MADV_REMOVE:
- 	case MADV_WILLNEED:
- 	case MADV_DONTNEED:
-+	case MADV_COOL:
- 	case MADV_FREE:
- 		return 0;
- 	default:
-@@ -307,6 +309,113 @@ static long madvise_willneed(struct vm_area_struct *vma,
- 	return 0;
- }
- 
-+static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,
-+				unsigned long end, struct mm_walk *walk)
-+{
-+	pte_t *orig_pte, *pte, ptent;
-+	spinlock_t *ptl;
-+	struct page *page;
-+	struct vm_area_struct *vma = walk->vma;
-+	unsigned long next;
-+
-+	next = pmd_addr_end(addr, end);
-+	if (pmd_trans_huge(*pmd)) {
-+		spinlock_t *ptl;
-+
-+		ptl = pmd_trans_huge_lock(pmd, vma);
-+		if (!ptl)
-+			return 0;
-+
-+		if (is_huge_zero_pmd(*pmd))
-+			goto huge_unlock;
-+
-+		page = pmd_page(*pmd);
-+		if (page_mapcount(page) > 1)
-+			goto huge_unlock;
-+
-+		if (next - addr != HPAGE_PMD_SIZE) {
-+			int err;
-+
-+			get_page(page);
-+			spin_unlock(ptl);
-+			lock_page(page);
-+			err = split_huge_page(page);
-+			unlock_page(page);
-+			put_page(page);
-+			if (!err)
-+				goto regular_page;
-+			return 0;
-+		}
-+
-+		pmdp_test_and_clear_young(vma, addr, pmd);
-+		deactivate_page(page);
-+huge_unlock:
-+		spin_unlock(ptl);
-+		return 0;
-+	}
-+
-+	if (pmd_trans_unstable(pmd))
-+		return 0;
-+
-+regular_page:
-+	orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
-+	for (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {
-+		ptent = *pte;
-+
-+		if (pte_none(ptent))
-+			continue;
-+
-+		if (!pte_present(ptent))
-+			continue;
-+
-+		page = vm_normal_page(vma, addr, ptent);
-+		if (!page)
-+			continue;
-+
-+		if (page_mapcount(page) > 1)
-+			continue;
-+
-+		ptep_test_and_clear_young(vma, addr, pte);
-+		deactivate_page(page);
-+	}
-+
-+	pte_unmap_unlock(orig_pte, ptl);
-+	cond_resched();
-+
-+	return 0;
-+}
-+
-+static void madvise_cool_page_range(struct mmu_gather *tlb,
-+			     struct vm_area_struct *vma,
-+			     unsigned long addr, unsigned long end)
-+{
-+	struct mm_walk cool_walk = {
-+		.pmd_entry = madvise_cool_pte_range,
-+		.mm = vma->vm_mm,
-+	};
-+
-+	tlb_start_vma(tlb, vma);
-+	walk_page_range(addr, end, &cool_walk);
-+	tlb_end_vma(tlb, vma);
-+}
-+
-+static long madvise_cool(struct vm_area_struct *vma,
-+			unsigned long start_addr, unsigned long end_addr)
-+{
-+	struct mm_struct *mm = vma->vm_mm;
-+	struct mmu_gather tlb;
-+
-+	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
-+		return -EINVAL;
-+
-+	lru_add_drain();
-+	tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
-+	madvise_cool_page_range(&tlb, vma, start_addr, end_addr);
-+	tlb_finish_mmu(&tlb, start_addr, end_addr);
-+
-+	return 0;
-+}
-+
- static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
- 				unsigned long end, struct mm_walk *walk)
- 
-@@ -695,6 +804,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
- 		return madvise_remove(vma, prev, start, end);
- 	case MADV_WILLNEED:
- 		return madvise_willneed(vma, prev, start, end);
-+	case MADV_COOL:
-+		return madvise_cool(vma, start, end);
- 	case MADV_FREE:
- 	case MADV_DONTNEED:
- 		return madvise_dontneed_free(vma, prev, start, end, behavior);
-@@ -716,6 +827,7 @@ madvise_behavior_valid(int behavior)
- 	case MADV_WILLNEED:
- 	case MADV_DONTNEED:
- 	case MADV_FREE:
-+	case MADV_COOL:
- #ifdef CONFIG_KSM
- 	case MADV_MERGEABLE:
- 	case MADV_UNMERGEABLE:
-diff --git a/mm/swap.c b/mm/swap.c
-index 3a75722e68a9..0f94c3b5397d 100644
---- a/mm/swap.c
-+++ b/mm/swap.c
-@@ -46,6 +46,7 @@ int page_cluster;
- static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
- static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
- static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
-+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
- static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);
- #ifdef CONFIG_SMP
- static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
-@@ -537,6 +538,23 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
- 	update_page_reclaim_stat(lruvec, file, 0);
- }
- 
-+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
-+			    void *arg)
-+{
-+	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-+		int file = page_is_file_cache(page);
-+		int lru = page_lru_base_type(page);
-+
-+		del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
-+		ClearPageActive(page);
-+		ClearPageReferenced(page);
-+		clear_page_young(page);
-+		add_page_to_lru_list(page, lruvec, lru);
-+
-+		__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));
-+		update_page_reclaim_stat(lruvec, file, 0);
-+	}
-+}
- 
- static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
- 			    void *arg)
-@@ -589,6 +607,10 @@ void lru_add_drain_cpu(int cpu)
- 	if (pagevec_count(pvec))
- 		pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
- 
-+	pvec = &per_cpu(lru_deactivate_pvecs, cpu);
-+	if (pagevec_count(pvec))
-+		pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
-+
- 	pvec = &per_cpu(lru_lazyfree_pvecs, cpu);
- 	if (pagevec_count(pvec))
- 		pagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);
-@@ -622,6 +644,26 @@ void deactivate_file_page(struct page *page)
- 	}
- }
- 
-+/*
-+ * deactivate_page - deactivate a page
-+ * @page: page to deactivate
-+ *
-+ * deactivate_page() moves @page to the inactive list if @page was on the active
-+ * list and was not an unevictable page.  This is done to accelerate the reclaim
-+ * of @page.
-+ */
-+void deactivate_page(struct page *page)
-+{
-+	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
-+		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
-+
-+		get_page(page);
-+		if (!pagevec_add(pvec, page) || PageCompound(page))
-+			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
-+		put_cpu_var(lru_deactivate_pvecs);
-+	}
-+}
-+
- /**
-  * mark_page_lazyfree - make an anon page lazyfree
-  * @page: page to deactivate
-@@ -686,6 +728,7 @@ void lru_add_drain_all(void)
- 		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
- 		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
- 		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
-+		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
- 		    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
- 		    need_activate_page_drain(cpu)) {
- 			INIT_WORK(work, lru_add_drain_per_cpu);
--- 
-2.21.0.1020.gf2820cf01a-goog
\ No newline at end of file
+s/end/next/ ?
+> +		ptent = *pte;
+> +
+> +		if (pte_none(ptent))
+> +			continue;
+> +
+> +		if (!pte_present(ptent))
+> +			continue;
+> +
+> +		page = vm_normal_page(vma, addr, ptent);
+> +		if (!page)
+> +			continue;
+> +
+> +		if (page_mapcount(page) > 1)
+> +			continue;
+> +
+> +		ptep_test_and_clear_young(vma, addr, pte);
+> +		deactivate_page(page);
+> +	}
+> +
+> +	pte_unmap_unlock(orig_pte, ptl);
+> +	cond_resched();
+> +
+> +	return 0;
+> +}
+> +
+> +static long madvise_cool(struct vm_area_struct *vma,
+> +			unsigned long start_addr, unsigned long end_addr)
+> +{
+> +	struct mm_struct *mm = vma->vm_mm;
+> +	struct mmu_gather tlb;
+> +
+> +	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+> +		return -EINVAL;
+
+No service in case of VM_IO?
+> +
+> +	lru_add_drain();
+> +	tlb_gather_mmu(&tlb, mm, start_addr, end_addr);
+> +	madvise_cool_page_range(&tlb, vma, start_addr, end_addr);
+> +	tlb_finish_mmu(&tlb, start_addr, end_addr);
+> +
+> +	return 0;
+> +}
+> +
+> +/*
+> + * deactivate_page - deactivate a page
+> + * @page: page to deactivate
+> + *
+> + * deactivate_page() moves @page to the inactive list if @page was on the active
+> + * list and was not an unevictable page.  This is done to accelerate the reclaim
+> + * of @page.
+> + */
+> +void deactivate_page(struct page *page)
+> +{
+> +	if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+> +		struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+> +
+> +		get_page(page);
+
+A line of comment seems needed for pinning the page.
+
+> +		if (!pagevec_add(pvec, page) || PageCompound(page))
+> +			pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+> +		put_cpu_var(lru_deactivate_pvecs);
+> +	}
+> +}
+> +
+
+--
+Hillf
\ No newline at end of file
diff --git a/a/content_digest b/N1/content_digest
index da3243e..d2328d2 100644
--- a/a/content_digest
+++ b/N1/content_digest
@@ -2,19 +2,20 @@
   "ref\00020190520035254.57579-1-minchan\@kernel.org\0"
 ]
 [
-  "From\0Minchan Kim <minchan\@kernel.org>\0"
+  "From\0Hillf Danton <hdanton\@sina.com>\0"
 ]
 [
-  "Subject\0[RFC 1/7] mm: introduce MADV_COOL\0"
+  "Subject\0Re: [RFC 1/7] mm: introduce MADV_COOL\0"
 ]
 [
-  "Date\0Mon, 20 May 2019 12:52:48 +0900\0"
+  "Date\0Tue, 28 May 2019 16:53:01 +0800\0"
 ]
 [
-  "To\0Andrew Morton <akpm\@linux-foundation.org>\0"
+  "To\0Minchan Kim <minchan\@kernel.org>\0"
 ]
 [
-  "Cc\0LKML <linux-kernel\@vger.kernel.org>",
+  "Cc\0Andrew Morton <akpm\@linux-foundation.org>",
+  " LKML <linux-kernel\@vger.kernel.org>",
   " linux-mm <linux-mm\@kvack.org>",
   " Michal Hocko <mhocko\@suse.com>",
   " Johannes Weiner <hannes\@cmpxchg.org>",
@@ -24,8 +25,7 @@
   " Daniel Colascione <dancol\@google.com>",
   " Shakeel Butt <shakeelb\@google.com>",
   " Sonny Rao <sonnyrao\@google.com>",
-  " Brian Geffon <bgeffon\@google.com>",
-  " Minchan Kim <minchan\@kernel.org>\0"
+  " Brian Geffon <bgeffon\@google.com>\0"
 ]
 [
   "\0000:1\0"
@@ -34,344 +34,139 @@
   "b\0"
 ]
 [
-  "When a process expects no accesses to a certain memory range\n",
-  "it could hint kernel that the pages can be reclaimed\n",
-  "when memory pressure happens but data should be preserved\n",
-  "for future use.  This could reduce workingset eviction so it\n",
-  "ends up increasing performance.\n",
   "\n",
-  "This patch introduces the new MADV_COOL hint to madvise(2)\n",
-  "syscall. MADV_COOL can be used by a process to mark a memory range\n",
-  "as not expected to be used in the near future. The hint can help\n",
-  "kernel in deciding which pages to evict early during memory\n",
-  "pressure.\n",
+  "On Mon, 20 May 2019 12:52:48 +0900 Minchan Kim wrote:\n",
+  "> +static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,\n",
+  "> +\t\t\t\tunsigned long end, struct mm_walk *walk)\n",
+  "> +{\n",
+  "> +\tpte_t *orig_pte, *pte, ptent;\n",
+  "> +\tspinlock_t *ptl;\n",
+  "> +\tstruct page *page;\n",
+  "> +\tstruct vm_area_struct *vma = walk->vma;\n",
+  "> +\tunsigned long next;\n",
+  "> +\n",
+  "> +\tnext = pmd_addr_end(addr, end);\n",
+  "> +\tif (pmd_trans_huge(*pmd)) {\n",
+  "> +\t\tspinlock_t *ptl;\n",
   "\n",
-  "Internally, it works via deactivating memory from active list to\n",
-  "inactive's head so when the memory pressure happens, they will be\n",
-  "reclaimed earlier than other active pages unless there is no\n",
-  "access until the time.\n",
+  "Seems not needed with another ptl declared above.\n",
+  "> +\n",
+  "> +\t\tptl = pmd_trans_huge_lock(pmd, vma);\n",
+  "> +\t\tif (!ptl)\n",
+  "> +\t\t\treturn 0;\n",
+  "> +\n",
+  "> +\t\tif (is_huge_zero_pmd(*pmd))\n",
+  "> +\t\t\tgoto huge_unlock;\n",
+  "> +\n",
+  "> +\t\tpage = pmd_page(*pmd);\n",
+  "> +\t\tif (page_mapcount(page) > 1)\n",
+  "> +\t\t\tgoto huge_unlock;\n",
+  "> +\n",
+  "> +\t\tif (next - addr != HPAGE_PMD_SIZE) {\n",
+  "> +\t\t\tint err;\n",
   "\n",
-  "* v1r2\n",
-  " * use clear_page_young in deactivate_page - joelaf\n",
+  "Alternately, we deactivate thp only if the address range from userspace\n",
+  "is sane enough, in order to avoid complex works we have to do here.\n",
+  "> +\n",
+  "> +\t\t\tget_page(page);\n",
+  "> +\t\t\tspin_unlock(ptl);\n",
+  "> +\t\t\tlock_page(page);\n",
+  "> +\t\t\terr = split_huge_page(page);\n",
+  "> +\t\t\tunlock_page(page);\n",
+  "> +\t\t\tput_page(page);\n",
+  "> +\t\t\tif (!err)\n",
+  "> +\t\t\t\tgoto regular_page;\n",
+  "> +\t\t\treturn 0;\n",
+  "> +\t\t}\n",
+  "> +\n",
+  "> +\t\tpmdp_test_and_clear_young(vma, addr, pmd);\n",
+  "> +\t\tdeactivate_page(page);\n",
+  "> +huge_unlock:\n",
+  "> +\t\tspin_unlock(ptl);\n",
+  "> +\t\treturn 0;\n",
+  "> +\t}\n",
+  "> +\n",
+  "> +\tif (pmd_trans_unstable(pmd))\n",
+  "> +\t\treturn 0;\n",
+  "> +\n",
+  "> +regular_page:\n",
   "\n",
-  "* v1r1\n",
-  " * Revise the description - surenb\n",
-  " * Renaming from MADV_WARM to MADV_COOL - surenb\n",
+  "Take a look at pending signal?\n",
   "\n",
-  "Signed-off-by: Minchan Kim <minchan\@kernel.org>\n",
-  "---\n",
-  " include/linux/page-flags.h             |   1 +\n",
-  " include/linux/page_idle.h              |  15 ++++\n",
-  " include/linux/swap.h                   |   1 +\n",
-  " include/uapi/asm-generic/mman-common.h |   1 +\n",
-  " mm/madvise.c                           | 112 +++++++++++++++++++++++++\n",
-  " mm/swap.c                              |  43 ++++++++++\n",
-  " 6 files changed, 173 insertions(+)\n",
+  "> +\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n",
+  "> +\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n",
   "\n",
-  "diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h\n",
-  "index 9f8712a4b1a5..58b06654c8dd 100644\n",
-  "--- a/include/linux/page-flags.h\n",
-  "+++ b/include/linux/page-flags.h\n",
-  "\@\@ -424,6 +424,7 \@\@ static inline bool set_hwpoison_free_buddy_page(struct page *page)\n",
-  " TESTPAGEFLAG(Young, young, PF_ANY)\n",
-  " SETPAGEFLAG(Young, young, PF_ANY)\n",
-  " TESTCLEARFLAG(Young, young, PF_ANY)\n",
-  "+CLEARPAGEFLAG(Young, young, PF_ANY)\n",
-  " PAGEFLAG(Idle, idle, PF_ANY)\n",
-  " #endif\n",
-  " \n",
-  "diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h\n",
-  "index 1e894d34bdce..f3f43b317150 100644\n",
-  "--- a/include/linux/page_idle.h\n",
-  "+++ b/include/linux/page_idle.h\n",
-  "\@\@ -19,6 +19,11 \@\@ static inline void set_page_young(struct page *page)\n",
-  " \tSetPageYoung(page);\n",
-  " }\n",
-  " \n",
-  "+static inline void clear_page_young(struct page *page)\n",
-  "+{\n",
-  "+\tClearPageYoung(page);\n",
-  "+}\n",
-  "+\n",
-  " static inline bool test_and_clear_page_young(struct page *page)\n",
-  " {\n",
-  " \treturn TestClearPageYoung(page);\n",
-  "\@\@ -65,6 +70,16 \@\@ static inline void set_page_young(struct page *page)\n",
-  " \tset_bit(PAGE_EXT_YOUNG, &page_ext->flags);\n",
-  " }\n",
-  " \n",
-  "+static void clear_page_young(struct page *page)\n",
-  "+{\n",
-  "+\tstruct page_ext *page_ext = lookup_page_ext(page);\n",
-  "+\n",
-  "+\tif (unlikely(!page_ext))\n",
-  "+\t\treturn;\n",
-  "+\n",
-  "+\tclear_bit(PAGE_EXT_YOUNG, &page_ext->flags);\n",
-  "+}\n",
-  "+\n",
-  " static inline bool test_and_clear_page_young(struct page *page)\n",
-  " {\n",
-  " \tstruct page_ext *page_ext = lookup_page_ext(page);\n",
-  "diff --git a/include/linux/swap.h b/include/linux/swap.h\n",
-  "index 4bfb5c4ac108..64795abea003 100644\n",
-  "--- a/include/linux/swap.h\n",
-  "+++ b/include/linux/swap.h\n",
-  "\@\@ -340,6 +340,7 \@\@ extern void lru_add_drain_cpu(int cpu);\n",
-  " extern void lru_add_drain_all(void);\n",
-  " extern void rotate_reclaimable_page(struct page *page);\n",
-  " extern void deactivate_file_page(struct page *page);\n",
-  "+extern void deactivate_page(struct page *page);\n",
-  " extern void mark_page_lazyfree(struct page *page);\n",
-  " extern void swap_setup(void);\n",
-  " \n",
-  "diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h\n",
-  "index abd238d0f7a4..f7a4a5d4b642 100644\n",
-  "--- a/include/uapi/asm-generic/mman-common.h\n",
-  "+++ b/include/uapi/asm-generic/mman-common.h\n",
-  "\@\@ -42,6 +42,7 \@\@\n",
-  " #define MADV_SEQUENTIAL\t2\t\t/* expect sequential page references */\n",
-  " #define MADV_WILLNEED\t3\t\t/* will need these pages */\n",
-  " #define MADV_DONTNEED\t4\t\t/* don't need these pages */\n",
-  "+#define MADV_COOL\t5\t\t/* deactivatie these pages */\n",
-  " \n",
-  " /* common parameters: try to keep these consistent across architectures */\n",
-  " #define MADV_FREE\t8\t\t/* free pages only if memory pressure */\n",
-  "diff --git a/mm/madvise.c b/mm/madvise.c\n",
-  "index 628022e674a7..c05817fb570d 100644\n",
-  "--- a/mm/madvise.c\n",
-  "+++ b/mm/madvise.c\n",
-  "\@\@ -8,6 +8,7 \@\@\n",
-  " \n",
-  " #include <linux/mman.h>\n",
-  " #include <linux/pagemap.h>\n",
-  "+#include <linux/page_idle.h>\n",
-  " #include <linux/syscalls.h>\n",
-  " #include <linux/mempolicy.h>\n",
-  " #include <linux/page-isolation.h>\n",
-  "\@\@ -40,6 +41,7 \@\@ static int madvise_need_mmap_write(int behavior)\n",
-  " \tcase MADV_REMOVE:\n",
-  " \tcase MADV_WILLNEED:\n",
-  " \tcase MADV_DONTNEED:\n",
-  "+\tcase MADV_COOL:\n",
-  " \tcase MADV_FREE:\n",
-  " \t\treturn 0;\n",
-  " \tdefault:\n",
-  "\@\@ -307,6 +309,113 \@\@ static long madvise_willneed(struct vm_area_struct *vma,\n",
-  " \treturn 0;\n",
-  " }\n",
-  " \n",
-  "+static int madvise_cool_pte_range(pmd_t *pmd, unsigned long addr,\n",
-  "+\t\t\t\tunsigned long end, struct mm_walk *walk)\n",
-  "+{\n",
-  "+\tpte_t *orig_pte, *pte, ptent;\n",
-  "+\tspinlock_t *ptl;\n",
-  "+\tstruct page *page;\n",
-  "+\tstruct vm_area_struct *vma = walk->vma;\n",
-  "+\tunsigned long next;\n",
-  "+\n",
-  "+\tnext = pmd_addr_end(addr, end);\n",
-  "+\tif (pmd_trans_huge(*pmd)) {\n",
-  "+\t\tspinlock_t *ptl;\n",
-  "+\n",
-  "+\t\tptl = pmd_trans_huge_lock(pmd, vma);\n",
-  "+\t\tif (!ptl)\n",
-  "+\t\t\treturn 0;\n",
-  "+\n",
-  "+\t\tif (is_huge_zero_pmd(*pmd))\n",
-  "+\t\t\tgoto huge_unlock;\n",
-  "+\n",
-  "+\t\tpage = pmd_page(*pmd);\n",
-  "+\t\tif (page_mapcount(page) > 1)\n",
-  "+\t\t\tgoto huge_unlock;\n",
-  "+\n",
-  "+\t\tif (next - addr != HPAGE_PMD_SIZE) {\n",
-  "+\t\t\tint err;\n",
-  "+\n",
-  "+\t\t\tget_page(page);\n",
-  "+\t\t\tspin_unlock(ptl);\n",
-  "+\t\t\tlock_page(page);\n",
-  "+\t\t\terr = split_huge_page(page);\n",
-  "+\t\t\tunlock_page(page);\n",
-  "+\t\t\tput_page(page);\n",
-  "+\t\t\tif (!err)\n",
-  "+\t\t\t\tgoto regular_page;\n",
-  "+\t\t\treturn 0;\n",
-  "+\t\t}\n",
-  "+\n",
-  "+\t\tpmdp_test_and_clear_young(vma, addr, pmd);\n",
-  "+\t\tdeactivate_page(page);\n",
-  "+huge_unlock:\n",
-  "+\t\tspin_unlock(ptl);\n",
-  "+\t\treturn 0;\n",
-  "+\t}\n",
-  "+\n",
-  "+\tif (pmd_trans_unstable(pmd))\n",
-  "+\t\treturn 0;\n",
-  "+\n",
-  "+regular_page:\n",
-  "+\torig_pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);\n",
-  "+\tfor (pte = orig_pte; addr < end; pte++, addr += PAGE_SIZE) {\n",
-  "+\t\tptent = *pte;\n",
-  "+\n",
-  "+\t\tif (pte_none(ptent))\n",
-  "+\t\t\tcontinue;\n",
-  "+\n",
-  "+\t\tif (!pte_present(ptent))\n",
-  "+\t\t\tcontinue;\n",
-  "+\n",
-  "+\t\tpage = vm_normal_page(vma, addr, ptent);\n",
-  "+\t\tif (!page)\n",
-  "+\t\t\tcontinue;\n",
-  "+\n",
-  "+\t\tif (page_mapcount(page) > 1)\n",
-  "+\t\t\tcontinue;\n",
-  "+\n",
-  "+\t\tptep_test_and_clear_young(vma, addr, pte);\n",
-  "+\t\tdeactivate_page(page);\n",
-  "+\t}\n",
-  "+\n",
-  "+\tpte_unmap_unlock(orig_pte, ptl);\n",
-  "+\tcond_resched();\n",
-  "+\n",
-  "+\treturn 0;\n",
-  "+}\n",
-  "+\n",
-  "+static void madvise_cool_page_range(struct mmu_gather *tlb,\n",
-  "+\t\t\t     struct vm_area_struct *vma,\n",
-  "+\t\t\t     unsigned long addr, unsigned long end)\n",
-  "+{\n",
-  "+\tstruct mm_walk cool_walk = {\n",
-  "+\t\t.pmd_entry = madvise_cool_pte_range,\n",
-  "+\t\t.mm = vma->vm_mm,\n",
-  "+\t};\n",
-  "+\n",
-  "+\ttlb_start_vma(tlb, vma);\n",
-  "+\twalk_page_range(addr, end, &cool_walk);\n",
-  "+\ttlb_end_vma(tlb, vma);\n",
-  "+}\n",
-  "+\n",
-  "+static long madvise_cool(struct vm_area_struct *vma,\n",
-  "+\t\t\tunsigned long start_addr, unsigned long end_addr)\n",
-  "+{\n",
-  "+\tstruct mm_struct *mm = vma->vm_mm;\n",
-  "+\tstruct mmu_gather tlb;\n",
-  "+\n",
-  "+\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n",
-  "+\t\treturn -EINVAL;\n",
-  "+\n",
-  "+\tlru_add_drain();\n",
-  "+\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n",
-  "+\tmadvise_cool_page_range(&tlb, vma, start_addr, end_addr);\n",
-  "+\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n",
-  "+\n",
-  "+\treturn 0;\n",
-  "+}\n",
-  "+\n",
-  " static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,\n",
-  " \t\t\t\tunsigned long end, struct mm_walk *walk)\n",
-  " \n",
-  "\@\@ -695,6 +804,8 \@\@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,\n",
-  " \t\treturn madvise_remove(vma, prev, start, end);\n",
-  " \tcase MADV_WILLNEED:\n",
-  " \t\treturn madvise_willneed(vma, prev, start, end);\n",
-  "+\tcase MADV_COOL:\n",
-  "+\t\treturn madvise_cool(vma, start, end);\n",
-  " \tcase MADV_FREE:\n",
-  " \tcase MADV_DONTNEED:\n",
-  " \t\treturn madvise_dontneed_free(vma, prev, start, end, behavior);\n",
-  "\@\@ -716,6 +827,7 \@\@ madvise_behavior_valid(int behavior)\n",
-  " \tcase MADV_WILLNEED:\n",
-  " \tcase MADV_DONTNEED:\n",
-  " \tcase MADV_FREE:\n",
-  "+\tcase MADV_COOL:\n",
-  " #ifdef CONFIG_KSM\n",
-  " \tcase MADV_MERGEABLE:\n",
-  " \tcase MADV_UNMERGEABLE:\n",
-  "diff --git a/mm/swap.c b/mm/swap.c\n",
-  "index 3a75722e68a9..0f94c3b5397d 100644\n",
-  "--- a/mm/swap.c\n",
-  "+++ b/mm/swap.c\n",
-  "\@\@ -46,6 +46,7 \@\@ int page_cluster;\n",
-  " static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);\n",
-  " static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);\n",
-  " static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);\n",
-  "+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);\n",
-  " static DEFINE_PER_CPU(struct pagevec, lru_lazyfree_pvecs);\n",
-  " #ifdef CONFIG_SMP\n",
-  " static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);\n",
-  "\@\@ -537,6 +538,23 \@\@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,\n",
-  " \tupdate_page_reclaim_stat(lruvec, file, 0);\n",
-  " }\n",
-  " \n",
-  "+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,\n",
-  "+\t\t\t    void *arg)\n",
-  "+{\n",
-  "+\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n",
-  "+\t\tint file = page_is_file_cache(page);\n",
-  "+\t\tint lru = page_lru_base_type(page);\n",
-  "+\n",
-  "+\t\tdel_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);\n",
-  "+\t\tClearPageActive(page);\n",
-  "+\t\tClearPageReferenced(page);\n",
-  "+\t\tclear_page_young(page);\n",
-  "+\t\tadd_page_to_lru_list(page, lruvec, lru);\n",
-  "+\n",
-  "+\t\t__count_vm_events(PGDEACTIVATE, hpage_nr_pages(page));\n",
-  "+\t\tupdate_page_reclaim_stat(lruvec, file, 0);\n",
-  "+\t}\n",
-  "+}\n",
-  " \n",
-  " static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,\n",
-  " \t\t\t    void *arg)\n",
-  "\@\@ -589,6 +607,10 \@\@ void lru_add_drain_cpu(int cpu)\n",
-  " \tif (pagevec_count(pvec))\n",
-  " \t\tpagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);\n",
-  " \n",
-  "+\tpvec = &per_cpu(lru_deactivate_pvecs, cpu);\n",
-  "+\tif (pagevec_count(pvec))\n",
-  "+\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n",
-  "+\n",
-  " \tpvec = &per_cpu(lru_lazyfree_pvecs, cpu);\n",
-  " \tif (pagevec_count(pvec))\n",
-  " \t\tpagevec_lru_move_fn(pvec, lru_lazyfree_fn, NULL);\n",
-  "\@\@ -622,6 +644,26 \@\@ void deactivate_file_page(struct page *page)\n",
-  " \t}\n",
-  " }\n",
-  " \n",
-  "+/*\n",
-  "+ * deactivate_page - deactivate a page\n",
-  "+ * \@page: page to deactivate\n",
-  "+ *\n",
-  "+ * deactivate_page() moves \@page to the inactive list if \@page was on the active\n",
-  "+ * list and was not an unevictable page.  This is done to accelerate the reclaim\n",
-  "+ * of \@page.\n",
-  "+ */\n",
-  "+void deactivate_page(struct page *page)\n",
-  "+{\n",
-  "+\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n",
-  "+\t\tstruct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);\n",
-  "+\n",
-  "+\t\tget_page(page);\n",
-  "+\t\tif (!pagevec_add(pvec, page) || PageCompound(page))\n",
-  "+\t\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n",
-  "+\t\tput_cpu_var(lru_deactivate_pvecs);\n",
-  "+\t}\n",
-  "+}\n",
-  "+\n",
-  " /**\n",
-  "  * mark_page_lazyfree - make an anon page lazyfree\n",
-  "  * \@page: page to deactivate\n",
-  "\@\@ -686,6 +728,7 \@\@ void lru_add_drain_all(void)\n",
-  " \t\tif (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||\n",
-  " \t\t    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||\n",
-  " \t\t    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||\n",
-  "+\t\t    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||\n",
-  " \t\t    pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||\n",
-  " \t\t    need_activate_page_drain(cpu)) {\n",
-  " \t\t\tINIT_WORK(work, lru_add_drain_per_cpu);\n",
-  "-- \n",
-  "2.21.0.1020.gf2820cf01a-goog"
+  "s/end/next/ ?\n",
+  "> +\t\tptent = *pte;\n",
+  "> +\n",
+  "> +\t\tif (pte_none(ptent))\n",
+  "> +\t\t\tcontinue;\n",
+  "> +\n",
+  "> +\t\tif (!pte_present(ptent))\n",
+  "> +\t\t\tcontinue;\n",
+  "> +\n",
+  "> +\t\tpage = vm_normal_page(vma, addr, ptent);\n",
+  "> +\t\tif (!page)\n",
+  "> +\t\t\tcontinue;\n",
+  "> +\n",
+  "> +\t\tif (page_mapcount(page) > 1)\n",
+  "> +\t\t\tcontinue;\n",
+  "> +\n",
+  "> +\t\tptep_test_and_clear_young(vma, addr, pte);\n",
+  "> +\t\tdeactivate_page(page);\n",
+  "> +\t}\n",
+  "> +\n",
+  "> +\tpte_unmap_unlock(orig_pte, ptl);\n",
+  "> +\tcond_resched();\n",
+  "> +\n",
+  "> +\treturn 0;\n",
+  "> +}\n",
+  "> +\n",
+  "> +static long madvise_cool(struct vm_area_struct *vma,\n",
+  "> +\t\t\tunsigned long start_addr, unsigned long end_addr)\n",
+  "> +{\n",
+  "> +\tstruct mm_struct *mm = vma->vm_mm;\n",
+  "> +\tstruct mmu_gather tlb;\n",
+  "> +\n",
+  "> +\tif (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))\n",
+  "> +\t\treturn -EINVAL;\n",
+  "\n",
+  "No service in case of VM_IO?\n",
+  "> +\n",
+  "> +\tlru_add_drain();\n",
+  "> +\ttlb_gather_mmu(&tlb, mm, start_addr, end_addr);\n",
+  "> +\tmadvise_cool_page_range(&tlb, vma, start_addr, end_addr);\n",
+  "> +\ttlb_finish_mmu(&tlb, start_addr, end_addr);\n",
+  "> +\n",
+  "> +\treturn 0;\n",
+  "> +}\n",
+  "> +\n",
+  "> +/*\n",
+  "> + * deactivate_page - deactivate a page\n",
+  "> + * \@page: page to deactivate\n",
+  "> + *\n",
+  "> + * deactivate_page() moves \@page to the inactive list if \@page was on the active\n",
+  "> + * list and was not an unevictable page.  This is done to accelerate the reclaim\n",
+  "> + * of \@page.\n",
+  "> + */\n",
+  "> +void deactivate_page(struct page *page)\n",
+  "> +{\n",
+  "> +\tif (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {\n",
+  "> +\t\tstruct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);\n",
+  "> +\n",
+  "> +\t\tget_page(page);\n",
+  "\n",
+  "A line of comment seems needed for pinning the page.\n",
+  "\n",
+  "> +\t\tif (!pagevec_add(pvec, page) || PageCompound(page))\n",
+  "> +\t\t\tpagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);\n",
+  "> +\t\tput_cpu_var(lru_deactivate_pvecs);\n",
+  "> +\t}\n",
+  "> +}\n",
+  "> +\n",
+  "\n",
+  "--\n",
+  "Hillf"
 ]
 
-76714939b2b300d19638111661c4e1e3f756b169a376ced66bb0a12185ab9e8f
+3f726f038c347818f2b246f151934fc80ce11befed99d28ed4eadc3835104774

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).