linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH v2 0/3] mm, thp: convert from optimistic swapin collapsing to conservative
@ 2016-06-22 11:15 Ebru Akagunduz
  2016-06-22 11:15 ` [RFC PATCH v2 1/3] mm, thp: revert allocstall comparing Ebru Akagunduz
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Ebru Akagunduz @ 2016-06-22 11:15 UTC (permalink / raw)
  To: linux-mm
  Cc: hughd, riel, akpm, kirill.shutemov, n-horiguchi, aarcange,
	iamjoonsoo.kim, gorcunov, linux-kernel, mgorman, rientjes,
	vbabka, aneesh.kumar, hannes, mhocko, boaz, Ebru Akagunduz

This patch series supplies to decide to swapin looking the amount of
young pages. Removes allocstall comparing and fixes comment inconsistency.

Ebru Akagunduz (3):
  mm, thp: revert allocstall comparing
  mm, thp: convert from optimistic swapin collapsing to conservative
  mm, thp: fix comment inconsistency for swapin readahead functions

 include/trace/events/huge_memory.h | 19 ++++++-----
 mm/huge_memory.c                   | 70 +++++++++++++++++---------------------
 2 files changed, 43 insertions(+), 46 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [RFC PATCH v2 1/3] mm, thp: revert allocstall comparing
  2016-06-22 11:15 [RFC PATCH v2 0/3] mm, thp: convert from optimistic swapin collapsing to conservative Ebru Akagunduz
@ 2016-06-22 11:15 ` Ebru Akagunduz
  2016-06-22 11:15 ` [RFC PATCH v2 2/3] mm, thp: convert from optimistic swapin collapsing to conservative Ebru Akagunduz
  2016-06-22 11:17 ` [RFC PATCH v2 3/3] mm, thp: fix comment inconsistency for swapin readahead functions Ebru Akagunduz
  2 siblings, 0 replies; 4+ messages in thread
From: Ebru Akagunduz @ 2016-06-22 11:15 UTC (permalink / raw)
  To: linux-mm
  Cc: hughd, riel, akpm, kirill.shutemov, n-horiguchi, aarcange,
	iamjoonsoo.kim, gorcunov, linux-kernel, mgorman, rientjes,
	vbabka, aneesh.kumar, hannes, mhocko, boaz, Ebru Akagunduz

This patch takes back allocstall comparing when deciding
whether swapin worthwhile because it does not work,
if vmevent disabled.

Related commit:
http://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git/commit/?id=2548306628308aa6a326640d345a737bc898941d

Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Suggested-by: Minchan Kim <minchan@kernel.org>
Suggested-by: Michal Hocko <mhocko@kernel.org>
---
Changes in v2:
 - Add Suggested-by tag (Minchan Kim)

 mm/huge_memory.c | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index acd374e..34fec1f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -102,7 +102,6 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
  */
 static unsigned int khugepaged_max_ptes_none __read_mostly;
 static unsigned int khugepaged_max_ptes_swap __read_mostly;
-static unsigned long allocstall;
 
 static int khugepaged(void *none);
 static int khugepaged_slab_init(void);
@@ -2465,7 +2464,6 @@ static void collapse_huge_page(struct mm_struct *mm,
 	struct page *new_page;
 	spinlock_t *pmd_ptl, *pte_ptl;
 	int isolated = 0, result = 0;
-	unsigned long swap, curr_allocstall;
 	struct mem_cgroup *memcg;
 	unsigned long mmun_start;	/* For mmu_notifiers */
 	unsigned long mmun_end;		/* For mmu_notifiers */
@@ -2488,8 +2486,6 @@ static void collapse_huge_page(struct mm_struct *mm,
 		goto out_nolock;
 	}
 
-	swap = get_mm_counter(mm, MM_SWAPENTS);
-	curr_allocstall = sum_vm_event(ALLOCSTALL);
 	down_read(&mm->mmap_sem);
 	result = hugepage_vma_revalidate(mm, address);
 	if (result) {
@@ -2507,20 +2503,14 @@ static void collapse_huge_page(struct mm_struct *mm,
 	}
 
 	/*
-	 * Don't perform swapin readahead when the system is under pressure,
-	 * to avoid unnecessary resource consumption.
+	 * __collapse_huge_page_swapin always returns with mmap_sem
+	 * locked.  If it fails, release mmap_sem and jump directly
+	 * out.  Continuing to collapse causes inconsistency.
 	 */
-	if (allocstall == curr_allocstall && swap != 0) {
-		/*
-		 * __collapse_huge_page_swapin always returns with mmap_sem
-		 * locked.  If it fails, release mmap_sem and jump directly
-		 * out.  Continuing to collapse causes inconsistency.
-		 */
-		if (!__collapse_huge_page_swapin(mm, vma, address, pmd)) {
-			mem_cgroup_cancel_charge(new_page, memcg, true);
-			up_read(&mm->mmap_sem);
-			goto out_nolock;
-		}
+	if (!__collapse_huge_page_swapin(mm, vma, address, pmd)) {
+		mem_cgroup_cancel_charge(new_page, memcg, true);
+		up_read(&mm->mmap_sem);
+		goto out_nolock;
 	}
 
 	up_read(&mm->mmap_sem);
@@ -2935,7 +2925,6 @@ static void khugepaged_wait_work(void)
 		if (!scan_sleep_jiffies)
 			return;
 
-		allocstall = sum_vm_event(ALLOCSTALL);
 		khugepaged_sleep_expire = jiffies + scan_sleep_jiffies;
 		wait_event_freezable_timeout(khugepaged_wait,
 					     khugepaged_should_wakeup(),
@@ -2943,10 +2932,8 @@ static void khugepaged_wait_work(void)
 		return;
 	}
 
-	if (khugepaged_enabled()) {
-		allocstall = sum_vm_event(ALLOCSTALL);
+	if (khugepaged_enabled())
 		wait_event_freezable(khugepaged_wait, khugepaged_wait_event());
-	}
 }
 
 static int khugepaged(void *none)
@@ -2955,7 +2942,6 @@ static int khugepaged(void *none)
 
 	set_freezable();
 	set_user_nice(current, MAX_NICE);
-	allocstall = sum_vm_event(ALLOCSTALL);
 
 	while (!kthread_should_stop()) {
 		khugepaged_do_scan();
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC PATCH v2 2/3] mm, thp: convert from optimistic swapin collapsing to conservative
  2016-06-22 11:15 [RFC PATCH v2 0/3] mm, thp: convert from optimistic swapin collapsing to conservative Ebru Akagunduz
  2016-06-22 11:15 ` [RFC PATCH v2 1/3] mm, thp: revert allocstall comparing Ebru Akagunduz
@ 2016-06-22 11:15 ` Ebru Akagunduz
  2016-06-22 11:17 ` [RFC PATCH v2 3/3] mm, thp: fix comment inconsistency for swapin readahead functions Ebru Akagunduz
  2 siblings, 0 replies; 4+ messages in thread
From: Ebru Akagunduz @ 2016-06-22 11:15 UTC (permalink / raw)
  To: linux-mm
  Cc: hughd, riel, akpm, kirill.shutemov, n-horiguchi, aarcange,
	iamjoonsoo.kim, gorcunov, linux-kernel, mgorman, rientjes,
	vbabka, aneesh.kumar, hannes, mhocko, boaz, Ebru Akagunduz

To detect whether khugepaged swapin worthwhile, this patch checks
the amount of young pages. There should be at least half of
HPAGE_PMD_NR to swapin.

Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Suggested-by: Minchan Kim <minchan@kernel.org>
---
Changes in v2:
 - Don't change thp design, only notice amount of young
   pages, if khugepaged needs to swapin (Minchan Kim).
 - Print out count of referenced pages in
   __collapse_huge_page_swapin() (Ebru Akagunduz)

 include/trace/events/huge_memory.h | 19 ++++++++++--------
 mm/huge_memory.c                   | 40 +++++++++++++++++++++++---------------
 2 files changed, 35 insertions(+), 24 deletions(-)

diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index bda2118..9f69a72 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -13,7 +13,7 @@
 	EM( SCAN_EXCEED_NONE_PTE,	"exceed_none_pte")		\
 	EM( SCAN_PTE_NON_PRESENT,	"pte_non_present")		\
 	EM( SCAN_PAGE_RO,		"no_writable_page")		\
-	EM( SCAN_NO_REFERENCED_PAGE,	"no_referenced_page")		\
+	EM( SCAN_LACK_REFERENCED_PAGE,	"lack_referenced_page")		\
 	EM( SCAN_PAGE_NULL,		"page_null")			\
 	EM( SCAN_SCAN_ABORT,		"scan_aborted")			\
 	EM( SCAN_PAGE_COUNT,		"not_suitable_page_count")	\
@@ -46,7 +46,7 @@ SCAN_STATUS
 TRACE_EVENT(mm_khugepaged_scan_pmd,
 
 	TP_PROTO(struct mm_struct *mm, struct page *page, bool writable,
-		 bool referenced, int none_or_zero, int status, int unmapped),
+		 int referenced, int none_or_zero, int status, int unmapped),
 
 	TP_ARGS(mm, page, writable, referenced, none_or_zero, status, unmapped),
 
@@ -54,7 +54,7 @@ TRACE_EVENT(mm_khugepaged_scan_pmd,
 		__field(struct mm_struct *, mm)
 		__field(unsigned long, pfn)
 		__field(bool, writable)
-		__field(bool, referenced)
+		__field(int, referenced)
 		__field(int, none_or_zero)
 		__field(int, status)
 		__field(int, unmapped)
@@ -107,14 +107,14 @@ TRACE_EVENT(mm_collapse_huge_page,
 TRACE_EVENT(mm_collapse_huge_page_isolate,
 
 	TP_PROTO(struct page *page, int none_or_zero,
-		 bool referenced, bool  writable, int status),
+		 int referenced, bool  writable, int status),
 
 	TP_ARGS(page, none_or_zero, referenced, writable, status),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, pfn)
 		__field(int, none_or_zero)
-		__field(bool, referenced)
+		__field(int, referenced)
 		__field(bool, writable)
 		__field(int, status)
 	),
@@ -137,25 +137,28 @@ TRACE_EVENT(mm_collapse_huge_page_isolate,
 
 TRACE_EVENT(mm_collapse_huge_page_swapin,
 
-	TP_PROTO(struct mm_struct *mm, int swapped_in, int ret),
+	TP_PROTO(struct mm_struct *mm, int swapped_in, int referenced, int ret),
 
-	TP_ARGS(mm, swapped_in, ret),
+	TP_ARGS(mm, swapped_in, referenced, ret),
 
 	TP_STRUCT__entry(
 		__field(struct mm_struct *, mm)
 		__field(int, swapped_in)
+		__field(int, referenced)
 		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		__entry->mm = mm;
 		__entry->swapped_in = swapped_in;
+		__entry->referenced = referenced;
 		__entry->ret = ret;
 	),
 
-	TP_printk("mm=%p, swapped_in=%d, ret=%d",
+	TP_printk("mm=%p, swapped_in=%d, referenced=%d, ret=%d",
 		__entry->mm,
 		__entry->swapped_in,
+		__entry->referenced,
 		__entry->ret)
 );
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 34fec1f..ff96765 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -42,7 +42,7 @@ enum scan_result {
 	SCAN_EXCEED_NONE_PTE,
 	SCAN_PTE_NON_PRESENT,
 	SCAN_PAGE_RO,
-	SCAN_NO_REFERENCED_PAGE,
+	SCAN_LACK_REFERENCED_PAGE,
 	SCAN_PAGE_NULL,
 	SCAN_SCAN_ABORT,
 	SCAN_PAGE_COUNT,
@@ -2048,8 +2048,8 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 {
 	struct page *page = NULL;
 	pte_t *_pte;
-	int none_or_zero = 0, result = 0;
-	bool referenced = false, writable = false;
+	int none_or_zero = 0, result = 0, referenced = 0;
+	bool writable = false;
 
 	for (_pte = pte; _pte < pte+HPAGE_PMD_NR;
 	     _pte++, address += PAGE_SIZE) {
@@ -2128,11 +2128,11 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
 		VM_BUG_ON_PAGE(PageLRU(page), page);
 
-		/* If there is no mapped pte young don't collapse the page */
+		/* There should be enough young pte to collapse the page */
 		if (pte_young(pteval) ||
 		    page_is_young(page) || PageReferenced(page) ||
 		    mmu_notifier_test_young(vma->vm_mm, address))
-			referenced = true;
+			referenced++;
 	}
 	if (likely(writable)) {
 		if (likely(referenced)) {
@@ -2416,7 +2416,8 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address)
 
 static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 					struct vm_area_struct *vma,
-					unsigned long address, pmd_t *pmd)
+					unsigned long address,
+					pmd_t *pmd, int referenced)
 {
 	unsigned long _address;
 	pte_t *pte, pteval;
@@ -2429,6 +2430,11 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 		if (!is_swap_pte(pteval))
 			continue;
 		swapped_in++;
+		/* we only decide to swapin, if there is enough young ptes */
+		if (referenced < HPAGE_PMD_NR/2) {
+			trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
+			return false;
+		}
 		ret = do_swap_page(mm, vma, _address, pte, pmd,
 				   FAULT_FLAG_ALLOW_RETRY,
 				   pteval);
@@ -2436,11 +2442,13 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 		if (ret & VM_FAULT_RETRY) {
 			down_read(&mm->mmap_sem);
 			/* vma is no longer available, don't continue to swapin */
-			if (hugepage_vma_revalidate(mm, address))
+			if (hugepage_vma_revalidate(mm, address)) {
+				trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 				return false;
+			}
 		}
 		if (ret & VM_FAULT_ERROR) {
-			trace_mm_collapse_huge_page_swapin(mm, swapped_in, 0);
+			trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 			return false;
 		}
 		/* pte is unmapped now, we need to map it */
@@ -2448,7 +2456,7 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 	}
 	pte--;
 	pte_unmap(pte);
-	trace_mm_collapse_huge_page_swapin(mm, swapped_in, 1);
+	trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 1);
 	return true;
 }
 
@@ -2456,7 +2464,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 				   unsigned long address,
 				   struct page **hpage,
 				   struct vm_area_struct *vma,
-				   int node)
+				   int node, int referenced)
 {
 	pmd_t *pmd, _pmd;
 	pte_t *pte;
@@ -2507,7 +2515,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	 * locked.  If it fails, release mmap_sem and jump directly
 	 * out.  Continuing to collapse causes inconsistency.
 	 */
-	if (!__collapse_huge_page_swapin(mm, vma, address, pmd)) {
+	if (!__collapse_huge_page_swapin(mm, vma, address, pmd, referenced)) {
 		mem_cgroup_cancel_charge(new_page, memcg, true);
 		up_read(&mm->mmap_sem);
 		goto out_nolock;
@@ -2615,12 +2623,12 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 {
 	pmd_t *pmd;
 	pte_t *pte, *_pte;
-	int ret = 0, none_or_zero = 0, result = 0;
+	int ret = 0, none_or_zero = 0, result = 0, referenced = 0;
 	struct page *page = NULL;
 	unsigned long _address;
 	spinlock_t *ptl;
 	int node = NUMA_NO_NODE, unmapped = 0;
-	bool writable = false, referenced = false;
+	bool writable = false;
 
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
@@ -2708,14 +2716,14 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
 		if (pte_young(pteval) ||
 		    page_is_young(page) || PageReferenced(page) ||
 		    mmu_notifier_test_young(vma->vm_mm, address))
-			referenced = true;
+			referenced++;
 	}
 	if (writable) {
 		if (referenced) {
 			result = SCAN_SUCCEED;
 			ret = 1;
 		} else {
-			result = SCAN_NO_REFERENCED_PAGE;
+			result = SCAN_LACK_REFERENCED_PAGE;
 		}
 	} else {
 		result = SCAN_PAGE_RO;
@@ -2725,7 +2733,7 @@ out_unmap:
 	if (ret) {
 		node = khugepaged_find_target_node();
 		/* collapse_huge_page will return with the mmap_sem released */
-		collapse_huge_page(mm, address, hpage, vma, node);
+		collapse_huge_page(mm, address, hpage, vma, node, referenced);
 	}
 out:
 	trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced,
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [RFC PATCH v2 3/3] mm, thp: fix comment inconsistency for swapin readahead functions
  2016-06-22 11:15 [RFC PATCH v2 0/3] mm, thp: convert from optimistic swapin collapsing to conservative Ebru Akagunduz
  2016-06-22 11:15 ` [RFC PATCH v2 1/3] mm, thp: revert allocstall comparing Ebru Akagunduz
  2016-06-22 11:15 ` [RFC PATCH v2 2/3] mm, thp: convert from optimistic swapin collapsing to conservative Ebru Akagunduz
@ 2016-06-22 11:17 ` Ebru Akagunduz
  2 siblings, 0 replies; 4+ messages in thread
From: Ebru Akagunduz @ 2016-06-22 11:17 UTC (permalink / raw)
  To: linux-mm
  Cc: hughd, riel, akpm, kirill.shutemov, n-horiguchi, aarcange,
	iamjoonsoo.kim, gorcunov, linux-kernel, mgorman, rientjes,
	vbabka, aneesh.kumar, hannes, mhocko, boaz, hillf.zj,
	Ebru Akagunduz

After fixing swapin issues, comment lines stayed as in old version.
This patch updates the comments.

Signed-off-by: Ebru Akagunduz <ebru.akagunduz@gmail.com>
Reported-by: Hillf Danton <hillf.zj@alibaba-inc.com>
---
Changes in v2:
 - Newly created in this version.

 mm/huge_memory.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ff96765..5cb0fd9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2441,8 +2441,8 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 		/* do_swap_page returns VM_FAULT_RETRY with released mmap_sem */
 		if (ret & VM_FAULT_RETRY) {
 			down_read(&mm->mmap_sem);
-			/* vma is no longer available, don't continue to swapin */
 			if (hugepage_vma_revalidate(mm, address)) {
+				/* vma is no longer available, don't continue to swapin */
 				trace_mm_collapse_huge_page_swapin(mm, swapped_in, referenced, 0);
 				return false;
 			}
@@ -2512,8 +2512,8 @@ static void collapse_huge_page(struct mm_struct *mm,
 
 	/*
 	 * __collapse_huge_page_swapin always returns with mmap_sem
-	 * locked.  If it fails, release mmap_sem and jump directly
-	 * out.  Continuing to collapse causes inconsistency.
+	 * locked. If it fails, we release mmap_sem and jump out_nolock.
+	 * Continuing to collapse causes inconsistency.
 	 */
 	if (!__collapse_huge_page_swapin(mm, vma, address, pmd, referenced)) {
 		mem_cgroup_cancel_charge(new_page, memcg, true);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-06-22 11:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-06-22 11:15 [RFC PATCH v2 0/3] mm, thp: convert from optimistic swapin collapsing to conservative Ebru Akagunduz
2016-06-22 11:15 ` [RFC PATCH v2 1/3] mm, thp: revert allocstall comparing Ebru Akagunduz
2016-06-22 11:15 ` [RFC PATCH v2 2/3] mm, thp: convert from optimistic swapin collapsing to conservative Ebru Akagunduz
2016-06-22 11:17 ` [RFC PATCH v2 3/3] mm, thp: fix comment inconsistency for swapin readahead functions Ebru Akagunduz

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).