linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] mm: drop zap_details::ignore_dirty
@ 2016-12-19 17:17 Kirill A. Shutemov
  2016-12-19 17:17 ` [PATCH 2/4] mm: drop zap_details::check_swap_entries Kirill A. Shutemov
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2016-12-19 17:17 UTC (permalink / raw)
  To: Michal Hocko, Tetsuo Handa, Peter Zijlstra, Rik van Riel, Andrew Morton
  Cc: linux-mm, linux-kernel, Kirill A. Shutemov

The only user of ignore_dirty is oom-reaper. But it doesn't really use
it.

ignore_dirty only has effect on file pages mapped with dirty pte.
But oom-repear skips shared VMAs, so there's no way we can dirty file
pte in them.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
---
 include/linux/mm.h | 1 -
 mm/memory.c        | 6 ------
 mm/oom_kill.c      | 3 +--
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4424784ac374..7b8e425ac41c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1148,7 +1148,6 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
-	bool ignore_dirty;			/* Ignore dirty pages */
 	bool check_swap_entries;		/* Check also swap entries */
 };
 
diff --git a/mm/memory.c b/mm/memory.c
index 455c3e628d52..6ac8fa56080f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1155,12 +1155,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 
 			if (!PageAnon(page)) {
 				if (pte_dirty(ptent)) {
-					/*
-					 * oom_reaper cannot tear down dirty
-					 * pages
-					 */
-					if (unlikely(details && details->ignore_dirty))
-						continue;
 					force_flush = 1;
 					set_page_dirty(page);
 				}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ec9f11d4f094..f101db68e760 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -465,8 +465,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 {
 	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
-	struct zap_details details = {.check_swap_entries = true,
-				      .ignore_dirty = true};
+	struct zap_details details = {.check_swap_entries = true};
 	bool ret = true;
 
 	/*
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] mm: drop zap_details::check_swap_entries
  2016-12-19 17:17 [PATCH 1/4] mm: drop zap_details::ignore_dirty Kirill A. Shutemov
@ 2016-12-19 17:17 ` Kirill A. Shutemov
  2016-12-19 17:17 ` [PATCH 3/4] mm: drop unused argument of zap_page_range() Kirill A. Shutemov
  2016-12-19 17:17 ` [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA Kirill A. Shutemov
  2 siblings, 0 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2016-12-19 17:17 UTC (permalink / raw)
  To: Michal Hocko, Tetsuo Handa, Peter Zijlstra, Rik van Riel, Andrew Morton
  Cc: linux-mm, linux-kernel, Kirill A. Shutemov

detail == NULL would give the same functionality as
.check_swap_entries==true.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
---
 include/linux/mm.h | 1 -
 mm/memory.c        | 4 ++--
 mm/oom_kill.c      | 3 +--
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7b8e425ac41c..5f6bea4c9d41 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1148,7 +1148,6 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
-	bool check_swap_entries;		/* Check also swap entries */
 };
 
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index 6ac8fa56080f..c03b18f13619 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1173,8 +1173,8 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 			}
 			continue;
 		}
-		/* only check swap_entries if explicitly asked for in details */
-		if (unlikely(details && !details->check_swap_entries))
+		/* If details->check_mapping, we leave swap entries. */
+		if (unlikely(details))
 			continue;
 
 		entry = pte_to_swp_entry(ptent);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f101db68e760..96a53ab0c9eb 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -465,7 +465,6 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 {
 	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
-	struct zap_details details = {.check_swap_entries = true};
 	bool ret = true;
 
 	/*
@@ -531,7 +530,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 		 */
 		if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED))
 			unmap_page_range(&tlb, vma, vma->vm_start, vma->vm_end,
-					 &details);
+					 NULL);
 	}
 	tlb_finish_mmu(&tlb, 0, -1);
 	pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] mm: drop unused argument of zap_page_range()
  2016-12-19 17:17 [PATCH 1/4] mm: drop zap_details::ignore_dirty Kirill A. Shutemov
  2016-12-19 17:17 ` [PATCH 2/4] mm: drop zap_details::check_swap_entries Kirill A. Shutemov
@ 2016-12-19 17:17 ` Kirill A. Shutemov
  2016-12-19 17:17 ` [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA Kirill A. Shutemov
  2 siblings, 0 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2016-12-19 17:17 UTC (permalink / raw)
  To: Michal Hocko, Tetsuo Handa, Peter Zijlstra, Rik van Riel, Andrew Morton
  Cc: linux-mm, linux-kernel, Kirill A. Shutemov

There's no users of zap_page_range() who wants non-NULL 'details'.
Let's drop it.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
---
 arch/s390/mm/gmap.c               | 2 +-
 arch/x86/mm/mpx.c                 | 2 +-
 drivers/android/binder.c          | 2 +-
 drivers/staging/android/ion/ion.c | 3 +--
 include/linux/mm.h                | 2 +-
 mm/madvise.c                      | 2 +-
 mm/memory.c                       | 5 ++---
 7 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index ec1f0dedb948..59ac93714fa4 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -687,7 +687,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
 		/* Find vma in the parent mm */
 		vma = find_vma(gmap->mm, vmaddr);
 		size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
-		zap_page_range(vma, vmaddr, size, NULL);
+		zap_page_range(vma, vmaddr, size);
 	}
 	up_read(&gmap->mm->mmap_sem);
 }
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index e4f800999b32..4bfb31e79d5d 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -796,7 +796,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
 			return -EINVAL;
 
 		len = min(vma->vm_end, end) - addr;
-		zap_page_range(vma, addr, len, NULL);
+		zap_page_range(vma, addr, len);
 		trace_mpx_unmap_zap(addr, addr+len);
 
 		vma = vma->vm_next;
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 3c71b982bf2a..d97f6725cf8c 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -629,7 +629,7 @@ static int binder_update_page_range(struct binder_proc *proc, int allocate,
 		page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE];
 		if (vma)
 			zap_page_range(vma, (uintptr_t)page_addr +
-				proc->user_buffer_offset, PAGE_SIZE, NULL);
+				proc->user_buffer_offset, PAGE_SIZE);
 err_vm_insert_page_failed:
 		unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
 err_map_kernel_failed:
diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c
index b653451843c8..0fb0e28ace70 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -865,8 +865,7 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
 	list_for_each_entry(vma_list, &buffer->vmas, list) {
 		struct vm_area_struct *vma = vma_list->vma;
 
-		zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start,
-			       NULL);
+		zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start);
 	}
 	mutex_unlock(&buffer->lock);
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5f6bea4c9d41..92dcada8caaf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1158,7 +1158,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
 int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 		unsigned long size);
 void zap_page_range(struct vm_area_struct *vma, unsigned long address,
-		unsigned long size, struct zap_details *);
+		unsigned long size);
 void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long start, unsigned long end);
 
diff --git a/mm/madvise.c b/mm/madvise.c
index 0e3828eae9f8..aa4c502caecb 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -476,7 +476,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
 		return -EINVAL;
 
-	zap_page_range(vma, start, end - start, NULL);
+	zap_page_range(vma, start, end - start);
 	return 0;
 }
 
diff --git a/mm/memory.c b/mm/memory.c
index c03b18f13619..6192ca3e45dc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1370,12 +1370,11 @@ void unmap_vmas(struct mmu_gather *tlb,
  * @vma: vm_area_struct holding the applicable pages
  * @start: starting address of pages to zap
  * @size: number of bytes to zap
- * @details: details of shared cache invalidation
  *
  * Caller must protect the VMA list
  */
 void zap_page_range(struct vm_area_struct *vma, unsigned long start,
-		unsigned long size, struct zap_details *details)
+		unsigned long size)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct mmu_gather tlb;
@@ -1386,7 +1385,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start,
 	update_hiwater_rss(mm);
 	mmu_notifier_invalidate_range_start(mm, start, end);
 	for ( ; vma && vma->vm_start < end; vma = vma->vm_next)
-		unmap_single_vma(&tlb, vma, start, end, details);
+		unmap_single_vma(&tlb, vma, start, end, NULL);
 	mmu_notifier_invalidate_range_end(mm, start, end);
 	tlb_finish_mmu(&tlb, start, end);
 }
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA
  2016-12-19 17:17 [PATCH 1/4] mm: drop zap_details::ignore_dirty Kirill A. Shutemov
  2016-12-19 17:17 ` [PATCH 2/4] mm: drop zap_details::check_swap_entries Kirill A. Shutemov
  2016-12-19 17:17 ` [PATCH 3/4] mm: drop unused argument of zap_page_range() Kirill A. Shutemov
@ 2016-12-19 17:17 ` Kirill A. Shutemov
  2016-12-20  1:44   ` kbuild test robot
                     ` (3 more replies)
  2 siblings, 4 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2016-12-19 17:17 UTC (permalink / raw)
  To: Michal Hocko, Tetsuo Handa, Peter Zijlstra, Rik van Riel, Andrew Morton
  Cc: linux-mm, linux-kernel, Kirill A. Shutemov

Logic on whether we can reap pages from the VMA should match what we
have in madvise_dontneed(). In particular, we should skip, VM_PFNMAP
VMAs, but we don't now.

Let's just extract condition on which we can shoot down pagesi from a
VMA with MADV_DONTNEED into separate function and use it in both places.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 mm/internal.h | 5 +++++
 mm/madvise.c  | 2 +-
 mm/oom_kill.c | 9 +--------
 3 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 44d68895a9b9..7430628bff34 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -41,6 +41,11 @@ int do_swap_page(struct vm_fault *vmf);
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
+static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma)
+{
+	return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
+}
+
 void unmap_page_range(struct mmu_gather *tlb,
 			     struct vm_area_struct *vma,
 			     unsigned long addr, unsigned long end,
diff --git a/mm/madvise.c b/mm/madvise.c
index aa4c502caecb..20200dfbd1bb 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -473,7 +473,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 			     unsigned long start, unsigned long end)
 {
 	*prev = vma;
-	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+	if (!can_madv_dontneed_vma(vma))
 		return -EINVAL;
 
 	zap_page_range(vma, start, end - start);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 96a53ab0c9eb..b6d8ac4948db 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -508,14 +508,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	tlb_gather_mmu(&tlb, mm, 0, -1);
 	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
-		if (is_vm_hugetlb_page(vma))
-			continue;
-
-		/*
-		 * mlocked VMAs require explicit munlocking before unmap.
-		 * Let's keep it simple here and skip such VMAs.
-		 */
-		if (vma->vm_flags & VM_LOCKED)
+		if (!can_madv_dontneed_vma(vma))
 			continue;
 
 		/*
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA
  2016-12-19 17:17 ` [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA Kirill A. Shutemov
@ 2016-12-20  1:44   ` kbuild test robot
  2016-12-20  2:22   ` kbuild test robot
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 10+ messages in thread
From: kbuild test robot @ 2016-12-20  1:44 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: kbuild-all, Michal Hocko, Tetsuo Handa, Peter Zijlstra,
	Rik van Riel, Andrew Morton, linux-mm, linux-kernel,
	Kirill A. Shutemov

[-- Attachment #1: Type: text/plain, Size: 1433 bytes --]

Hi Kirill,

[auto build test ERROR on mmotm/master]
[also build test ERROR on v4.9 next-20161219]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Kirill-A-Shutemov/mm-drop-zap_details-ignore_dirty/20161220-092938
base:   git://git.cmpxchg.org/linux-mmotm.git master
config: i386-randconfig-x003-201651 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   mm/madvise.c: In function 'madvise_dontneed':
>> mm/madvise.c:476:7: error: implicit declaration of function 'can_madv_dontneed_vma' [-Werror=implicit-function-declaration]
     if (!can_madv_dontneed_vma(vma))
          ^~~~~~~~~~~~~~~~~~~~~
   cc1: some warnings being treated as errors

vim +/can_madv_dontneed_vma +476 mm/madvise.c

   470	 */
   471	static long madvise_dontneed(struct vm_area_struct *vma,
   472				     struct vm_area_struct **prev,
   473				     unsigned long start, unsigned long end)
   474	{
   475		*prev = vma;
 > 476		if (!can_madv_dontneed_vma(vma))
   477			return -EINVAL;
   478	
   479		zap_page_range(vma, start, end - start);

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 24716 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA
  2016-12-19 17:17 ` [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA Kirill A. Shutemov
  2016-12-20  1:44   ` kbuild test robot
@ 2016-12-20  2:22   ` kbuild test robot
  2016-12-20  9:57   ` Michal Hocko
  2016-12-20 10:46   ` Kirill A. Shutemov
  3 siblings, 0 replies; 10+ messages in thread
From: kbuild test robot @ 2016-12-20  2:22 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: kbuild-all, Michal Hocko, Tetsuo Handa, Peter Zijlstra,
	Rik van Riel, Andrew Morton, linux-mm, linux-kernel,
	Kirill A. Shutemov

[-- Attachment #1: Type: text/plain, Size: 2768 bytes --]

Hi Kirill,

[auto build test WARNING on mmotm/master]
[also build test WARNING on v4.9 next-20161219]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Kirill-A-Shutemov/mm-drop-zap_details-ignore_dirty/20161220-092938
base:   git://git.cmpxchg.org/linux-mmotm.git master
config: x86_64-randconfig-x004-201651 (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=x86_64 

All warnings (new ones prefixed by >>):

   In file included from include/asm-generic/bug.h:4:0,
                    from arch/x86/include/asm/bug.h:35,
                    from include/linux/bug.h:4,
                    from include/linux/mmdebug.h:4,
                    from include/linux/mm.h:8,
                    from include/linux/mman.h:4,
                    from mm/madvise.c:8:
   mm/madvise.c: In function 'madvise_dontneed':
   mm/madvise.c:476:7: error: implicit declaration of function 'can_madv_dontneed_vma' [-Werror=implicit-function-declaration]
     if (!can_madv_dontneed_vma(vma))
          ^
   include/linux/compiler.h:149:30: note: in definition of macro '__trace_if'
     if (__builtin_constant_p(!!(cond)) ? !!(cond) :   \
                                 ^~~~
>> mm/madvise.c:476:2: note: in expansion of macro 'if'
     if (!can_madv_dontneed_vma(vma))
     ^~
   cc1: some warnings being treated as errors

vim +/if +476 mm/madvise.c

   460	 *
   461	 * NB: This interface discards data rather than pushes it out to swap,
   462	 * as some implementations do.  This has performance implications for
   463	 * applications like large transactional databases which want to discard
   464	 * pages in anonymous maps after committing to backing store the data
   465	 * that was kept in them.  There is no reason to write this data out to
   466	 * the swap area if the application is discarding it.
   467	 *
   468	 * An interface that causes the system to free clean pages and flush
   469	 * dirty pages is already available as msync(MS_INVALIDATE).
   470	 */
   471	static long madvise_dontneed(struct vm_area_struct *vma,
   472				     struct vm_area_struct **prev,
   473				     unsigned long start, unsigned long end)
   474	{
   475		*prev = vma;
 > 476		if (!can_madv_dontneed_vma(vma))
   477			return -EINVAL;
   478	
   479		zap_page_range(vma, start, end - start);
   480		return 0;
   481	}
   482	
   483	/*
   484	 * Application wants to free up the pages and associated backing store.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/gzip, Size: 27297 bytes --]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA
  2016-12-19 17:17 ` [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA Kirill A. Shutemov
  2016-12-20  1:44   ` kbuild test robot
  2016-12-20  2:22   ` kbuild test robot
@ 2016-12-20  9:57   ` Michal Hocko
  2016-12-20 10:46   ` Kirill A. Shutemov
  3 siblings, 0 replies; 10+ messages in thread
From: Michal Hocko @ 2016-12-20  9:57 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Tetsuo Handa, Peter Zijlstra, Rik van Riel, Andrew Morton,
	linux-mm, linux-kernel

On Mon 19-12-16 20:17:22, Kirill A. Shutemov wrote:
> Logic on whether we can reap pages from the VMA should match what we
> have in madvise_dontneed(). In particular, we should skip, VM_PFNMAP
> VMAs, but we don't now.
> 
> Let's just extract condition on which we can shoot down pagesi from a
> VMA with MADV_DONTNEED into separate function and use it in both places.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

you need to include internal.h in madvise.c

diff --git a/mm/madvise.c b/mm/madvise.c
index 20200dfbd1bb..c53d8da9c8e6 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -24,6 +24,8 @@
 
 #include <asm/tlb.h>
 
+#include "internal.h"
+
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
  * take mmap_sem for writing. Others, which simply traverse vmas, need

otherwise it won't compile. Then you can add
Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  mm/internal.h | 5 +++++
>  mm/madvise.c  | 2 +-
>  mm/oom_kill.c | 9 +--------
>  3 files changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/mm/internal.h b/mm/internal.h
> index 44d68895a9b9..7430628bff34 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -41,6 +41,11 @@ int do_swap_page(struct vm_fault *vmf);
>  void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
>  		unsigned long floor, unsigned long ceiling);
>  
> +static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma)
> +{
> +	return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
> +}
> +
>  void unmap_page_range(struct mmu_gather *tlb,
>  			     struct vm_area_struct *vma,
>  			     unsigned long addr, unsigned long end,
> diff --git a/mm/madvise.c b/mm/madvise.c
> index aa4c502caecb..20200dfbd1bb 100644
> --- a/mm/madvise.c
> +++ b/mm/madvise.c
> @@ -473,7 +473,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
>  			     unsigned long start, unsigned long end)
>  {
>  	*prev = vma;
> -	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
> +	if (!can_madv_dontneed_vma(vma))
>  		return -EINVAL;
>  
>  	zap_page_range(vma, start, end - start);
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index 96a53ab0c9eb..b6d8ac4948db 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -508,14 +508,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  
>  	tlb_gather_mmu(&tlb, mm, 0, -1);
>  	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
> -		if (is_vm_hugetlb_page(vma))
> -			continue;
> -
> -		/*
> -		 * mlocked VMAs require explicit munlocking before unmap.
> -		 * Let's keep it simple here and skip such VMAs.
> -		 */
> -		if (vma->vm_flags & VM_LOCKED)
> +		if (!can_madv_dontneed_vma(vma))
>  			continue;
>  
>  		/*
> -- 
> 2.10.2
> 

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA
  2016-12-19 17:17 ` [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA Kirill A. Shutemov
                     ` (2 preceding siblings ...)
  2016-12-20  9:57   ` Michal Hocko
@ 2016-12-20 10:46   ` Kirill A. Shutemov
  3 siblings, 0 replies; 10+ messages in thread
From: Kirill A. Shutemov @ 2016-12-20 10:46 UTC (permalink / raw)
  To: Michal Hocko, Tetsuo Handa, Peter Zijlstra, Rik van Riel, Andrew Morton
  Cc: linux-mm, linux-kernel, Kirill A. Shutemov

Logic on whether we can reap pages from the VMA should match what we
have in madvise_dontneed(). In particular, we should skip, VM_PFNMAP
VMAs, but we don't now.

Let's just extract condition on which we can shoot down pagesi from a
VMA with MADV_DONTNEED into separate function and use it in both places.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: Michal Hocko <mhocko@suse.com>
---
 mm/internal.h | 5 +++++
 mm/madvise.c  | 4 +++-
 mm/oom_kill.c | 9 +--------
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mm/internal.h b/mm/internal.h
index 44d68895a9b9..7430628bff34 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -41,6 +41,11 @@ int do_swap_page(struct vm_fault *vmf);
 void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *start_vma,
 		unsigned long floor, unsigned long ceiling);
 
+static inline bool can_madv_dontneed_vma(struct vm_area_struct *vma)
+{
+	return !(vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP));
+}
+
 void unmap_page_range(struct mmu_gather *tlb,
 			     struct vm_area_struct *vma,
 			     unsigned long addr, unsigned long end,
diff --git a/mm/madvise.c b/mm/madvise.c
index aa4c502caecb..c53d8da9c8e6 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -24,6 +24,8 @@
 
 #include <asm/tlb.h>
 
+#include "internal.h"
+
 /*
  * Any behaviour which results in changes to the vma->vm_flags needs to
  * take mmap_sem for writing. Others, which simply traverse vmas, need
@@ -473,7 +475,7 @@ static long madvise_dontneed(struct vm_area_struct *vma,
 			     unsigned long start, unsigned long end)
 {
 	*prev = vma;
-	if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_PFNMAP))
+	if (!can_madv_dontneed_vma(vma))
 		return -EINVAL;
 
 	zap_page_range(vma, start, end - start);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 96a53ab0c9eb..b6d8ac4948db 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -508,14 +508,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 
 	tlb_gather_mmu(&tlb, mm, 0, -1);
 	for (vma = mm->mmap ; vma; vma = vma->vm_next) {
-		if (is_vm_hugetlb_page(vma))
-			continue;
-
-		/*
-		 * mlocked VMAs require explicit munlocking before unmap.
-		 * Let's keep it simple here and skip such VMAs.
-		 */
-		if (vma->vm_flags & VM_LOCKED)
+		if (!can_madv_dontneed_vma(vma))
 			continue;
 
 		/*
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 1/4] mm: drop zap_details::ignore_dirty
  2016-12-16 14:15 [PATCH 1/4] mm: drop zap_details::ignore_dirty Kirill A. Shutemov
@ 2016-12-19 14:22 ` Michal Hocko
  0 siblings, 0 replies; 10+ messages in thread
From: Michal Hocko @ 2016-12-19 14:22 UTC (permalink / raw)
  To: Kirill A. Shutemov
  Cc: Peter Zijlstra, Rik van Riel, Andrew Morton, linux-mm, linux-kernel

On Fri 16-12-16 17:15:53, Kirill A. Shutemov wrote:
> The only user of ignore_dirty is oom-reaper. But it doesn't really use
> it.
> 
> ignore_dirty only has effect on file pages mapped with dirty pte.
> But oom-repear skips shared VMAs, so there's no way we can dirty file
> pte in them.

Hmm, I am trying to rememeber why I've done that and it seems I was just
too paranoid and wanted to make sure that we never touch dirty mapped
pages. As you say this is not possible with the current implementation
so the patch should be OK.

> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>

Acked-by: Michal Hocko <mhocko@suse.com>

> ---
>  include/linux/mm.h | 1 -
>  mm/memory.c        | 6 ------
>  mm/oom_kill.c      | 3 +--
>  3 files changed, 1 insertion(+), 9 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 4424784ac374..7b8e425ac41c 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1148,7 +1148,6 @@ struct zap_details {
>  	struct address_space *check_mapping;	/* Check page->mapping if set */
>  	pgoff_t	first_index;			/* Lowest page->index to unmap */
>  	pgoff_t last_index;			/* Highest page->index to unmap */
> -	bool ignore_dirty;			/* Ignore dirty pages */
>  	bool check_swap_entries;		/* Check also swap entries */
>  };
>  
> diff --git a/mm/memory.c b/mm/memory.c
> index 455c3e628d52..6ac8fa56080f 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -1155,12 +1155,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
>  
>  			if (!PageAnon(page)) {
>  				if (pte_dirty(ptent)) {
> -					/*
> -					 * oom_reaper cannot tear down dirty
> -					 * pages
> -					 */
> -					if (unlikely(details && details->ignore_dirty))
> -						continue;
>  					force_flush = 1;
>  					set_page_dirty(page);
>  				}
> diff --git a/mm/oom_kill.c b/mm/oom_kill.c
> index ec9f11d4f094..f101db68e760 100644
> --- a/mm/oom_kill.c
> +++ b/mm/oom_kill.c
> @@ -465,8 +465,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
>  {
>  	struct mmu_gather tlb;
>  	struct vm_area_struct *vma;
> -	struct zap_details details = {.check_swap_entries = true,
> -				      .ignore_dirty = true};
> +	struct zap_details details = {.check_swap_entries = true};
>  	bool ret = true;
>  
>  	/*
> -- 
> 2.10.2
> 

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH 1/4] mm: drop zap_details::ignore_dirty
@ 2016-12-16 14:15 Kirill A. Shutemov
  2016-12-19 14:22 ` Michal Hocko
  0 siblings, 1 reply; 10+ messages in thread
From: Kirill A. Shutemov @ 2016-12-16 14:15 UTC (permalink / raw)
  To: Michal Hocko, Peter Zijlstra, Rik van Riel, Andrew Morton
  Cc: linux-mm, linux-kernel, Kirill A. Shutemov

The only user of ignore_dirty is oom-reaper. But it doesn't really use
it.

ignore_dirty only has effect on file pages mapped with dirty pte.
But oom-repear skips shared VMAs, so there's no way we can dirty file
pte in them.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 include/linux/mm.h | 1 -
 mm/memory.c        | 6 ------
 mm/oom_kill.c      | 3 +--
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4424784ac374..7b8e425ac41c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1148,7 +1148,6 @@ struct zap_details {
 	struct address_space *check_mapping;	/* Check page->mapping if set */
 	pgoff_t	first_index;			/* Lowest page->index to unmap */
 	pgoff_t last_index;			/* Highest page->index to unmap */
-	bool ignore_dirty;			/* Ignore dirty pages */
 	bool check_swap_entries;		/* Check also swap entries */
 };
 
diff --git a/mm/memory.c b/mm/memory.c
index 455c3e628d52..6ac8fa56080f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1155,12 +1155,6 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
 
 			if (!PageAnon(page)) {
 				if (pte_dirty(ptent)) {
-					/*
-					 * oom_reaper cannot tear down dirty
-					 * pages
-					 */
-					if (unlikely(details && details->ignore_dirty))
-						continue;
 					force_flush = 1;
 					set_page_dirty(page);
 				}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ec9f11d4f094..f101db68e760 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -465,8 +465,7 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
 {
 	struct mmu_gather tlb;
 	struct vm_area_struct *vma;
-	struct zap_details details = {.check_swap_entries = true,
-				      .ignore_dirty = true};
+	struct zap_details details = {.check_swap_entries = true};
 	bool ret = true;
 
 	/*
-- 
2.10.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-12-20 10:46 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-19 17:17 [PATCH 1/4] mm: drop zap_details::ignore_dirty Kirill A. Shutemov
2016-12-19 17:17 ` [PATCH 2/4] mm: drop zap_details::check_swap_entries Kirill A. Shutemov
2016-12-19 17:17 ` [PATCH 3/4] mm: drop unused argument of zap_page_range() Kirill A. Shutemov
2016-12-19 17:17 ` [PATCH 4/4] oom-reaper: use madvise_dontneed() logic to decide if unmap the VMA Kirill A. Shutemov
2016-12-20  1:44   ` kbuild test robot
2016-12-20  2:22   ` kbuild test robot
2016-12-20  9:57   ` Michal Hocko
2016-12-20 10:46   ` Kirill A. Shutemov
  -- strict thread matches above, loose matches on Subject: below --
2016-12-16 14:15 [PATCH 1/4] mm: drop zap_details::ignore_dirty Kirill A. Shutemov
2016-12-19 14:22 ` Michal Hocko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).