All of lore.kernel.org
 help / color / mirror / Atom feed
* [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
@ 2017-01-24 22:32 ` David Rientjes
  0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2017-01-24 22:32 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jonathan Corbet, Johannes Weiner, mtk.manpages, Jerome Marchand,
	Kirill A. Shutemov, linux-man, linux-kernel, linux-mm

If madvise(2) advice will result in the underlying vma being split and
the number of areas mapped by the process will exceed
/proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN.

EAGAIN is returned by madvise(2) when a kernel resource, such as slab,
is temporarily unavailable.  It indicates that userspace should retry the
advice in the near future.  This is important for advice such as
MADV_DONTNEED which is often used by malloc implementations to free
memory back to the system: we really do want to free memory back when
madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas,
or mempolicies) cannot be allocated.

Encountering /proc/sys/vm/max_map_count is not a temporary failure,
however, so return ENOMEM to indicate this is a more serious issue.  A
followup patch to the man page will specify this behavior.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 Documentation/sysctl/vm.txt |  4 ++--
 Documentation/vm/ksm.txt    |  4 ++++
 include/linux/mm.h          |  6 ++++--
 mm/madvise.c                | 51 +++++++++++++++++++++++++++++++++++++--------
 mm/mmap.c                   |  8 +++----
 5 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -376,8 +376,8 @@ max_map_count:
 
 This file contains the maximum number of memory map areas a process
 may have. Memory map areas are used as a side-effect of calling
-malloc, directly by mmap and mprotect, and also when loading shared
-libraries.
+malloc, directly by mmap, mprotect, and madvise, and also when loading
+shared libraries.
 
 While most applications need less than a thousand maps, certain
 programs, particularly malloc debuggers, may consume lots of them,
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -38,6 +38,10 @@ the range for whenever the KSM daemon is started; even if the range
 cannot contain any pages which KSM could actually merge; even if
 MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
 
+If a region of memory must be split into at least one new MADV_MERGEABLE
+or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
+will exceed vm.max_map_count (see Documentation/sysctl/vm.txt).
+
 Like other madvise calls, they are intended for use on mapped areas of
 the user address space: they will report ENOMEM if the specified range
 includes unmapped gaps (though working on the intervening mapped areas),
diff --git a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1968,8 +1968,10 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
 	struct mempolicy *, struct vm_userfaultfd_ctx);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
-extern int split_vma(struct mm_struct *,
-	struct vm_area_struct *, unsigned long addr, int new_below);
+extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
+	unsigned long addr, int new_below);
+extern int split_vma(struct mm_struct *, struct vm_area_struct *,
+	unsigned long addr, int new_below);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 	struct rb_node **, struct rb_node *);
diff --git a/mm/madvise.c b/mm/madvise.c
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -89,14 +89,28 @@ static long madvise_behavior(struct vm_area_struct *vma,
 	case MADV_MERGEABLE:
 	case MADV_UNMERGEABLE:
 		error = ksm_madvise(vma, start, end, behavior, &new_flags);
-		if (error)
+		if (error) {
+			/*
+			 * madvise() returns EAGAIN if kernel resources, such as
+			 * slab, are temporarily unavailable.
+			 */
+			if (error == -ENOMEM)
+				error = -EAGAIN;
 			goto out;
+		}
 		break;
 	case MADV_HUGEPAGE:
 	case MADV_NOHUGEPAGE:
 		error = hugepage_madvise(vma, &new_flags, behavior);
-		if (error)
+		if (error) {
+			/*
+			 * madvise() returns EAGAIN if kernel resources, such as
+			 * slab, are temporarily unavailable.
+			 */
+			if (error == -ENOMEM)
+				error = -EAGAIN;
 			goto out;
+		}
 		break;
 	}
 
@@ -117,15 +131,37 @@ static long madvise_behavior(struct vm_area_struct *vma,
 	*prev = vma;
 
 	if (start != vma->vm_start) {
-		error = split_vma(mm, vma, start, 1);
-		if (error)
+		if (unlikely(mm->map_count >= sysctl_max_map_count)) {
+			error = -ENOMEM;
 			goto out;
+		}
+		error = __split_vma(mm, vma, start, 1);
+		if (error) {
+			/*
+			 * madvise() returns EAGAIN if kernel resources, such as
+			 * slab, are temporarily unavailable.
+			 */
+			if (error == -ENOMEM)
+				error = -EAGAIN;
+			goto out;
+		}
 	}
 
 	if (end != vma->vm_end) {
-		error = split_vma(mm, vma, end, 0);
-		if (error)
+		if (unlikely(mm->map_count >= sysctl_max_map_count)) {
+			error = -ENOMEM;
+			goto out;
+		}
+		error = __split_vma(mm, vma, end, 0);
+		if (error) {
+			/*
+			 * madvise() returns EAGAIN if kernel resources, such as
+			 * slab, are temporarily unavailable.
+			 */
+			if (error == -ENOMEM)
+				error = -EAGAIN;
 			goto out;
+		}
 	}
 
 success:
@@ -133,10 +169,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
 	 * vm_flags is protected by the mmap_sem held in write mode.
 	 */
 	vma->vm_flags = new_flags;
-
 out:
-	if (error == -ENOMEM)
-		error = -EAGAIN;
 	return error;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2495,11 +2495,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 /*
- * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
- * munmap path where it doesn't make sense to fail.
+ * __split_vma() bypasses sysctl_max_map_count checking.  We use this where it
+ * has already been checked or doesn't make sense to fail.
  */
-static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
-	      unsigned long addr, int new_below)
+int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+		unsigned long addr, int new_below)
 {
 	struct vm_area_struct *new;
 	int err;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
@ 2017-01-24 22:32 ` David Rientjes
  0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2017-01-24 22:32 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Jonathan Corbet, Johannes Weiner, mtk.manpages, Jerome Marchand,
	Kirill A. Shutemov, linux-man, linux-kernel, linux-mm

If madvise(2) advice will result in the underlying vma being split and
the number of areas mapped by the process will exceed
/proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN.

EAGAIN is returned by madvise(2) when a kernel resource, such as slab,
is temporarily unavailable.  It indicates that userspace should retry the
advice in the near future.  This is important for advice such as
MADV_DONTNEED which is often used by malloc implementations to free
memory back to the system: we really do want to free memory back when
madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas,
or mempolicies) cannot be allocated.

Encountering /proc/sys/vm/max_map_count is not a temporary failure,
however, so return ENOMEM to indicate this is a more serious issue.  A
followup patch to the man page will specify this behavior.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 Documentation/sysctl/vm.txt |  4 ++--
 Documentation/vm/ksm.txt    |  4 ++++
 include/linux/mm.h          |  6 ++++--
 mm/madvise.c                | 51 +++++++++++++++++++++++++++++++++++++--------
 mm/mmap.c                   |  8 +++----
 5 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -376,8 +376,8 @@ max_map_count:
 
 This file contains the maximum number of memory map areas a process
 may have. Memory map areas are used as a side-effect of calling
-malloc, directly by mmap and mprotect, and also when loading shared
-libraries.
+malloc, directly by mmap, mprotect, and madvise, and also when loading
+shared libraries.
 
 While most applications need less than a thousand maps, certain
 programs, particularly malloc debuggers, may consume lots of them,
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -38,6 +38,10 @@ the range for whenever the KSM daemon is started; even if the range
 cannot contain any pages which KSM could actually merge; even if
 MADV_UNMERGEABLE is applied to a range which was never MADV_MERGEABLE.
 
+If a region of memory must be split into at least one new MADV_MERGEABLE
+or MADV_UNMERGEABLE region, the madvise may return ENOMEM if the process
+will exceed vm.max_map_count (see Documentation/sysctl/vm.txt).
+
 Like other madvise calls, they are intended for use on mapped areas of
 the user address space: they will report ENOMEM if the specified range
 includes unmapped gaps (though working on the intervening mapped areas),
diff --git a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1968,8 +1968,10 @@ extern struct vm_area_struct *vma_merge(struct mm_struct *,
 	unsigned long vm_flags, struct anon_vma *, struct file *, pgoff_t,
 	struct mempolicy *, struct vm_userfaultfd_ctx);
 extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *);
-extern int split_vma(struct mm_struct *,
-	struct vm_area_struct *, unsigned long addr, int new_below);
+extern int __split_vma(struct mm_struct *, struct vm_area_struct *,
+	unsigned long addr, int new_below);
+extern int split_vma(struct mm_struct *, struct vm_area_struct *,
+	unsigned long addr, int new_below);
 extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
 extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
 	struct rb_node **, struct rb_node *);
diff --git a/mm/madvise.c b/mm/madvise.c
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -89,14 +89,28 @@ static long madvise_behavior(struct vm_area_struct *vma,
 	case MADV_MERGEABLE:
 	case MADV_UNMERGEABLE:
 		error = ksm_madvise(vma, start, end, behavior, &new_flags);
-		if (error)
+		if (error) {
+			/*
+			 * madvise() returns EAGAIN if kernel resources, such as
+			 * slab, are temporarily unavailable.
+			 */
+			if (error == -ENOMEM)
+				error = -EAGAIN;
 			goto out;
+		}
 		break;
 	case MADV_HUGEPAGE:
 	case MADV_NOHUGEPAGE:
 		error = hugepage_madvise(vma, &new_flags, behavior);
-		if (error)
+		if (error) {
+			/*
+			 * madvise() returns EAGAIN if kernel resources, such as
+			 * slab, are temporarily unavailable.
+			 */
+			if (error == -ENOMEM)
+				error = -EAGAIN;
 			goto out;
+		}
 		break;
 	}
 
@@ -117,15 +131,37 @@ static long madvise_behavior(struct vm_area_struct *vma,
 	*prev = vma;
 
 	if (start != vma->vm_start) {
-		error = split_vma(mm, vma, start, 1);
-		if (error)
+		if (unlikely(mm->map_count >= sysctl_max_map_count)) {
+			error = -ENOMEM;
 			goto out;
+		}
+		error = __split_vma(mm, vma, start, 1);
+		if (error) {
+			/*
+			 * madvise() returns EAGAIN if kernel resources, such as
+			 * slab, are temporarily unavailable.
+			 */
+			if (error == -ENOMEM)
+				error = -EAGAIN;
+			goto out;
+		}
 	}
 
 	if (end != vma->vm_end) {
-		error = split_vma(mm, vma, end, 0);
-		if (error)
+		if (unlikely(mm->map_count >= sysctl_max_map_count)) {
+			error = -ENOMEM;
+			goto out;
+		}
+		error = __split_vma(mm, vma, end, 0);
+		if (error) {
+			/*
+			 * madvise() returns EAGAIN if kernel resources, such as
+			 * slab, are temporarily unavailable.
+			 */
+			if (error == -ENOMEM)
+				error = -EAGAIN;
 			goto out;
+		}
 	}
 
 success:
@@ -133,10 +169,7 @@ static long madvise_behavior(struct vm_area_struct *vma,
 	 * vm_flags is protected by the mmap_sem held in write mode.
 	 */
 	vma->vm_flags = new_flags;
-
 out:
-	if (error == -ENOMEM)
-		error = -EAGAIN;
 	return error;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2495,11 +2495,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 }
 
 /*
- * __split_vma() bypasses sysctl_max_map_count checking.  We use this on the
- * munmap path where it doesn't make sense to fail.
+ * __split_vma() bypasses sysctl_max_map_count checking.  We use this where it
+ * has already been checked or doesn't make sense to fail.
  */
-static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
-	      unsigned long addr, int new_below)
+int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
+		unsigned long addr, int new_below)
 {
 	struct vm_area_struct *new;
 	int err;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch -man] madvise.2: Specify new ENOMEM return value
@ 2017-01-24 22:32   ` David Rientjes
  0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2017-01-24 22:32 UTC (permalink / raw)
  To: mtk.manpages
  Cc: Andrew Morton, Jonathan Corbet, Johannes Weiner, Jerome Marchand,
	Kirill A. Shutemov, linux-man, linux-kernel, linux-mm

madvise(2) may return ENOMEM if the advice acts on a vma that must be
split and creating the new vma will result in the process exceeding
/proc/sys/vm/max_map_count.

Specify this additional possibility.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 man2/madvise.2 | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/man2/madvise.2 b/man2/madvise.2
--- a/man2/madvise.2
+++ b/man2/madvise.2
@@ -467,7 +467,12 @@ Not enough memory: paging in failed.
 .TP
 .B ENOMEM
 Addresses in the specified range are not currently
-mapped, or are outside the address space of the process.
+mapped, are outside the address space of the process, or will result in the
+number of areas mapped by this process to exceed
+.I /proc/sys/vm/max_map_count
+(see the Linux kernel source file
+.I Documentation/sysctl/vm.txt
+for more details).
 .TP
 .B EPERM
 .I advice

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch -man] madvise.2: Specify new ENOMEM return value
@ 2017-01-24 22:32   ` David Rientjes
  0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2017-01-24 22:32 UTC (permalink / raw)
  To: mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w
  Cc: Andrew Morton, Jonathan Corbet, Johannes Weiner, Jerome Marchand,
	Kirill A. Shutemov, linux-man-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg

madvise(2) may return ENOMEM if the advice acts on a vma that must be
split and creating the new vma will result in the process exceeding
/proc/sys/vm/max_map_count.

Specify this additional possibility.

Signed-off-by: David Rientjes <rientjes-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>
---
 man2/madvise.2 | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/man2/madvise.2 b/man2/madvise.2
--- a/man2/madvise.2
+++ b/man2/madvise.2
@@ -467,7 +467,12 @@ Not enough memory: paging in failed.
 .TP
 .B ENOMEM
 Addresses in the specified range are not currently
-mapped, or are outside the address space of the process.
+mapped, are outside the address space of the process, or will result in the
+number of areas mapped by this process to exceed
+.I /proc/sys/vm/max_map_count
+(see the Linux kernel source file
+.I Documentation/sysctl/vm.txt
+for more details).
 .TP
 .B EPERM
 .I advice
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [patch -man] madvise.2: Specify new ENOMEM return value
@ 2017-01-24 22:32   ` David Rientjes
  0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2017-01-24 22:32 UTC (permalink / raw)
  To: mtk.manpages
  Cc: Andrew Morton, Jonathan Corbet, Johannes Weiner, Jerome Marchand,
	Kirill A. Shutemov, linux-man, linux-kernel, linux-mm

madvise(2) may return ENOMEM if the advice acts on a vma that must be
split and creating the new vma will result in the process exceeding
/proc/sys/vm/max_map_count.

Specify this additional possibility.

Signed-off-by: David Rientjes <rientjes@google.com>
---
 man2/madvise.2 | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/man2/madvise.2 b/man2/madvise.2
--- a/man2/madvise.2
+++ b/man2/madvise.2
@@ -467,7 +467,12 @@ Not enough memory: paging in failed.
 .TP
 .B ENOMEM
 Addresses in the specified range are not currently
-mapped, or are outside the address space of the process.
+mapped, are outside the address space of the process, or will result in the
+number of areas mapped by this process to exceed
+.I /proc/sys/vm/max_map_count
+(see the Linux kernel source file
+.I Documentation/sysctl/vm.txt
+for more details).
 .TP
 .B EPERM
 .I advice

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
  2017-01-24 22:32 ` David Rientjes
@ 2017-01-25 10:30   ` Anshuman Khandual
  -1 siblings, 0 replies; 10+ messages in thread
From: Anshuman Khandual @ 2017-01-25 10:30 UTC (permalink / raw)
  To: David Rientjes, Andrew Morton
  Cc: Jonathan Corbet, Johannes Weiner, mtk.manpages, Jerome Marchand,
	Kirill A. Shutemov, linux-man, linux-kernel, linux-mm

On 01/25/2017 04:02 AM, David Rientjes wrote:
> If madvise(2) advice will result in the underlying vma being split and
> the number of areas mapped by the process will exceed
> /proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN.
> 
> EAGAIN is returned by madvise(2) when a kernel resource, such as slab,
> is temporarily unavailable.  It indicates that userspace should retry the
> advice in the near future.  This is important for advice such as
> MADV_DONTNEED which is often used by malloc implementations to free
> memory back to the system: we really do want to free memory back when
> madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas,
> or mempolicies) cannot be allocated.
> 
> Encountering /proc/sys/vm/max_map_count is not a temporary failure,
> however, so return ENOMEM to indicate this is a more serious issue.  A
> followup patch to the man page will specify this behavior.

But in the due course there might be other changes in number of VMAs of
the process because of unmap() or merge() which could reduce the total
number of VMAs and hence this condition may not exist afterwards. In
that case EAGAIN still makes sense.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
@ 2017-01-25 10:30   ` Anshuman Khandual
  0 siblings, 0 replies; 10+ messages in thread
From: Anshuman Khandual @ 2017-01-25 10:30 UTC (permalink / raw)
  To: David Rientjes, Andrew Morton
  Cc: Jonathan Corbet, Johannes Weiner, mtk.manpages, Jerome Marchand,
	Kirill A. Shutemov, linux-man, linux-kernel, linux-mm

On 01/25/2017 04:02 AM, David Rientjes wrote:
> If madvise(2) advice will result in the underlying vma being split and
> the number of areas mapped by the process will exceed
> /proc/sys/vm/max_map_count as a result, return ENOMEM instead of EAGAIN.
> 
> EAGAIN is returned by madvise(2) when a kernel resource, such as slab,
> is temporarily unavailable.  It indicates that userspace should retry the
> advice in the near future.  This is important for advice such as
> MADV_DONTNEED which is often used by malloc implementations to free
> memory back to the system: we really do want to free memory back when
> madvise(2) returns EAGAIN because slab allocations (for vmas, anon_vmas,
> or mempolicies) cannot be allocated.
> 
> Encountering /proc/sys/vm/max_map_count is not a temporary failure,
> however, so return ENOMEM to indicate this is a more serious issue.  A
> followup patch to the man page will specify this behavior.

But in the due course there might be other changes in number of VMAs of
the process because of unmap() or merge() which could reduce the total
number of VMAs and hence this condition may not exist afterwards. In
that case EAGAIN still makes sense.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
@ 2017-01-25 22:14     ` David Rientjes
  0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2017-01-25 22:14 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Andrew Morton, Jonathan Corbet, Johannes Weiner, mtk.manpages,
	Jerome Marchand, Kirill A. Shutemov, linux-man, linux-kernel,
	linux-mm

On Wed, 25 Jan 2017, Anshuman Khandual wrote:

> But in the due course there might be other changes in number of VMAs of
> the process because of unmap() or merge() which could reduce the total
> number of VMAs and hence this condition may not exist afterwards. In
> that case EAGAIN still makes sense.
> 

Imagine a singlethreaded process that is operating on its own privately 
mapped memory.  Attempting to split an existing vma and meeting 
vm.max_map_count is not something that will be fixed by trying again, i.e. 
it is not helpful to loop when madvise() returns -1 with errno EAGAIN if 
vm.max_map_count will always be encountered.  The other cases where ENOMEM 
is blindly converted to EAGAIN is when slab allocation fails which can 
encounter external freeing, the meaning of "kernel resource is temporarily 
unavailable."  There is no such guarantee for vm.max_map_count, so ENOMEM 
clearly indicates the failure.

After this, it makes sense for userspace to loop for advice such as 
MADV_DONTNEED because we are actively freeing memory when EAGAIN is 
returned.  If we are meeting vm.max_map_count, this will infinitely loop.  
This is the case in tcmalloc and this patch addresses the issue when 
vm.max_map_count is low.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
@ 2017-01-25 22:14     ` David Rientjes
  0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2017-01-25 22:14 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Andrew Morton, Jonathan Corbet, Johannes Weiner,
	mtk.manpages-Re5JQEeQqe8AvxtiuMwx3w, Jerome Marchand,
	Kirill A. Shutemov, linux-man-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg

On Wed, 25 Jan 2017, Anshuman Khandual wrote:

> But in the due course there might be other changes in number of VMAs of
> the process because of unmap() or merge() which could reduce the total
> number of VMAs and hence this condition may not exist afterwards. In
> that case EAGAIN still makes sense.
> 

Imagine a singlethreaded process that is operating on its own privately 
mapped memory.  Attempting to split an existing vma and meeting 
vm.max_map_count is not something that will be fixed by trying again, i.e. 
it is not helpful to loop when madvise() returns -1 with errno EAGAIN if 
vm.max_map_count will always be encountered.  The other cases where ENOMEM 
is blindly converted to EAGAIN is when slab allocation fails which can 
encounter external freeing, the meaning of "kernel resource is temporarily 
unavailable."  There is no such guarantee for vm.max_map_count, so ENOMEM 
clearly indicates the failure.

After this, it makes sense for userspace to loop for advice such as 
MADV_DONTNEED because we are actively freeing memory when EAGAIN is 
returned.  If we are meeting vm.max_map_count, this will infinitely loop.  
This is the case in tcmalloc and this patch addresses the issue when 
vm.max_map_count is low.
--
To unsubscribe from this list: send the line "unsubscribe linux-man" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count
@ 2017-01-25 22:14     ` David Rientjes
  0 siblings, 0 replies; 10+ messages in thread
From: David Rientjes @ 2017-01-25 22:14 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Andrew Morton, Jonathan Corbet, Johannes Weiner, mtk.manpages,
	Jerome Marchand, Kirill A. Shutemov, linux-man, linux-kernel,
	linux-mm

On Wed, 25 Jan 2017, Anshuman Khandual wrote:

> But in the due course there might be other changes in number of VMAs of
> the process because of unmap() or merge() which could reduce the total
> number of VMAs and hence this condition may not exist afterwards. In
> that case EAGAIN still makes sense.
> 

Imagine a singlethreaded process that is operating on its own privately 
mapped memory.  Attempting to split an existing vma and meeting 
vm.max_map_count is not something that will be fixed by trying again, i.e. 
it is not helpful to loop when madvise() returns -1 with errno EAGAIN if 
vm.max_map_count will always be encountered.  The other cases where ENOMEM 
is blindly converted to EAGAIN is when slab allocation fails which can 
encounter external freeing, the meaning of "kernel resource is temporarily 
unavailable."  There is no such guarantee for vm.max_map_count, so ENOMEM 
clearly indicates the failure.

After this, it makes sense for userspace to loop for advice such as 
MADV_DONTNEED because we are actively freeing memory when EAGAIN is 
returned.  If we are meeting vm.max_map_count, this will infinitely loop.  
This is the case in tcmalloc and this patch addresses the issue when 
vm.max_map_count is low.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2017-01-25 22:15 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-01-24 22:32 [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count David Rientjes
2017-01-24 22:32 ` David Rientjes
2017-01-24 22:32 ` [patch -man] madvise.2: Specify new ENOMEM return value David Rientjes
2017-01-24 22:32   ` David Rientjes
2017-01-24 22:32   ` David Rientjes
2017-01-25 10:30 ` [patch] mm, madvise: fail with ENOMEM when splitting vma will hit max_map_count Anshuman Khandual
2017-01-25 10:30   ` Anshuman Khandual
2017-01-25 22:14   ` David Rientjes
2017-01-25 22:14     ` David Rientjes
2017-01-25 22:14     ` David Rientjes

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.