All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy()
@ 2013-07-10 17:02 Oleg Nesterov
  2013-07-11 22:13 ` David Rientjes
  0 siblings, 1 reply; 7+ messages in thread
From: Oleg Nesterov @ 2013-07-10 17:02 UTC (permalink / raw)
  To: Andrew Morton, KOSAKI Motohiro
  Cc: Mel Gorman, Rik van Riel, Andi Kleen, linux-kernel

Simple cleanup. Every user of vma_set_policy() does the same work,
this looks a bit annoying imho. And the new trivial helper which
does mpol_dup() + vma_set_policy() to simplify the callers.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 include/linux/mempolicy.h |    9 +++++++--
 kernel/fork.c             |    9 +++------
 mm/mempolicy.c            |   10 ++++++++++
 mm/mmap.c                 |   17 +++++------------
 4 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0d7df39..b2f8977 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -91,7 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
 }
 
 #define vma_policy(vma) ((vma)->vm_policy)
-#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol))
 
 static inline void mpol_get(struct mempolicy *pol)
 {
@@ -126,6 +125,7 @@ struct shared_policy {
 	spinlock_t lock;
 };
 
+int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst);
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
 int mpol_set_shared_policy(struct shared_policy *info,
 				struct vm_area_struct *vma,
@@ -240,7 +240,12 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
 }
 
 #define vma_policy(vma) NULL
-#define vma_set_policy(vma, pol) do {} while(0)
+
+static inline int
+vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
+{
+	return 0;
+}
 
 static inline void numa_policy_init(void)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 987b28a..1c214fe 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -351,7 +351,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 	struct rb_node **rb_link, *rb_parent;
 	int retval;
 	unsigned long charge;
-	struct mempolicy *pol;
 
 	uprobe_start_dup_mmap();
 	down_write(&oldmm->mmap_sem);
@@ -402,11 +401,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 			goto fail_nomem;
 		*tmp = *mpnt;
 		INIT_LIST_HEAD(&tmp->anon_vma_chain);
-		pol = mpol_dup(vma_policy(mpnt));
-		retval = PTR_ERR(pol);
-		if (IS_ERR(pol))
+		retval = vma_dup_policy(mpnt, tmp);
+		if (retval)
 			goto fail_nomem_policy;
-		vma_set_policy(tmp, pol);
 		tmp->vm_mm = mm;
 		if (anon_vma_fork(tmp, mpnt))
 			goto fail_nomem_anon_vma_fork;
@@ -474,7 +471,7 @@ out:
 	uprobe_end_dup_mmap();
 	return retval;
 fail_nomem_anon_vma_fork:
-	mpol_put(pol);
+	mpol_put(vma_policy(tmp));
 fail_nomem_policy:
 	kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4baf12e..6b1d426 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2065,6 +2065,16 @@ retry_cpuset:
 }
 EXPORT_SYMBOL(alloc_pages_current);
 
+int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
+{
+	struct mempolicy *pol = mpol_dup(vma_policy(src));
+
+	if (IS_ERR(pol))
+		return PTR_ERR(pol);
+	dst->vm_policy = pol;
+	return 0;
+}
+
 /*
  * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
  * rebinds the mempolicy its copying by calling mpol_rebind_policy()
diff --git a/mm/mmap.c b/mm/mmap.c
index 7a1ba76..d8a310b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2481,7 +2481,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 	      unsigned long addr, int new_below)
 {
-	struct mempolicy *pol;
 	struct vm_area_struct *new;
 	int err = -ENOMEM;
 
@@ -2505,12 +2504,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
 	}
 
-	pol = mpol_dup(vma_policy(vma));
-	if (IS_ERR(pol)) {
-		err = PTR_ERR(pol);
+	err = vma_dup_policy(vma, new);
+	if (err)
 		goto out_free_vma;
-	}
-	vma_set_policy(new, pol);
 
 	if (anon_vma_clone(new, vma))
 		goto out_free_mpol;
@@ -2538,7 +2534,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 		fput(new->vm_file);
 	unlink_anon_vmas(new);
  out_free_mpol:
-	mpol_put(pol);
+	mpol_put(vma_policy(new));
  out_free_vma:
 	kmem_cache_free(vm_area_cachep, new);
  out_err:
@@ -2881,7 +2877,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
-	struct mempolicy *pol;
 	bool faulted_in_anon_vma = true;
 
 	/*
@@ -2926,10 +2921,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 			new_vma->vm_start = addr;
 			new_vma->vm_end = addr + len;
 			new_vma->vm_pgoff = pgoff;
-			pol = mpol_dup(vma_policy(vma));
-			if (IS_ERR(pol))
+			if (vma_dup_policy(vma, new_vma))
 				goto out_free_vma;
-			vma_set_policy(new_vma, pol);
 			INIT_LIST_HEAD(&new_vma->anon_vma_chain);
 			if (anon_vma_clone(new_vma, vma))
 				goto out_free_mempol;
@@ -2944,7 +2937,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	return new_vma;
 
  out_free_mempol:
-	mpol_put(pol);
+	mpol_put(vma_policy(new_vma));
  out_free_vma:
 	kmem_cache_free(vm_area_cachep, new_vma);
 	return NULL;
-- 
1.5.5.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy()
  2013-07-10 17:02 [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy() Oleg Nesterov
@ 2013-07-11 22:13 ` David Rientjes
  2013-07-11 22:20   ` Andrew Morton
  2013-07-12 15:32   ` Oleg Nesterov
  0 siblings, 2 replies; 7+ messages in thread
From: David Rientjes @ 2013-07-11 22:13 UTC (permalink / raw)
  To: Oleg Nesterov
  Cc: Andrew Morton, KOSAKI Motohiro, Mel Gorman, Rik van Riel,
	Andi Kleen, linux-kernel

On Wed, 10 Jul 2013, Oleg Nesterov wrote:

> Simple cleanup. Every user of vma_set_policy() does the same work,
> this looks a bit annoying imho. And the new trivial helper which
> does mpol_dup() + vma_set_policy() to simplify the callers.
> 

Good idea, just a few simple issues to fix.

> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 4baf12e..6b1d426 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -2065,6 +2065,16 @@ retry_cpuset:
>  }
>  EXPORT_SYMBOL(alloc_pages_current);
>  
> +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
> +{
> +	struct mempolicy *pol = mpol_dup(vma_policy(src));
> +
> +	if (IS_ERR(pol))
> +		return PTR_ERR(pol);

PTR_ERR() returns long, so vma_dup_policy() needs to return long.

> +	dst->vm_policy = pol;
> +	return 0;
> +}
> +
>  /*
>   * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
>   * rebinds the mempolicy its copying by calling mpol_rebind_policy()
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 7a1ba76..d8a310b 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2481,7 +2481,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
>  static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
>  	      unsigned long addr, int new_below)
>  {
> -	struct mempolicy *pol;
>  	struct vm_area_struct *new;
>  	int err = -ENOMEM;
>  
> @@ -2505,12 +2504,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
>  		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
>  	}
>  
> -	pol = mpol_dup(vma_policy(vma));
> -	if (IS_ERR(pol)) {
> -		err = PTR_ERR(pol);
> +	err = vma_dup_policy(vma, new);
> +	if (err)
>  		goto out_free_vma;
> -	}
> -	vma_set_policy(new, pol);
>  
>  	if (anon_vma_clone(new, vma))
>  		goto out_free_mpol;

This isn't the first occurrence in mm/mmap.c, what about vma_adjust()?  
Probably need to patch 3.10 or later.

Otherwise looks good.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy()
  2013-07-11 22:13 ` David Rientjes
@ 2013-07-11 22:20   ` Andrew Morton
  2013-07-11 22:27     ` David Rientjes
  2013-07-12 15:32   ` Oleg Nesterov
  1 sibling, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2013-07-11 22:20 UTC (permalink / raw)
  To: David Rientjes
  Cc: Oleg Nesterov, KOSAKI Motohiro, Mel Gorman, Rik van Riel,
	Andi Kleen, linux-kernel

On Thu, 11 Jul 2013 15:13:03 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:

> > +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
> > +{
> > +	struct mempolicy *pol = mpol_dup(vma_policy(src));
> > +
> > +	if (IS_ERR(pol))
> > +		return PTR_ERR(pol);
> 
> PTR_ERR() returns long, so vma_dup_policy() needs to return long.

hm, really?  vma_dup_policy() returns an errno and errnos have type "int".

Arguably it is PTR_ERR() which returns the wrong type...

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy()
  2013-07-11 22:20   ` Andrew Morton
@ 2013-07-11 22:27     ` David Rientjes
  2013-07-11 22:33       ` Andrew Morton
  0 siblings, 1 reply; 7+ messages in thread
From: David Rientjes @ 2013-07-11 22:27 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Oleg Nesterov, KOSAKI Motohiro, Mel Gorman, Rik van Riel,
	Andi Kleen, linux-kernel

On Thu, 11 Jul 2013, Andrew Morton wrote:

> On Thu, 11 Jul 2013 15:13:03 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:
> 
> > > +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
> > > +{
> > > +	struct mempolicy *pol = mpol_dup(vma_policy(src));
> > > +
> > > +	if (IS_ERR(pol))
> > > +		return PTR_ERR(pol);
> > 
> > PTR_ERR() returns long, so vma_dup_policy() needs to return long.
> 
> hm, really?  vma_dup_policy() returns an errno and errnos have type "int".
> 
> Arguably it is PTR_ERR() which returns the wrong type...
> 

PTR_ERR() may not imply IS_ERR(), which I believe Rusty is trying to fix 
with his series that fixes up abuses of PTR_ERR().  But I agree that 
vma_dup_policy() can return int because of the IS_ERR() check as written.  

For complete correctness there should probably be a build error if 
MAX_ERRNO cannot fit in an int and then this should do 
return (int)PTR_ERR(pol) to make it clear.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy()
  2013-07-11 22:27     ` David Rientjes
@ 2013-07-11 22:33       ` Andrew Morton
  2013-07-11 22:54         ` David Rientjes
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2013-07-11 22:33 UTC (permalink / raw)
  To: David Rientjes
  Cc: Oleg Nesterov, KOSAKI Motohiro, Mel Gorman, Rik van Riel,
	Andi Kleen, linux-kernel, Rusty Russell

On Thu, 11 Jul 2013 15:27:46 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:

> On Thu, 11 Jul 2013, Andrew Morton wrote:
> 
> > On Thu, 11 Jul 2013 15:13:03 -0700 (PDT) David Rientjes <rientjes@google.com> wrote:
> > 
> > > > +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
> > > > +{
> > > > +	struct mempolicy *pol = mpol_dup(vma_policy(src));
> > > > +
> > > > +	if (IS_ERR(pol))
> > > > +		return PTR_ERR(pol);
> > > 
> > > PTR_ERR() returns long, so vma_dup_policy() needs to return long.
> > 
> > hm, really?  vma_dup_policy() returns an errno and errnos have type "int".
> > 
> > Arguably it is PTR_ERR() which returns the wrong type...
> > 
> 
> PTR_ERR() may not imply IS_ERR(),

Well why not.  Are we saying that code can legitimately convert the
PTR_ERR() return value back into a pointer?  If so that sounds nuts.

> which I believe Rusty is trying to fix 
> with his series that fixes up abuses of PTR_ERR().

Well I hope all of this will be completely documented in err.h. 
Otherwise the abuses will simply continue.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy()
  2013-07-11 22:33       ` Andrew Morton
@ 2013-07-11 22:54         ` David Rientjes
  0 siblings, 0 replies; 7+ messages in thread
From: David Rientjes @ 2013-07-11 22:54 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Oleg Nesterov, KOSAKI Motohiro, Mel Gorman, Rik van Riel,
	Andi Kleen, linux-kernel, Rusty Russell

On Thu, 11 Jul 2013, Andrew Morton wrote:

> > PTR_ERR() may not imply IS_ERR(),
> 
> Well why not.  Are we saying that code can legitimately convert the
> PTR_ERR() return value back into a pointer?  If so that sounds nuts.
> 

ERR_PTR() is just delivering a payload that can be interpreted by 
PTR_ERR(), Rusty has spotted places in the kernel that do this without 
actual errno.  The most obvious case for me is the ERR_PTR(-1UL) in 
mm/oom_kill.c.

People delivering a non-errno payload shouldn't be using ERR_PTR(), but 
nothing enforces that.  You could add a WARN_ON_ONCE(error >= MAX_ERRNO).  
But PTR_ERR() will still need to rely on IS_ERR().

I agree that these longs should be converted to ints, since errno is 
defined to be int by the C standard.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy()
  2013-07-11 22:13 ` David Rientjes
  2013-07-11 22:20   ` Andrew Morton
@ 2013-07-12 15:32   ` Oleg Nesterov
  1 sibling, 0 replies; 7+ messages in thread
From: Oleg Nesterov @ 2013-07-12 15:32 UTC (permalink / raw)
  To: David Rientjes
  Cc: Andrew Morton, KOSAKI Motohiro, Mel Gorman, Rik van Riel,
	Andi Kleen, linux-kernel

On 07/11, David Rientjes wrote:
>
> On Wed, 10 Jul 2013, Oleg Nesterov wrote:
>
> > +int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
> > +{
> > +	struct mempolicy *pol = mpol_dup(vma_policy(src));
> > +
> > +	if (IS_ERR(pol))
> > +		return PTR_ERR(pol);
>
> PTR_ERR() returns long, so vma_dup_policy() needs to return long.

I think that "int" should be fine, or we should fix IS_ERR/ERR_PTR. If
nothing else, the changed code did the same. And there are a lot of other
"int" functions which return PTR_ERR().

But I agree, this is only correct because vma_dup_policy() checks IS_ERR()
before PTR_ERR(), and because mpol_dup() doesn't do the wrong things with
ERR_PTR().

For example, ERR_PTR(args->err) in hw_breakpoint_handler() looks really
strange and imho should be killed. But correct, it is not actually the
error.

> > @@ -2505,12 +2504,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
> >  		new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
> >  	}
> >
> > -	pol = mpol_dup(vma_policy(vma));
> > -	if (IS_ERR(pol)) {
> > -		err = PTR_ERR(pol);
> > +	err = vma_dup_policy(vma, new);
> > +	if (err)
> >  		goto out_free_vma;
> > -	}
> > -	vma_set_policy(new, pol);
> >
> >  	if (anon_vma_clone(new, vma))
> >  		goto out_free_mpol;
>
> This isn't the first occurrence in mm/mmap.c, what about vma_adjust()?
> Probably need to patch 3.10 or later.

Ah, sorry for confusion, I forgot to mention that this is on top of
another -mm patch,

	mm-mempolicy-fix-mbind_range-vma_adjust-interaction.patch

attached below just in case.

> Otherwise looks good.

Thanks for review ;)

Oleg.

-----------------------------------------------------------------------
[PATCH] mm: mempolicy: fix mbind_range() && vma_adjust() interaction

vma_adjust() does vma_set_policy(vma, vma_policy(next)) and this
is doubly wrong:

1. This leaks vma->vm_policy if it is not NULL and not equal to
   next->vm_policy.

   This can happen if vma_merge() expands "area", not prev (case 8).

2. This sets the wrong policy if vma_merge() joins prev and area,
   area is the vma the caller needs to update and it still has the
   old policy.

Revert 1444f92c "mm: merging memory blocks resets mempolicy" which
introduced these problems.

Change mbind_range() to recheck mpol_equal() after vma_merge() to
fix the problem 1444f92c tried to address.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org>
---
 mm/mempolicy.c |    6 +++++-
 mm/mmap.c      |    2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7431001..4baf12e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -732,7 +732,10 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 		if (prev) {
 			vma = prev;
 			next = vma->vm_next;
-			continue;
+			if (mpol_equal(vma_policy(vma), new_pol))
+				continue;
+			/* vma_merge() joined vma && vma->next, case 8 */
+			goto replace;
 		}
 		if (vma->vm_start != vmstart) {
 			err = split_vma(vma->vm_mm, vma, vmstart, 1);
@@ -744,6 +747,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start,
 			if (err)
 				goto out;
 		}
+ replace:
 		err = vma_replace_policy(vma, new_pol);
 		if (err)
 			goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7fe7f0b..42234b8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -865,7 +865,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		if (next->anon_vma)
 			anon_vma_merge(vma, next);
 		mm->map_count--;
-		vma_set_policy(vma, vma_policy(next));
+		mpol_put(vma_policy(next));
 		kmem_cache_free(vm_area_cachep, next);
 		/*
 		 * In mprotect's case 6 (see comments on vma_merge),
-- 
1.5.5.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-07-12 15:37 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-07-10 17:02 [PATCH] mm: mempolicy: turn vma_set_policy() into vma_dup_policy() Oleg Nesterov
2013-07-11 22:13 ` David Rientjes
2013-07-11 22:20   ` Andrew Morton
2013-07-11 22:27     ` David Rientjes
2013-07-11 22:33       ` Andrew Morton
2013-07-11 22:54         ` David Rientjes
2013-07-12 15:32   ` Oleg Nesterov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.