linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Oleg Nesterov <oleg@redhat.com>
To: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Colin Cross <ccross@android.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Hugh Dickins <hughd@google.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	"Hampson, Steven T" <steven.t.hampson@intel.com>,
	lkml <linux-kernel@vger.kernel.org>,
	Kyungmin Park <kmpark@infradead.org>,
	Christoph Hellwig <hch@infradead.org>,
	John Stultz <john.stultz@linaro.org>,
	Rob Landley <rob@landley.net>, Arnd Bergmann <arnd@arndb.de>,
	Cyrill Gorcunov <gorcunov@openvz.org>,
	David Rientjes <rientjes@google.com>,
	Davidlohr Bueso <dave@gnu.org>, Kees Cook <keescook@chromium.org>,
	Al Viro <viro@zeniv.linux.org.uk>, Mel Gorman <mgorman@suse.de>,
	Michel Lespinasse <walken@google.com>,
	Rik van Riel <riel@redhat.com>,
	Konstantin Khlebnikov <khlebnikov@openvz.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Rusty Russell <rusty@rustcorp.com.au>,
	"Eric W. Biederman" <ebiederm@xmission.com>,
	Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Michal Hocko <mhocko@suse.cz>,
	Anton Vorontsov <anton.vorontsov@linaro.org>,
	Pekka Enberg <penberg@kernel.org>, Shaohua Li <shli@fusionio.com>,
	Sasha Levin <sasha.levin@oracle.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Ingo Molnar <mingo@kernel.org>,
	"open list:DOCUMENTATION" <linux-doc@vger.kernel.org>,
	"open list:MEMORY MANAGEMENT" <linux-mm@kvack.org>,
	"open list:GENERIC INCLUDE/A..." <linux-arch@vger.kernel.org>
Subject: Re: [PATCH 1/1] mm: mempolicy: fix mbind_range() && vma_adjust() interaction
Date: Tue, 9 Jul 2013 21:43:21 +0200	[thread overview]
Message-ID: <20130709194321.GA31104@redhat.com> (raw)
In-Reply-To: <20130709152836.GA10033@redhat.com>

On 07/09, Oleg Nesterov wrote:
>
> I can be easily wrong, but to me vma_adjust() and its usage looks a bit
> overcomplicated. Perhaps it makes sense to distinguish mmapped/hole cases.
> mbind_range/madvise/etc need vma_join(vma, ...), not prev/anon_vma/file.
> Perhaps. not sure.

And I am just curious if something like below makes any sense...

Suppose we add the new helper, vma_update(vma, new). Note that "new" is
the fake vma, it just describes how do want to change vma.

static bool
can_merge_vma(struct vm_area_struct *prev, struct vm_area_struct *next)
{
	if (prev->vm_end != next->vm_start)
		return false;
	if (prev->vm_pgoff + vma_pages(prev) != next->vm_pgoff)
		return false;

	if (prev->vm_file != next->vm_file)
		return false;
	if (prev->vm_flags != next->vm_flags)
		return false;
	if (!mpol_equal(vma_policy(prev), vma_policy(next)))
		return false;

	if (prev->anon_vma != next->anon_vma)	/* WRONG, FIXME !!! */
		return false;

	if (next->vm_ops && next->vm_ops->close)
		return false;

	return true;
}

struct vm_area_struct *
vma_update(struct vm_area_struct *vma, struct vm_area_struct *new)
{
	struct vm_area_struct *prev = vma->vm_prev;
	struct vm_area_struct *next = vma->vm_next;
	struct vm_area_struct *new_ret;
	unsigned long new_end;
	int err;

	/* prev/next != vma means we can merge with prev/next */
	if (!prev || !can_merge_vma(prev, new))
		prev = vma;
	if (!next || !can_merge_vma(new, next))
		next = vma;

	if (new->vm_start > vma->vm_start) {	/* prev == vma */
		if (next != vma) {
			/* vma shrinks, next grows, case 4 */
			new_end = new->vm_start;
			new_ret = next;
			goto adjust;
		}
		err = split_vma(vma->vm_mm, vma, new->vm_start, 1);
		if (err)
			return ERR_PTR(err);
	}

	if (new->vm_end < vma->vm_end) {	/* next == vma */
		if (prev != vma) {
			/* prev grows, vma shrinks, case 5 */
			new_end = new->vm_end;
			new_ret = vma;
			goto adjust;
		}
		err = split_vma(vma->vm_mm, vma, new->vm_end, 0);
		if (err)
			return ERR_PTR(err);
	}

	if (prev == next)	/* true if split_vma() was called */
		return vma;

	new_end = next->vm_end;
	new_ret = prev;
adjust:
	err = vma_adjust(prev, prev->vm_start, new_end, prev->vm_pgoff, NULL);
	if (err)
		return ERR_PTR(err);
	khugepaged_enter_vma_merge(new_ret);
	return new_ret;
}

Now we can change madvise_behavior() and other similar users as below.

As for mmap_region() we can add another helper which simply tries to
expand prev/next (case 1-3).

Oleg.

--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -46,10 +46,9 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		     struct vm_area_struct **prev,
 		     unsigned long start, unsigned long end, int behavior)
 {
-	struct mm_struct * mm = vma->vm_mm;
-	int error = 0;
-	pgoff_t pgoff;
 	unsigned long new_flags = vma->vm_flags;
+	struct vm_area_struct new;
+	int error = 0;
 
 	switch (behavior) {
 	case MADV_NORMAL:
@@ -100,34 +99,21 @@ static long madvise_behavior(struct vm_area_struct * vma,
 		goto out;
 	}
 
-	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-	*prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma,
-				vma->vm_file, pgoff, vma_policy(vma));
-	if (*prev) {
-		vma = *prev;
-		goto success;
-	}
-
-	*prev = vma;
+	new = *vma;
+	new.vm_flags = new_flags;
+	new.vm_start = start;
+	new.vm_end = end;
+	vma = vma_update(vma, &new);
 
-	if (start != vma->vm_start) {
-		error = split_vma(mm, vma, start, 1);
-		if (error)
-			goto out;
-	}
-
-	if (end != vma->vm_end) {
-		error = split_vma(mm, vma, end, 0);
-		if (error)
-			goto out;
+	if (IS_ERR(vma)) {
+		error = PTR_ERR(vma);
+		goto out;
 	}
-
-success:
 	/*
 	 * vm_flags is protected by the mmap_sem held in write mode.
 	 */
 	vma->vm_flags = new_flags;
-
+	*prev = vma;
 out:
 	if (error == -ENOMEM)
 		error = -EAGAIN;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2013-07-09 19:49 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-04  1:31 [PATCH] mm: add sys_madvise2 and MADV_NAME to name vmas Colin Cross
2013-07-04  4:54 ` Eric W. Biederman
2013-07-04  6:32   ` Colin Cross
2013-07-05 16:52     ` Oleg Nesterov
2013-07-06  6:33   ` Pekka Enberg
2013-07-06 11:53     ` Eric W. Biederman
2013-07-07 18:35       ` Colin Cross
2013-07-14  1:38   ` Simon Jeons
2013-07-04  8:56 ` Peter Zijlstra
2013-07-05 20:25   ` Colin Cross
2013-07-10 23:20     ` Dave Hansen
2013-07-04 20:22 ` Oleg Nesterov
2013-07-05 19:40   ` Colin Cross
2013-07-08 18:04     ` [PATCH 0/1] mm: mempolicy: (Was: add sys_madvise2 and MADV_NAME to name vmas) Oleg Nesterov
2013-07-08 18:05       ` [PATCH 1/1] mm: mempolicy: fix mbind_range() && vma_adjust() interaction Oleg Nesterov
2013-07-08 22:29         ` KOSAKI Motohiro
2013-07-09 15:28           ` Oleg Nesterov
2013-07-09 19:43             ` Oleg Nesterov [this message]
2013-07-10  2:49             ` KOSAKI Motohiro
2013-07-09 21:56         ` Andrew Morton
2013-07-10 15:45           ` Oleg Nesterov
2013-07-24  9:40     ` [PATCH] mm: add sys_madvise2 and MADV_NAME to name vmas Jan Glauber
2013-07-24 20:05       ` Colin Cross
2013-07-10 23:08 ` Dave Hansen
     [not found]   ` <CAMbhsRTio2mS=azWTxSdRdaZJRRf5FfMNoQUZmrFjkB7kv9LSQ@mail.gmail.com>
2013-07-10 23:38     ` Dave Hansen
     [not found]       ` <CAMbhsRTs45QE1ze6mvdiL2QYKD0dHjXoRk7o1h2Y_rYP80ckDg@mail.gmail.com>
2013-07-11  0:19         ` Dave Hansen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130709194321.GA31104@redhat.com \
    --to=oleg@redhat.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=anton.vorontsov@linaro.org \
    --cc=arnd@arndb.de \
    --cc=ccross@android.com \
    --cc=dave@gnu.org \
    --cc=ebiederm@xmission.com \
    --cc=gorcunov@openvz.org \
    --cc=hannes@cmpxchg.org \
    --cc=hch@infradead.org \
    --cc=hughd@google.com \
    --cc=john.stultz@linaro.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=keescook@chromium.org \
    --cc=khlebnikov@openvz.org \
    --cc=kmpark@infradead.org \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=mingo@kernel.org \
    --cc=penberg@kernel.org \
    --cc=riel@redhat.com \
    --cc=rientjes@google.com \
    --cc=rob@landley.net \
    --cc=rusty@rustcorp.com.au \
    --cc=sasha.levin@oracle.com \
    --cc=shli@fusionio.com \
    --cc=srikar@linux.vnet.ibm.com \
    --cc=steven.t.hampson@intel.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=walken@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).