All of lore.kernel.org
 help / color / mirror / Atom feed
From: Minchan Kim <minchan@kernel.org>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: LKML <linux-kernel@vger.kernel.org>,
	linux-mm <linux-mm@kvack.org>,
	linux-api@vger.kernel.org, oleksandr@redhat.com,
	Suren Baghdasaryan <surenb@google.com>,
	Tim Murray <timmurray@google.com>,
	Daniel Colascione <dancol@google.com>,
	Sandeep Patil <sspatil@google.com>,
	Sonny Rao <sonnyrao@google.com>,
	Brian Geffon <bgeffon@google.com>, Michal Hocko <mhocko@suse.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Shakeel Butt <shakeelb@google.com>,
	John Dias <joaodias@google.com>,
	Joel Fernandes <joel@joelfernandes.org>,
	Alexander Duyck <alexander.h.duyck@linux.intel.com>,
	Minchan Kim <minchan@kernel.org>
Subject: [PATCH v4 1/8] mm: pass task to do_madvise
Date: Wed, 12 Feb 2020 15:39:39 -0800	[thread overview]
Message-ID: <20200212233946.246210-2-minchan@kernel.org> (raw)
In-Reply-To: <20200212233946.246210-1-minchan@kernel.org>

In upcoming patches, do_madvise will be called from external process
context so it shouldn't asssume "current" is always hinted process's
task_struct. Thus, let's get the mm_struct from vma->vm_mm, not
current because vma is always hinted process's one. And let's pass
*current* as new task argument of do_madvise so it shouldn't change
existing behavior.

Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 fs/io_uring.c      |  2 +-
 include/linux/mm.h |  3 ++-
 mm/madvise.c       | 37 ++++++++++++++++++++-----------------
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 63beda9bafc5..6307206b970f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2736,7 +2736,7 @@ static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
 	if (force_nonblock)
 		return -EAGAIN;
 
-	ret = do_madvise(ma->addr, ma->len, ma->advice);
+	ret = do_madvise(current, ma->addr, ma->len, ma->advice);
 	if (ret < 0)
 		req_set_fail_links(req);
 	io_cqring_add_event(req, ret);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52269e56c514..8cb41131ec96 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2323,7 +2323,8 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
 		       struct list_head *uf, bool downgrade);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 		     struct list_head *uf);
-extern int do_madvise(unsigned long start, size_t len_in, int behavior);
+extern int do_madvise(struct task_struct *task, unsigned long start,
+			size_t len_in, int behavior);
 
 static inline unsigned long
 do_mmap_pgoff(struct file *file, unsigned long addr,
diff --git a/mm/madvise.c b/mm/madvise.c
index 43b47d3fae02..ab4011ba2d9e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -256,6 +256,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
 {
 	struct file *file = vma->vm_file;
 	loff_t offset;
+	struct mm_struct *mm = vma->vm_mm;
 
 	*prev = vma;
 #ifdef CONFIG_SWAP
@@ -288,12 +289,12 @@ static long madvise_willneed(struct vm_area_struct *vma,
 	 */
 	*prev = NULL;	/* tell sys_madvise we drop mmap_sem */
 	get_file(file);
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 	offset = (loff_t)(start - vma->vm_start)
 			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 	vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
 	fput(file);
-	down_read(&current->mm->mmap_sem);
+	down_read(&mm->mmap_sem);
 	return 0;
 }
 
@@ -674,9 +675,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 	}
 out:
 	if (nr_swap) {
-		if (current->mm == mm)
-			sync_mm_rss(mm);
-
+		sync_mm_rss(mm);
 		add_mm_counter(mm, MM_SWAPENTS, nr_swap);
 	}
 	arch_leave_lazy_mmu_mode();
@@ -756,6 +755,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
 				  unsigned long start, unsigned long end,
 				  int behavior)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	*prev = vma;
 	if (!can_madv_lru_vma(vma))
 		return -EINVAL;
@@ -763,8 +763,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
 	if (!userfaultfd_remove(vma, start, end)) {
 		*prev = NULL; /* mmap_sem has been dropped, prev is stale */
 
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, start);
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, start);
 		if (!vma)
 			return -ENOMEM;
 		if (start < vma->vm_start) {
@@ -818,6 +818,7 @@ static long madvise_remove(struct vm_area_struct *vma,
 	loff_t offset;
 	int error;
 	struct file *f;
+	struct mm_struct *mm = vma->vm_mm;
 
 	*prev = NULL;	/* tell sys_madvise we drop mmap_sem */
 
@@ -845,13 +846,13 @@ static long madvise_remove(struct vm_area_struct *vma,
 	get_file(f);
 	if (userfaultfd_remove(vma, start, end)) {
 		/* mmap_sem was not released by userfaultfd_remove() */
-		up_read(&current->mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 	}
 	error = vfs_fallocate(f,
 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 				offset, end - start);
 	fput(f);
-	down_read(&current->mm->mmap_sem);
+	down_read(&mm->mmap_sem);
 	return error;
 }
 
@@ -1044,7 +1045,8 @@ madvise_behavior_valid(int behavior)
  *  -EBADF  - map exists, but area maps something that isn't a file.
  *  -EAGAIN - a kernel resource was temporarily unavailable.
  */
-int do_madvise(unsigned long start, size_t len_in, int behavior)
+int do_madvise(struct task_struct *task, unsigned long start,
+					size_t len_in, int behavior)
 {
 	unsigned long end, tmp;
 	struct vm_area_struct *vma, *prev;
@@ -1053,6 +1055,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 	int write;
 	size_t len;
 	struct blk_plug plug;
+	struct mm_struct *mm = task->mm;
 
 	start = untagged_addr(start);
 
@@ -1082,10 +1085,10 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 
 	write = madvise_need_mmap_write(behavior);
 	if (write) {
-		if (down_write_killable(&current->mm->mmap_sem))
+		if (down_write_killable(&mm->mmap_sem))
 			return -EINTR;
 	} else {
-		down_read(&current->mm->mmap_sem);
+		down_read(&mm->mmap_sem);
 	}
 
 	/*
@@ -1093,7 +1096,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 	 * ranges, just ignore them, but return -ENOMEM at the end.
 	 * - different from the way of handling in mlock etc.
 	 */
-	vma = find_vma_prev(current->mm, start, &prev);
+	vma = find_vma_prev(mm, start, &prev);
 	if (vma && start > vma->vm_start)
 		prev = vma;
 
@@ -1130,19 +1133,19 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 		if (prev)
 			vma = prev->vm_next;
 		else	/* madvise_remove dropped mmap_sem */
-			vma = find_vma(current->mm, start);
+			vma = find_vma(mm, start);
 	}
 out:
 	blk_finish_plug(&plug);
 	if (write)
-		up_write(&current->mm->mmap_sem);
+		up_write(&mm->mmap_sem);
 	else
-		up_read(&current->mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 
 	return error;
 }
 
 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 {
-	return do_madvise(start, len_in, behavior);
+	return do_madvise(current, start, len_in, behavior);
 }
-- 
2.25.0.225.g125e21ebc7-goog


WARNING: multiple messages have this Message-ID (diff)
From: Minchan Kim <minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
To: Andrew Morton <akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org>
Cc: LKML <linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org>,
	linux-mm <linux-mm-Bw31MaZKKs3YtjvyW6yDsg@public.gmane.org>,
	linux-api-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	oleksandr-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	Suren Baghdasaryan
	<surenb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Tim Murray <timmurray-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Daniel Colascione
	<dancol-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Sandeep Patil <sspatil-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Sonny Rao <sonnyrao-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Brian Geffon <bgeffon-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Michal Hocko <mhocko-IBi9RG/b67k@public.gmane.org>,
	Johannes Weiner <hannes-druUgvl0LCNAfugRpC6u6w@public.gmane.org>,
	Shakeel Butt <shakeelb-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	John Dias <joaodias-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org>,
	Joel Fernandes
	<joel-QYYGw3jwrUn5owFQY34kdNi2O/JbrIOy@public.gmane.org>,
	Alexander Duyck
	<alexander.h.duyck-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,
	Minchan Kim <minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Subject: [PATCH v4 1/8] mm: pass task to do_madvise
Date: Wed, 12 Feb 2020 15:39:39 -0800	[thread overview]
Message-ID: <20200212233946.246210-2-minchan@kernel.org> (raw)
In-Reply-To: <20200212233946.246210-1-minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

In upcoming patches, do_madvise will be called from external process
context so it shouldn't asssume "current" is always hinted process's
task_struct. Thus, let's get the mm_struct from vma->vm_mm, not
current because vma is always hinted process's one. And let's pass
*current* as new task argument of do_madvise so it shouldn't change
existing behavior.

Signed-off-by: Minchan Kim <minchan-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 fs/io_uring.c      |  2 +-
 include/linux/mm.h |  3 ++-
 mm/madvise.c       | 37 ++++++++++++++++++++-----------------
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 63beda9bafc5..6307206b970f 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2736,7 +2736,7 @@ static int io_madvise(struct io_kiocb *req, struct io_kiocb **nxt,
 	if (force_nonblock)
 		return -EAGAIN;
 
-	ret = do_madvise(ma->addr, ma->len, ma->advice);
+	ret = do_madvise(current, ma->addr, ma->len, ma->advice);
 	if (ret < 0)
 		req_set_fail_links(req);
 	io_cqring_add_event(req, ret);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 52269e56c514..8cb41131ec96 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2323,7 +2323,8 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
 		       struct list_head *uf, bool downgrade);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 		     struct list_head *uf);
-extern int do_madvise(unsigned long start, size_t len_in, int behavior);
+extern int do_madvise(struct task_struct *task, unsigned long start,
+			size_t len_in, int behavior);
 
 static inline unsigned long
 do_mmap_pgoff(struct file *file, unsigned long addr,
diff --git a/mm/madvise.c b/mm/madvise.c
index 43b47d3fae02..ab4011ba2d9e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -256,6 +256,7 @@ static long madvise_willneed(struct vm_area_struct *vma,
 {
 	struct file *file = vma->vm_file;
 	loff_t offset;
+	struct mm_struct *mm = vma->vm_mm;
 
 	*prev = vma;
 #ifdef CONFIG_SWAP
@@ -288,12 +289,12 @@ static long madvise_willneed(struct vm_area_struct *vma,
 	 */
 	*prev = NULL;	/* tell sys_madvise we drop mmap_sem */
 	get_file(file);
-	up_read(&current->mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 	offset = (loff_t)(start - vma->vm_start)
 			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
 	vfs_fadvise(file, offset, end - start, POSIX_FADV_WILLNEED);
 	fput(file);
-	down_read(&current->mm->mmap_sem);
+	down_read(&mm->mmap_sem);
 	return 0;
 }
 
@@ -674,9 +675,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
 	}
 out:
 	if (nr_swap) {
-		if (current->mm == mm)
-			sync_mm_rss(mm);
-
+		sync_mm_rss(mm);
 		add_mm_counter(mm, MM_SWAPENTS, nr_swap);
 	}
 	arch_leave_lazy_mmu_mode();
@@ -756,6 +755,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
 				  unsigned long start, unsigned long end,
 				  int behavior)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	*prev = vma;
 	if (!can_madv_lru_vma(vma))
 		return -EINVAL;
@@ -763,8 +763,8 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
 	if (!userfaultfd_remove(vma, start, end)) {
 		*prev = NULL; /* mmap_sem has been dropped, prev is stale */
 
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, start);
+		down_read(&mm->mmap_sem);
+		vma = find_vma(mm, start);
 		if (!vma)
 			return -ENOMEM;
 		if (start < vma->vm_start) {
@@ -818,6 +818,7 @@ static long madvise_remove(struct vm_area_struct *vma,
 	loff_t offset;
 	int error;
 	struct file *f;
+	struct mm_struct *mm = vma->vm_mm;
 
 	*prev = NULL;	/* tell sys_madvise we drop mmap_sem */
 
@@ -845,13 +846,13 @@ static long madvise_remove(struct vm_area_struct *vma,
 	get_file(f);
 	if (userfaultfd_remove(vma, start, end)) {
 		/* mmap_sem was not released by userfaultfd_remove() */
-		up_read(&current->mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 	}
 	error = vfs_fallocate(f,
 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 				offset, end - start);
 	fput(f);
-	down_read(&current->mm->mmap_sem);
+	down_read(&mm->mmap_sem);
 	return error;
 }
 
@@ -1044,7 +1045,8 @@ madvise_behavior_valid(int behavior)
  *  -EBADF  - map exists, but area maps something that isn't a file.
  *  -EAGAIN - a kernel resource was temporarily unavailable.
  */
-int do_madvise(unsigned long start, size_t len_in, int behavior)
+int do_madvise(struct task_struct *task, unsigned long start,
+					size_t len_in, int behavior)
 {
 	unsigned long end, tmp;
 	struct vm_area_struct *vma, *prev;
@@ -1053,6 +1055,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 	int write;
 	size_t len;
 	struct blk_plug plug;
+	struct mm_struct *mm = task->mm;
 
 	start = untagged_addr(start);
 
@@ -1082,10 +1085,10 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 
 	write = madvise_need_mmap_write(behavior);
 	if (write) {
-		if (down_write_killable(&current->mm->mmap_sem))
+		if (down_write_killable(&mm->mmap_sem))
 			return -EINTR;
 	} else {
-		down_read(&current->mm->mmap_sem);
+		down_read(&mm->mmap_sem);
 	}
 
 	/*
@@ -1093,7 +1096,7 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 	 * ranges, just ignore them, but return -ENOMEM at the end.
 	 * - different from the way of handling in mlock etc.
 	 */
-	vma = find_vma_prev(current->mm, start, &prev);
+	vma = find_vma_prev(mm, start, &prev);
 	if (vma && start > vma->vm_start)
 		prev = vma;
 
@@ -1130,19 +1133,19 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 		if (prev)
 			vma = prev->vm_next;
 		else	/* madvise_remove dropped mmap_sem */
-			vma = find_vma(current->mm, start);
+			vma = find_vma(mm, start);
 	}
 out:
 	blk_finish_plug(&plug);
 	if (write)
-		up_write(&current->mm->mmap_sem);
+		up_write(&mm->mmap_sem);
 	else
-		up_read(&current->mm->mmap_sem);
+		up_read(&mm->mmap_sem);
 
 	return error;
 }
 
 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 {
-	return do_madvise(start, len_in, behavior);
+	return do_madvise(current, start, len_in, behavior);
 }
-- 
2.25.0.225.g125e21ebc7-goog

  reply	other threads:[~2020-02-12 23:39 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-12 23:39 [PATCH v4 0/8] introduce memory hinting API for external process Minchan Kim
2020-02-12 23:39 ` Minchan Kim
2020-02-12 23:39 ` Minchan Kim [this message]
2020-02-12 23:39   ` [PATCH v4 1/8] mm: pass task to do_madvise Minchan Kim
2020-02-13  0:21   ` Alexander Duyck
2020-02-13  0:21     ` Alexander Duyck
2020-02-13  0:21     ` Alexander Duyck
2020-02-13 17:02     ` Minchan Kim
2020-02-13 17:35       ` Jann Horn
2020-02-13 17:35         ` Jann Horn
2020-02-12 23:39 ` [PATCH v4 2/8] mm: introduce external memory hinting API Minchan Kim
2020-02-13 14:08   ` Jann Horn
2020-02-13 14:08     ` Jann Horn
2020-02-13 16:10     ` Minchan Kim
2020-02-12 23:39 ` [PATCH v4 3/8] mm: validate mm in do_madvise Minchan Kim
2020-02-12 23:39 ` [PATCH v4 4/8] mm: check fatal signal pending of target process Minchan Kim
2020-02-12 23:39   ` Minchan Kim
2020-02-12 23:39 ` [PATCH v4 5/8] mm/madvise: employ mmget_still_valid for write lock Minchan Kim
2020-02-12 23:39 ` [PATCH v4 6/8] mm/madvise: allow KSM hints for remote API Minchan Kim
2020-02-12 23:39 ` [PATCH v4 7/8] pid: export pidfd_get_pid Minchan Kim
2020-02-12 23:39   ` Minchan Kim
2020-02-13  0:25   ` Alexander Duyck
2020-02-13  0:25     ` Alexander Duyck
2020-02-13  0:25     ` Alexander Duyck
2020-02-13 17:08     ` Minchan Kim
2020-02-12 23:39 ` [PATCH v4 8/8] mm: support both pid and pidfd for process_madvise Minchan Kim
2020-02-13  0:28   ` Alexander Duyck
2020-02-13  0:28     ` Alexander Duyck
2020-02-13  0:28     ` Alexander Duyck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200212233946.246210-2-minchan@kernel.org \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.h.duyck@linux.intel.com \
    --cc=bgeffon@google.com \
    --cc=dancol@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=joaodias@google.com \
    --cc=joel@joelfernandes.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    --cc=oleksandr@redhat.com \
    --cc=shakeelb@google.com \
    --cc=sonnyrao@google.com \
    --cc=sspatil@google.com \
    --cc=surenb@google.com \
    --cc=timmurray@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.