All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jann Horn <jannh@google.com>
To: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
	Christoph Hellwig <hch@lst.de>,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	linux-fsdevel@vger.kernel.org,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	"Eric W . Biederman" <ebiederm@xmission.com>,
	Oleg Nesterov <oleg@redhat.com>
Subject: [PATCH v5 7/7] mm: Remove the now-unnecessary mmget_still_valid() hack
Date: Thu, 27 Aug 2020 13:49:32 +0200	[thread overview]
Message-ID: <20200827114932.3572699-8-jannh@google.com> (raw)
In-Reply-To: <20200827114932.3572699-1-jannh@google.com>

The preceding patches have ensured that core dumping properly takes the
mmap_lock. Thanks to that, we can now remove mmget_still_valid() and all
its users.

Signed-off-by: Jann Horn <jannh@google.com>
---
 drivers/infiniband/core/uverbs_main.c |  3 ---
 drivers/vfio/pci/vfio_pci.c           | 38 +++++++++++++--------------
 fs/proc/task_mmu.c                    | 18 -------------
 fs/userfaultfd.c                      | 28 +++++++-------------
 include/linux/sched/mm.h              | 25 ------------------
 mm/khugepaged.c                       |  2 +-
 mm/madvise.c                          | 17 ------------
 mm/mmap.c                             |  5 +---
 8 files changed, 29 insertions(+), 107 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 37794d88b1f3..a4ba0b87d6de 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -845,8 +845,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 		 * will only be one mm, so no big deal.
 		 */
 		mmap_read_lock(mm);
-		if (!mmget_still_valid(mm))
-			goto skip_mm;
 		mutex_lock(&ufile->umap_lock);
 		list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
 					  list) {
@@ -865,7 +863,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 			}
 		}
 		mutex_unlock(&ufile->umap_lock);
-	skip_mm:
 		mmap_read_unlock(mm);
 		mmput(mm);
 	}
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 620465c2a1da..27f11cc7ba6c 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -1480,31 +1480,29 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
 		} else {
 			mmap_read_lock(mm);
 		}
-		if (mmget_still_valid(mm)) {
-			if (try) {
-				if (!mutex_trylock(&vdev->vma_lock)) {
-					mmap_read_unlock(mm);
-					mmput(mm);
-					return 0;
-				}
-			} else {
-				mutex_lock(&vdev->vma_lock);
+		if (try) {
+			if (!mutex_trylock(&vdev->vma_lock)) {
+				mmap_read_unlock(mm);
+				mmput(mm);
+				return 0;
 			}
-			list_for_each_entry_safe(mmap_vma, tmp,
-						 &vdev->vma_list, vma_next) {
-				struct vm_area_struct *vma = mmap_vma->vma;
+		} else {
+			mutex_lock(&vdev->vma_lock);
+		}
+		list_for_each_entry_safe(mmap_vma, tmp,
+					 &vdev->vma_list, vma_next) {
+			struct vm_area_struct *vma = mmap_vma->vma;
 
-				if (vma->vm_mm != mm)
-					continue;
+			if (vma->vm_mm != mm)
+				continue;
 
-				list_del(&mmap_vma->vma_next);
-				kfree(mmap_vma);
+			list_del(&mmap_vma->vma_next);
+			kfree(mmap_vma);
 
-				zap_vma_ptes(vma, vma->vm_start,
-					     vma->vm_end - vma->vm_start);
-			}
-			mutex_unlock(&vdev->vma_lock);
+			zap_vma_ptes(vma, vma->vm_start,
+				     vma->vm_end - vma->vm_start);
 		}
+		mutex_unlock(&vdev->vma_lock);
 		mmap_read_unlock(mm);
 		mmput(mm);
 	}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 5066b0251ed8..c43490aec95d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1168,24 +1168,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 					count = -EINTR;
 					goto out_mm;
 				}
-				/*
-				 * Avoid to modify vma->vm_flags
-				 * without locked ops while the
-				 * coredump reads the vm_flags.
-				 */
-				if (!mmget_still_valid(mm)) {
-					/*
-					 * Silently return "count"
-					 * like if get_task_mm()
-					 * failed. FIXME: should this
-					 * function have returned
-					 * -ESRCH if get_task_mm()
-					 * failed like if
-					 * get_proc_task() fails?
-					 */
-					mmap_write_unlock(mm);
-					goto out_mm;
-				}
 				for (vma = mm->mmap; vma; vma = vma->vm_next) {
 					vma->vm_flags &= ~VM_SOFTDIRTY;
 					vma_set_page_prot(vma);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 0e4a3837da52..000b457ad087 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -601,8 +601,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
 
 		/* the various vma->vm_userfaultfd_ctx still points to it */
 		mmap_write_lock(mm);
-		/* no task can run (and in turn coredump) yet */
-		VM_WARN_ON(!mmget_still_valid(mm));
 		for (vma = mm->mmap; vma; vma = vma->vm_next)
 			if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
 				vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
@@ -842,7 +840,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 	/* len == 0 means wake all */
 	struct userfaultfd_wake_range range = { .len = 0, };
 	unsigned long new_flags;
-	bool still_valid;
 
 	WRITE_ONCE(ctx->released, true);
 
@@ -858,7 +855,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 	 * taking the mmap_lock for writing.
 	 */
 	mmap_write_lock(mm);
-	still_valid = mmget_still_valid(mm);
 	prev = NULL;
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
 		cond_resched();
@@ -869,17 +865,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
 			continue;
 		}
 		new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
-		if (still_valid) {
-			prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
-					 new_flags, vma->anon_vma,
-					 vma->vm_file, vma->vm_pgoff,
-					 vma_policy(vma),
-					 NULL_VM_UFFD_CTX);
-			if (prev)
-				vma = prev;
-			else
-				prev = vma;
-		}
+		prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
+				 new_flags, vma->anon_vma,
+				 vma->vm_file, vma->vm_pgoff,
+				 vma_policy(vma),
+				 NULL_VM_UFFD_CTX);
+		if (prev)
+			vma = prev;
+		else
+			prev = vma;
 		vma->vm_flags = new_flags;
 		vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
 	}
@@ -1309,8 +1303,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
 		goto out;
 
 	mmap_write_lock(mm);
-	if (!mmget_still_valid(mm))
-		goto out_unlock;
 	vma = find_vma_prev(mm, start, &prev);
 	if (!vma)
 		goto out_unlock;
@@ -1511,8 +1503,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
 		goto out;
 
 	mmap_write_lock(mm);
-	if (!mmget_still_valid(mm))
-		goto out_unlock;
 	vma = find_vma_prev(mm, start, &prev);
 	if (!vma)
 		goto out_unlock;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index f889e332912f..e9cd1e637d76 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
 		__mmdrop(mm);
 }
 
-/*
- * This has to be called after a get_task_mm()/mmget_not_zero()
- * followed by taking the mmap_lock for writing before modifying the
- * vmas or anything the coredump pretends not to change from under it.
- *
- * It also has to be called when mmgrab() is used in the context of
- * the process, but then the mm_count refcount is transferred outside
- * the context of the process to run down_write() on that pinned mm.
- *
- * NOTE: find_extend_vma() called from GUP context is the only place
- * that can modify the "mm" (notably the vm_start/end) under mmap_lock
- * for reading and outside the context of the process, so it is also
- * the only case that holds the mmap_lock for reading that must call
- * this function. Generally if the mmap_lock is hold for reading
- * there's no need of this check after get_task_mm()/mmget_not_zero().
- *
- * This function can be obsoleted and the check can be removed, after
- * the coredump code will hold the mmap_lock for writing before
- * invoking the ->core_dump methods.
- */
-static inline bool mmget_still_valid(struct mm_struct *mm)
-{
-	return likely(!mm->core_state);
-}
-
 /**
  * mmget() - Pin the address space associated with a &struct mm_struct.
  * @mm: The address space to pin.
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 15a9af791014..101b636c72b5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -431,7 +431,7 @@ static void insert_to_mm_slots_hash(struct mm_struct *mm,
 
 static inline int khugepaged_test_exit(struct mm_struct *mm)
 {
-	return atomic_read(&mm->mm_users) == 0 || !mmget_still_valid(mm);
+	return atomic_read(&mm->mm_users) == 0;
 }
 
 static bool hugepage_vma_check(struct vm_area_struct *vma,
diff --git a/mm/madvise.c b/mm/madvise.c
index dd1d43cf026d..d5b33d9011f0 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1091,23 +1091,6 @@ int do_madvise(unsigned long start, size_t len_in, int behavior)
 	if (write) {
 		if (mmap_write_lock_killable(current->mm))
 			return -EINTR;
-
-		/*
-		 * We may have stolen the mm from another process
-		 * that is undergoing core dumping.
-		 *
-		 * Right now that's io_ring, in the future it may
-		 * be remote process management and not "current"
-		 * at all.
-		 *
-		 * We need to fix core dumping to not do this,
-		 * but for now we have the mmget_still_valid()
-		 * model.
-		 */
-		if (!mmget_still_valid(current->mm)) {
-			mmap_write_unlock(current->mm);
-			return -EINTR;
-		}
 	} else {
 		mmap_read_lock(current->mm);
 	}
diff --git a/mm/mmap.c b/mm/mmap.c
index 40248d84ad5f..c47abe460439 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2552,7 +2552,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 	if (vma && (vma->vm_start <= addr))
 		return vma;
 	/* don't alter vm_end if the coredump is running */
-	if (!prev || !mmget_still_valid(mm) || expand_stack(prev, addr))
+	if (!prev || expand_stack(prev, addr))
 		return NULL;
 	if (prev->vm_flags & VM_LOCKED)
 		populate_vma_page_range(prev, addr, prev->vm_end, NULL);
@@ -2578,9 +2578,6 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 		return vma;
 	if (!(vma->vm_flags & VM_GROWSDOWN))
 		return NULL;
-	/* don't alter vm_start if the coredump is running */
-	if (!mmget_still_valid(mm))
-		return NULL;
 	start = vma->vm_start;
 	if (expand_stack(vma, addr))
 		return NULL;
-- 
2.28.0.297.g1956fa8f8d-goog


  parent reply	other threads:[~2020-08-27 12:10 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-27 11:49 [PATCH v5 0/7] Fix ELF / FDPIC ELF core dumping, and use mmap_lock properly in there Jann Horn
2020-08-27 11:49 ` Jann Horn
2020-08-27 11:49 ` [PATCH v5 1/7] binfmt_elf_fdpic: Stop using dump_emit() on user pointers on !MMU Jann Horn
2020-08-27 11:49   ` Jann Horn
2020-08-27 11:49 ` [PATCH v5 2/7] coredump: Let dump_emit() bail out on short writes Jann Horn
2020-08-27 11:49   ` Jann Horn
2020-08-27 11:49 ` [PATCH v5 3/7] coredump: Refactor page range dumping into common helper Jann Horn
2020-08-27 11:49   ` Jann Horn
2020-08-27 11:49 ` [PATCH v5 4/7] coredump: Rework elf/elf_fdpic vma_dump_size() " Jann Horn
2020-08-27 11:49   ` Jann Horn
2020-08-27 11:49 ` [PATCH v5 5/7] binfmt_elf, binfmt_elf_fdpic: Use a VMA list snapshot Jann Horn
2020-08-27 11:49   ` Jann Horn
2020-08-27 11:49 ` [PATCH v5 6/7] mm/gup: Take mmap_lock in get_dump_page() Jann Horn
2020-08-27 11:49   ` Jann Horn
2020-08-27 17:13   ` Linus Torvalds
2020-08-27 17:13     ` Linus Torvalds
2020-08-27 11:49 ` Jann Horn [this message]
2020-08-27 11:49   ` [PATCH v5 7/7] mm: Remove the now-unnecessary mmget_still_valid() hack Jann Horn
2020-08-31  6:06   ` Hugh Dickins
2020-08-31  6:06     ` Hugh Dickins
2020-08-31  9:58     ` Jann Horn
2020-08-31  9:58       ` Jann Horn
2020-08-31 20:36       ` Hugh Dickins
2020-08-31 20:36         ` Hugh Dickins
2020-08-31 21:30       ` Hugh Dickins
2020-08-31 21:30         ` Hugh Dickins
2020-08-27 17:15 ` [PATCH v5 0/7] Fix ELF / FDPIC ELF core dumping, and use mmap_lock properly in there Linus Torvalds
2020-08-27 17:15   ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200827114932.3572699-8-jannh@google.com \
    --to=jannh@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=ebiederm@xmission.com \
    --cc=hch@lst.de \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=oleg@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.