All of lore.kernel.org
 help / color / mirror / Atom feed
From: Liam Howlett <liam.howlett@oracle.com>
To: "maple-tree@lists.infradead.org" <maple-tree@lists.infradead.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [PATCH v5 25/70] mm/mmap: Use advanced maple tree API for mmap_region()
Date: Wed, 2 Feb 2022 02:42:09 +0000	[thread overview]
Message-ID: <20220202024137.2516438-26-Liam.Howlett@oracle.com> (raw)
In-Reply-To: <20220202024137.2516438-1-Liam.Howlett@oracle.com>

From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>

Changing mmap_region() to use the maple tree state and the advanced
maple tree interface allows for a lot less tree walking.

This change removes the last caller of munmap_vma_range(), so drop this
unused function.

Add vma_expand() to expand a VMA if possible by doing the necessary
hugepage check, uprobe_munmap of files, dcache flush, modifications then
undoing the detaches, etc.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 mm/mmap.c | 230 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 183 insertions(+), 47 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 8bfbaea0cffd..d70900dbcbea 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -518,28 +518,6 @@ static inline struct vm_area_struct *__vma_next(struct mm_struct *mm,
 	return vma->vm_next;
 }
 
-/*
- * munmap_vma_range() - munmap VMAs that overlap a range.
- * @mm: The mm struct
- * @start: The start of the range.
- * @len: The length of the range.
- * @pprev: pointer to the pointer that will be set to previous vm_area_struct
- *
- * Find all the vm_area_struct that overlap from @start to
- * @end and munmap them.  Set @pprev to the previous vm_area_struct.
- *
- * Returns: -ENOMEM on munmap failure or 0 on success.
- */
-static inline int
-munmap_vma_range(struct mm_struct *mm, unsigned long start, unsigned long len,
-		 struct vm_area_struct **pprev, struct list_head *uf)
-{
-	while (range_has_overlap(mm, start, start + len, pprev))
-		if (do_munmap(mm, start, len, uf))
-			return -ENOMEM;
-	return 0;
-}
-
 static unsigned long count_vma_pages_range(struct mm_struct *mm,
 		unsigned long addr, unsigned long end)
 {
@@ -641,6 +619,117 @@ static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma,
 	mm->map_count++;
 }
 
+/*
+ * vma_expand - Expand an existing VMA
+ *
+ * @mas: The maple state
+ * @vma: The vma to expand
+ * @start: The start of the vma
+ * @end: The exclusive end of the vma
+ * @pgoff: The page offset of vma
+ * @next: The current of next vma.
+ *
+ * Expand @vma to @start and @end.  Can expand off the start and end.  Will
+ * expand over @next if it's different from @vma and @end == @next->vm_end.
+ * Checking if the @vma can expand and merge with @next needs to be handled by
+ * the caller.
+ *
+ * Returns: 0 on success
+ */
+inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
+		      unsigned long start, unsigned long end, pgoff_t pgoff,
+		      struct vm_area_struct *next)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct address_space *mapping = NULL;
+	struct rb_root_cached *root = NULL;
+	struct anon_vma *anon_vma = vma->anon_vma;
+	struct file *file = vma->vm_file;
+	bool remove_next = false;
+
+	if (next && (vma != next) && (end == next->vm_end)) {
+		remove_next = true;
+		if (next->anon_vma && !vma->anon_vma) {
+			int error;
+
+			vma->anon_vma = next->anon_vma;
+			error = anon_vma_clone(vma, next);
+			if (error)
+				return error;
+		}
+	}
+
+	/* Not merging but overwriting any part of next is not handled. */
+	VM_BUG_ON(next && (vma != next) && (end > next->vm_start));
+	/* Only handles expanding */
+	VM_BUG_ON(vma->vm_start < start || vma->vm_end > end);
+
+	vma_adjust_trans_huge(vma, start, end, 0);
+
+	if (file) {
+		mapping = file->f_mapping;
+		root = &mapping->i_mmap;
+		uprobe_munmap(vma, vma->vm_start, vma->vm_end);
+		i_mmap_lock_write(mapping);
+		flush_dcache_mmap_lock(mapping);
+		vma_interval_tree_remove(vma, root);
+	} else if (anon_vma) {
+		anon_vma_lock_write(anon_vma);
+		anon_vma_interval_tree_pre_update_vma(vma);
+	}
+
+	vma->vm_start = start;
+	vma->vm_end = end;
+	vma->vm_pgoff = pgoff;
+	/* Note: mas must be pointing to the expanding VMA */
+	vma_mas_store(vma, mas);
+
+	if (file) {
+		vma_interval_tree_insert(vma, root);
+		flush_dcache_mmap_unlock(mapping);
+	}
+
+	/* Expanding over the next vma */
+	if (remove_next) {
+		/* Remove from mm linked list - also updates highest_vm_end */
+		__vma_unlink_list(mm, next);
+
+		/* Kill the cache */
+		vmacache_invalidate(mm);
+
+		if (file)
+			__remove_shared_vm_struct(next, file, mapping);
+
+	} else if (!next) {
+		mm->highest_vm_end = vm_end_gap(vma);
+	}
+
+	if (anon_vma) {
+		anon_vma_interval_tree_post_update_vma(vma);
+		anon_vma_unlock_write(anon_vma);
+	}
+
+	if (file) {
+		i_mmap_unlock_write(mapping);
+		uprobe_mmap(vma);
+	}
+
+	if (remove_next) {
+		if (file) {
+			uprobe_munmap(next, next->vm_start, next->vm_end);
+			fput(file);
+		}
+		if (next->anon_vma)
+			anon_vma_merge(vma, next);
+		mm->map_count--;
+		mpol_put(vma_policy(next));
+		vm_area_free(next);
+	}
+
+	validate_mm(mm);
+	return 0;
+}
+
 /*
  * We cannot adjust vm_start, vm_end, vm_pgoff fields of a vma that
  * is already present in an i_mmap tree without adjusting the tree.
@@ -1632,9 +1721,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		struct list_head *uf)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma, *prev, *merge;
-	int error;
+	struct vm_area_struct *vma = NULL;
+	struct vm_area_struct *prev, *next;
+	pgoff_t pglen = len >> PAGE_SHIFT;
 	unsigned long charged = 0;
+	unsigned long end = addr + len;
+	unsigned long merge_start = addr, merge_end = end;
+	pgoff_t vm_pgoff;
+	int error;
+	MA_STATE(mas, &mm->mm_mt, addr, end - 1);
 
 	/* Check against address space limit. */
 	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
@@ -1644,16 +1739,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		 * MAP_FIXED may remove pages of mappings that intersects with
 		 * requested mapping. Account for the pages it would unmap.
 		 */
-		nr_pages = count_vma_pages_range(mm, addr, addr + len);
+		nr_pages = count_vma_pages_range(mm, addr, end);
 
 		if (!may_expand_vm(mm, vm_flags,
 					(len >> PAGE_SHIFT) - nr_pages))
 			return -ENOMEM;
 	}
 
-	/* Clear old maps, set up prev and uf */
-	if (munmap_vma_range(mm, addr, len, &prev, uf))
+	/* Unmap any existing mapping in the area */
+	if (do_munmap(mm, addr, len, uf))
 		return -ENOMEM;
+
 	/*
 	 * Private writable mapping: check memory availability
 	 */
@@ -1664,14 +1760,41 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		vm_flags |= VM_ACCOUNT;
 	}
 
-	/*
-	 * Can we just expand an old mapping?
-	 */
-	vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
-			NULL, file, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
-	if (vma)
-		goto out;
+	next = mas_next(&mas, ULONG_MAX);
+	prev = mas_prev(&mas, 0);
+	if (vm_flags & VM_SPECIAL)
+		goto cannot_expand;
+
+	/* Attempt to expand an old mapping */
+	/* Check next */
+	if (next && next->vm_start == end && !vma_policy(next) &&
+	    can_vma_merge_before(next, vm_flags, NULL, file, pgoff+pglen,
+				 NULL_VM_UFFD_CTX, NULL)) {
+		merge_end = next->vm_end;
+		vma = next;
+		vm_pgoff = next->vm_pgoff - pglen;
+	}
 
+	/* Check prev */
+	if (prev && prev->vm_end == addr && !vma_policy(prev) &&
+	    can_vma_merge_after(prev, vm_flags, NULL, file, pgoff,
+				NULL_VM_UFFD_CTX, NULL)) {
+		merge_start = prev->vm_start;
+		vma = prev;
+		vm_pgoff = prev->vm_pgoff;
+	}
+
+
+	/* Actually expand, if possible */
+	if (vma &&
+	    !vma_expand(&mas, vma, merge_start, merge_end, vm_pgoff, next)) {
+		khugepaged_enter_vma_merge(prev, vm_flags);
+		goto expanded;
+	}
+
+	mas.index = addr;
+	mas.last = end - 1;
+cannot_expand:
 	/*
 	 * Determine the object being mapped and call the appropriate
 	 * specific mapper. the address has already been validated, but
@@ -1684,7 +1807,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	}
 
 	vma->vm_start = addr;
-	vma->vm_end = addr + len;
+	vma->vm_end = end;
 	vma->vm_flags = vm_flags;
 	vma->vm_page_prot = vm_get_page_prot(vm_flags);
 	vma->vm_pgoff = pgoff;
@@ -1705,8 +1828,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		 *
 		 * Answer: Yes, several device drivers can do it in their
 		 *         f_op->mmap method. -DaveM
-		 * Bug: If addr is changed, prev, rb_link, rb_parent should
-		 *      be updated for vma_link()
 		 */
 		WARN_ON_ONCE(addr != vma->vm_start);
 
@@ -1716,22 +1837,26 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		 * as we may succeed this time.
 		 */
 		if (unlikely(vm_flags != vma->vm_flags && prev)) {
-			merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
+			next = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags,
 				NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
-			if (merge) {
+			if (next) {
 				/* ->mmap() can change vma->vm_file and fput the original file. So
 				 * fput the vma->vm_file here or we would add an extra fput for file
 				 * and cause general protection fault ultimately.
 				 */
 				fput(vma->vm_file);
 				vm_area_free(vma);
-				vma = merge;
-				/* Update vm_flags to pick up the change. */
+				vma = prev;
+				/* Update vm_flags and possible addr to pick up the change. We don't
+				 * warn here if addr changed as the vma is not linked by vma_link().
+				 */
+				addr = vma->vm_start;
 				vm_flags = vma->vm_flags;
 				goto unmap_writable;
 			}
 		}
 
+		mas_set(&mas, addr);
 		vm_flags = vma->vm_flags;
 	} else if (vm_flags & VM_SHARED) {
 		error = shmem_zero_setup(vma);
@@ -1750,13 +1875,28 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 			goto free_vma;
 	}
 
-	vma_link(mm, vma, prev);
+	if (vma->vm_file)
+		i_mmap_lock_write(vma->vm_file->f_mapping);
+
+	vma_mas_store(vma, &mas);
+	__vma_link_list(mm, vma, prev);
+	mm->map_count++;
+	if (vma->vm_file) {
+		if (vma->vm_flags & VM_SHARED)
+			mapping_allow_writable(vma->vm_file->f_mapping);
+
+		flush_dcache_mmap_lock(vma->vm_file->f_mapping);
+		vma_interval_tree_insert(vma, &vma->vm_file->f_mapping->i_mmap);
+		flush_dcache_mmap_unlock(vma->vm_file->f_mapping);
+		i_mmap_unlock_write(vma->vm_file->f_mapping);
+	}
+
 	/* Once vma denies write, undo our temporary denial count */
 unmap_writable:
 	if (file && vm_flags & VM_SHARED)
 		mapping_unmap_writable(file->f_mapping);
 	file = vma->vm_file;
-out:
+expanded:
 	perf_event_mmap(vma);
 
 	vm_stat_account(mm, vm_flags, len >> PAGE_SHIFT);
@@ -2606,10 +2746,6 @@ int __do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
 	prev = vma->vm_prev;
 	/* we have start < vma->vm_end  */
 
-	/* if it doesn't overlap, we have nothing.. */
-	if (vma->vm_start >= end)
-		return 0;
-
 	/*
 	 * If we need to split any vma, do it now to save pain later.
 	 *
-- 
2.34.1

  parent reply	other threads:[~2022-02-02  2:43 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-02  2:41 [PATCH v5 00/70] Introducing the Maple Tree Liam Howlett
2022-02-02  2:41 ` [PATCH v5 01/70] binfmt_elf: Take the mmap lock when walking the VMA list Liam Howlett
2022-02-02  2:41 ` [PATCH v5 02/70] radix tree test suite: Add pr_err define Liam Howlett
2022-02-02  2:41 ` [PATCH v5 04/70] radix tree test suite: Add allocation counts and size to kmem_cache Liam Howlett
2022-02-02  2:41 ` [PATCH v5 05/70] radix tree test suite: Add support for slab bulk APIs Liam Howlett
2022-02-02  2:41 ` [PATCH v5 03/70] radix tree test suite: Add kmem_cache_set_non_kernel() Liam Howlett
2022-02-02  2:41 ` [PATCH v5 06/70] radix tree test suite: Add lockdep_is_held to header Liam Howlett
2022-02-02  2:41 ` [PATCH v5 07/70] Maple Tree: Add new data structure Liam Howlett
2022-02-02 17:11   ` Mike Rapoport
2022-02-03  2:38     ` Liam Howlett
2022-02-03  8:57       ` Mike Rapoport
2022-02-03 16:02   ` Mark Hemment
2022-02-03 17:29     ` Liam Howlett
2022-02-04 11:24   ` David Howells
2022-02-02  2:42 ` [PATCH v5 08/70] lib/test_maple_tree: Add testing for maple tree Liam Howlett
2022-02-02  2:42 ` [PATCH v5 11/70] mmap: Use the VMA iterator in count_vma_pages_range() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 10/70] mm: Add VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 09/70] mm: Start tracking VMAs with maple tree Liam Howlett
2022-02-02  2:42 ` [PATCH v5 13/70] mm/mmap: Use the maple tree for find_vma_prev() instead of the rbtree Liam Howlett
2022-02-02  2:42 ` [PATCH v5 12/70] mm/mmap: Use the maple tree in find_vma() " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 14/70] mm/mmap: Use maple tree for unmapped_area{_topdown} Liam Howlett
2022-02-02  2:42 ` [PATCH v5 15/70] kernel/fork: Use maple tree for dup_mmap() during forking Liam Howlett
2022-02-03 11:59   ` Mark Hemment
2022-02-03 17:20     ` Liam Howlett
2022-02-02  2:42 ` [PATCH v5 16/70] damon: Convert __damon_va_three_regions to use the VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 18/70] mm: Remove rb tree Liam Howlett
2022-02-02  2:42 ` [PATCH v5 17/70] proc: Remove VMA rbtree use from nommu Liam Howlett
2022-02-02  2:42 ` [PATCH v5 19/70] mmap: Change zeroing of maple tree in __vma_adjust() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 21/70] mm: Optimize find_exact_vma() to use vma_lookup() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 20/70] xen: Use vma_lookup() in privcmd_ioctl_mmap() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 22/70] mm/khugepaged: Optimize collapse_pte_mapped_thp() by using vma_lookup() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 23/70] mm/mmap: Change do_brk_flags() to expand existing VMA and add do_brk_munmap() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 24/70] mm: Use maple tree operations for find_vma_intersection() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 26/70] mm: Remove vmacache Liam Howlett
2022-02-02  2:42 ` Liam Howlett [this message]
2022-02-02  2:42 ` [PATCH v5 27/70] mm: Convert vma_lookup() to use mtree_load() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 28/70] mm/mmap: Move mmap_region() below do_munmap() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 31/70] arm64: Remove mmap linked list from vdso Liam Howlett
2022-02-02  2:42 ` [PATCH v5 29/70] mm/mmap: Reorganize munmap to use maple states Liam Howlett
2022-02-02  2:42 ` [PATCH v5 30/70] mm/mmap: Change do_brk_munmap() to use do_mas_align_munmap() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 32/70] parisc: Remove mmap linked list from cache handling Liam Howlett
2022-02-02  2:42 ` [PATCH v5 33/70] powerpc: Remove mmap linked list walks Liam Howlett
2022-02-02  2:42 ` [PATCH v5 34/70] s390: Remove vma " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 37/70] cxl: Remove vma linked list walk Liam Howlett
2022-02-02  2:42 ` [PATCH v5 35/70] x86: Remove vma linked list walks Liam Howlett
2022-02-02  2:42 ` [PATCH v5 36/70] xtensa: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 39/70] um: Remove vma linked list walk Liam Howlett
2022-02-02  2:42 ` [PATCH v5 38/70] optee: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 41/70] coredump: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 42/70] exec: Use VMA iterator instead of linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 40/70] binfmt_elf: Remove vma linked list walk Liam Howlett
2022-02-02  2:42 ` [PATCH v5 44/70] fs/proc/task_mmu: Stop using linked list and highest_vm_end Liam Howlett
2022-02-02  2:42 ` [PATCH v5 43/70] fs/proc/base: Use maple tree iterators in place of linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 47/70] acct: Use VMA iterator instead " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 46/70] ipc/shm: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 45/70] userfaultfd: Use maple tree iterator to iterate VMAs Liam Howlett
2022-02-02  2:42 ` [PATCH v5 49/70] sched: Use maple tree iterator to walk VMAs Liam Howlett
2022-02-02  2:42 ` [PATCH v5 48/70] perf: Use VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 52/70] mm/gup: Use maple tree navigation instead of linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 50/70] fork: Use VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 51/70] bpf: Remove VMA linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 53/70] mm/khugepaged: Stop using vma " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 54/70] mm/ksm: Use vma iterators instead of " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 57/70] mm/mempolicy: Use vma iterator & maple state " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 55/70] mm/madvise: Use vma_find() " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 56/70] mm/memcontrol: Stop using mm->highest_vm_end Liam Howlett
2022-02-02  2:42 ` [PATCH v5 59/70] mm/mprotect: Use maple tree navigation instead of vma linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 58/70] mm/mlock: Use vma iterator and " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 60/70] mm/mremap: Use vma_find_intersection() " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 62/70] mm/oom_kill: Use maple tree iterators " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 61/70] mm/msync: Use vma_find() " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 63/70] mm/pagewalk: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 64/70] mm/swapfile: Use vma iterator " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 65/70] i915: Use the VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 66/70] nommu: Remove uses of VMA linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 67/70] riscv: Use vma iterator for vdso Liam Howlett
2022-02-02  2:42 ` [PATCH v5 68/70] mm: Remove the vma linked list Liam Howlett
2022-02-03 12:08   ` Mark Hemment
2022-02-03 17:25     ` Liam Howlett
2022-02-02  2:42 ` [PATCH v5 69/70] mm/mmap: Drop range_has_overlap() function Liam Howlett
2022-02-02  2:42 ` [PATCH v5 70/70] mm/mmap.c: Pass in mapping to __vma_link_file() Liam Howlett
2022-02-04 11:28 ` [PATCH v5 09/70] mm: Start tracking VMAs with maple tree David Howells
2022-02-11 18:23   ` Liam Howlett

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220202024137.2516438-26-Liam.Howlett@oracle.com \
    --to=liam.howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=maple-tree@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.