All of lore.kernel.org
 help / color / mirror / Atom feed
From: Liam Howlett <liam.howlett@oracle.com>
To: "maple-tree@lists.infradead.org" <maple-tree@lists.infradead.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [PATCH v5 09/70] mm: Start tracking VMAs with maple tree
Date: Wed, 2 Feb 2022 02:42:01 +0000	[thread overview]
Message-ID: <20220202024137.2516438-10-Liam.Howlett@oracle.com> (raw)
In-Reply-To: <20220202024137.2516438-1-Liam.Howlett@oracle.com>

From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>

Start tracking the VMAs with the new maple tree structure in parallel
with the rb_tree.  Add debug and trace events for maple tree operations
and duplicate the rb_tree that is created on forks into the maple tree.

The maple tree is added to the mm_struct including the mm_init struct,
added support in required mm/mmap functions, added tracking in
kernel/fork for process forking, and used to find the unmapped_area and
checked against what the rbtree finds.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 arch/x86/kernel/tboot.c     |   1 +
 drivers/firmware/efi/efi.c  |   1 +
 include/linux/mm.h          |   2 +
 include/linux/mm_types.h    |   3 +
 include/trace/events/mmap.h |  71 ++++++++++++
 init/main.c                 |   2 +
 kernel/fork.c               |   5 +
 mm/init-mm.c                |   2 +
 mm/internal.h               |  70 ++++++++++++
 mm/mmap.c                   | 212 +++++++++++++++++++++++++++++++++++-
 10 files changed, 365 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index f9af561c3cd4..859e8d2ea070 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -98,6 +98,7 @@ void __init tboot_probe(void)
 static pgd_t *tboot_pg_dir;
 static struct mm_struct tboot_mm = {
 	.mm_rb          = RB_ROOT,
+	.mm_mt          = MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, tboot_mm.mmap_lock),
 	.pgd            = swapper_pg_dir,
 	.mm_users       = ATOMIC_INIT(2),
 	.mm_count       = ATOMIC_INIT(1),
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 7de3f5b6e8d0..c41efbff237c 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -55,6 +55,7 @@ static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR;
 
 struct mm_struct efi_mm = {
 	.mm_rb			= RB_ROOT,
+	.mm_mt			= MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, efi_mm.mmap_lock),
 	.mm_users		= ATOMIC_INIT(2),
 	.mm_count		= ATOMIC_INIT(1),
 	.write_protect_seq      = SEQCNT_ZERO(efi_mm.write_protect_seq),
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..c71cf425048e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2578,6 +2578,8 @@ extern bool arch_has_descending_max_zone_pfns(void);
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
+/* mmap.c */
+void vma_store(struct mm_struct *mm, struct vm_area_struct *vma);
 
 /* interval_tree.c */
 void vma_interval_tree_insert(struct vm_area_struct *node,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 5140e5feb486..687cb463ff6e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -9,6 +9,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
+#include <linux/maple_tree.h>
 #include <linux/rwsem.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
@@ -455,6 +456,7 @@ struct kioctx_table;
 struct mm_struct {
 	struct {
 		struct vm_area_struct *mmap;		/* list of VMAs */
+		struct maple_tree mm_mt;
 		struct rb_root mm_rb;
 		u64 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
@@ -643,6 +645,7 @@ struct mm_struct {
 	unsigned long cpu_bitmap[];
 };
 
+#define MM_MT_FLAGS	(MT_FLAGS_ALLOC_RANGE | MT_FLAGS_LOCK_EXTERN)
 extern struct mm_struct init_mm;
 
 /* Pointer magic because the dynamic array size confuses some compilers. */
diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h
index 4661f7ba07c0..f5c1626f3bbb 100644
--- a/include/trace/events/mmap.h
+++ b/include/trace/events/mmap.h
@@ -42,6 +42,77 @@ TRACE_EVENT(vm_unmapped_area,
 		__entry->low_limit, __entry->high_limit, __entry->align_mask,
 		__entry->align_offset)
 );
+
+TRACE_EVENT(vma_mt_szero,
+	TP_PROTO(struct mm_struct *mm, unsigned long start,
+		 unsigned long end),
+
+	TP_ARGS(mm, start, end),
+
+	TP_STRUCT__entry(
+			__field(struct mm_struct *, mm)
+			__field(unsigned long, start)
+			__field(unsigned long, end)
+	),
+
+	TP_fast_assign(
+			__entry->mm		= mm;
+			__entry->start		= start;
+			__entry->end		= end - 1;
+	),
+
+	TP_printk("mt_mod %p, (NULL), SNULL, %lu, %lu,",
+		  __entry->mm,
+		  (unsigned long) __entry->start,
+		  (unsigned long) __entry->end
+	)
+);
+
+TRACE_EVENT(vma_store,
+	TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma),
+
+	TP_ARGS(mm, vma),
+
+	TP_STRUCT__entry(
+			__field(struct mm_struct *, mm)
+			__field(struct vm_area_struct *, vma)
+			__field(unsigned long, vm_start)
+			__field(unsigned long, vm_end)
+	),
+
+	TP_fast_assign(
+			__entry->mm		= mm;
+			__entry->vma		= vma;
+			__entry->vm_start	= vma->vm_start;
+			__entry->vm_end		= vma->vm_end - 1;
+	),
+
+	TP_printk("mt_mod %p, (%p), STORE, %lu, %lu,",
+		  __entry->mm, __entry->vma,
+		  (unsigned long) __entry->vm_start,
+		  (unsigned long) __entry->vm_end
+	)
+);
+
+
+TRACE_EVENT(exit_mmap,
+	TP_PROTO(struct mm_struct *mm),
+
+	TP_ARGS(mm),
+
+	TP_STRUCT__entry(
+			__field(struct mm_struct *, mm)
+	),
+
+	TP_fast_assign(
+			__entry->mm		= mm;
+	),
+
+	TP_printk("mt_mod %p, DESTROY\n",
+		  __entry->mm
+	)
+);
+
 #endif
 
 /* This part must be outside protection */
diff --git a/init/main.c b/init/main.c
index 65fa2e41a9c0..67e91430471a 100644
--- a/init/main.c
+++ b/init/main.c
@@ -115,6 +115,7 @@ static int kernel_init(void *);
 
 extern void init_IRQ(void);
 extern void radix_tree_init(void);
+extern void maple_tree_init(void);
 
 /*
  * Debug helper: via this flag we know that we are in 'early bootup code'
@@ -1000,6 +1001,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
 		 "Interrupts were enabled *very* early, fixing it\n"))
 		local_irq_disable();
 	radix_tree_init();
+	maple_tree_init();
 
 	/*
 	 * Set up housekeeping before setting up workqueues to allow the unbound
diff --git a/kernel/fork.c b/kernel/fork.c
index d75a528f7b21..51a7971651ef 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -607,6 +607,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		rb_link = &tmp->vm_rb.rb_right;
 		rb_parent = &tmp->vm_rb;
 
+		/* Link the vma into the MT */
+		vma_store(mm, tmp);
+
 		mm->map_count++;
 		if (!(tmp->vm_flags & VM_WIPEONFORK))
 			retval = copy_page_range(tmp, mpnt);
@@ -1038,6 +1041,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 {
 	mm->mmap = NULL;
 	mm->mm_rb = RB_ROOT;
+	mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
+	mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
 	mm->vmacache_seqnum = 0;
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
diff --git a/mm/init-mm.c b/mm/init-mm.c
index b4a6f38fb51d..7622ca24eeb7 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/mm_types.h>
 #include <linux/rbtree.h>
+#include <linux/maple_tree.h>
 #include <linux/rwsem.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
@@ -28,6 +29,7 @@
  */
 struct mm_struct init_mm = {
 	.mm_rb		= RB_ROOT,
+	.mm_mt		= MTREE_INIT_EXT(mm_mt, MM_MT_FLAGS, init_mm.mmap_lock),
 	.pgd		= swapper_pg_dir,
 	.mm_users	= ATOMIC_INIT(2),
 	.mm_count	= ATOMIC_INIT(1),
diff --git a/mm/internal.h b/mm/internal.h
index d80300392a19..d229b4c2f393 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -386,6 +386,76 @@ static inline bool is_data_mapping(vm_flags_t flags)
 	return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
 }
 
+/* Maple tree operations using VMAs */
+/*
+ * vma_mas_store() - Store a VMA in the maple tree.
+ * @vma: The vm_area_struct
+ * @mas: The maple state
+ *
+ * Efficient way to store a VMA in the maple tree when the @mas has already
+ * walked to the correct location.
+ *
+ * Note: the end address is inclusive in the maple tree.
+ */
+static inline int vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas)
+{
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+	/* Make sure no VMAs are about to be lost. */
+	MA_STATE(test, mas->tree, vma->vm_start, vma->vm_end - 1);
+	struct vm_area_struct *vma_mas;
+	int count = 0;
+
+	mas_for_each(&test, vma_mas, vma->vm_end - 1) {
+		/* Rule out vma_expand */
+		if ((vma->vm_start != vma_mas->vm_start) &&
+		    (vma->vm_end != vma_mas->vm_end))
+			count++;
+	}
+
+	BUG_ON(count);
+
+	BUG_ON(mas->min > vma->vm_start);
+	BUG_ON(mas->index > vma->vm_start);
+#endif
+	mas->index = vma->vm_start;
+	mas->last = vma->vm_end - 1;
+	return mas_store_gfp(mas, vma, GFP_KERNEL);
+}
+
+/*
+ * vma_mas_remove() - Remove a VMA from the maple tree.
+ * @vma: The vm_area_struct
+ * @mas: The maple state
+ *
+ * Efficient way to remove a VMA from the maple tree when the @mas has already
+ * been established and points to the correct location.
+ * Note: the end address is inclusive in the maple tree.
+ */
+static inline int vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
+{
+	int ret;
+
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+	/* Make sure no VMAs are about to be lost. */
+	MA_STATE(test, mas->tree, vma->vm_start, vma->vm_end - 1);
+	struct vm_area_struct *vma_mas;
+	int count = 0;
+
+	mas_for_each(&test, vma_mas, vma->vm_end - 1)
+		count++;
+
+	BUG_ON(count != 1);
+
+	BUG_ON(mas->min > vma->vm_start);
+	BUG_ON(mas->index > vma->vm_start);
+	BUG_ON(mas->min > mas->index);
+#endif
+	mas->index = vma->vm_start;
+	mas->last = vma->vm_end - 1;
+	ret = mas_store_gfp(mas, NULL, GFP_KERNEL);
+	return ret;
+}
+
 /* mm/util.c */
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev);
diff --git a/mm/mmap.c b/mm/mmap.c
index 1e8fdb0b51ed..05d3b63c7bed 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -376,7 +376,70 @@ static int browse_rb(struct mm_struct *mm)
 	}
 	return bug ? -1 : i;
 }
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
+extern void mt_validate(struct maple_tree *mt);
+extern void mt_dump(const struct maple_tree *mt);
 
+/* Validate the maple tree */
+static void validate_mm_mt(struct mm_struct *mm)
+{
+	struct maple_tree *mt = &mm->mm_mt;
+	struct vm_area_struct *vma_mt, *vma = mm->mmap;
+
+	MA_STATE(mas, mt, 0, 0);
+	mas_for_each(&mas, vma_mt, ULONG_MAX) {
+		if (xa_is_zero(vma_mt))
+			continue;
+
+		if (!vma)
+			break;
+
+		if ((vma != vma_mt) ||
+		    (vma->vm_start != vma_mt->vm_start) ||
+		    (vma->vm_end != vma_mt->vm_end) ||
+		    (vma->vm_start != mas.index) ||
+		    (vma->vm_end - 1 != mas.last)) {
+			pr_emerg("issue in %s\n", current->comm);
+			dump_stack();
+#ifdef CONFIG_DEBUG_VM
+			dump_vma(vma_mt);
+			pr_emerg("and next in rb\n");
+			dump_vma(vma->vm_next);
+#endif
+			pr_emerg("mt piv: %px %lu - %lu\n", vma_mt,
+				 mas.index, mas.last);
+			pr_emerg("mt vma: %px %lu - %lu\n", vma_mt,
+				 vma_mt->vm_start, vma_mt->vm_end);
+			pr_emerg("rb vma: %px %lu - %lu\n", vma,
+				 vma->vm_start, vma->vm_end);
+			pr_emerg("rb->next = %px %lu - %lu\n", vma->vm_next,
+					vma->vm_next->vm_start, vma->vm_next->vm_end);
+
+			mt_dump(mas.tree);
+			if (vma_mt->vm_end != mas.last + 1) {
+				pr_err("vma: %px vma_mt %lu-%lu\tmt %lu-%lu\n",
+						mm, vma_mt->vm_start, vma_mt->vm_end,
+						mas.index, mas.last);
+				mt_dump(mas.tree);
+			}
+			VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
+			if (vma_mt->vm_start != mas.index) {
+				pr_err("vma: %px vma_mt %px %lu - %lu doesn't match\n",
+						mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
+				mt_dump(mas.tree);
+			}
+			VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
+		}
+		VM_BUG_ON(vma != vma_mt);
+		vma = vma->vm_next;
+
+	}
+	VM_BUG_ON(vma);
+	mt_validate(&mm->mm_mt);
+}
+#else
+#define validate_mm_mt(root) do { } while (0)
+#endif
 static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
 {
 	struct rb_node *nd;
@@ -431,6 +494,7 @@ static void validate_mm(struct mm_struct *mm)
 }
 #else
 #define validate_mm_rb(root, ignore) do { } while (0)
+#define validate_mm_mt(root) do { } while (0)
 #define validate_mm(mm) do { } while (0)
 #endif
 
@@ -675,11 +739,42 @@ static void __vma_link_file(struct vm_area_struct *vma)
 	}
 }
 
+/*
+ * vma_mt_szero() - Set a given range to zero.  Used when modifying a
+ * vm_area_struct start or end.
+ *
+ * @mm: The struct_mm
+ * @start: The start address to zero
+ * @end: The end address to zero.
+ */
+static inline void vma_mt_szero(struct mm_struct *mm, unsigned long start,
+				unsigned long end)
+{
+	MA_STATE(mas, &mm->mm_mt, start, end - 1);
+
+	trace_vma_mt_szero(mm, start, end);
+	mas_store_gfp(&mas, NULL, GFP_KERNEL);
+}
+
+/*
+ * vma_store() - Store a given vm_area_struct in the maple tree.
+ * @mm: The struct_mm
+ * @vma: The vm_area_struct to store in the maple tree.
+ */
+void vma_store(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
+
+	trace_vma_store(vma->vm_mm, vma);
+	vma_mas_store(vma, &mas);
+}
+
 static void
 __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct vm_area_struct *prev, struct rb_node **rb_link,
 	struct rb_node *rb_parent)
 {
+	vma_store(mm, vma);
 	__vma_link_list(mm, vma, prev);
 	__vma_link_rb(mm, vma, rb_link, rb_parent);
 }
@@ -752,6 +847,9 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	long adjust_next = 0;
 	int remove_next = 0;
 
+	validate_mm(mm);
+	validate_mm_mt(mm);
+
 	if (next && !insert) {
 		struct vm_area_struct *exporter = NULL, *importer = NULL;
 
@@ -877,17 +975,28 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	}
 
 	if (start != vma->vm_start) {
+		unsigned long old_start = vma->vm_start;
 		vma->vm_start = start;
+		if (old_start < start)
+			vma_mt_szero(mm, old_start, start);
 		start_changed = true;
 	}
 	if (end != vma->vm_end) {
+		unsigned long old_end = vma->vm_end;
 		vma->vm_end = end;
+		if (old_end > end)
+			vma_mt_szero(mm, end, old_end);
 		end_changed = true;
 	}
+
+	if (end_changed || start_changed)
+		vma_store(mm, vma);
+
 	vma->vm_pgoff = pgoff;
 	if (adjust_next) {
 		next->vm_start += adjust_next;
 		next->vm_pgoff += adjust_next >> PAGE_SHIFT;
+		vma_store(mm, next);
 	}
 
 	if (file) {
@@ -901,6 +1010,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 		/*
 		 * vma_merge has merged next into vma, and needs
 		 * us to remove next before dropping the locks.
+		 * Since we have expanded over this vma, the maple tree will
+		 * have overwritten by storing the value
 		 */
 		if (remove_next != 3)
 			__vma_unlink(mm, next, next);
@@ -1020,6 +1131,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 		uprobe_mmap(insert);
 
 	validate_mm(mm);
+	validate_mm_mt(mm);
 
 	return 0;
 }
@@ -1173,6 +1285,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 	struct vm_area_struct *area, *next;
 	int err;
 
+	validate_mm_mt(mm);
 	/*
 	 * We later require that vma->vm_flags == vm_flags,
 	 * so this tests vma->vm_flags & VM_SPECIAL, too.
@@ -1248,6 +1361,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 		khugepaged_enter_vma_merge(area, vm_flags);
 		return area;
 	}
+	validate_mm_mt(mm);
 
 	return NULL;
 }
@@ -1729,6 +1843,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	struct rb_node **rb_link, *rb_parent;
 	unsigned long charged = 0;
 
+	validate_mm_mt(mm);
 	/* Check against address space limit. */
 	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
 		unsigned long nr_pages;
@@ -1876,6 +1991,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 
 	vma_set_page_prot(vma);
 
+	validate_mm_mt(mm);
 	return addr;
 
 unmap_and_free_vma:
@@ -1892,6 +2008,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 unacct_error:
 	if (charged)
 		vm_unacct_memory(charged);
+	validate_mm_mt(mm);
 	return error;
 }
 
@@ -1908,12 +2025,19 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+	unsigned long gap;
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
 
 	/* Adjust search length to account for worst case alignment overhead */
 	length = info->length + info->align_mask;
 	if (length < info->length)
 		return -ENOMEM;
 
+	mas_empty_area(&mas, info->low_limit, info->high_limit - 1,
+			   length);
+	gap = mas.index;
+	gap += (info->align_offset - gap) & info->align_mask;
+
 	/* Adjust search limits by the desired length */
 	if (info->high_limit < length)
 		return -ENOMEM;
@@ -1995,20 +2119,31 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 
 	VM_BUG_ON(gap_start + info->length > info->high_limit);
 	VM_BUG_ON(gap_start + info->length > gap_end);
+
+	VM_BUG_ON(gap != gap_start);
 	return gap_start;
 }
 
 static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma = NULL;
 	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+	unsigned long gap;
+
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
+	validate_mm_mt(mm);
 
 	/* Adjust search length to account for worst case alignment overhead */
 	length = info->length + info->align_mask;
 	if (length < info->length)
 		return -ENOMEM;
 
+	mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+		   length);
+	gap = (mas.index + info->align_mask) & ~info->align_mask;
+	gap -= info->align_offset & info->align_mask;
+
 	/*
 	 * Adjust search limits by the desired length.
 	 * See implementation comment at top of unmapped_area().
@@ -2094,6 +2229,32 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 	VM_BUG_ON(gap_end < info->low_limit);
 	VM_BUG_ON(gap_end < gap_start);
+
+	if (gap != gap_end) {
+		pr_err("%s: %px Gap was found: mt %lu gap_end %lu\n", __func__,
+		       mm, gap, gap_end);
+		pr_err("window was %lu - %lu size %lu\n", info->high_limit,
+		       info->low_limit, length);
+		pr_err("mas.min %lu max %lu mas.last %lu\n", mas.min, mas.max,
+		       mas.last);
+		pr_err("mas.index %lu align mask %lu offset %lu\n", mas.index,
+		       info->align_mask, info->align_offset);
+		pr_err("rb_find_vma find on %lu => %px (%px)\n", mas.index,
+		       find_vma(mm, mas.index), vma);
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
+		mt_dump(&mm->mm_mt);
+#endif
+		{
+			struct vm_area_struct *dv = mm->mmap;
+
+			while (dv) {
+				printk("vma %px %lu-%lu\n", dv, dv->vm_start, dv->vm_end);
+				dv = dv->vm_next;
+			}
+		}
+		VM_BUG_ON(gap != gap_end);
+	}
+
 	return gap_end;
 }
 
@@ -2307,7 +2468,6 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 		vmacache_update(addr, vma);
 	return vma;
 }
-
 EXPORT_SYMBOL(find_vma);
 
 /*
@@ -2388,6 +2548,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 	unsigned long gap_addr;
 	int error = 0;
 
+	validate_mm_mt(mm);
 	if (!(vma->vm_flags & VM_GROWSUP))
 		return -EFAULT;
 
@@ -2464,6 +2625,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 	anon_vma_unlock_write(vma->anon_vma);
 	khugepaged_enter_vma_merge(vma, vma->vm_flags);
 	validate_mm(mm);
+	validate_mm_mt(mm);
 	return error;
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2478,6 +2640,7 @@ int expand_downwards(struct vm_area_struct *vma,
 	struct vm_area_struct *prev;
 	int error = 0;
 
+	validate_mm(mm);
 	address &= PAGE_MASK;
 	if (address < mmap_min_addr)
 		return -EPERM;
@@ -2531,6 +2694,8 @@ int expand_downwards(struct vm_area_struct *vma,
 				anon_vma_interval_tree_pre_update_vma(vma);
 				vma->vm_start = address;
 				vma->vm_pgoff -= grow;
+				/* Overwrite old entry in mtree. */
+				vma_store(mm, vma);
 				anon_vma_interval_tree_post_update_vma(vma);
 				vma_gap_update(vma);
 				spin_unlock(&mm->page_table_lock);
@@ -2672,6 +2837,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 	vma->vm_prev = NULL;
+	vma_mt_szero(mm, vma->vm_start, end);
 	do {
 		vma_rb_erase(vma, &mm->mm_rb);
 		mm->map_count--;
@@ -2710,6 +2876,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct vm_area_struct *new;
 	int err;
+	validate_mm_mt(mm);
 
 	if (vma->vm_ops && vma->vm_ops->may_split) {
 		err = vma->vm_ops->may_split(vma, addr);
@@ -2762,6 +2929,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	mpol_put(vma_policy(new));
  out_free_vma:
 	vm_area_free(new);
+	validate_mm_mt(mm);
 	return err;
 }
 
@@ -3032,6 +3200,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 	pgoff_t pgoff = addr >> PAGE_SHIFT;
 	int error;
 	unsigned long mapped_addr;
+	validate_mm_mt(mm);
 
 	/* Until we need other flags, refuse anything except VM_EXEC. */
 	if ((flags & (~VM_EXEC)) != 0)
@@ -3089,6 +3258,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 	if (flags & VM_LOCKED)
 		mm->locked_vm += (len >> PAGE_SHIFT);
 	vma->vm_flags |= VM_SOFTDIRTY;
+	validate_mm_mt(mm);
 	return 0;
 }
 
@@ -3157,7 +3327,13 @@ void exit_mmap(struct mm_struct *mm)
 		set_bit(MMF_OOM_SKIP, &mm->flags);
 	}
 
-	mmap_write_lock(mm);
+	/*
+	 * Actually taking the mmap semaphore here costs 3% performance on
+	 * a large machine:
+	 * https://lore.kernel.org/lkml/20170725151754.3txp44a2kbffsxdg@node.shutemov.name/
+	 * Lockdep will complain about not holding the mmap_lock, so we lie.
+	 */
+	rwsem_acquire(&mm->mmap_lock.dep_map, 0, 0, _THIS_IP_);
 	if (mm->locked_vm)
 		unlock_range(mm->mmap, ULONG_MAX);
 
@@ -3186,7 +3362,11 @@ void exit_mmap(struct mm_struct *mm)
 		vma = remove_vma(vma);
 		cond_resched();
 	}
-	mmap_write_unlock(mm);
+
+	trace_exit_mmap(mm);
+	__mt_destroy(&mm->mm_mt);
+	rwsem_release(&mm->mmap_lock.dep_map, _THIS_IP_);
+
 	vm_unacct_memory(nr_accounted);
 }
 
@@ -3198,10 +3378,25 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
 	struct vm_area_struct *prev;
 	struct rb_node **rb_link, *rb_parent;
+	unsigned long start = vma->vm_start;
+	struct vm_area_struct *overlap = NULL;
 
 	if (find_vma_links(mm, vma->vm_start, vma->vm_end,
 			   &prev, &rb_link, &rb_parent))
 		return -ENOMEM;
+
+	overlap = mt_find(&mm->mm_mt, &start, vma->vm_end - 1);
+	if (overlap) {
+
+		pr_err("Found vma ending at %lu\n", start - 1);
+		pr_err("vma : %lu => %lu-%lu\n", (unsigned long)overlap,
+				overlap->vm_start, overlap->vm_end - 1);
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
+		mt_dump(&mm->mm_mt);
+#endif
+		BUG();
+	}
+
 	if ((vma->vm_flags & VM_ACCOUNT) &&
 	     security_vm_enough_memory_mm(mm, vma_pages(vma)))
 		return -ENOMEM;
@@ -3241,7 +3436,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
 	bool faulted_in_anon_vma = true;
+	unsigned long index = addr;
 
+	validate_mm_mt(mm);
 	/*
 	 * If anonymous vma has not yet been faulted, update new pgoff
 	 * to match new location, to increase its chance of merging.
@@ -3253,6 +3450,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 
 	if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
 		return NULL;	/* should never get here */
+	if (mt_find(&mm->mm_mt, &index, addr+len - 1))
+		BUG();
 	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
 			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
 			    vma->vm_userfaultfd_ctx, vma_anon_name(vma));
@@ -3296,6 +3495,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		vma_link(mm, new_vma, prev, rb_link, rb_parent);
 		*need_rmap_locks = false;
 	}
+	validate_mm_mt(mm);
 	return new_vma;
 
 out_free_mempol:
@@ -3303,6 +3503,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 out_free_vma:
 	vm_area_free(new_vma);
 out:
+	validate_mm_mt(mm);
 	return NULL;
 }
 
@@ -3439,6 +3640,7 @@ static struct vm_area_struct *__install_special_mapping(
 	int ret;
 	struct vm_area_struct *vma;
 
+	validate_mm_mt(mm);
 	vma = vm_area_alloc(mm);
 	if (unlikely(vma == NULL))
 		return ERR_PTR(-ENOMEM);
@@ -3460,10 +3662,12 @@ static struct vm_area_struct *__install_special_mapping(
 
 	perf_event_mmap(vma);
 
+	validate_mm_mt(mm);
 	return vma;
 
 out:
 	vm_area_free(vma);
+	validate_mm_mt(mm);
 	return ERR_PTR(ret);
 }
 
-- 
2.34.1

  parent reply	other threads:[~2022-02-02  2:42 UTC|newest]

Thread overview: 83+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-02  2:41 [PATCH v5 00/70] Introducing the Maple Tree Liam Howlett
2022-02-02  2:41 ` [PATCH v5 01/70] binfmt_elf: Take the mmap lock when walking the VMA list Liam Howlett
2022-02-02  2:41 ` [PATCH v5 02/70] radix tree test suite: Add pr_err define Liam Howlett
2022-02-02  2:41 ` [PATCH v5 04/70] radix tree test suite: Add allocation counts and size to kmem_cache Liam Howlett
2022-02-02  2:41 ` [PATCH v5 05/70] radix tree test suite: Add support for slab bulk APIs Liam Howlett
2022-02-02  2:41 ` [PATCH v5 03/70] radix tree test suite: Add kmem_cache_set_non_kernel() Liam Howlett
2022-02-02  2:41 ` [PATCH v5 06/70] radix tree test suite: Add lockdep_is_held to header Liam Howlett
2022-02-02  2:41 ` [PATCH v5 07/70] Maple Tree: Add new data structure Liam Howlett
2022-02-02 17:11   ` Mike Rapoport
2022-02-03  2:38     ` Liam Howlett
2022-02-03  8:57       ` Mike Rapoport
2022-02-03 16:02   ` Mark Hemment
2022-02-03 17:29     ` Liam Howlett
2022-02-04 11:24   ` David Howells
2022-02-02  2:42 ` [PATCH v5 08/70] lib/test_maple_tree: Add testing for maple tree Liam Howlett
2022-02-02  2:42 ` [PATCH v5 11/70] mmap: Use the VMA iterator in count_vma_pages_range() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 10/70] mm: Add VMA iterator Liam Howlett
2022-02-02  2:42 ` Liam Howlett [this message]
2022-02-02  2:42 ` [PATCH v5 13/70] mm/mmap: Use the maple tree for find_vma_prev() instead of the rbtree Liam Howlett
2022-02-02  2:42 ` [PATCH v5 12/70] mm/mmap: Use the maple tree in find_vma() " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 14/70] mm/mmap: Use maple tree for unmapped_area{_topdown} Liam Howlett
2022-02-02  2:42 ` [PATCH v5 15/70] kernel/fork: Use maple tree for dup_mmap() during forking Liam Howlett
2022-02-03 11:59   ` Mark Hemment
2022-02-03 17:20     ` Liam Howlett
2022-02-02  2:42 ` [PATCH v5 16/70] damon: Convert __damon_va_three_regions to use the VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 18/70] mm: Remove rb tree Liam Howlett
2022-02-02  2:42 ` [PATCH v5 17/70] proc: Remove VMA rbtree use from nommu Liam Howlett
2022-02-02  2:42 ` [PATCH v5 19/70] mmap: Change zeroing of maple tree in __vma_adjust() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 21/70] mm: Optimize find_exact_vma() to use vma_lookup() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 20/70] xen: Use vma_lookup() in privcmd_ioctl_mmap() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 22/70] mm/khugepaged: Optimize collapse_pte_mapped_thp() by using vma_lookup() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 23/70] mm/mmap: Change do_brk_flags() to expand existing VMA and add do_brk_munmap() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 24/70] mm: Use maple tree operations for find_vma_intersection() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 26/70] mm: Remove vmacache Liam Howlett
2022-02-02  2:42 ` [PATCH v5 25/70] mm/mmap: Use advanced maple tree API for mmap_region() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 27/70] mm: Convert vma_lookup() to use mtree_load() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 28/70] mm/mmap: Move mmap_region() below do_munmap() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 31/70] arm64: Remove mmap linked list from vdso Liam Howlett
2022-02-02  2:42 ` [PATCH v5 29/70] mm/mmap: Reorganize munmap to use maple states Liam Howlett
2022-02-02  2:42 ` [PATCH v5 30/70] mm/mmap: Change do_brk_munmap() to use do_mas_align_munmap() Liam Howlett
2022-02-02  2:42 ` [PATCH v5 32/70] parisc: Remove mmap linked list from cache handling Liam Howlett
2022-02-02  2:42 ` [PATCH v5 33/70] powerpc: Remove mmap linked list walks Liam Howlett
2022-02-02  2:42 ` [PATCH v5 34/70] s390: Remove vma " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 37/70] cxl: Remove vma linked list walk Liam Howlett
2022-02-02  2:42 ` [PATCH v5 35/70] x86: Remove vma linked list walks Liam Howlett
2022-02-02  2:42 ` [PATCH v5 36/70] xtensa: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 39/70] um: Remove vma linked list walk Liam Howlett
2022-02-02  2:42 ` [PATCH v5 38/70] optee: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 41/70] coredump: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 42/70] exec: Use VMA iterator instead of linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 40/70] binfmt_elf: Remove vma linked list walk Liam Howlett
2022-02-02  2:42 ` [PATCH v5 44/70] fs/proc/task_mmu: Stop using linked list and highest_vm_end Liam Howlett
2022-02-02  2:42 ` [PATCH v5 43/70] fs/proc/base: Use maple tree iterators in place of linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 47/70] acct: Use VMA iterator instead " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 46/70] ipc/shm: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 45/70] userfaultfd: Use maple tree iterator to iterate VMAs Liam Howlett
2022-02-02  2:42 ` [PATCH v5 49/70] sched: Use maple tree iterator to walk VMAs Liam Howlett
2022-02-02  2:42 ` [PATCH v5 48/70] perf: Use VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 52/70] mm/gup: Use maple tree navigation instead of linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 50/70] fork: Use VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 51/70] bpf: Remove VMA linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 53/70] mm/khugepaged: Stop using vma " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 54/70] mm/ksm: Use vma iterators instead of " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 57/70] mm/mempolicy: Use vma iterator & maple state " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 55/70] mm/madvise: Use vma_find() " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 56/70] mm/memcontrol: Stop using mm->highest_vm_end Liam Howlett
2022-02-02  2:42 ` [PATCH v5 59/70] mm/mprotect: Use maple tree navigation instead of vma linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 58/70] mm/mlock: Use vma iterator and " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 60/70] mm/mremap: Use vma_find_intersection() " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 62/70] mm/oom_kill: Use maple tree iterators " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 61/70] mm/msync: Use vma_find() " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 63/70] mm/pagewalk: " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 64/70] mm/swapfile: Use vma iterator " Liam Howlett
2022-02-02  2:42 ` [PATCH v5 65/70] i915: Use the VMA iterator Liam Howlett
2022-02-02  2:42 ` [PATCH v5 66/70] nommu: Remove uses of VMA linked list Liam Howlett
2022-02-02  2:42 ` [PATCH v5 67/70] riscv: Use vma iterator for vdso Liam Howlett
2022-02-02  2:42 ` [PATCH v5 68/70] mm: Remove the vma linked list Liam Howlett
2022-02-03 12:08   ` Mark Hemment
2022-02-03 17:25     ` Liam Howlett
2022-02-02  2:42 ` [PATCH v5 69/70] mm/mmap: Drop range_has_overlap() function Liam Howlett
2022-02-02  2:42 ` [PATCH v5 70/70] mm/mmap.c: Pass in mapping to __vma_link_file() Liam Howlett
2022-02-04 11:28 ` [PATCH v5 09/70] mm: Start tracking VMAs with maple tree David Howells
2022-02-11 18:23   ` Liam Howlett

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220202024137.2516438-10-Liam.Howlett@oracle.com \
    --to=liam.howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=maple-tree@lists.infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.