All of lore.kernel.org
 help / color / mirror / Atom feed
From: Liam Howlett <liam.howlett@oracle.com>
To: "maple-tree@lists.infradead.org" <maple-tree@lists.infradead.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	David Hildenbrand <david@redhat.com>,
	Douglas Gilbert <dgilbert@interlog.com>
Cc: Song Liu <songliubraving@fb.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	"Paul E . McKenney" <paulmck@kernel.org>,
	Matthew Wilcox <willy@infradead.org>,
	David Rientjes <rientjes@google.com>,
	Axel Rasmussen <axelrasmussen@google.com>,
	Suren Baghdasaryan <surenb@google.com>,
	Vlastimil Babka <vbabka@suse.cz>, Rik van Riel <riel@surriel.com>,
	Peter Zijlstra <peterz@infradead.org>
Subject: [PATCH v3 06/66] mm: Start tracking VMAs with maple tree
Date: Tue, 5 Oct 2021 01:30:33 +0000	[thread overview]
Message-ID: <20211005012959.1110504-7-Liam.Howlett@oracle.com> (raw)
In-Reply-To: <20211005012959.1110504-1-Liam.Howlett@oracle.com>

From: "Liam R. Howlett" <Liam.Howlett@Oracle.com>

Start tracking the VMAs with the new maple tree structure in parallel
with the rb_tree.  Add debug and trace events for maple tree operations
and duplicate the rb_tree that is created on forks into the maple tree.

In this commit, the maple tree is added to the mm_struct including the
mm_init struct, added support in required mm/mmap functions, added
tracking in kernel/fork for process forking, and used to find the
unmapped_area and checked against what the rbtree finds.

Signed-off-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
---
 arch/x86/kernel/tboot.c     |   1 +
 drivers/firmware/efi/efi.c  |   1 +
 include/linux/mm.h          |   2 +
 include/linux/mm_types.h    |   2 +
 include/trace/events/mmap.h |  71 +++++++++++
 init/main.c                 |   2 +
 kernel/fork.c               |   4 +
 mm/init-mm.c                |   2 +
 mm/internal.h               |  77 ++++++++++++
 mm/mmap.c                   | 229 +++++++++++++++++++++++++++++++++++-
 10 files changed, 389 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index f9af561c3cd4..6f978f722dff 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -98,6 +98,7 @@ void __init tboot_probe(void)
 static pgd_t *tboot_pg_dir;
 static struct mm_struct tboot_mm = {
 	.mm_rb          = RB_ROOT,
+	.mm_mt          = MTREE_INIT(mm_mt, MAPLE_ALLOC_RANGE),
 	.pgd            = swapper_pg_dir,
 	.mm_users       = ATOMIC_INIT(2),
 	.mm_count       = ATOMIC_INIT(1),
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 847f33ffc4ae..1aaecca36e23 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -55,6 +55,7 @@ static unsigned long __initdata rt_prop = EFI_INVALID_TABLE_ADDR;
 
 struct mm_struct efi_mm = {
 	.mm_rb			= RB_ROOT,
+	.mm_mt			= MTREE_INIT(mm_mt, MAPLE_ALLOC_RANGE),
 	.mm_users		= ATOMIC_INIT(2),
 	.mm_count		= ATOMIC_INIT(1),
 	.write_protect_seq      = SEQCNT_ZERO(efi_mm.write_protect_seq),
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..df592bf937f9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2500,6 +2500,8 @@ extern bool arch_has_descending_max_zone_pfns(void);
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
+/* maple_tree */
+void vma_store(struct mm_struct *mm, struct vm_area_struct *vma);
 
 /* interval_tree.c */
 void vma_interval_tree_insert(struct vm_area_struct *node,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f8ee09c711f..2fc27dff8b6c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -8,6 +8,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/rbtree.h>
+#include <linux/maple_tree.h>
 #include <linux/rwsem.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
@@ -402,6 +403,7 @@ struct kioctx_table;
 struct mm_struct {
 	struct {
 		struct vm_area_struct *mmap;		/* list of VMAs */
+		struct maple_tree mm_mt;
 		struct rb_root mm_rb;
 		u64 vmacache_seqnum;                   /* per-thread vmacache */
 #ifdef CONFIG_MMU
diff --git a/include/trace/events/mmap.h b/include/trace/events/mmap.h
index 4661f7ba07c0..4ffe3d348966 100644
--- a/include/trace/events/mmap.h
+++ b/include/trace/events/mmap.h
@@ -42,6 +42,77 @@ TRACE_EVENT(vm_unmapped_area,
 		__entry->low_limit, __entry->high_limit, __entry->align_mask,
 		__entry->align_offset)
 );
+
+TRACE_EVENT(vma_mt_szero,
+	TP_PROTO(struct mm_struct *mm, unsigned long start,
+		 unsigned long end),
+
+	TP_ARGS(mm, start, end),
+
+	TP_STRUCT__entry(
+			__field(struct mm_struct*, mm)
+			__field(unsigned long, start)
+			__field(unsigned long, end)
+	),
+
+	TP_fast_assign(
+			__entry->mm		= mm;
+			__entry->start		= start;
+			__entry->end		= end - 1;
+	),
+
+	TP_printk("mt_mod %px, (NULL), SNULL, %lu, %lu,",
+		  __entry->mm,
+		  (unsigned long) __entry->start,
+		  (unsigned long) __entry->end
+	)
+);
+
+TRACE_EVENT(vma_mt_store,
+	TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma),
+
+	TP_ARGS(mm, vma),
+
+	TP_STRUCT__entry(
+			__field(struct mm_struct*, mm)
+			__field(struct vm_area_struct*, vma)
+			__field(unsigned long, vm_start)
+			__field(unsigned long, vm_end)
+	),
+
+	TP_fast_assign(
+			__entry->mm		= mm;
+			__entry->vma		= vma;
+			__entry->vm_start	= vma->vm_start;
+			__entry->vm_end		= vma->vm_end - 1;
+	),
+
+	TP_printk("mt_mod %px, (%px), STORE, %lu, %lu,",
+		  __entry->mm, __entry->vma,
+		  (unsigned long) __entry->vm_start,
+		  (unsigned long) __entry->vm_end
+	)
+);
+
+
+TRACE_EVENT(exit_mmap,
+	TP_PROTO(struct mm_struct *mm),
+
+	TP_ARGS(mm),
+
+	TP_STRUCT__entry(
+			__field(struct mm_struct*, mm)
+	),
+
+	TP_fast_assign(
+			__entry->mm		= mm;
+	),
+
+	TP_printk("mt_mod %px, DESTROY\n",
+		  __entry->mm
+	)
+);
+
 #endif
 
 /* This part must be outside protection */
diff --git a/init/main.c b/init/main.c
index 81a79a77db46..1cc4c69a5a55 100644
--- a/init/main.c
+++ b/init/main.c
@@ -116,6 +116,7 @@ static int kernel_init(void *);
 
 extern void init_IRQ(void);
 extern void radix_tree_init(void);
+extern void maple_tree_init(void);
 
 /*
  * Debug helper: via this flag we know that we are in 'early bootup code'
@@ -1003,6 +1004,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
 		 "Interrupts were enabled *very* early, fixing it\n"))
 		local_irq_disable();
 	radix_tree_init();
+	maple_tree_init();
 
 	/*
 	 * Set up housekeeping before setting up workqueues to allow the unbound
diff --git a/kernel/fork.c b/kernel/fork.c
index 38681ad44c76..e8d602de40e8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -605,6 +605,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 		rb_link = &tmp->vm_rb.rb_right;
 		rb_parent = &tmp->vm_rb;
 
+		/* Link the vma into the MT */
+		vma_store(mm, tmp);
+
 		mm->map_count++;
 		if (!(tmp->vm_flags & VM_WIPEONFORK))
 			retval = copy_page_range(tmp, mpnt);
@@ -1038,6 +1041,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 {
 	mm->mmap = NULL;
 	mm->mm_rb = RB_ROOT;
+	mt_init_flags(&mm->mm_mt, MAPLE_ALLOC_RANGE);
 	mm->vmacache_seqnum = 0;
 	atomic_set(&mm->mm_users, 1);
 	atomic_set(&mm->mm_count, 1);
diff --git a/mm/init-mm.c b/mm/init-mm.c
index b4a6f38fb51d..e4a058f7e8b6 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/mm_types.h>
 #include <linux/rbtree.h>
+#include <linux/maple_tree.h>
 #include <linux/rwsem.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
@@ -28,6 +29,7 @@
  */
 struct mm_struct init_mm = {
 	.mm_rb		= RB_ROOT,
+	.mm_mt		= MTREE_INIT(mm_mt, MAPLE_ALLOC_RANGE),
 	.pgd		= swapper_pg_dir,
 	.mm_users	= ATOMIC_INIT(2),
 	.mm_count	= ATOMIC_INIT(1),
diff --git a/mm/internal.h b/mm/internal.h
index cf3cb933eba3..3f45e1d0c02b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -340,6 +340,83 @@ static inline bool is_data_mapping(vm_flags_t flags)
 	return (flags & (VM_WRITE | VM_SHARED | VM_STACK)) == VM_WRITE;
 }
 
+/* Maple tree operations using VMAs */
+/*
+ * vma_mas_store() - Store a VMA in the maple tree.
+ * @vma: The vm_area_struct
+ * @mas: The maple state
+ *
+ * Efficient way to store a VMA in the maple tree when the @mas has already
+ * walked to the correct location.
+ *
+ * Note: the end address is inclusive in the maple tree.
+ */
+static inline int vma_mas_store(struct vm_area_struct *vma, struct ma_state *mas)
+{
+	int ret;
+
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+	/* Make sure no VMAs are about to be lost. */
+	MA_STATE(test, mas->tree, vma->vm_start, vma->vm_end - 1);
+	struct vm_area_struct *vma_mas;
+	int count = 0;
+
+	mas_for_each(&test, vma_mas, vma->vm_end - 1) {
+		/* Rule out vma_expand */
+		if ((vma->vm_start != vma_mas->vm_start) &&
+		    (vma->vm_end != vma_mas->vm_end))
+			count++;
+	}
+
+	BUG_ON(count);
+
+	BUG_ON(mas->min > vma->vm_start);
+	BUG_ON(mas->index > vma->vm_start);
+#endif
+	mas->index = vma->vm_start;
+	mas->last = vma->vm_end - 1;
+	mas_lock(mas);
+	ret = mas_store_gfp(mas, vma, GFP_KERNEL);
+	mas_unlock(mas);
+	return ret;
+}
+
+/*
+ * vma_mas_remove() - Remove a VMA from the maple tree.
+ * @vma: The vm_area_struct
+ * @mas: The maple state
+ *
+ * Efficient way to remove a VMA from the maple tree when the @mas has already
+ * been established and points to the correct location.
+ * Note: the end address is inclusive in the maple tree.
+ */
+static inline int vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
+{
+	int ret;
+
+#ifdef CONFIG_DEBUG_MAPLE_TREE
+	/* Make sure no VMAs are about to be lost. */
+	MA_STATE(test, mas->tree, vma->vm_start, vma->vm_end - 1);
+	struct vm_area_struct *vma_mas;
+	int count = 0;
+
+	mas_for_each(&test, vma_mas, vma->vm_end - 1)
+		count++;
+
+	BUG_ON(count != 1);
+
+	BUG_ON(mas->min > vma->vm_start);
+	BUG_ON(mas->index > vma->vm_start);
+	BUG_ON(mas->min > mas->index);
+#endif
+	mas->index = vma->vm_start;
+	mas->last = vma->vm_end - 1;
+	mas_lock(mas);
+	ret = mas_store_gfp(mas, NULL, GFP_KERNEL);
+	mas_unlock(mas);
+	return ret;
+}
+
 /* mm/util.c */
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct vm_area_struct *prev);
diff --git a/mm/mmap.c b/mm/mmap.c
index 88dcc5c25225..b9078e6d7d61 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -375,7 +375,73 @@ static int browse_rb(struct mm_struct *mm)
 	}
 	return bug ? -1 : i;
 }
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
+extern void mt_validate(struct maple_tree *mt);
+extern void mt_dump(const struct maple_tree *mt);
 
+/* Validate the maple tree */
+static void validate_mm_mt(struct mm_struct *mm)
+{
+	struct maple_tree *mt = &mm->mm_mt;
+	struct vm_area_struct *vma_mt, *vma = mm->mmap;
+
+	MA_STATE(mas, mt, 0, 0);
+	rcu_read_lock();
+	mas_for_each(&mas, vma_mt, ULONG_MAX) {
+		if (xa_is_zero(vma_mt))
+			continue;
+
+		if (!vma)
+			break;
+
+		if ((vma != vma_mt) ||
+		    (vma->vm_start != vma_mt->vm_start) ||
+		    (vma->vm_end != vma_mt->vm_end) ||
+		    (vma->vm_start != mas.index) ||
+		    (vma->vm_end - 1 != mas.last)) {
+			pr_emerg("issue in %s\n", current->comm);
+			dump_stack();
+#ifdef CONFIG_DEBUG_VM
+			dump_vma(vma_mt);
+			pr_emerg("and next in rb\n");
+			dump_vma(vma->vm_next);
+#endif
+			pr_emerg("mt piv: %px %lu - %lu\n", vma_mt,
+				 mas.index, mas.last);
+			pr_emerg("mt vma: %px %lu - %lu\n", vma_mt,
+				 vma_mt->vm_start, vma_mt->vm_end);
+			pr_emerg("rb vma: %px %lu - %lu\n", vma,
+				 vma->vm_start, vma->vm_end);
+			pr_emerg("rb->next = %px %lu - %lu\n", vma->vm_next,
+					vma->vm_next->vm_start, vma->vm_next->vm_end);
+
+			mt_dump(mas.tree);
+			if (vma_mt->vm_end != mas.last + 1) {
+				pr_err("vma: %px vma_mt %lu-%lu\tmt %lu-%lu\n",
+						mm, vma_mt->vm_start, vma_mt->vm_end,
+						mas.index, mas.last);
+				mt_dump(mas.tree);
+			}
+			VM_BUG_ON_MM(vma_mt->vm_end != mas.last + 1, mm);
+			if (vma_mt->vm_start != mas.index) {
+				pr_err("vma: %px vma_mt %px %lu - %lu doesn't match\n",
+						mm, vma_mt, vma_mt->vm_start, vma_mt->vm_end);
+				mt_dump(mas.tree);
+			}
+			VM_BUG_ON_MM(vma_mt->vm_start != mas.index, mm);
+		}
+		VM_BUG_ON(vma != vma_mt);
+		vma = vma->vm_next;
+
+	}
+	VM_BUG_ON(vma);
+
+	rcu_read_unlock();
+	mt_validate(&mm->mm_mt);
+}
+#else
+#define validate_mm_mt(root) do { } while (0)
+#endif
 static void validate_mm_rb(struct rb_root *root, struct vm_area_struct *ignore)
 {
 	struct rb_node *nd;
@@ -430,6 +496,7 @@ static void validate_mm(struct mm_struct *mm)
 }
 #else
 #define validate_mm_rb(root, ignore) do { } while (0)
+#define validate_mm_mt(root) do { } while (0)
 #define validate_mm(mm) do { } while (0)
 #endif
 
@@ -609,6 +676,7 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
 		unsigned long addr, unsigned long end)
 {
 	unsigned long nr_pages = 0;
+	unsigned long nr_mt_pages = 0;
 	struct vm_area_struct *vma;
 
 	/* Find first overlapping mapping */
@@ -630,6 +698,13 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
 		nr_pages += overlap_len >> PAGE_SHIFT;
 	}
 
+	mt_for_each(&mm->mm_mt, vma, addr, end) {
+		nr_mt_pages +=
+			(min(end, vma->vm_end) - vma->vm_start) >> PAGE_SHIFT;
+	}
+
+	VM_BUG_ON_MM(nr_pages != nr_mt_pages, mm);
+
 	return nr_pages;
 }
 
@@ -674,11 +749,49 @@ static void __vma_link_file(struct vm_area_struct *vma)
 	}
 }
 
+/*
+ * vma_mt_szero() - Set a given range to zero.  Used when modifying a
+ * vm_area_struct start or end.
+ *
+ * @mm: The struct_mm
+ * @start: The start address to zero
+ * @end: The end address to zero.
+ *
+ * Must not hold the maple tree lock.
+ */
+static inline void vma_mt_szero(struct mm_struct *mm, unsigned long start,
+				unsigned long end)
+{
+	trace_vma_mt_szero(mm, start, end);
+	mtree_store_range(&mm->mm_mt, start, end - 1, NULL, GFP_KERNEL);
+}
+
+/*
+ * vma_mt_store() - Store a given vm_area_struct in the maple tree.
+ *
+ * @mm: The struct_mm
+ * @vma: The vm_area_struct to store in the maple tree.
+ *
+ * Must not hold the maple tree lock.
+ */
+static inline
+void vma_mt_store(struct mm_struct *mm, struct vm_area_struct *vma)
+{
+	trace_vma_mt_store(mm, vma);
+	mtree_store_range(&mm->mm_mt, vma->vm_start, vma->vm_end - 1, vma,
+		GFP_KERNEL);
+}
+
+void vma_store(struct mm_struct *mm, struct vm_area_struct *vma) {
+	vma_mt_store(mm, vma);
+}
+
 static void
 __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct vm_area_struct *prev, struct rb_node **rb_link,
 	struct rb_node *rb_parent)
 {
+	vma_mt_store(mm, vma);
 	__vma_link_list(mm, vma, prev);
 	__vma_link_rb(mm, vma, rb_link, rb_parent);
 }
@@ -751,6 +864,9 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	long adjust_next = 0;
 	int remove_next = 0;
 
+	validate_mm(mm);
+	validate_mm_mt(mm);
+
 	if (next && !insert) {
 		struct vm_area_struct *exporter = NULL, *importer = NULL;
 
@@ -876,17 +992,28 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	}
 
 	if (start != vma->vm_start) {
+		unsigned long old_start = vma->vm_start;
 		vma->vm_start = start;
+		if (old_start < start)
+			vma_mt_szero(mm, old_start, start);
 		start_changed = true;
 	}
 	if (end != vma->vm_end) {
+		unsigned long old_end = vma->vm_end;
 		vma->vm_end = end;
+		if (old_end > end)
+			vma_mt_szero(mm, end, old_end);
 		end_changed = true;
 	}
+
+	if (end_changed || start_changed)
+		vma_mt_store(mm, vma);
+
 	vma->vm_pgoff = pgoff;
 	if (adjust_next) {
 		next->vm_start += adjust_next;
 		next->vm_pgoff += adjust_next >> PAGE_SHIFT;
+		vma_mt_store(mm, next);
 	}
 
 	if (file) {
@@ -900,6 +1027,8 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 		/*
 		 * vma_merge has merged next into vma, and needs
 		 * us to remove next before dropping the locks.
+		 * Since we have expanded over this vma, the maple tree will
+		 * have overwritten by storing the value
 		 */
 		if (remove_next != 3)
 			__vma_unlink(mm, next, next);
@@ -1019,6 +1148,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
 		uprobe_mmap(insert);
 
 	validate_mm(mm);
+	validate_mm_mt(mm);
 
 	return 0;
 }
@@ -1166,6 +1296,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 	struct vm_area_struct *area, *next;
 	int err;
 
+	validate_mm_mt(mm);
 	/*
 	 * We later require that vma->vm_flags == vm_flags,
 	 * so this tests vma->vm_flags & VM_SPECIAL, too.
@@ -1241,6 +1372,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 		khugepaged_enter_vma_merge(area, vm_flags);
 		return area;
 	}
+	validate_mm_mt(mm);
 
 	return NULL;
 }
@@ -1723,6 +1855,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 	struct rb_node **rb_link, *rb_parent;
 	unsigned long charged = 0;
 
+	validate_mm_mt(mm);
 	/* Check against address space limit. */
 	if (!may_expand_vm(mm, vm_flags, len >> PAGE_SHIFT)) {
 		unsigned long nr_pages;
@@ -1870,6 +2003,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 
 	vma_set_page_prot(vma);
 
+	validate_mm_mt(mm);
 	return addr;
 
 unmap_and_free_vma:
@@ -1886,6 +2020,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 unacct_error:
 	if (charged)
 		vm_unacct_memory(charged);
+	validate_mm_mt(mm);
 	return error;
 }
 
@@ -1902,12 +2037,21 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma;
 	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+	unsigned long gap;
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
 
 	/* Adjust search length to account for worst case alignment overhead */
 	length = info->length + info->align_mask;
 	if (length < info->length)
 		return -ENOMEM;
 
+	rcu_read_lock();
+	mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+			   length);
+	rcu_read_unlock();
+	gap = mas.index;
+	gap += (info->align_offset - gap) & info->align_mask;
+
 	/* Adjust search limits by the desired length */
 	if (info->high_limit < length)
 		return -ENOMEM;
@@ -1989,20 +2133,39 @@ static unsigned long unmapped_area(struct vm_unmapped_area_info *info)
 
 	VM_BUG_ON(gap_start + info->length > info->high_limit);
 	VM_BUG_ON(gap_start + info->length > gap_end);
+
+	VM_BUG_ON(gap != gap_start);
 	return gap_start;
 }
 
+static inline unsigned long top_area_aligned(struct vm_unmapped_area_info *info,
+					     unsigned long end)
+{
+	return (end - info->length - info->align_offset) & (~info->align_mask);
+}
+
 static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 {
 	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
+	struct vm_area_struct *vma = NULL;
 	unsigned long length, low_limit, high_limit, gap_start, gap_end;
+	unsigned long gap;
+
+	MA_STATE(mas, &mm->mm_mt, 0, 0);
+	validate_mm_mt(mm);
 
 	/* Adjust search length to account for worst case alignment overhead */
 	length = info->length + info->align_mask;
 	if (length < info->length)
 		return -ENOMEM;
 
+	rcu_read_lock();
+	mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1,
+		   length);
+	rcu_read_unlock();
+	gap = (mas.index + info->align_mask) & ~info->align_mask;
+	gap -= info->align_offset & info->align_mask;
+
 	/*
 	 * Adjust search limits by the desired length.
 	 * See implementation comment at top of unmapped_area().
@@ -2088,6 +2251,32 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info)
 
 	VM_BUG_ON(gap_end < info->low_limit);
 	VM_BUG_ON(gap_end < gap_start);
+
+	if (gap != gap_end) {
+		pr_err("%s: %px Gap was found: mt %lu gap_end %lu\n", __func__,
+		       mm, gap, gap_end);
+		pr_err("window was %lu - %lu size %lu\n", info->high_limit,
+		       info->low_limit, length);
+		pr_err("mas.min %lu max %lu mas.last %lu\n", mas.min, mas.max,
+		       mas.last);
+		pr_err("mas.index %lu align mask %lu offset %lu\n", mas.index,
+		       info->align_mask, info->align_offset);
+		pr_err("rb_find_vma find on %lu => %px (%px)\n", mas.index,
+		       find_vma(mm, mas.index), vma);
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
+		mt_dump(&mm->mm_mt);
+#endif
+		{
+			struct vm_area_struct *dv = mm->mmap;
+
+			while (dv) {
+				printk("vma %px %lu-%lu\n", dv, dv->vm_start, dv->vm_end);
+				dv = dv->vm_next;
+			}
+		}
+		VM_BUG_ON(gap != gap_end);
+	}
+
 	return gap_end;
 }
 
@@ -2301,7 +2490,6 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 		vmacache_update(addr, vma);
 	return vma;
 }
-
 EXPORT_SYMBOL(find_vma);
 
 /*
@@ -2382,6 +2570,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 	unsigned long gap_addr;
 	int error = 0;
 
+	validate_mm_mt(mm);
 	if (!(vma->vm_flags & VM_GROWSUP))
 		return -EFAULT;
 
@@ -2458,6 +2647,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 	anon_vma_unlock_write(vma->anon_vma);
 	khugepaged_enter_vma_merge(vma, vma->vm_flags);
 	validate_mm(mm);
+	validate_mm_mt(mm);
 	return error;
 }
 #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -2472,6 +2662,7 @@ int expand_downwards(struct vm_area_struct *vma,
 	struct vm_area_struct *prev;
 	int error = 0;
 
+	validate_mm(mm);
 	address &= PAGE_MASK;
 	if (address < mmap_min_addr)
 		return -EPERM;
@@ -2525,6 +2716,8 @@ int expand_downwards(struct vm_area_struct *vma,
 				anon_vma_interval_tree_pre_update_vma(vma);
 				vma->vm_start = address;
 				vma->vm_pgoff -= grow;
+				/* Overwrite old entry in mtree. */
+				vma_mt_store(mm, vma);
 				anon_vma_interval_tree_post_update_vma(vma);
 				vma_gap_update(vma);
 				spin_unlock(&mm->page_table_lock);
@@ -2666,6 +2859,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	insertion_point = (prev ? &prev->vm_next : &mm->mmap);
 	vma->vm_prev = NULL;
+	vma_mt_szero(mm, vma->vm_start, end);
 	do {
 		vma_rb_erase(vma, &mm->mm_rb);
 		mm->map_count--;
@@ -2704,6 +2898,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 {
 	struct vm_area_struct *new;
 	int err;
+	validate_mm_mt(mm);
 
 	if (vma->vm_ops && vma->vm_ops->may_split) {
 		err = vma->vm_ops->may_split(vma, addr);
@@ -2756,6 +2951,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	mpol_put(vma_policy(new));
  out_free_vma:
 	vm_area_free(new);
+	validate_mm_mt(mm);
 	return err;
 }
 
@@ -3027,6 +3223,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 	pgoff_t pgoff = addr >> PAGE_SHIFT;
 	int error;
 	unsigned long mapped_addr;
+	validate_mm_mt(mm);
 
 	/* Until we need other flags, refuse anything except VM_EXEC. */
 	if ((flags & (~VM_EXEC)) != 0)
@@ -3084,6 +3281,7 @@ static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long fla
 	if (flags & VM_LOCKED)
 		mm->locked_vm += (len >> PAGE_SHIFT);
 	vma->vm_flags |= VM_SOFTDIRTY;
+	validate_mm_mt(mm);
 	return 0;
 }
 
@@ -3182,6 +3380,9 @@ void exit_mmap(struct mm_struct *mm)
 		vma = remove_vma(vma);
 		cond_resched();
 	}
+
+	trace_exit_mmap(mm);
+	mtree_destroy(&mm->mm_mt);
 	vm_unacct_memory(nr_accounted);
 }
 
@@ -3193,10 +3394,25 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
 	struct vm_area_struct *prev;
 	struct rb_node **rb_link, *rb_parent;
+	unsigned long start = vma->vm_start;
+	struct vm_area_struct *overlap = NULL;
 
 	if (find_vma_links(mm, vma->vm_start, vma->vm_end,
 			   &prev, &rb_link, &rb_parent))
 		return -ENOMEM;
+
+	overlap = mt_find(&mm->mm_mt, &start, vma->vm_end - 1);
+	if (overlap) {
+
+		pr_err("Found vma ending at %lu\n", start - 1);
+		pr_err("vma : %lu => %lu-%lu\n", (unsigned long)overlap,
+				overlap->vm_start, overlap->vm_end - 1);
+#if defined(CONFIG_DEBUG_MAPLE_TREE)
+		mt_dump(&mm->mm_mt);
+#endif
+		BUG();
+	}
+
 	if ((vma->vm_flags & VM_ACCOUNT) &&
 	     security_vm_enough_memory_mm(mm, vma_pages(vma)))
 		return -ENOMEM;
@@ -3236,7 +3452,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 	struct vm_area_struct *new_vma, *prev;
 	struct rb_node **rb_link, *rb_parent;
 	bool faulted_in_anon_vma = true;
+	unsigned long index = addr;
 
+	validate_mm_mt(mm);
 	/*
 	 * If anonymous vma has not yet been faulted, update new pgoff
 	 * to match new location, to increase its chance of merging.
@@ -3248,6 +3466,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 
 	if (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent))
 		return NULL;	/* should never get here */
+	if (mt_find(&mm->mm_mt, &index, addr+len - 1))
+		BUG();
 	new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
 			    vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma),
 			    vma->vm_userfaultfd_ctx);
@@ -3291,6 +3511,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 		vma_link(mm, new_vma, prev, rb_link, rb_parent);
 		*need_rmap_locks = false;
 	}
+	validate_mm_mt(mm);
 	return new_vma;
 
 out_free_mempol:
@@ -3298,6 +3519,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 out_free_vma:
 	vm_area_free(new_vma);
 out:
+	validate_mm_mt(mm);
 	return NULL;
 }
 
@@ -3434,6 +3656,7 @@ static struct vm_area_struct *__install_special_mapping(
 	int ret;
 	struct vm_area_struct *vma;
 
+	validate_mm_mt(mm);
 	vma = vm_area_alloc(mm);
 	if (unlikely(vma == NULL))
 		return ERR_PTR(-ENOMEM);
@@ -3455,10 +3678,12 @@ static struct vm_area_struct *__install_special_mapping(
 
 	perf_event_mmap(vma);
 
+	validate_mm_mt(mm);
 	return vma;
 
 out:
 	vm_area_free(vma);
+	validate_mm_mt(mm);
 	return ERR_PTR(ret);
 }
 
-- 
2.30.2

  parent reply	other threads:[~2021-10-05  1:32 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-05  1:30 [PATCH v3 00/66] Introducing the Maple Tree Liam Howlett
2021-10-05  1:30 ` [PATCH v3 01/66] radix tree test suite: Add pr_err define Liam Howlett
2021-10-05  1:30 ` [PATCH v3 02/66] radix tree test suite: Add kmem_cache_set_non_kernel() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 04/66] radix tree test suite: Add support for slab bulk APIs Liam Howlett
2021-10-05  1:30 ` [PATCH v3 03/66] radix tree test suite: Add allocation counts and size to kmem_cache Liam Howlett
2021-10-05  1:30 ` [PATCH v3 05/66] Maple Tree: Add new data structure Liam Howlett
2021-10-05  1:30 ` Liam Howlett [this message]
2021-10-05  1:30 ` [PATCH v3 07/66] mm/mmap: Use the maple tree in find_vma() instead of the rbtree Liam Howlett
2021-10-05  1:30 ` [PATCH v3 08/66] mm/mmap: Use the maple tree for find_vma_prev() " Liam Howlett
2021-10-05  1:30 ` [PATCH v3 09/66] mm/mmap: Use maple tree for unmapped_area{_topdown} Liam Howlett
2021-10-05  1:30 ` [PATCH v3 10/66] kernel/fork: Use maple tree for dup_mmap() during forking Liam Howlett
2021-10-05  1:30 ` [PATCH v3 11/66] mm: Remove rb tree Liam Howlett
2021-10-05  1:30 ` [PATCH v3 14/66] mm: Optimize find_exact_vma() to use vma_lookup() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 12/66] mmap: Change zeroing of maple tree in __vma_adjust Liam Howlett
2021-10-05  1:30 ` [PATCH v3 13/66] xen/privcmd: Optimized privcmd_ioctl_mmap() by using vma_lookup() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 16/66] mm/mmap: Change do_brk_flags() to expand existing VMA and add do_brk_munmap() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 15/66] mm/khugepaged: Optimize collapse_pte_mapped_thp() by using vma_lookup() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 17/66] mm: Use maple tree operations for find_vma_intersection() and find_vma() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 18/66] mm/mmap: Use advanced maple tree API for mmap_region() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 19/66] mm: Remove vmacache Liam Howlett
2021-10-05  1:30 ` [PATCH v3 20/66] mm/mmap: Move mmap_region() below do_munmap() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 21/66] mm/mmap: Convert count_vma_pages_range() to use ma_state Liam Howlett
2021-10-05  1:30 ` [PATCH v3 22/66] mm/mmap: Reorganize munmap to use maple states Liam Howlett
2021-10-05  1:30 ` [PATCH v3 24/66] mm: Introduce vma_next() and vma_prev() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 23/66] mm/mmap: Change do_brk_munmap() to use do_mas_align_munmap() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 25/66] arch/arm64: Remove mmap linked list from vdso Liam Howlett
2021-10-05  1:30 ` [PATCH v3 26/66] arch/parisc: Remove mmap linked list from kernel/cache Liam Howlett
2021-10-05  1:30 ` [PATCH v3 27/66] arch/powerpc: Remove mmap linked list from mm/book3s32/tlb Liam Howlett
2021-10-05  1:30 ` [PATCH v3 28/66] arch/powerpc: Remove mmap linked list from mm/book3s64/subpage_prot Liam Howlett
2021-10-05  1:30 ` [PATCH v3 29/66] arch/s390: Use maple tree iterators instead of linked list Liam Howlett
2021-10-05  1:30 ` [PATCH v3 31/66] arch/xtensa: Use maple tree iterators for unmapped area Liam Howlett
2021-10-05  1:30 ` [PATCH v3 30/66] arch/x86: Use maple tree iterators for vdso/vma Liam Howlett
2021-10-05  1:30 ` [PATCH v3 34/66] fs/binfmt_elf: Use maple tree iterators for fill_files_note() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 32/66] drivers/misc/cxl: Use maple tree iterators for cxl_prefault_vma() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 33/66] drivers/tee/optee: Use maple tree iterators for __check_mem_type() Liam Howlett
2021-10-05  1:30 ` [PATCH v3 35/66] fs/coredump: Use maple tree iterators in place of linked list Liam Howlett
2021-10-05  1:30 ` [PATCH v3 36/66] fs/exec: Use vma_next() instead " Liam Howlett
2021-10-05  1:30 ` [PATCH v3 37/66] fs/proc/base: Use maple tree iterators in place " Liam Howlett
2021-10-05  1:30 ` [PATCH v3 38/66] fs/proc/task_mmu: Stop using linked list and highest_vm_end Liam Howlett
2021-10-05  1:30 ` [PATCH v3 39/66] fs/userfaultfd: Stop using vma linked list Liam Howlett
2021-10-05  1:30 ` [PATCH v3 40/66] ipc/shm: Stop using the " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 41/66] kernel/acct: Use maple tree iterators instead of " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 43/66] kernel/events/uprobes: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 42/66] kernel/events/core: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 46/66] arch/um/kernel/tlb: Stop using " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 44/66] kernel/sched/fair: Use maple tree iterators instead of " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 45/66] kernel/fork: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 47/66] bpf: Remove VMA " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 48/66] mm/gup: Use maple tree navigation instead of " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 51/66] mm/madvise: Use vma_next instead of vma " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 49/66] mm/khugepaged: Use maple tree iterators " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 50/66] mm/ksm: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 52/66] mm/memcontrol: Stop using mm->highest_vm_end Liam Howlett
2021-10-05  1:31 ` [PATCH v3 53/66] mm/mempolicy: Use maple tree iterators instead of vma linked list Liam Howlett
2021-10-05  1:31 ` [PATCH v3 54/66] mm/mlock: " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 56/66] mm/mremap: Use vma_next() " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 55/66] mm/mprotect: Use maple tree navigation " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 57/66] mm/msync: Use vma_next() " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 58/66] mm/oom_kill: Use maple tree iterators " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 59/66] mm/pagewalk: Use vma_next() " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 61/66] damon: Change vma iterator to mas_for_each Liam Howlett
2021-10-05  1:31 ` [PATCH v3 60/66] mm/swapfile: Use maple tree iterator instead of vma linked list Liam Howlett
2021-10-05  1:31 ` [PATCH v3 63/66] s390: Use the maple tree iterator for vdso Liam Howlett
2021-10-05  1:31 ` [PATCH v3 62/66] powerpc: Use " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 64/66] i915: Use the " Liam Howlett
2021-10-05  1:31 ` [PATCH v3 65/66] mm: Remove the vma linked list Liam Howlett
2021-10-05  1:31 ` [PATCH v3 66/66] mm/mmap: Drop range_has_overlap() function Liam Howlett

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211005012959.1110504-7-Liam.Howlett@oracle.com \
    --to=liam.howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=axelrasmussen@google.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dgilbert@interlog.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=maple-tree@lists.infradead.org \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=riel@surriel.com \
    --cc=rientjes@google.com \
    --cc=songliubraving@fb.com \
    --cc=surenb@google.com \
    --cc=vbabka@suse.cz \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.