linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC] reduce latency in __purge_vmap_area_lazy
@ 2016-10-18  6:56 Christoph Hellwig
  2016-10-18  6:56 ` [PATCH 1/6] mm: refactor __purge_vmap_area_lazy Christoph Hellwig
                   ` (8 more replies)
  0 siblings, 9 replies; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-18  6:56 UTC (permalink / raw)
  To: akpm
  Cc: joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users, linux-kernel

Hi all,

this is my spin at sorting out the long lock hold times in
__purge_vmap_area_lazy.  It is based on the patch from Joel sent this
week.  I don't have any good numbers for it, but it survived an
xfstests run on XFS which is a significant vmalloc user.  The
changelogs could still be improved as well, but I'd rather get it
out quickly for feedback and testing.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [PATCH 1/6] mm: refactor __purge_vmap_area_lazy
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
@ 2016-10-18  6:56 ` Christoph Hellwig
  2016-10-18  6:56 ` [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping Christoph Hellwig
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-18  6:56 UTC (permalink / raw)
  To: akpm
  Cc: joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users, linux-kernel

Move the purge_lock synchronization to the callers, move the call to
purge_fragmented_blocks_allcpus at the beginning of the function to
the callers that need it, move the force_flush behavior to the caller
that needs it, and pass start and end by value instead of by reference.

No change in behavior.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/vmalloc.c | 80 ++++++++++++++++++++++++++----------------------------------
 1 file changed, 35 insertions(+), 45 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f2481cb..d045a10 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -601,6 +601,13 @@ static unsigned long lazy_max_pages(void)
 
 static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
 
+/*
+ * Serialize vmap purging.  There is no actual criticial section protected
+ * by this look, but we want to avoid concurrent calls for performance
+ * reasons and to make the pcpu_get_vm_areas more deterministic.
+ */
+static DEFINE_SPINLOCK(vmap_purge_lock);
+
 /* for per-CPU blocks */
 static void purge_fragmented_blocks_allcpus(void);
 
@@ -615,59 +622,36 @@ void set_iounmap_nonlazy(void)
 
 /*
  * Purges all lazily-freed vmap areas.
- *
- * If sync is 0 then don't purge if there is already a purge in progress.
- * If force_flush is 1, then flush kernel TLBs between *start and *end even
- * if we found no lazy vmap areas to unmap (callers can use this to optimise
- * their own TLB flushing).
- * Returns with *start = min(*start, lowest purged address)
- *              *end = max(*end, highest purged address)
  */
-static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
-					int sync, int force_flush)
+static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 {
-	static DEFINE_SPINLOCK(purge_lock);
 	struct llist_node *valist;
 	struct vmap_area *va;
 	struct vmap_area *n_va;
 	int nr = 0;
 
-	/*
-	 * If sync is 0 but force_flush is 1, we'll go sync anyway but callers
-	 * should not expect such behaviour. This just simplifies locking for
-	 * the case that isn't actually used at the moment anyway.
-	 */
-	if (!sync && !force_flush) {
-		if (!spin_trylock(&purge_lock))
-			return;
-	} else
-		spin_lock(&purge_lock);
-
-	if (sync)
-		purge_fragmented_blocks_allcpus();
+	lockdep_assert_held(&vmap_purge_lock);
 
 	valist = llist_del_all(&vmap_purge_list);
 	llist_for_each_entry(va, valist, purge_list) {
-		if (va->va_start < *start)
-			*start = va->va_start;
-		if (va->va_end > *end)
-			*end = va->va_end;
+		if (va->va_start < start)
+			start = va->va_start;
+		if (va->va_end > end)
+			end = va->va_end;
 		nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
 	}
 
-	if (nr)
-		atomic_sub(nr, &vmap_lazy_nr);
+	if (!nr)
+		return false;
 
-	if (nr || force_flush)
-		flush_tlb_kernel_range(*start, *end);
+	atomic_sub(nr, &vmap_lazy_nr);
+	flush_tlb_kernel_range(start, end);
 
-	if (nr) {
-		spin_lock(&vmap_area_lock);
-		llist_for_each_entry_safe(va, n_va, valist, purge_list)
-			__free_vmap_area(va);
-		spin_unlock(&vmap_area_lock);
-	}
-	spin_unlock(&purge_lock);
+	spin_lock(&vmap_area_lock);
+	llist_for_each_entry_safe(va, n_va, valist, purge_list)
+		__free_vmap_area(va);
+	spin_unlock(&vmap_area_lock);
+	return true;
 }
 
 /*
@@ -676,9 +660,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
  */
 static void try_purge_vmap_area_lazy(void)
 {
-	unsigned long start = ULONG_MAX, end = 0;
-
-	__purge_vmap_area_lazy(&start, &end, 0, 0);
+	if (spin_trylock(&vmap_purge_lock)) {
+		__purge_vmap_area_lazy(ULONG_MAX, 0);
+		spin_unlock(&vmap_purge_lock);
+	}
 }
 
 /*
@@ -686,9 +671,10 @@ static void try_purge_vmap_area_lazy(void)
  */
 static void purge_vmap_area_lazy(void)
 {
-	unsigned long start = ULONG_MAX, end = 0;
-
-	__purge_vmap_area_lazy(&start, &end, 1, 0);
+	spin_lock(&vmap_purge_lock);
+	purge_fragmented_blocks_allcpus();
+	__purge_vmap_area_lazy(ULONG_MAX, 0);
+	spin_unlock(&vmap_purge_lock);
 }
 
 /*
@@ -1094,7 +1080,11 @@ void vm_unmap_aliases(void)
 		rcu_read_unlock();
 	}
 
-	__purge_vmap_area_lazy(&start, &end, 1, flush);
+	spin_lock(&vmap_purge_lock);
+	purge_fragmented_blocks_allcpus();
+	if (!__purge_vmap_area_lazy(start, end) && flush)
+		flush_tlb_kernel_range(start, end);
+	spin_unlock(&vmap_purge_lock);
 }
 EXPORT_SYMBOL_GPL(vm_unmap_aliases);
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
  2016-10-18  6:56 ` [PATCH 1/6] mm: refactor __purge_vmap_area_lazy Christoph Hellwig
@ 2016-10-18  6:56 ` Christoph Hellwig
  2016-10-18 10:33   ` Chris Wilson
  2016-10-19 11:15   ` Chris Wilson
  2016-10-18  6:56 ` [PATCH 3/6] mm: remove free_unmap_vmap_area_noflush Christoph Hellwig
                   ` (6 subsequent siblings)
  8 siblings, 2 replies; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-18  6:56 UTC (permalink / raw)
  To: akpm
  Cc: joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users, linux-kernel

This is how everyone seems to already use them, but let's make that
explicit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/vmalloc.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d045a10..9830514 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -365,7 +365,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
 	BUG_ON(offset_in_page(size));
 	BUG_ON(!is_power_of_2(align));
 
-	might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+	might_sleep();
 
 	va = kmalloc_node(sizeof(struct vmap_area),
 			gfp_mask & GFP_RECLAIM_MASK, node);
@@ -1056,6 +1056,8 @@ void vm_unmap_aliases(void)
 	if (unlikely(!vmap_initialized))
 		return;
 
+	might_sleep();
+
 	for_each_possible_cpu(cpu) {
 		struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, cpu);
 		struct vmap_block *vb;
@@ -1098,6 +1100,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
 	unsigned long size = (unsigned long)count << PAGE_SHIFT;
 	unsigned long addr = (unsigned long)mem;
 
+	might_sleep();
 	BUG_ON(!addr);
 	BUG_ON(addr < VMALLOC_START);
 	BUG_ON(addr > VMALLOC_END);
@@ -1445,6 +1448,8 @@ struct vm_struct *remove_vm_area(const void *addr)
 {
 	struct vmap_area *va;
 
+	might_sleep();
+
 	va = find_vmap_area((unsigned long)addr);
 	if (va && va->flags & VM_VM_AREA) {
 		struct vm_struct *vm = va->vm;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/6] mm: remove free_unmap_vmap_area_noflush
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
  2016-10-18  6:56 ` [PATCH 1/6] mm: refactor __purge_vmap_area_lazy Christoph Hellwig
  2016-10-18  6:56 ` [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping Christoph Hellwig
@ 2016-10-18  6:56 ` Christoph Hellwig
  2016-10-18  6:56 ` [PATCH 4/6] mm: remove free_unmap_vmap_area_addr Christoph Hellwig
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-18  6:56 UTC (permalink / raw)
  To: akpm
  Cc: joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users, linux-kernel

Just inline it into the only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/vmalloc.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 9830514..8cedfa0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -697,22 +697,13 @@ static void free_vmap_area_noflush(struct vmap_area *va)
 }
 
 /*
- * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
- * called for the correct range previously.
- */
-static void free_unmap_vmap_area_noflush(struct vmap_area *va)
-{
-	unmap_vmap_area(va);
-	free_vmap_area_noflush(va);
-}
-
-/*
  * Free and unmap a vmap area
  */
 static void free_unmap_vmap_area(struct vmap_area *va)
 {
 	flush_cache_vunmap(va->va_start, va->va_end);
-	free_unmap_vmap_area_noflush(va);
+	unmap_vmap_area(va);
+	free_vmap_area_noflush(va);
 }
 
 static struct vmap_area *find_vmap_area(unsigned long addr)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/6] mm: remove free_unmap_vmap_area_addr
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
                   ` (2 preceding siblings ...)
  2016-10-18  6:56 ` [PATCH 3/6] mm: remove free_unmap_vmap_area_noflush Christoph Hellwig
@ 2016-10-18  6:56 ` Christoph Hellwig
  2016-10-21  0:46   ` Joel Fernandes
  2016-10-18  6:56 ` [PATCH 5/6] mm: turn vmap_purge_lock into a mutex Christoph Hellwig
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-18  6:56 UTC (permalink / raw)
  To: akpm
  Cc: joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users, linux-kernel

Just inline it into the only caller.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/vmalloc.c | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8cedfa0..2af2921 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -717,16 +717,6 @@ static struct vmap_area *find_vmap_area(unsigned long addr)
 	return va;
 }
 
-static void free_unmap_vmap_area_addr(unsigned long addr)
-{
-	struct vmap_area *va;
-
-	va = find_vmap_area(addr);
-	BUG_ON(!va);
-	free_unmap_vmap_area(va);
-}
-
-
 /*** Per cpu kva allocator ***/
 
 /*
@@ -1090,6 +1080,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
 {
 	unsigned long size = (unsigned long)count << PAGE_SHIFT;
 	unsigned long addr = (unsigned long)mem;
+	struct vmap_area *va;
 
 	might_sleep();
 	BUG_ON(!addr);
@@ -1100,10 +1091,14 @@ void vm_unmap_ram(const void *mem, unsigned int count)
 	debug_check_no_locks_freed(mem, size);
 	vmap_debug_free_range(addr, addr+size);
 
-	if (likely(count <= VMAP_MAX_ALLOC))
+	if (likely(count <= VMAP_MAX_ALLOC)) {
 		vb_free(mem, size);
-	else
-		free_unmap_vmap_area_addr(addr);
+		return;
+	}
+
+	va = find_vmap_area(addr);
+	BUG_ON(!va);
+	free_unmap_vmap_area(va);
 }
 EXPORT_SYMBOL(vm_unmap_ram);
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 5/6] mm: turn vmap_purge_lock into a mutex
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
                   ` (3 preceding siblings ...)
  2016-10-18  6:56 ` [PATCH 4/6] mm: remove free_unmap_vmap_area_addr Christoph Hellwig
@ 2016-10-18  6:56 ` Christoph Hellwig
  2016-10-18  6:56 ` [PATCH 6/6] mm: add preempt points into __purge_vmap_area_lazy Christoph Hellwig
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-18  6:56 UTC (permalink / raw)
  To: akpm
  Cc: joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users, linux-kernel

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/vmalloc.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2af2921..6c7eb8d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -606,7 +606,7 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0);
  * by this look, but we want to avoid concurrent calls for performance
  * reasons and to make the pcpu_get_vm_areas more deterministic.
  */
-static DEFINE_SPINLOCK(vmap_purge_lock);
+static DEFINE_MUTEX(vmap_purge_lock);
 
 /* for per-CPU blocks */
 static void purge_fragmented_blocks_allcpus(void);
@@ -660,9 +660,9 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
  */
 static void try_purge_vmap_area_lazy(void)
 {
-	if (spin_trylock(&vmap_purge_lock)) {
+	if (mutex_trylock(&vmap_purge_lock)) {
 		__purge_vmap_area_lazy(ULONG_MAX, 0);
-		spin_unlock(&vmap_purge_lock);
+		mutex_unlock(&vmap_purge_lock);
 	}
 }
 
@@ -671,10 +671,10 @@ static void try_purge_vmap_area_lazy(void)
  */
 static void purge_vmap_area_lazy(void)
 {
-	spin_lock(&vmap_purge_lock);
+	mutex_lock(&vmap_purge_lock);
 	purge_fragmented_blocks_allcpus();
 	__purge_vmap_area_lazy(ULONG_MAX, 0);
-	spin_unlock(&vmap_purge_lock);
+	mutex_unlock(&vmap_purge_lock);
 }
 
 /*
@@ -1063,11 +1063,11 @@ void vm_unmap_aliases(void)
 		rcu_read_unlock();
 	}
 
-	spin_lock(&vmap_purge_lock);
+	mutex_lock(&vmap_purge_lock);
 	purge_fragmented_blocks_allcpus();
 	if (!__purge_vmap_area_lazy(start, end) && flush)
 		flush_tlb_kernel_range(start, end);
-	spin_unlock(&vmap_purge_lock);
+	mutex_unlock(&vmap_purge_lock);
 }
 EXPORT_SYMBOL_GPL(vm_unmap_aliases);
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 6/6] mm: add preempt points into __purge_vmap_area_lazy
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
                   ` (4 preceding siblings ...)
  2016-10-18  6:56 ` [PATCH 5/6] mm: turn vmap_purge_lock into a mutex Christoph Hellwig
@ 2016-10-18  6:56 ` Christoph Hellwig
  2016-10-18 20:56   ` Steven Rostedt
  2016-10-18 10:40 ` [RFC] reduce latency in __purge_vmap_area_lazy Nicholas Piggin
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-18  6:56 UTC (permalink / raw)
  To: akpm
  Cc: joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users, linux-kernel

From: Joel Fernandes <joelaf@google.com>

Use cond_resched_lock to avoid holding the vmap_area_lock for a
potentially long time.

Signed-off-by: Joel Fernandes <joelaf@google.com>
[hch: split from a larger patch by Joel, wrote the crappy changelog]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 mm/vmalloc.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 6c7eb8d..98b19ea 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -628,7 +628,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 	struct llist_node *valist;
 	struct vmap_area *va;
 	struct vmap_area *n_va;
-	int nr = 0;
+	bool do_free = false;
 
 	lockdep_assert_held(&vmap_purge_lock);
 
@@ -638,18 +638,22 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 			start = va->va_start;
 		if (va->va_end > end)
 			end = va->va_end;
-		nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
+		do_free = true;
 	}
 
-	if (!nr)
+	if (!do_free)
 		return false;
 
-	atomic_sub(nr, &vmap_lazy_nr);
 	flush_tlb_kernel_range(start, end);
 
 	spin_lock(&vmap_area_lock);
-	llist_for_each_entry_safe(va, n_va, valist, purge_list)
+	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
+		int nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
+
 		__free_vmap_area(va);
+		atomic_sub(nr, &vmap_lazy_nr);
+		cond_resched_lock(&vmap_area_lock);
+	}
 	spin_unlock(&vmap_area_lock);
 	return true;
 }
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-18  6:56 ` [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping Christoph Hellwig
@ 2016-10-18 10:33   ` Chris Wilson
  2016-10-18 10:38     ` Christoph Hellwig
  2016-10-19 11:15   ` Chris Wilson
  1 sibling, 1 reply; 24+ messages in thread
From: Chris Wilson @ 2016-10-18 10:33 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: akpm, joelaf, jszhang, joaodias, linux-mm, linux-rt-users, linux-kernel

On Tue, Oct 18, 2016 at 08:56:07AM +0200, Christoph Hellwig wrote:
> This is how everyone seems to already use them, but let's make that
> explicit.

mm/page_alloc.c: alloc_large_system_hash() is perhaps the exception to
the rule.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-18 10:33   ` Chris Wilson
@ 2016-10-18 10:38     ` Christoph Hellwig
  0 siblings, 0 replies; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-18 10:38 UTC (permalink / raw)
  To: Chris Wilson
  Cc: Christoph Hellwig, akpm, joelaf, jszhang, joaodias, linux-mm,
	linux-rt-users, linux-kernel

On Tue, Oct 18, 2016 at 11:33:59AM +0100, Chris Wilson wrote:
> On Tue, Oct 18, 2016 at 08:56:07AM +0200, Christoph Hellwig wrote:
> > This is how everyone seems to already use them, but let's make that
> > explicit.
> 
> mm/page_alloc.c: alloc_large_system_hash() is perhaps the exception to
> the rule.

While alloc_large_system_hash passes GFP_ATOMIC it still is called
from context where it can sleep - I think it just abuses GFP_ATOMIC
so that it gets an "early" failure.  For which GFP_ATOMIC isn't
exactly a good choice as it dips into additional reserves, GFP_NOWAIT
would have probably been a better choice.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] reduce latency in __purge_vmap_area_lazy
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
                   ` (5 preceding siblings ...)
  2016-10-18  6:56 ` [PATCH 6/6] mm: add preempt points into __purge_vmap_area_lazy Christoph Hellwig
@ 2016-10-18 10:40 ` Nicholas Piggin
  2016-10-18 11:21 ` Jisheng Zhang
  2016-10-21  1:08 ` Joel Fernandes
  8 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-10-18 10:40 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: akpm, joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users,
	linux-kernel

On Tue, 18 Oct 2016 08:56:05 +0200
Christoph Hellwig <hch@lst.de> wrote:

> Hi all,
> 
> this is my spin at sorting out the long lock hold times in
> __purge_vmap_area_lazy.  It is based on the patch from Joel sent this
> week.  I don't have any good numbers for it, but it survived an
> xfstests run on XFS which is a significant vmalloc user.  The
> changelogs could still be improved as well, but I'd rather get it
> out quickly for feedback and testing.

All seems pretty good to me.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] reduce latency in __purge_vmap_area_lazy
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
                   ` (6 preceding siblings ...)
  2016-10-18 10:40 ` [RFC] reduce latency in __purge_vmap_area_lazy Nicholas Piggin
@ 2016-10-18 11:21 ` Jisheng Zhang
  2016-10-21  1:08 ` Joel Fernandes
  8 siblings, 0 replies; 24+ messages in thread
From: Jisheng Zhang @ 2016-10-18 11:21 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: akpm, joelaf, chris, joaodias, linux-mm, linux-rt-users,
	linux-kernel, linux-arm-kernel

On Tue, 18 Oct 2016 08:56:05 +0200 Christoph Hellwig wrote:

> Hi all,
> 
> this is my spin at sorting out the long lock hold times in
> __purge_vmap_area_lazy.  It is based on the patch from Joel sent this
> week.  I don't have any good numbers for it, but it survived an
> xfstests run on XFS which is a significant vmalloc user.  The
> changelogs could still be improved as well, but I'd rather get it
> out quickly for feedback and testing.

I just tested this series, the preempt off ftrace log doesn't complain
__purge_vmap_area_lazy any more in my test case, this latency is removed!

So feel free to add

Tested-by: Jisheng Zhang <jszhang@marvell.com>

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 6/6] mm: add preempt points into __purge_vmap_area_lazy
  2016-10-18  6:56 ` [PATCH 6/6] mm: add preempt points into __purge_vmap_area_lazy Christoph Hellwig
@ 2016-10-18 20:56   ` Steven Rostedt
  2016-10-18 21:01     ` Steven Rostedt
  0 siblings, 1 reply; 24+ messages in thread
From: Steven Rostedt @ 2016-10-18 20:56 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: akpm, joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users,
	linux-kernel

On Tue, Oct 18, 2016 at 08:56:11AM +0200, Christoph Hellwig wrote:
> From: Joel Fernandes <joelaf@google.com>
> 
> Use cond_resched_lock to avoid holding the vmap_area_lock for a
> potentially long time.
> 
> Signed-off-by: Joel Fernandes <joelaf@google.com>
> [hch: split from a larger patch by Joel, wrote the crappy changelog]
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  mm/vmalloc.c | 14 +++++++++-----
>  1 file changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 6c7eb8d..98b19ea 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -628,7 +628,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
>  	struct llist_node *valist;
>  	struct vmap_area *va;
>  	struct vmap_area *n_va;
> -	int nr = 0;
> +	bool do_free = false;
>  
>  	lockdep_assert_held(&vmap_purge_lock);
>  
> @@ -638,18 +638,22 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
>  			start = va->va_start;
>  		if (va->va_end > end)
>  			end = va->va_end;
> -		nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
> +		do_free = true;
>  	}
>  
> -	if (!nr)
> +	if (!do_free)
>  		return false;
>  
> -	atomic_sub(nr, &vmap_lazy_nr);
>  	flush_tlb_kernel_range(start, end);
>  
>  	spin_lock(&vmap_area_lock);
> -	llist_for_each_entry_safe(va, n_va, valist, purge_list)
> +	llist_for_each_entry_safe(va, n_va, valist, purge_list) {
> +		int nr = (va->va_end - va->va_start) >> PAGE_SHIFT;
> +
>  		__free_vmap_area(va);
> +		atomic_sub(nr, &vmap_lazy_nr);
> +		cond_resched_lock(&vmap_area_lock);

Is releasing the lock within a llist_for_each_entry_safe() actually safe? Is
vmap_area_lock the one to protect the valist?

That is llist_for_each_entry_safe(va, n_va, valist, purg_list) does:

	for (va = llist_entry(valist, typeof(*va), purge_list);
	     &va->purge_list != NULL &&
	     n_va = llist_entry(va->purge_list.next, typeof(*n_va),
				purge_list, true);
	     pos = n)

Thus n_va is pointing to the next element to process when we release the
lock. Is it possible for another task to get into this same path and process
the item that n_va is pointing to? Then when the preempted task comes back,
grabs the vmap_area_lock, and then continues the loop with what n_va has,
could that cause problems? That is, the next iteration after releasing the
lock does va = n_va. What happens if n_va no longer exits?

I don't know this code that well, and perhaps vmap_area_lock is not protecting
the list and this is all fine.

-- Steve


> +	}
>  	spin_unlock(&vmap_area_lock);
>  	return true;
>  }
> -- 
> 2.1.4

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 6/6] mm: add preempt points into __purge_vmap_area_lazy
  2016-10-18 20:56   ` Steven Rostedt
@ 2016-10-18 21:01     ` Steven Rostedt
  0 siblings, 0 replies; 24+ messages in thread
From: Steven Rostedt @ 2016-10-18 21:01 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: akpm, joelaf, jszhang, chris, joaodias, linux-mm, linux-rt-users,
	linux-kernel

On Tue, 18 Oct 2016 16:56:48 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:


> Is releasing the lock within a llist_for_each_entry_safe() actually safe? Is
> vmap_area_lock the one to protect the valist?
> 
> That is llist_for_each_entry_safe(va, n_va, valist, purg_list) does:
> 
> 	for (va = llist_entry(valist, typeof(*va), purge_list);
> 	     &va->purge_list != NULL &&
> 	     n_va = llist_entry(va->purge_list.next, typeof(*n_va),
> 				purge_list, true);
> 	     pos = n)
> 
> Thus n_va is pointing to the next element to process when we release the
> lock. Is it possible for another task to get into this same path and process
> the item that n_va is pointing to? Then when the preempted task comes back,
> grabs the vmap_area_lock, and then continues the loop with what n_va has,
> could that cause problems? That is, the next iteration after releasing the
> lock does va = n_va. What happens if n_va no longer exits?
> 
> I don't know this code that well, and perhaps vmap_area_lock is not protecting
> the list and this is all fine.
> 

Bah, nevermind. I missed the:

	valist = llist_del_all(&vmap_purge_list);

so yeah, all should be good.

Nothing to see here, move along please.

-- Steve

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-18  6:56 ` [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping Christoph Hellwig
  2016-10-18 10:33   ` Chris Wilson
@ 2016-10-19 11:15   ` Chris Wilson
  2016-10-19 13:05     ` Christoph Hellwig
  2016-11-08 13:24     ` Joel Fernandes
  1 sibling, 2 replies; 24+ messages in thread
From: Chris Wilson @ 2016-10-19 11:15 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: akpm, joelaf, jszhang, joaodias, linux-mm, linux-rt-users, linux-kernel

On Tue, Oct 18, 2016 at 08:56:07AM +0200, Christoph Hellwig wrote:
> This is how everyone seems to already use them, but let's make that
> explicit.

Ah, found an exception, vmapped stacks:

[  696.928541] BUG: sleeping function called from invalid context at mm/vmalloc.c:615
[  696.928576] in_atomic(): 1, irqs_disabled(): 0, pid: 30521, name: bash
[  696.928590] 1 lock held by bash/30521:
[  696.928600]  #0: [  696.928606]  (vmap_area_lock[  696.928619] ){+.+...}, at: [  696.928640] [<ffffffff8115f0cf>] __purge_vmap_area_lazy+0x30f/0x370
[  696.928656] CPU: 0 PID: 30521 Comm: bash Tainted: G        W       4.9.0-rc1+ #124
[  696.928672] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
[  696.928690]  ffffc900070f7c70 ffffffff812be1f5 ffff8802750b6680 ffffffff819650a6
[  696.928717]  ffffc900070f7c98 ffffffff810a3216 0000000000004001 ffff8802726e16c0
[  696.928743]  ffff8802726e19a0 ffffc900070f7d08 ffffffff8115f0f3 ffff8802750b6680
[  696.928768] Call Trace:
[  696.928782]  [<ffffffff812be1f5>] dump_stack+0x68/0x93
[  696.928796]  [<ffffffff810a3216>] ___might_sleep+0x166/0x220
[  696.928809]  [<ffffffff8115f0f3>] __purge_vmap_area_lazy+0x333/0x370
[  696.928823]  [<ffffffff8115ea68>] ? vunmap_page_range+0x1e8/0x350
[  696.928837]  [<ffffffff8115f1b3>] free_vmap_area_noflush+0x83/0x90
[  696.928850]  [<ffffffff81160931>] remove_vm_area+0x71/0xb0
[  696.928863]  [<ffffffff81160999>] __vunmap+0x29/0xf0
[  696.928875]  [<ffffffff81160ab9>] vfree+0x29/0x70
[  696.928888]  [<ffffffff81071746>] put_task_stack+0x76/0x120
[  696.928901]  [<ffffffff8109a943>] finish_task_switch+0x163/0x1e0
[  696.928914]  [<ffffffff8109a845>] ? finish_task_switch+0x65/0x1e0
[  696.928928]  [<ffffffff816125f5>] __schedule+0x1f5/0x7c0
[  696.928940]  [<ffffffff81612c28>] schedule+0x38/0x90
[  696.928953]  [<ffffffff810787b1>] do_wait+0x1d1/0x200
[  696.928966]  [<ffffffff810799b1>] SyS_wait4+0x61/0xc0
[  696.928979]  [<ffffffff81076e50>] ? task_stopped_code+0x50/0x50
[  696.928992]  [<ffffffff81618e6e>] entry_SYSCALL_64_fastpath+0x1c/0xb1

[This was triggered by earlier patch to remove the serialisation and add
cond_resched_lock(&vmap_area_lock)]
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-19 11:15   ` Chris Wilson
@ 2016-10-19 13:05     ` Christoph Hellwig
  2016-10-19 15:34       ` Andy Lutomirski
  2016-11-08 13:24     ` Joel Fernandes
  1 sibling, 1 reply; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-19 13:05 UTC (permalink / raw)
  To: Chris Wilson
  Cc: Christoph Hellwig, akpm, joelaf, jszhang, joaodias, linux-mm,
	linux-rt-users, linux-kernel, Andy Lutomirski

On Wed, Oct 19, 2016 at 12:15:41PM +0100, Chris Wilson wrote:
> On Tue, Oct 18, 2016 at 08:56:07AM +0200, Christoph Hellwig wrote:
> > This is how everyone seems to already use them, but let's make that
> > explicit.
> 
> Ah, found an exception, vmapped stacks:

Oh, fun.  So if we can't require vfree to be called from process context
we also can't use a mutex to wait for the vmap flushing.  Given that we
free stacks from the scheduler context switch I also fear there is no
good way to get a sleepable context there.

The only other idea I had was to use vmap_area_lock for the protection
that purge_lock currently provides, but that would require some serious
refactoring to avoid recursive locking first.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-19 13:05     ` Christoph Hellwig
@ 2016-10-19 15:34       ` Andy Lutomirski
  2016-10-19 16:31         ` Christoph Hellwig
  0 siblings, 1 reply; 24+ messages in thread
From: Andy Lutomirski @ 2016-10-19 15:34 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Chris Wilson, Andrew Morton, joelaf, jszhang, joaodias, linux-mm,
	linux-rt-users, linux-kernel

On Wed, Oct 19, 2016 at 6:05 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Wed, Oct 19, 2016 at 12:15:41PM +0100, Chris Wilson wrote:
>> On Tue, Oct 18, 2016 at 08:56:07AM +0200, Christoph Hellwig wrote:
>> > This is how everyone seems to already use them, but let's make that
>> > explicit.
>>
>> Ah, found an exception, vmapped stacks:
>
> Oh, fun.  So if we can't require vfree to be called from process context
> we also can't use a mutex to wait for the vmap flushing.  Given that we
> free stacks from the scheduler context switch I also fear there is no
> good way to get a sleepable context there.
>
> The only other idea I had was to use vmap_area_lock for the protection
> that purge_lock currently provides, but that would require some serious
> refactoring to avoid recursive locking first.

It would be quite awkward for a task stack to get freed from a
sleepable context, because the obvious sleepable context is the task
itself, and it still needs its stack.  This was true even in the old
regime when task stacks were freed from RCU context.

But vfree has a magic automatic deferral mechanism.  Couldn't you make
the non-deferred case might_sleep()?

-- 
Andy Lutomirski
AMA Capital Management, LLC

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-19 15:34       ` Andy Lutomirski
@ 2016-10-19 16:31         ` Christoph Hellwig
  2016-10-19 19:43           ` Chris Wilson
  2016-10-21  0:32           ` Joel Fernandes
  0 siblings, 2 replies; 24+ messages in thread
From: Christoph Hellwig @ 2016-10-19 16:31 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Christoph Hellwig, Chris Wilson, Andrew Morton, joelaf, jszhang,
	joaodias, linux-mm, linux-rt-users, linux-kernel

On Wed, Oct 19, 2016 at 08:34:40AM -0700, Andy Lutomirski wrote:
> 
> It would be quite awkward for a task stack to get freed from a
> sleepable context, because the obvious sleepable context is the task
> itself, and it still needs its stack.  This was true even in the old
> regime when task stacks were freed from RCU context.
> 
> But vfree has a magic automatic deferral mechanism.  Couldn't you make
> the non-deferred case might_sleep()?

But it's only magic from interrupt context..

Chris, does this patch make virtually mapped stack work for you again?

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f2481cb..942e02d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1533,7 +1533,7 @@ void vfree(const void *addr)
 
 	if (!addr)
 		return;
-	if (unlikely(in_interrupt())) {
+	if (in_interrupt() || in_atomic()) {
 		struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
 		if (llist_add((struct llist_node *)addr, &p->list))
 			schedule_work(&p->wq);

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-19 16:31         ` Christoph Hellwig
@ 2016-10-19 19:43           ` Chris Wilson
  2016-10-21  0:32           ` Joel Fernandes
  1 sibling, 0 replies; 24+ messages in thread
From: Chris Wilson @ 2016-10-19 19:43 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andy Lutomirski, Andrew Morton, joelaf, jszhang, joaodias,
	linux-mm, linux-rt-users, linux-kernel

On Wed, Oct 19, 2016 at 06:31:12PM +0200, Christoph Hellwig wrote:
> On Wed, Oct 19, 2016 at 08:34:40AM -0700, Andy Lutomirski wrote:
> > 
> > It would be quite awkward for a task stack to get freed from a
> > sleepable context, because the obvious sleepable context is the task
> > itself, and it still needs its stack.  This was true even in the old
> > regime when task stacks were freed from RCU context.
> > 
> > But vfree has a magic automatic deferral mechanism.  Couldn't you make
> > the non-deferred case might_sleep()?
> 
> But it's only magic from interrupt context..
> 
> Chris, does this patch make virtually mapped stack work for you again?

So far, so good. No warns from anyone else.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-19 16:31         ` Christoph Hellwig
  2016-10-19 19:43           ` Chris Wilson
@ 2016-10-21  0:32           ` Joel Fernandes
  1 sibling, 0 replies; 24+ messages in thread
From: Joel Fernandes @ 2016-10-21  0:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andy Lutomirski, Chris Wilson, Andrew Morton, Jisheng Zhang,
	John Dias, linux-mm, linux-rt-users, linux-kernel

Hi Christoph,

On Wed, Oct 19, 2016 at 9:31 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Wed, Oct 19, 2016 at 08:34:40AM -0700, Andy Lutomirski wrote:
>>
>> It would be quite awkward for a task stack to get freed from a
>> sleepable context, because the obvious sleepable context is the task
>> itself, and it still needs its stack.  This was true even in the old
>> regime when task stacks were freed from RCU context.
>>
>> But vfree has a magic automatic deferral mechanism.  Couldn't you make
>> the non-deferred case might_sleep()?
>
> But it's only magic from interrupt context..
>
> Chris, does this patch make virtually mapped stack work for you again?
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index f2481cb..942e02d 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -1533,7 +1533,7 @@ void vfree(const void *addr)
>
>         if (!addr)
>                 return;
> -       if (unlikely(in_interrupt())) {
> +       if (in_interrupt() || in_atomic()) {

in_atomic() also checks in_interrupt() cases so only in_atomic() should suffice.

Thanks,

Joel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/6] mm: remove free_unmap_vmap_area_addr
  2016-10-18  6:56 ` [PATCH 4/6] mm: remove free_unmap_vmap_area_addr Christoph Hellwig
@ 2016-10-21  0:46   ` Joel Fernandes
  2016-10-21  1:58     ` Nicholas Piggin
  0 siblings, 1 reply; 24+ messages in thread
From: Joel Fernandes @ 2016-10-21  0:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Jisheng Zhang, Chris Wilson, John Dias,
	open list:MEMORY MANAGEMENT, linux-rt-users, LKML

Hi Christoph,

On Mon, Oct 17, 2016 at 11:56 PM, Christoph Hellwig <hch@lst.de> wrote:
> Just inline it into the only caller.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  mm/vmalloc.c | 21 ++++++++-------------
>  1 file changed, 8 insertions(+), 13 deletions(-)
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 8cedfa0..2af2921 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -717,16 +717,6 @@ static struct vmap_area *find_vmap_area(unsigned long addr)
>         return va;
>  }
>
> -static void free_unmap_vmap_area_addr(unsigned long addr)
> -{
> -       struct vmap_area *va;
> -
> -       va = find_vmap_area(addr);
> -       BUG_ON(!va);
> -       free_unmap_vmap_area(va);
> -}
> -
> -
>  /*** Per cpu kva allocator ***/
>
>  /*
> @@ -1090,6 +1080,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
>  {
>         unsigned long size = (unsigned long)count << PAGE_SHIFT;
>         unsigned long addr = (unsigned long)mem;
> +       struct vmap_area *va;
>
>         might_sleep();
>         BUG_ON(!addr);
> @@ -1100,10 +1091,14 @@ void vm_unmap_ram(const void *mem, unsigned int count)
>         debug_check_no_locks_freed(mem, size);
>         vmap_debug_free_range(addr, addr+size);
>
> -       if (likely(count <= VMAP_MAX_ALLOC))
> +       if (likely(count <= VMAP_MAX_ALLOC)) {
>                 vb_free(mem, size);
> -       else
> -               free_unmap_vmap_area_addr(addr);
> +               return;
> +       }
> +
> +       va = find_vmap_area(addr);
> +       BUG_ON(!va);

Considering recent objections to BUG_ON [1], lets make this a WARN_ON
while we're moving the code?

> +       free_unmap_vmap_area(va);
>  }
>  EXPORT_SYMBOL(vm_unmap_ram);
>
> --
> 2.1.4

Thanks,

Joel

[1] https://lkml.org/lkml/2016/10/6/65

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [RFC] reduce latency in __purge_vmap_area_lazy
  2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
                   ` (7 preceding siblings ...)
  2016-10-18 11:21 ` Jisheng Zhang
@ 2016-10-21  1:08 ` Joel Fernandes
  8 siblings, 0 replies; 24+ messages in thread
From: Joel Fernandes @ 2016-10-21  1:08 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Andrew Morton, Jisheng Zhang, Chris Wilson, John Dias,
	open list:MEMORY MANAGEMENT, linux-rt-users, LKML

On Mon, Oct 17, 2016 at 11:56 PM, Christoph Hellwig <hch@lst.de> wrote:
> Hi all,
>
> this is my spin at sorting out the long lock hold times in
> __purge_vmap_area_lazy.  It is based on the patch from Joel sent this
> week.  I don't have any good numbers for it, but it survived an
> xfstests run on XFS which is a significant vmalloc user.  The
> changelogs could still be improved as well, but I'd rather get it
> out quickly for feedback and testing.

All patches look great to me, and thanks a lot.

Regards,
Joel

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 4/6] mm: remove free_unmap_vmap_area_addr
  2016-10-21  0:46   ` Joel Fernandes
@ 2016-10-21  1:58     ` Nicholas Piggin
  0 siblings, 0 replies; 24+ messages in thread
From: Nicholas Piggin @ 2016-10-21  1:58 UTC (permalink / raw)
  To: Joel Fernandes
  Cc: Christoph Hellwig, Andrew Morton, Jisheng Zhang, Chris Wilson,
	John Dias, open list:MEMORY MANAGEMENT, linux-rt-users, LKML

On Thu, 20 Oct 2016 17:46:36 -0700
Joel Fernandes <joelaf@google.com> wrote:

> > @@ -1100,10 +1091,14 @@ void vm_unmap_ram(const void *mem, unsigned int count)
> >         debug_check_no_locks_freed(mem, size);
> >         vmap_debug_free_range(addr, addr+size);
> >
> > -       if (likely(count <= VMAP_MAX_ALLOC))
> > +       if (likely(count <= VMAP_MAX_ALLOC)) {
> >                 vb_free(mem, size);
> > -       else
> > -               free_unmap_vmap_area_addr(addr);
> > +               return;
> > +       }
> > +
> > +       va = find_vmap_area(addr);
> > +       BUG_ON(!va);  
> 
> Considering recent objections to BUG_ON [1], lets make this a WARN_ON
> while we're moving the code?

If you lost track of your kernel memory mappings, you are in big trouble
and fail stop is really the best course of action for containing the
problem, which could have security and data corruption implications. This
is covered by Linus' last paragraph in that commit.

Thanks,
Nick

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-10-19 11:15   ` Chris Wilson
  2016-10-19 13:05     ` Christoph Hellwig
@ 2016-11-08 13:24     ` Joel Fernandes
  2016-11-08 14:32       ` Andrey Ryabinin
  1 sibling, 1 reply; 24+ messages in thread
From: Joel Fernandes @ 2016-11-08 13:24 UTC (permalink / raw)
  To: Chris Wilson
  Cc: Christoph Hellwig, Andrew Morton, Jisheng Zhang, John Dias,
	open list:MEMORY MANAGEMENT, linux-rt-users, LKML

On Wed, Oct 19, 2016 at 4:15 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, Oct 18, 2016 at 08:56:07AM +0200, Christoph Hellwig wrote:
>> This is how everyone seems to already use them, but let's make that
>> explicit.
>
> Ah, found an exception, vmapped stacks:
>
> [  696.928541] BUG: sleeping function called from invalid context at mm/vmalloc.c:615
> [  696.928576] in_atomic(): 1, irqs_disabled(): 0, pid: 30521, name: bash
> [  696.928590] 1 lock held by bash/30521:
> [  696.928600]  #0: [  696.928606]  (vmap_area_lock[  696.928619] ){+.+...}, at: [  696.928640] [<ffffffff8115f0cf>] __purge_vmap_area_lazy+0x30f/0x370
> [  696.928656] CPU: 0 PID: 30521 Comm: bash Tainted: G        W       4.9.0-rc1+ #124
> [  696.928672] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
> [  696.928690]  ffffc900070f7c70 ffffffff812be1f5 ffff8802750b6680 ffffffff819650a6
> [  696.928717]  ffffc900070f7c98 ffffffff810a3216 0000000000004001 ffff8802726e16c0
> [  696.928743]  ffff8802726e19a0 ffffc900070f7d08 ffffffff8115f0f3 ffff8802750b6680
> [  696.928768] Call Trace:
> [  696.928782]  [<ffffffff812be1f5>] dump_stack+0x68/0x93
> [  696.928796]  [<ffffffff810a3216>] ___might_sleep+0x166/0x220
> [  696.928809]  [<ffffffff8115f0f3>] __purge_vmap_area_lazy+0x333/0x370
> [  696.928823]  [<ffffffff8115ea68>] ? vunmap_page_range+0x1e8/0x350
> [  696.928837]  [<ffffffff8115f1b3>] free_vmap_area_noflush+0x83/0x90
> [  696.928850]  [<ffffffff81160931>] remove_vm_area+0x71/0xb0
> [  696.928863]  [<ffffffff81160999>] __vunmap+0x29/0xf0
> [  696.928875]  [<ffffffff81160ab9>] vfree+0x29/0x70
> [  696.928888]  [<ffffffff81071746>] put_task_stack+0x76/0x120

>From this traceback, it looks like the lock causing the atomic context
was actually acquired in the vfree path itself, and not by the vmapped
stack user (as it says "vmap_area_lock" held). I am still wondering
why vmap_area_lock was held during the might_sleep(), perhaps you may
not have applied all patches from Chris H?

>From the patches I saw, vmap_area_lock is not acquired during any of
the might_sleep Chris H added, but I may be missing something. In
anycase looks to me like the atomicity is introduced by the vfree path
itself and not the caller.

Thanks!

Joel


> [  696.928901]  [<ffffffff8109a943>] finish_task_switch+0x163/0x1e0
> [  696.928914]  [<ffffffff8109a845>] ? finish_task_switch+0x65/0x1e0
> [  696.928928]  [<ffffffff816125f5>] __schedule+0x1f5/0x7c0
> [  696.928940]  [<ffffffff81612c28>] schedule+0x38/0x90
> [  696.928953]  [<ffffffff810787b1>] do_wait+0x1d1/0x200
> [  696.928966]  [<ffffffff810799b1>] SyS_wait4+0x61/0xc0
> [  696.928979]  [<ffffffff81076e50>] ? task_stopped_code+0x50/0x50
> [  696.928992]  [<ffffffff81618e6e>] entry_SYSCALL_64_fastpath+0x1c/0xb1
>
> [This was triggered by earlier patch to remove the serialisation and add
> cond_resched_lock(&vmap_area_lock)]
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping
  2016-11-08 13:24     ` Joel Fernandes
@ 2016-11-08 14:32       ` Andrey Ryabinin
  0 siblings, 0 replies; 24+ messages in thread
From: Andrey Ryabinin @ 2016-11-08 14:32 UTC (permalink / raw)
  To: Joel Fernandes, Chris Wilson
  Cc: Christoph Hellwig, Andrew Morton, Jisheng Zhang, John Dias,
	open list:MEMORY MANAGEMENT, linux-rt-users, LKML



On 11/08/2016 04:24 PM, Joel Fernandes wrote:
> On Wed, Oct 19, 2016 at 4:15 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> On Tue, Oct 18, 2016 at 08:56:07AM +0200, Christoph Hellwig wrote:
>>> This is how everyone seems to already use them, but let's make that
>>> explicit.
>>
>> Ah, found an exception, vmapped stacks:
>>
>> [  696.928541] BUG: sleeping function called from invalid context at mm/vmalloc.c:615
>> [  696.928576] in_atomic(): 1, irqs_disabled(): 0, pid: 30521, name: bash
>> [  696.928590] 1 lock held by bash/30521:
>> [  696.928600]  #0: [  696.928606]  (vmap_area_lock[  696.928619] ){+.+...}, at: [  696.928640] [<ffffffff8115f0cf>] __purge_vmap_area_lazy+0x30f/0x370
>> [  696.928656] CPU: 0 PID: 30521 Comm: bash Tainted: G        W       4.9.0-rc1+ #124
>> [  696.928672] Hardware name:                  /        , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015
>> [  696.928690]  ffffc900070f7c70 ffffffff812be1f5 ffff8802750b6680 ffffffff819650a6
>> [  696.928717]  ffffc900070f7c98 ffffffff810a3216 0000000000004001 ffff8802726e16c0
>> [  696.928743]  ffff8802726e19a0 ffffc900070f7d08 ffffffff8115f0f3 ffff8802750b6680
>> [  696.928768] Call Trace:
>> [  696.928782]  [<ffffffff812be1f5>] dump_stack+0x68/0x93
>> [  696.928796]  [<ffffffff810a3216>] ___might_sleep+0x166/0x220
>> [  696.928809]  [<ffffffff8115f0f3>] __purge_vmap_area_lazy+0x333/0x370
>> [  696.928823]  [<ffffffff8115ea68>] ? vunmap_page_range+0x1e8/0x350
>> [  696.928837]  [<ffffffff8115f1b3>] free_vmap_area_noflush+0x83/0x90
>> [  696.928850]  [<ffffffff81160931>] remove_vm_area+0x71/0xb0
>> [  696.928863]  [<ffffffff81160999>] __vunmap+0x29/0xf0
>> [  696.928875]  [<ffffffff81160ab9>] vfree+0x29/0x70
>> [  696.928888]  [<ffffffff81071746>] put_task_stack+0x76/0x120
> 
> From this traceback, it looks like the lock causing the atomic context
> was actually acquired in the vfree path itself, and not by the vmapped
> stack user (as it says "vmap_area_lock" held). I am still wondering
> why vmap_area_lock was held during the might_sleep(), perhaps you may
> not have applied all patches from Chris H?
> 

I don't think that this splat is because we holding vmap_area_lock.
Look at cond_resched_lock:

#define cond_resched_lock(lock) ({				\
	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
	__cond_resched_lock(lock);				\
})

It calls might_sleep() with spin lock still held.
AFAIU PREEMPT_LOCK_OFFSET supposed to tell might_sleep() to ignore spin locks
and complain iff something else changed preempt_count.

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2016-11-08 14:31 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-18  6:56 [RFC] reduce latency in __purge_vmap_area_lazy Christoph Hellwig
2016-10-18  6:56 ` [PATCH 1/6] mm: refactor __purge_vmap_area_lazy Christoph Hellwig
2016-10-18  6:56 ` [PATCH 2/6] mm: mark all calls into the vmalloc subsystem as potentially sleeping Christoph Hellwig
2016-10-18 10:33   ` Chris Wilson
2016-10-18 10:38     ` Christoph Hellwig
2016-10-19 11:15   ` Chris Wilson
2016-10-19 13:05     ` Christoph Hellwig
2016-10-19 15:34       ` Andy Lutomirski
2016-10-19 16:31         ` Christoph Hellwig
2016-10-19 19:43           ` Chris Wilson
2016-10-21  0:32           ` Joel Fernandes
2016-11-08 13:24     ` Joel Fernandes
2016-11-08 14:32       ` Andrey Ryabinin
2016-10-18  6:56 ` [PATCH 3/6] mm: remove free_unmap_vmap_area_noflush Christoph Hellwig
2016-10-18  6:56 ` [PATCH 4/6] mm: remove free_unmap_vmap_area_addr Christoph Hellwig
2016-10-21  0:46   ` Joel Fernandes
2016-10-21  1:58     ` Nicholas Piggin
2016-10-18  6:56 ` [PATCH 5/6] mm: turn vmap_purge_lock into a mutex Christoph Hellwig
2016-10-18  6:56 ` [PATCH 6/6] mm: add preempt points into __purge_vmap_area_lazy Christoph Hellwig
2016-10-18 20:56   ` Steven Rostedt
2016-10-18 21:01     ` Steven Rostedt
2016-10-18 10:40 ` [RFC] reduce latency in __purge_vmap_area_lazy Nicholas Piggin
2016-10-18 11:21 ` Jisheng Zhang
2016-10-21  1:08 ` Joel Fernandes

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).