linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node()
@ 2018-01-23 10:55 Konstantin Khlebnikov
  2018-01-23 10:55 ` [PATCH 2/4] vmalloc: add __vmalloc_area() Konstantin Khlebnikov
                   ` (3 more replies)
  0 siblings, 4 replies; 24+ messages in thread
From: Konstantin Khlebnikov @ 2018-01-23 10:55 UTC (permalink / raw)
  To: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton

This allows to set VM_USERMAP in vmalloc_user() and vmalloc_32_user()
directly at allocation and avoid find_vm_area() call.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 mm/vmalloc.c |   54 +++++++++++++++++++++---------------------------------
 1 file changed, 21 insertions(+), 33 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 673942094328..cece3fb33cef 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1662,7 +1662,9 @@ EXPORT_SYMBOL(vmap);
 
 static void *__vmalloc_node(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, pgprot_t prot,
+			    unsigned long vm_flags,
 			    int node, const void *caller);
+
 static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 				 pgprot_t prot, int node)
 {
@@ -1681,7 +1683,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 	/* Please note that the recursion is strictly bounded. */
 	if (array_size > PAGE_SIZE) {
 		pages = __vmalloc_node(array_size, 1, nested_gfp|highmem_mask,
-				PAGE_KERNEL, node, area->caller);
+				       PAGE_KERNEL, 0, node, area->caller);
 	} else {
 		pages = kmalloc_node(array_size, nested_gfp, node);
 	}
@@ -1752,7 +1754,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 		goto fail;
 
 	area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
-				vm_flags, start, end, node, gfp_mask, caller);
+				  vm_flags, start, end, node, gfp_mask, caller);
 	if (!area)
 		goto fail;
 
@@ -1783,6 +1785,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
  *	@align:		desired alignment
  *	@gfp_mask:	flags for the page level allocator
  *	@prot:		protection mask for the allocated pages
+ *	@vm_flags:	additional vm area flags (e.g. %VM_NO_GUARD)
  *	@node:		node to use for allocation or NUMA_NO_NODE
  *	@caller:	caller's return address
  *
@@ -1799,15 +1802,16 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
  */
 static void *__vmalloc_node(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, pgprot_t prot,
+			    unsigned long vm_flags,
 			    int node, const void *caller)
 {
 	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
-				gfp_mask, prot, 0, node, caller);
+				gfp_mask, prot, vm_flags, node, caller);
 }
 
 void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
 {
-	return __vmalloc_node(size, 1, gfp_mask, prot, NUMA_NO_NODE,
+	return __vmalloc_node(size, 1, gfp_mask, prot, 0, NUMA_NO_NODE,
 				__builtin_return_address(0));
 }
 EXPORT_SYMBOL(__vmalloc);
@@ -1815,15 +1819,15 @@ EXPORT_SYMBOL(__vmalloc);
 static inline void *__vmalloc_node_flags(unsigned long size,
 					int node, gfp_t flags)
 {
-	return __vmalloc_node(size, 1, flags, PAGE_KERNEL,
-					node, __builtin_return_address(0));
+	return __vmalloc_node(size, 1, flags, PAGE_KERNEL, 0, node,
+			      __builtin_return_address(0));
 }
 
 
 void *__vmalloc_node_flags_caller(unsigned long size, int node, gfp_t flags,
 				  void *caller)
 {
-	return __vmalloc_node(size, 1, flags, PAGE_KERNEL, node, caller);
+	return __vmalloc_node(size, 1, flags, PAGE_KERNEL, 0, node, caller);
 }
 
 /**
@@ -1868,18 +1872,9 @@ EXPORT_SYMBOL(vzalloc);
  */
 void *vmalloc_user(unsigned long size)
 {
-	struct vm_struct *area;
-	void *ret;
-
-	ret = __vmalloc_node(size, SHMLBA,
-			     GFP_KERNEL | __GFP_ZERO,
-			     PAGE_KERNEL, NUMA_NO_NODE,
-			     __builtin_return_address(0));
-	if (ret) {
-		area = find_vm_area(ret);
-		area->flags |= VM_USERMAP;
-	}
-	return ret;
+	return __vmalloc_node(size, SHMLBA, GFP_KERNEL | __GFP_ZERO,
+			      PAGE_KERNEL, VM_USERMAP, NUMA_NO_NODE,
+			      __builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_user);
 
@@ -1896,8 +1891,8 @@ EXPORT_SYMBOL(vmalloc_user);
  */
 void *vmalloc_node(unsigned long size, int node)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL,
-					node, __builtin_return_address(0));
+	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, 0, node,
+			      __builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_node);
 
@@ -1938,7 +1933,7 @@ EXPORT_SYMBOL(vzalloc_node);
 
 void *vmalloc_exec(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC,
+	return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL_EXEC, 0,
 			      NUMA_NO_NODE, __builtin_return_address(0));
 }
 
@@ -1959,7 +1954,7 @@ void *vmalloc_exec(unsigned long size)
  */
 void *vmalloc_32(unsigned long size)
 {
-	return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL,
+	return __vmalloc_node(size, 1, GFP_VMALLOC32, PAGE_KERNEL, 0,
 			      NUMA_NO_NODE, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_32);
@@ -1973,16 +1968,9 @@ EXPORT_SYMBOL(vmalloc_32);
  */
 void *vmalloc_32_user(unsigned long size)
 {
-	struct vm_struct *area;
-	void *ret;
-
-	ret = __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
-			     NUMA_NO_NODE, __builtin_return_address(0));
-	if (ret) {
-		area = find_vm_area(ret);
-		area->flags |= VM_USERMAP;
-	}
-	return ret;
+	return __vmalloc_node(size, 1, GFP_VMALLOC32 | __GFP_ZERO,
+			      PAGE_KERNEL, VM_USERMAP, NUMA_NO_NODE,
+			      __builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_32_user);
 

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 2/4] vmalloc: add __vmalloc_area()
  2018-01-23 10:55 [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Konstantin Khlebnikov
@ 2018-01-23 10:55 ` Konstantin Khlebnikov
  2018-01-23 10:55 ` [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area() Konstantin Khlebnikov
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 24+ messages in thread
From: Konstantin Khlebnikov @ 2018-01-23 10:55 UTC (permalink / raw)
  To: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton

This function it the same as __vmalloc_node_range() but returns pointer to
vm_struct rather than virtual address.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 include/linux/vmalloc.h |    5 +++++
 mm/vmalloc.c            |   39 +++++++++++++++++++++++++++++++++++----
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 1e5d8c392f15..f772346a506e 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -81,6 +81,11 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
 			unsigned long start, unsigned long end, gfp_t gfp_mask,
 			pgprot_t prot, unsigned long vm_flags, int node,
 			const void *caller);
+extern struct vm_struct *__vmalloc_area(unsigned long size, unsigned long align,
+			unsigned long start, unsigned long end,
+			gfp_t gfp_mask, pgprot_t prot,
+			unsigned long vm_flags, int node,
+			const void *caller);
 #ifndef CONFIG_MMU
 extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags);
 static inline void *__vmalloc_node_flags_caller(unsigned long size, int node,
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index cece3fb33cef..ad962be74d53 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1725,7 +1725,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 }
 
 /**
- *	__vmalloc_node_range  -  allocate virtually contiguous memory
+ *	__vmalloc_area  -  allocate virtually contiguous memory
  *	@size:		allocation size
  *	@align:		desired alignment
  *	@start:		vm area range start
@@ -1738,9 +1738,11 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
  *
  *	Allocate enough pages to cover @size from the page level
  *	allocator with @gfp_mask flags.  Map them into contiguous
- *	kernel virtual space, using a pagetable protection of @prot.
+ *	kernel virtual space, using a pagetable protection of @prot
+ *
+ *	Returns the area descriptor on success or %NULL on failure.
  */
-void *__vmalloc_node_range(unsigned long size, unsigned long align,
+struct vm_struct *__vmalloc_area(unsigned long size, unsigned long align,
 			unsigned long start, unsigned long end, gfp_t gfp_mask,
 			pgprot_t prot, unsigned long vm_flags, int node,
 			const void *caller)
@@ -1771,7 +1773,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 
 	kmemleak_vmalloc(area, size, gfp_mask);
 
-	return addr;
+	return area;
 
 fail:
 	warn_alloc(gfp_mask, NULL,
@@ -1780,6 +1782,35 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 }
 
 /**
+ *	__vmalloc_node_range  -  allocate virtually contiguous memory
+ *	@size:		allocation size
+ *	@align:		desired alignment
+ *	@start:		vm area range start
+ *	@end:		vm area range end
+ *	@gfp_mask:	flags for the page level allocator
+ *	@prot:		protection mask for the allocated pages
+ *	@vm_flags:	additional vm area flags (e.g. %VM_NO_GUARD)
+ *	@node:		node to use for allocation or NUMA_NO_NODE
+ *	@caller:	caller's return address
+ *
+ *	Allocate enough pages to cover @size from the page level
+ *	allocator with @gfp_mask flags.  Map them into contiguous
+ *	kernel virtual space, using a pagetable protection of @prot.
+ */
+void *__vmalloc_node_range(unsigned long size, unsigned long align,
+			unsigned long start, unsigned long end, gfp_t gfp_mask,
+			pgprot_t prot, unsigned long vm_flags, int node,
+			const void *caller)
+{
+	struct vm_struct *area;
+
+	area = __vmalloc_area(size, align, start, end, gfp_mask,
+			      prot, vm_flags, node, caller);
+
+	return area ? area->addr : NULL;
+}
+
+/**
  *	__vmalloc_node  -  allocate virtually contiguous memory
  *	@size:		allocation size
  *	@align:		desired alignment

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area()
  2018-01-23 10:55 [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Konstantin Khlebnikov
  2018-01-23 10:55 ` [PATCH 2/4] vmalloc: add __vmalloc_area() Konstantin Khlebnikov
@ 2018-01-23 10:55 ` Konstantin Khlebnikov
  2018-01-23 13:57   ` Konstantin Khlebnikov
  2018-01-23 10:55 ` [PATCH 4/4] kernel/fork: add option to use virtually mapped stacks as fallback Konstantin Khlebnikov
  2018-02-21 12:24 ` [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Matthew Wilcox
  3 siblings, 1 reply; 24+ messages in thread
From: Konstantin Khlebnikov @ 2018-01-23 10:55 UTC (permalink / raw)
  To: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton

This gives as pointer vm_struct without calling find_vm_area().

And fix comment about that task holds cache of vm area: this cache used
for retrieving actual stack pages, freeing is done by vfree_deferred().

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 kernel/fork.c |   37 +++++++++++++++----------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 2295fc69717f..457c9151f3c8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -204,39 +204,32 @@ static int free_vm_stack_cache(unsigned int cpu)
 static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 {
 #ifdef CONFIG_VMAP_STACK
-	void *stack;
+	struct vm_struct *stack;
 	int i;
 
 	for (i = 0; i < NR_CACHED_STACKS; i++) {
-		struct vm_struct *s;
-
-		s = this_cpu_xchg(cached_stacks[i], NULL);
-
-		if (!s)
+		stack = this_cpu_xchg(cached_stacks[i], NULL);
+		if (!stack)
 			continue;
 
 #ifdef CONFIG_DEBUG_KMEMLEAK
 		/* Clear stale pointers from reused stack. */
-		memset(s->addr, 0, THREAD_SIZE);
+		memset(stack->addr, 0, THREAD_SIZE);
 #endif
-		tsk->stack_vm_area = s;
-		return s->addr;
+		tsk->stack_vm_area = stack;
+		return stack->addr;
 	}
 
-	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
-				     VMALLOC_START, VMALLOC_END,
-				     THREADINFO_GFP,
-				     PAGE_KERNEL,
-				     0, node, __builtin_return_address(0));
+	stack = __vmalloc_area(THREAD_SIZE, THREAD_ALIGN,
+			       VMALLOC_START, VMALLOC_END,
+			       THREADINFO_GFP, PAGE_KERNEL,
+			       0, node, __builtin_return_address(0));
+	if (unlikely(!stack))
+		return NULL;
 
-	/*
-	 * We can't call find_vm_area() in interrupt context, and
-	 * free_thread_stack() can be called in interrupt context,
-	 * so cache the vm_struct.
-	 */
-	if (stack)
-		tsk->stack_vm_area = find_vm_area(stack);
-	return stack;
+	/* Cache the vm_struct for stack to page conversions. */
+	tsk->stack_vm_area = stack;
+	return stack->addr;
 #else
 	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
 					     THREAD_SIZE_ORDER);

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [PATCH 4/4] kernel/fork: add option to use virtually mapped stacks as fallback
  2018-01-23 10:55 [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Konstantin Khlebnikov
  2018-01-23 10:55 ` [PATCH 2/4] vmalloc: add __vmalloc_area() Konstantin Khlebnikov
  2018-01-23 10:55 ` [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area() Konstantin Khlebnikov
@ 2018-01-23 10:55 ` Konstantin Khlebnikov
  2018-02-21 15:42   ` Use higher-order pages in vmalloc Matthew Wilcox
  2018-02-21 12:24 ` [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Matthew Wilcox
  3 siblings, 1 reply; 24+ messages in thread
From: Konstantin Khlebnikov @ 2018-01-23 10:55 UTC (permalink / raw)
  To: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton

Virtually mapped stack have two bonuses: it eats order-0 pages and
adds guard page at the end. But it slightly slower if system have
plenty free high-order pages.

This patch adds option to use virtually bapped stack as fallback for
atomic allocation of traditional high-order page.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
---
 arch/Kconfig  |   14 ++++++++++++++
 kernel/fork.c |   11 +++++++++++
 2 files changed, 25 insertions(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 400b9e1b2f27..c181ab263e7f 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -904,6 +904,20 @@ config VMAP_STACK
 	  the stack to map directly to the KASAN shadow map using a formula
 	  that is incorrect if the stack is in vmalloc space.
 
+config VMAP_STACK_AS_FALLBACK
+	default n
+	bool "Use a virtually-mapped stack as fallback for directly-mapped"
+	depends on VMAP_STACK
+	help
+	  With this option kernel first tries to allocate directly-mapped stack
+	  without calling direct memory reclaim and fallback to vmap stack.
+
+	  Allocation of directly mapped stack faster than vmap if system a lot
+	  of free memory and much slower if all memory is used or fragmented.
+
+	  This option neutralize stack overflow protection but allows to
+	  achieve best performance for syscalls fork() and clone().
+
 config ARCH_OPTIONAL_KERNEL_RWX
 	def_bool n
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 457c9151f3c8..cc61a083954d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -207,6 +207,17 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
 	struct vm_struct *stack;
 	int i;
 
+#ifdef CONFIG_VMAP_STACK_AS_FALLBACK
+	struct page *page;
+
+	page = alloc_pages_node(node, THREADINFO_GFP & ~__GFP_DIRECT_RECLAIM,
+				THREAD_SIZE_ORDER);
+	if (page) {
+		tsk->stack_vm_area = NULL;
+		return page_address(page);
+	}
+#endif
+
 	for (i = 0; i < NR_CACHED_STACKS; i++) {
 		stack = this_cpu_xchg(cached_stacks[i], NULL);
 		if (!stack)

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area()
  2018-01-23 10:55 ` [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area() Konstantin Khlebnikov
@ 2018-01-23 13:57   ` Konstantin Khlebnikov
  2018-02-21  0:16     ` Andrew Morton
  0 siblings, 1 reply; 24+ messages in thread
From: Konstantin Khlebnikov @ 2018-01-23 13:57 UTC (permalink / raw)
  To: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton

# stress-ng --clone 100 -t 10s --metrics-brief
at 32-core machine shows boost 35000 -> 36000 bogo ops

Patch 4/4 is a kind of RFC.
Actually per-cpu cache of preallocated stacks works faster than buddy allocator thus
performance boots for it happens only at completely insane rate of clones.

On 23.01.2018 13:55, Konstantin Khlebnikov wrote:
> This gives as pointer vm_struct without calling find_vm_area().
> 
> And fix comment about that task holds cache of vm area: this cache used
> for retrieving actual stack pages, freeing is done by vfree_deferred().
> 
> Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
> ---
>   kernel/fork.c |   37 +++++++++++++++----------------------
>   1 file changed, 15 insertions(+), 22 deletions(-)
> 
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 2295fc69717f..457c9151f3c8 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -204,39 +204,32 @@ static int free_vm_stack_cache(unsigned int cpu)
>   static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
>   {
>   #ifdef CONFIG_VMAP_STACK
> -	void *stack;
> +	struct vm_struct *stack;
>   	int i;
>   
>   	for (i = 0; i < NR_CACHED_STACKS; i++) {
> -		struct vm_struct *s;
> -
> -		s = this_cpu_xchg(cached_stacks[i], NULL);
> -
> -		if (!s)
> +		stack = this_cpu_xchg(cached_stacks[i], NULL);
> +		if (!stack)
>   			continue;
>   
>   #ifdef CONFIG_DEBUG_KMEMLEAK
>   		/* Clear stale pointers from reused stack. */
> -		memset(s->addr, 0, THREAD_SIZE);
> +		memset(stack->addr, 0, THREAD_SIZE);
>   #endif
> -		tsk->stack_vm_area = s;
> -		return s->addr;
> +		tsk->stack_vm_area = stack;
> +		return stack->addr;
>   	}
>   
> -	stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
> -				     VMALLOC_START, VMALLOC_END,
> -				     THREADINFO_GFP,
> -				     PAGE_KERNEL,
> -				     0, node, __builtin_return_address(0));
> +	stack = __vmalloc_area(THREAD_SIZE, THREAD_ALIGN,
> +			       VMALLOC_START, VMALLOC_END,
> +			       THREADINFO_GFP, PAGE_KERNEL,
> +			       0, node, __builtin_return_address(0));
> +	if (unlikely(!stack))
> +		return NULL;
>   
> -	/*
> -	 * We can't call find_vm_area() in interrupt context, and
> -	 * free_thread_stack() can be called in interrupt context,
> -	 * so cache the vm_struct.
> -	 */
> -	if (stack)
> -		tsk->stack_vm_area = find_vm_area(stack);
> -	return stack;
> +	/* Cache the vm_struct for stack to page conversions. */
> +	tsk->stack_vm_area = stack;
> +	return stack->addr;
>   #else
>   	struct page *page = alloc_pages_node(node, THREADINFO_GFP,
>   					     THREAD_SIZE_ORDER);
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area()
  2018-01-23 13:57   ` Konstantin Khlebnikov
@ 2018-02-21  0:16     ` Andrew Morton
  2018-02-21  7:23       ` Konstantin Khlebnikov
  0 siblings, 1 reply; 24+ messages in thread
From: Andrew Morton @ 2018-02-21  0:16 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm, Andy Lutomirski

On Tue, 23 Jan 2018 16:57:21 +0300 Konstantin Khlebnikov <khlebnikov@yandex-team.ru> wrote:

> # stress-ng --clone 100 -t 10s --metrics-brief
> at 32-core machine shows boost 35000 -> 36000 bogo ops
> 
> Patch 4/4 is a kind of RFC.
> Actually per-cpu cache of preallocated stacks works faster than buddy allocator thus
> performance boots for it happens only at completely insane rate of clones.
> 

I'm not really sure what to make of this patchset.  Is it useful in any
known real-world use cases?

> +	  This option neutralize stack overflow protection but allows to
> +	  achieve best performance for syscalls fork() and clone().

That sounds problematic, but perhaps acceptable if the fallback only
happens rarely.

Can this code be folded into CONFIG_VMAP_STACk in some cleaner fashion?
We now have options for non-vmapped stacks, vmapped stacks and a mix
of both.

And what about this comment in arch/Kconfig:VMAP_STACK:

          This is presently incompatible with KASAN because KASAN expects
          the stack to map directly to the KASAN shadow map using a formula
          that is incorrect if the stack is in vmalloc space.


So VMAP_STACK_AS_FALLBACK will intermittently break KASAN?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area()
  2018-02-21  0:16     ` Andrew Morton
@ 2018-02-21  7:23       ` Konstantin Khlebnikov
  2018-02-21 16:35         ` Andy Lutomirski
  0 siblings, 1 reply; 24+ messages in thread
From: Konstantin Khlebnikov @ 2018-02-21  7:23 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm, Andy Lutomirski



On 21.02.2018 03:16, Andrew Morton wrote:
> On Tue, 23 Jan 2018 16:57:21 +0300 Konstantin Khlebnikov <khlebnikov@yandex-team.ru> wrote:
> 
>> # stress-ng --clone 100 -t 10s --metrics-brief
>> at 32-core machine shows boost 35000 -> 36000 bogo ops
>>
>> Patch 4/4 is a kind of RFC.
>> Actually per-cpu cache of preallocated stacks works faster than buddy allocator thus
>> performance boots for it happens only at completely insane rate of clones.
>>
> 
> I'm not really sure what to make of this patchset.  Is it useful in any
> known real-world use cases?

Not yet. Feel free to ignore last patch.

> 
>> +	  This option neutralize stack overflow protection but allows to
>> +	  achieve best performance for syscalls fork() and clone().
> 
> That sounds problematic, but perhaps acceptable if the fallback only
> happens rarely.
> 
> Can this code be folded into CONFIG_VMAP_STACk in some cleaner fashion?
> We now have options for non-vmapped stacks, vmapped stacks and a mix
> of both.
> 
> And what about this comment in arch/Kconfig:VMAP_STACK:
> 
>            This is presently incompatible with KASAN because KASAN expects
>            the stack to map directly to the KASAN shadow map using a formula
>            that is incorrect if the stack is in vmalloc space.
> 
> 
> So VMAP_STACK_AS_FALLBACK will intermittently break KASAN?
> 

All of this (including CONFIG_VMAP_STACK) could be turned into boot option.
I think this would be a best solution.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node()
  2018-01-23 10:55 [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Konstantin Khlebnikov
                   ` (2 preceding siblings ...)
  2018-01-23 10:55 ` [PATCH 4/4] kernel/fork: add option to use virtually mapped stacks as fallback Konstantin Khlebnikov
@ 2018-02-21 12:24 ` Matthew Wilcox
  2018-02-21 12:39   ` Andrey Ryabinin
  3 siblings, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2018-02-21 12:24 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton, Andrey Ryabinin, kasan-dev

On Tue, Jan 23, 2018 at 01:55:22PM +0300, Konstantin Khlebnikov wrote:
> This allows to set VM_USERMAP in vmalloc_user() and vmalloc_32_user()
> directly at allocation and avoid find_vm_area() call.

While reviewing this patch, I came across this infelicity ...

have I understood correctly?

diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index e13d911251e7..9060f80b4a41 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -631,11 +631,10 @@ int kasan_module_alloc(void *addr, size_t size)
 	ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
 			shadow_start + shadow_size,
 			GFP_KERNEL | __GFP_ZERO,
-			PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
+			PAGE_KERNEL, VM_NO_GUARD | VM_KASAN, NUMA_NO_NODE,
 			__builtin_return_address(0));
 
 	if (ret) {
-		find_vm_area(addr)->flags |= VM_KASAN;
 		kmemleak_ignore(ret);
 		return 0;
 	}

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node()
  2018-02-21 12:24 ` [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Matthew Wilcox
@ 2018-02-21 12:39   ` Andrey Ryabinin
  0 siblings, 0 replies; 24+ messages in thread
From: Andrey Ryabinin @ 2018-02-21 12:39 UTC (permalink / raw)
  To: Matthew Wilcox, Konstantin Khlebnikov
  Cc: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton, kasan-dev



On 02/21/2018 03:24 PM, Matthew Wilcox wrote:
> On Tue, Jan 23, 2018 at 01:55:22PM +0300, Konstantin Khlebnikov wrote:
>> This allows to set VM_USERMAP in vmalloc_user() and vmalloc_32_user()
>> directly at allocation and avoid find_vm_area() call.
> 
> While reviewing this patch, I came across this infelicity ...
> 
> have I understood correctly?
> 
> diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
> index e13d911251e7..9060f80b4a41 100644
> --- a/mm/kasan/kasan.c
> +++ b/mm/kasan/kasan.c
> @@ -631,11 +631,10 @@ int kasan_module_alloc(void *addr, size_t size)
>  	ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
>  			shadow_start + shadow_size,
>  			GFP_KERNEL | __GFP_ZERO,
> -			PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
> +			PAGE_KERNEL, VM_NO_GUARD | VM_KASAN, NUMA_NO_NODE,
>  			__builtin_return_address(0));
>  
>  	if (ret) {
> -		find_vm_area(addr)->flags |= VM_KASAN;

addr != ret
That's different vm areas.

>  		kmemleak_ignore(ret);
>  		return 0;
>  	}
> 

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Use higher-order pages in vmalloc
  2018-01-23 10:55 ` [PATCH 4/4] kernel/fork: add option to use virtually mapped stacks as fallback Konstantin Khlebnikov
@ 2018-02-21 15:42   ` Matthew Wilcox
  2018-02-21 16:11     ` Andy Lutomirski
  2018-02-21 16:16     ` Dave Hansen
  0 siblings, 2 replies; 24+ messages in thread
From: Matthew Wilcox @ 2018-02-21 15:42 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Dave Hansen, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton, Kirill A. Shutemov

On Tue, Jan 23, 2018 at 01:55:32PM +0300, Konstantin Khlebnikov wrote:
> Virtually mapped stack have two bonuses: it eats order-0 pages and
> adds guard page at the end. But it slightly slower if system have
> plenty free high-order pages.
> 
> This patch adds option to use virtually bapped stack as fallback for
> atomic allocation of traditional high-order page.

This prompted me to write a patch I've been meaning to do for a while,
allocating large pages if they're available to satisfy vmalloc.  I thought
it would save on touching multiple struct pages, but it turns out that
the checking code we currently have in the free_pages path requires you
to have initialised all of the tail pages (maybe we can make that code
conditional ...)

It does save the buddy allocator the trouble of breaking down higher-order
pages into order-0 pages, only to allocate them again immediately.

(um, i seem to have broken the patch while cleaning it up for submission.
since it probably won't be accepted anyway, I'm not going to try to debug it)

diff --git a/kernel/fork.c b/kernel/fork.c
index be8aa5b98666..2bc01071b6ae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -319,12 +319,12 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
 	if (vm) {
 		int i;
 
-		BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
-
-		for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
-			mod_zone_page_state(page_zone(vm->pages[i]),
+		for (i = 0; i < vm->nr_pages; i++) {
+			struct page *page = vm->pages[i];
+			unsigned int size = PAGE_SIZE << compound_order(page);
+			mod_zone_page_state(page_zone(page),
 					    NR_KERNEL_STACK_KB,
-					    PAGE_SIZE / 1024 * account);
+					    size / 1024 * account);
 		}
 
 		/* All stack pages belong to the same memcg. */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index b728c98f49cd..4bfc29b21bc1 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -134,6 +134,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
 static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
 		unsigned long end, pgprot_t prot, struct page **pages, int *nr)
 {
+	unsigned int i;
 	pte_t *pte;
 
 	/*
@@ -151,9 +152,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
 			return -EBUSY;
 		if (WARN_ON(!page))
 			return -ENOMEM;
-		set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
+		for (i = 0; i < (1UL << compound_order(page)); i++) {
+			set_pte_at(&init_mm, addr, pte++,
+					mk_pte(page + i, prot));
+			addr += PAGE_SIZE;
+		}
 		(*nr)++;
-	} while (pte++, addr += PAGE_SIZE, addr != end);
+	} while (addr != end);
 	return 0;
 }
 
@@ -1530,14 +1535,14 @@ static void __vunmap(const void *addr, int deallocate_pages)
 	debug_check_no_obj_freed(addr, get_vm_area_size(area));
 
 	if (deallocate_pages) {
-		int i;
+		unsigned int i;
 
 		for (i = 0; i < area->nr_pages; i++) {
 			struct page *page = area->pages[i];
 
 			BUG_ON(!page);
 			__ClearPageVmalloc(page);
-			__free_pages(page, 0);
+			__free_pages(page, compound_order(page));
 		}
 
 		kvfree(area->pages);
@@ -1696,11 +1701,20 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 	for (i = 0; i < area->nr_pages; i++) {
 		struct page *page;
-
-		if (node == NUMA_NO_NODE)
-			page = alloc_page(alloc_mask);
-		else
-			page = alloc_pages_node(node, alloc_mask, 0);
+		unsigned int j = ilog2(area->nr_pages - i) + 1;
+
+		do {
+			j--;
+			if (node == NUMA_NO_NODE)
+				page = alloc_pages(alloc_mask, j);
+			else
+				page = alloc_pages_node(node, alloc_mask, j);
+		} while (!page && j);
+
+		if (j) {
+			area->nr_pages -= (1UL << j) - 1;
+			prep_compound_page(page, j);
+		}
 
 		if (unlikely(!page)) {
 			/* Successfully allocated i pages, free them in __vunmap() */
@@ -1719,8 +1733,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 
 fail:
 	warn_alloc(gfp_mask, NULL,
-			  "vmalloc: allocation failure, allocated %ld of %ld bytes",
-			  (area->nr_pages*PAGE_SIZE), area->size);
+		   "vmalloc: allocation failure, allocated %ld of %ld bytes",
+		   (nr_pages * PAGE_SIZE), get_vm_area_size(area));
 	vfree(area->addr);
 	return NULL;
 }

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-21 15:42   ` Use higher-order pages in vmalloc Matthew Wilcox
@ 2018-02-21 16:11     ` Andy Lutomirski
  2018-02-21 16:50       ` Matthew Wilcox
  2018-02-21 16:16     ` Dave Hansen
  1 sibling, 1 reply; 24+ messages in thread
From: Andy Lutomirski @ 2018-02-21 16:11 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Konstantin Khlebnikov, Dave Hansen, LKML, Christoph Hellwig,
	Linux-MM, Andy Lutomirski, Andrew Morton, Kirill A. Shutemov

On Wed, Feb 21, 2018 at 3:42 PM, Matthew Wilcox <willy@infradead.org> wrote:
> On Tue, Jan 23, 2018 at 01:55:32PM +0300, Konstantin Khlebnikov wrote:
>> Virtually mapped stack have two bonuses: it eats order-0 pages and
>> adds guard page at the end. But it slightly slower if system have
>> plenty free high-order pages.
>>
>> This patch adds option to use virtually bapped stack as fallback for
>> atomic allocation of traditional high-order page.
>
> This prompted me to write a patch I've been meaning to do for a while,
> allocating large pages if they're available to satisfy vmalloc.  I thought
> it would save on touching multiple struct pages, but it turns out that
> the checking code we currently have in the free_pages path requires you
> to have initialised all of the tail pages (maybe we can make that code
> conditional ...)
>
> It does save the buddy allocator the trouble of breaking down higher-order
> pages into order-0 pages, only to allocate them again immediately.
>
> (um, i seem to have broken the patch while cleaning it up for submission.
> since it probably won't be accepted anyway, I'm not going to try to debug it)
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index be8aa5b98666..2bc01071b6ae 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -319,12 +319,12 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
>         if (vm) {
>                 int i;
>
> -               BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
> -
> -               for (i = 0; i < THREAD_SIZE / PAGE_SIZE; i++) {
> -                       mod_zone_page_state(page_zone(vm->pages[i]),
> +               for (i = 0; i < vm->nr_pages; i++) {
> +                       struct page *page = vm->pages[i];
> +                       unsigned int size = PAGE_SIZE << compound_order(page);
> +                       mod_zone_page_state(page_zone(page),
>                                             NR_KERNEL_STACK_KB,
> -                                           PAGE_SIZE / 1024 * account);
> +                                           size / 1024 * account);
>                 }
>
>                 /* All stack pages belong to the same memcg. */
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index b728c98f49cd..4bfc29b21bc1 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -134,6 +134,7 @@ static void vunmap_page_range(unsigned long addr, unsigned long end)
>  static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
>                 unsigned long end, pgprot_t prot, struct page **pages, int *nr)
>  {
> +       unsigned int i;
>         pte_t *pte;
>
>         /*
> @@ -151,9 +152,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
>                         return -EBUSY;
>                 if (WARN_ON(!page))
>                         return -ENOMEM;
> -               set_pte_at(&init_mm, addr, pte, mk_pte(page, prot));
> +               for (i = 0; i < (1UL << compound_order(page)); i++) {
> +                       set_pte_at(&init_mm, addr, pte++,
> +                                       mk_pte(page + i, prot));
> +                       addr += PAGE_SIZE;
> +               }
>                 (*nr)++;
> -       } while (pte++, addr += PAGE_SIZE, addr != end);
> +       } while (addr != end);
>         return 0;
>  }
>
> @@ -1530,14 +1535,14 @@ static void __vunmap(const void *addr, int deallocate_pages)
>         debug_check_no_obj_freed(addr, get_vm_area_size(area));
>
>         if (deallocate_pages) {
> -               int i;
> +               unsigned int i;
>
>                 for (i = 0; i < area->nr_pages; i++) {
>                         struct page *page = area->pages[i];
>
>                         BUG_ON(!page);
>                         __ClearPageVmalloc(page);
> -                       __free_pages(page, 0);
> +                       __free_pages(page, compound_order(page));
>                 }
>
>                 kvfree(area->pages);
> @@ -1696,11 +1701,20 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
>
>         for (i = 0; i < area->nr_pages; i++) {
>                 struct page *page;
> -
> -               if (node == NUMA_NO_NODE)
> -                       page = alloc_page(alloc_mask);
> -               else
> -                       page = alloc_pages_node(node, alloc_mask, 0);
> +               unsigned int j = ilog2(area->nr_pages - i) + 1;
> +
> +               do {
> +                       j--;
> +                       if (node == NUMA_NO_NODE)
> +                               page = alloc_pages(alloc_mask, j);
> +                       else
> +                               page = alloc_pages_node(node, alloc_mask, j);
> +               } while (!page && j);
> +
> +               if (j) {
> +                       area->nr_pages -= (1UL << j) - 1;

Is there any code that expects area->nr_pages to be the size of the
area in pages?  I don't know of any such code.

> +                       prep_compound_page(page, j);
> +               }
>
>                 if (unlikely(!page)) {
>                         /* Successfully allocated i pages, free them in __vunmap() */
> @@ -1719,8 +1733,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
>
>  fail:
>         warn_alloc(gfp_mask, NULL,
> -                         "vmalloc: allocation failure, allocated %ld of %ld bytes",
> -                         (area->nr_pages*PAGE_SIZE), area->size);
> +                  "vmalloc: allocation failure, allocated %ld of %ld bytes",
> +                  (nr_pages * PAGE_SIZE), get_vm_area_size(area));
>         vfree(area->addr);
>         return NULL;
>  }

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-21 15:42   ` Use higher-order pages in vmalloc Matthew Wilcox
  2018-02-21 16:11     ` Andy Lutomirski
@ 2018-02-21 16:16     ` Dave Hansen
  2018-02-21 17:01       ` Matthew Wilcox
  1 sibling, 1 reply; 24+ messages in thread
From: Dave Hansen @ 2018-02-21 16:16 UTC (permalink / raw)
  To: Matthew Wilcox, Konstantin Khlebnikov
  Cc: linux-kernel, Christoph Hellwig, linux-mm, Andy Lutomirski,
	Andrew Morton, Kirill A. Shutemov

On 02/21/2018 07:42 AM, Matthew Wilcox wrote:
> On Tue, Jan 23, 2018 at 01:55:32PM +0300, Konstantin Khlebnikov wrote:
>> Virtually mapped stack have two bonuses: it eats order-0 pages and
>> adds guard page at the end. But it slightly slower if system have
>> plenty free high-order pages.
>>
>> This patch adds option to use virtually bapped stack as fallback for
>> atomic allocation of traditional high-order page.
> This prompted me to write a patch I've been meaning to do for a while,
> allocating large pages if they're available to satisfy vmalloc.  I thought
> it would save on touching multiple struct pages, but it turns out that
> the checking code we currently have in the free_pages path requires you
> to have initialised all of the tail pages (maybe we can make that code
> conditional ...)

What the concept here?  If we can use high-order pages for vmalloc() at
the moment, we *should* use them?

One of the coolest things about vmalloc() is that it can do large
allocations without consuming large (high-order) pages, so it has very
few side-effects compared to doing a bunch of order-0 allocations.  This
patch seems to propose removing that cool thing.  Even trying the
high-order allocation could kick off a bunch of reclaim and compaction
that was not there previously.

If you could take this an only _opportunistically_ allocate large pages,
it could be a more universal win.  You could try to make sure that no
compaction or reclaim is done for the large allocation.  Or, maybe you
only try it if there are *only* high-order pages in the allocator that
would have been broken down into order-0 *anyway*.

I'm not sure it's worth it, though.  I don't see a lot of folks
complaining about vmalloc()'s speed or TLB impact.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area()
  2018-02-21  7:23       ` Konstantin Khlebnikov
@ 2018-02-21 16:35         ` Andy Lutomirski
  0 siblings, 0 replies; 24+ messages in thread
From: Andy Lutomirski @ 2018-02-21 16:35 UTC (permalink / raw)
  To: Konstantin Khlebnikov
  Cc: Andrew Morton, Dave Hansen, LKML, Christoph Hellwig, Linux-MM,
	Andy Lutomirski

On Wed, Feb 21, 2018 at 7:23 AM, Konstantin Khlebnikov
<khlebnikov@yandex-team.ru> wrote:
>
>
> On 21.02.2018 03:16, Andrew Morton wrote:
>>
>> On Tue, 23 Jan 2018 16:57:21 +0300 Konstantin Khlebnikov
>> <khlebnikov@yandex-team.ru> wrote:
>>
>>> # stress-ng --clone 100 -t 10s --metrics-brief
>>> at 32-core machine shows boost 35000 -> 36000 bogo ops
>>>
>>> Patch 4/4 is a kind of RFC.
>>> Actually per-cpu cache of preallocated stacks works faster than buddy
>>> allocator thus
>>> performance boots for it happens only at completely insane rate of
>>> clones.
>>>
>>
>> I'm not really sure what to make of this patchset.  Is it useful in any
>> known real-world use cases?
>
>
> Not yet. Feel free to ignore last patch.
>
>>
>>> +         This option neutralize stack overflow protection but allows to
>>> +         achieve best performance for syscalls fork() and clone().
>>
>>
>> That sounds problematic, but perhaps acceptable if the fallback only
>> happens rarely.
>>
>> Can this code be folded into CONFIG_VMAP_STACk in some cleaner fashion?
>> We now have options for non-vmapped stacks, vmapped stacks and a mix
>> of both.
>>
>> And what about this comment in arch/Kconfig:VMAP_STACK:
>>
>>            This is presently incompatible with KASAN because KASAN expects
>>            the stack to map directly to the KASAN shadow map using a
>> formula
>>            that is incorrect if the stack is in vmalloc space.
>>
>>
>> So VMAP_STACK_AS_FALLBACK will intermittently break KASAN?
>>
>
> All of this (including CONFIG_VMAP_STACK) could be turned into boot option.
> I think this would be a best solution.

Or someone could *fix* KASAN to work with stacks in the vmalloc area.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-21 16:11     ` Andy Lutomirski
@ 2018-02-21 16:50       ` Matthew Wilcox
  0 siblings, 0 replies; 24+ messages in thread
From: Matthew Wilcox @ 2018-02-21 16:50 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Konstantin Khlebnikov, Dave Hansen, LKML, Christoph Hellwig,
	Linux-MM, Andrew Morton, Kirill A. Shutemov

On Wed, Feb 21, 2018 at 04:11:16PM +0000, Andy Lutomirski wrote:
> On Wed, Feb 21, 2018 at 3:42 PM, Matthew Wilcox <willy@infradead.org> wrote:
> > +++ b/kernel/fork.c
> > @@ -319,12 +319,12 @@ static void account_kernel_stack(struct task_struct *tsk, int account)
> >         if (vm) {
> >                 int i;
> >
> > -               BUG_ON(vm->nr_pages != THREAD_SIZE / PAGE_SIZE);
...
> > +               if (j) {
> > +                       area->nr_pages -= (1UL << j) - 1;
> 
> Is there any code that expects area->nr_pages to be the size of the
> area in pages?  I don't know of any such code.

I found one and deleted it ;-)

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-21 16:16     ` Dave Hansen
@ 2018-02-21 17:01       ` Matthew Wilcox
  2018-02-22  6:59         ` Michal Hocko
  0 siblings, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2018-02-21 17:01 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Konstantin Khlebnikov, linux-kernel, Christoph Hellwig, linux-mm,
	Andy Lutomirski, Andrew Morton, Kirill A. Shutemov

On Wed, Feb 21, 2018 at 08:16:22AM -0800, Dave Hansen wrote:
> On 02/21/2018 07:42 AM, Matthew Wilcox wrote:
> > This prompted me to write a patch I've been meaning to do for a while,
> > allocating large pages if they're available to satisfy vmalloc.  I thought
> > it would save on touching multiple struct pages, but it turns out that
> > the checking code we currently have in the free_pages path requires you
> > to have initialised all of the tail pages (maybe we can make that code
> > conditional ...)
> 
> What the concept here?  If we can use high-order pages for vmalloc() at
> the moment, we *should* use them?

Right.  It helps with fragmentation if we can keep higher-order
allocations together.

> One of the coolest things about vmalloc() is that it can do large
> allocations without consuming large (high-order) pages, so it has very
> few side-effects compared to doing a bunch of order-0 allocations.  This
> patch seems to propose removing that cool thing.  Even trying the
> high-order allocation could kick off a bunch of reclaim and compaction
> that was not there previously.

Yes, that's one of the debatable things.  It'd be nice to have a GFP
flag that stopped after calling get_page_from_freelist() and didn't try
to do compaction or reclaim.

> If you could take this an only _opportunistically_ allocate large pages,
> it could be a more universal win.  You could try to make sure that no
> compaction or reclaim is done for the large allocation.  Or, maybe you
> only try it if there are *only* high-order pages in the allocator that
> would have been broken down into order-0 *anyway*.
> 
> I'm not sure it's worth it, though.  I don't see a lot of folks
> complaining about vmalloc()'s speed or TLB impact.

No, I'm not sure it's worth it either, although Konstantin's mail
suggesting improvements in fork speed were possible by avoiding vmalloc
reminded me that I'd been meaning to give this a try.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-21 17:01       ` Matthew Wilcox
@ 2018-02-22  6:59         ` Michal Hocko
  2018-02-22 12:22           ` Matthew Wilcox
  0 siblings, 1 reply; 24+ messages in thread
From: Michal Hocko @ 2018-02-22  6:59 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Dave Hansen, Konstantin Khlebnikov, linux-kernel,
	Christoph Hellwig, linux-mm, Andy Lutomirski, Andrew Morton,
	Kirill A. Shutemov

On Wed 21-02-18 09:01:29, Matthew Wilcox wrote:
> On Wed, Feb 21, 2018 at 08:16:22AM -0800, Dave Hansen wrote:
> > On 02/21/2018 07:42 AM, Matthew Wilcox wrote:
> > > This prompted me to write a patch I've been meaning to do for a while,
> > > allocating large pages if they're available to satisfy vmalloc.  I thought
> > > it would save on touching multiple struct pages, but it turns out that
> > > the checking code we currently have in the free_pages path requires you
> > > to have initialised all of the tail pages (maybe we can make that code
> > > conditional ...)
> > 
> > What the concept here?  If we can use high-order pages for vmalloc() at
> > the moment, we *should* use them?
> 
> Right.  It helps with fragmentation if we can keep higher-order
> allocations together.

Hmm, wouldn't it help if we made vmalloc pages migrateable instead? That
would help the compaction and get us to a lower fragmentation longterm
without playing tricks in the allocation path.

> > One of the coolest things about vmalloc() is that it can do large
> > allocations without consuming large (high-order) pages, so it has very
> > few side-effects compared to doing a bunch of order-0 allocations.  This
> > patch seems to propose removing that cool thing.  Even trying the
> > high-order allocation could kick off a bunch of reclaim and compaction
> > that was not there previously.
> 
> Yes, that's one of the debatable things.  It'd be nice to have a GFP
> flag that stopped after calling get_page_from_freelist() and didn't try
> to do compaction or reclaim.

GFP_NOWAIT, you mean?

> > If you could take this an only _opportunistically_ allocate large pages,
> > it could be a more universal win.  You could try to make sure that no
> > compaction or reclaim is done for the large allocation.  Or, maybe you
> > only try it if there are *only* high-order pages in the allocator that
> > would have been broken down into order-0 *anyway*.
> > 
> > I'm not sure it's worth it, though.  I don't see a lot of folks
> > complaining about vmalloc()'s speed or TLB impact.
> 
> No, I'm not sure it's worth it either, although Konstantin's mail
> suggesting improvements in fork speed were possible by avoiding vmalloc
> reminded me that I'd been meaning to give this a try.

Maybe we should consider kvmalloc for the kernel stack?
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-22  6:59         ` Michal Hocko
@ 2018-02-22 12:22           ` Matthew Wilcox
  2018-02-22 13:36             ` Michal Hocko
  0 siblings, 1 reply; 24+ messages in thread
From: Matthew Wilcox @ 2018-02-22 12:22 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Dave Hansen, Konstantin Khlebnikov, linux-kernel,
	Christoph Hellwig, linux-mm, Andy Lutomirski, Andrew Morton,
	Kirill A. Shutemov

On Thu, Feb 22, 2018 at 07:59:43AM +0100, Michal Hocko wrote:
> On Wed 21-02-18 09:01:29, Matthew Wilcox wrote:
> > Right.  It helps with fragmentation if we can keep higher-order
> > allocations together.
> 
> Hmm, wouldn't it help if we made vmalloc pages migrateable instead? That
> would help the compaction and get us to a lower fragmentation longterm
> without playing tricks in the allocation path.

I was wondering about that possibility.  If we want to migrate a page
then we have to shoot down the PTE across all CPUs, copy the data to the
new page, and insert the new PTE.  Copying 4kB doesn't take long; if you
have 12GB/s (current example on Wikipedia: dual-channel memory and one
DDR2-800 module per channel gives a theoretical bandwidth of 12.8GB/s)
then we should be able to copy a page in 666ns).  So there's no problem
holding a spinlock for it.

But we can't handle a fault in vmalloc space today.  It's handled in
arch-specific code, see vmalloc_fault() in arch/x86/mm/fault.c
If we're going to do this, it'll have to be something arches opt into
because I'm not taking on the job of fixing every architecture!

> Maybe we should consider kvmalloc for the kernel stack?

We'd lose the guard page, so it'd have to be something we let the
sysadmin decide to do.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-22 12:22           ` Matthew Wilcox
@ 2018-02-22 13:36             ` Michal Hocko
  2018-02-22 19:01               ` Andy Lutomirski
  0 siblings, 1 reply; 24+ messages in thread
From: Michal Hocko @ 2018-02-22 13:36 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Dave Hansen, Konstantin Khlebnikov, linux-kernel,
	Christoph Hellwig, linux-mm, Andy Lutomirski, Andrew Morton,
	Kirill A. Shutemov

On Thu 22-02-18 04:22:54, Matthew Wilcox wrote:
> On Thu, Feb 22, 2018 at 07:59:43AM +0100, Michal Hocko wrote:
> > On Wed 21-02-18 09:01:29, Matthew Wilcox wrote:
> > > Right.  It helps with fragmentation if we can keep higher-order
> > > allocations together.
> > 
> > Hmm, wouldn't it help if we made vmalloc pages migrateable instead? That
> > would help the compaction and get us to a lower fragmentation longterm
> > without playing tricks in the allocation path.
> 
> I was wondering about that possibility.  If we want to migrate a page
> then we have to shoot down the PTE across all CPUs, copy the data to the
> new page, and insert the new PTE.  Copying 4kB doesn't take long; if you
> have 12GB/s (current example on Wikipedia: dual-channel memory and one
> DDR2-800 module per channel gives a theoretical bandwidth of 12.8GB/s)
> then we should be able to copy a page in 666ns).  So there's no problem
> holding a spinlock for it.
> 
> But we can't handle a fault in vmalloc space today.  It's handled in
> arch-specific code, see vmalloc_fault() in arch/x86/mm/fault.c
> If we're going to do this, it'll have to be something arches opt into
> because I'm not taking on the job of fixing every architecture!

yes.

> > Maybe we should consider kvmalloc for the kernel stack?
> 
> We'd lose the guard page, so it'd have to be something we let the
> sysadmin decide to do.

ohh, right, I forgot about the guard page.
-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-22 13:36             ` Michal Hocko
@ 2018-02-22 19:01               ` Andy Lutomirski
  2018-02-22 19:19                 ` Dave Hansen
  2018-02-23 12:13                 ` Michal Hocko
  0 siblings, 2 replies; 24+ messages in thread
From: Andy Lutomirski @ 2018-02-22 19:01 UTC (permalink / raw)
  To: Michal Hocko
  Cc: Matthew Wilcox, Dave Hansen, Konstantin Khlebnikov, LKML,
	Christoph Hellwig, Linux-MM, Andy Lutomirski, Andrew Morton,
	Kirill A. Shutemov

On Thu, Feb 22, 2018 at 1:36 PM, Michal Hocko <mhocko@kernel.org> wrote:
> On Thu 22-02-18 04:22:54, Matthew Wilcox wrote:
>> On Thu, Feb 22, 2018 at 07:59:43AM +0100, Michal Hocko wrote:
>> > On Wed 21-02-18 09:01:29, Matthew Wilcox wrote:
>> > > Right.  It helps with fragmentation if we can keep higher-order
>> > > allocations together.
>> >
>> > Hmm, wouldn't it help if we made vmalloc pages migrateable instead? That
>> > would help the compaction and get us to a lower fragmentation longterm
>> > without playing tricks in the allocation path.
>>
>> I was wondering about that possibility.  If we want to migrate a page
>> then we have to shoot down the PTE across all CPUs, copy the data to the
>> new page, and insert the new PTE.  Copying 4kB doesn't take long; if you
>> have 12GB/s (current example on Wikipedia: dual-channel memory and one
>> DDR2-800 module per channel gives a theoretical bandwidth of 12.8GB/s)
>> then we should be able to copy a page in 666ns).  So there's no problem
>> holding a spinlock for it.
>>
>> But we can't handle a fault in vmalloc space today.  It's handled in
>> arch-specific code, see vmalloc_fault() in arch/x86/mm/fault.c
>> If we're going to do this, it'll have to be something arches opt into
>> because I'm not taking on the job of fixing every architecture!
>
> yes.

On x86, if you shoot down the PTE for the current stack, you're dead.
vmalloc_fault() might not even be called.  Instead we hit
do_double_fault(), and the manual warns extremely strongly against
trying to recover, and, in this case, I agree with the SDM.  If you
actually want this to work, there needs to be a special IPI broadcast
to the task in question (with appropriate synchronization) that calls
magic arch code that does the switcheroo.

Didn't someone (Christoph?) have a patch to teach the page allocator
to give high-order allocations if available and otherwise fall back to
low order?

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-22 19:01               ` Andy Lutomirski
@ 2018-02-22 19:19                 ` Dave Hansen
  2018-02-22 19:27                   ` Andy Lutomirski
  2018-02-23 12:13                 ` Michal Hocko
  1 sibling, 1 reply; 24+ messages in thread
From: Dave Hansen @ 2018-02-22 19:19 UTC (permalink / raw)
  To: Andy Lutomirski, Michal Hocko
  Cc: Matthew Wilcox, Konstantin Khlebnikov, LKML, Christoph Hellwig,
	Linux-MM, Andrew Morton, Kirill A. Shutemov

On 02/22/2018 11:01 AM, Andy Lutomirski wrote:
> On x86, if you shoot down the PTE for the current stack, you're dead.

*If* we were to go do this insanity for vmalloc()'d memory, we could
probably limit it to kswapd, and also make sure that kernel threads
don't get vmalloc()'d stacks or that we mark them in a way to say we
never muck with them.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-22 19:19                 ` Dave Hansen
@ 2018-02-22 19:27                   ` Andy Lutomirski
  2018-02-22 19:36                     ` Dave Hansen
  0 siblings, 1 reply; 24+ messages in thread
From: Andy Lutomirski @ 2018-02-22 19:27 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Andy Lutomirski, Michal Hocko, Matthew Wilcox,
	Konstantin Khlebnikov, LKML, Christoph Hellwig, Linux-MM,
	Andrew Morton, Kirill A. Shutemov

On Thu, Feb 22, 2018 at 7:19 PM, Dave Hansen <dave.hansen@intel.com> wrote:
> On 02/22/2018 11:01 AM, Andy Lutomirski wrote:
>> On x86, if you shoot down the PTE for the current stack, you're dead.
>
> *If* we were to go do this insanity for vmalloc()'d memory, we could
> probably limit it to kswapd, and also make sure that kernel threads
> don't get vmalloc()'d stacks or that we mark them in a way to say we
> never muck with them.

How does that help?  We need to make sure that the task whose stack
we're migrating is (a) not running and (b) is not being switched in or
out.  And we have to make sure that there isn't some *other* mm that
has the task's stack in ASID's TLB space.

Maybe we take some lock so the task can't run, then flush the world,
then release the lock.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-22 19:27                   ` Andy Lutomirski
@ 2018-02-22 19:36                     ` Dave Hansen
  0 siblings, 0 replies; 24+ messages in thread
From: Dave Hansen @ 2018-02-22 19:36 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Michal Hocko, Matthew Wilcox, Konstantin Khlebnikov, LKML,
	Christoph Hellwig, Linux-MM, Andrew Morton, Kirill A. Shutemov

On 02/22/2018 11:27 AM, Andy Lutomirski wrote:
> On Thu, Feb 22, 2018 at 7:19 PM, Dave Hansen <dave.hansen@intel.com> wrote:
>> On 02/22/2018 11:01 AM, Andy Lutomirski wrote:
>>> On x86, if you shoot down the PTE for the current stack, you're dead.
>>
>> *If* we were to go do this insanity for vmalloc()'d memory, we could
>> probably limit it to kswapd, and also make sure that kernel threads
>> don't get vmalloc()'d stacks or that we mark them in a way to say we
>> never muck with them.
> 
> How does that help?  We need to make sure that the task whose stack
> we're migrating is (a) not running and (b) is not being switched in or
> out.  And we have to make sure that there isn't some *other* mm that
> has the task's stack in ASID's TLB space.
> 
> Maybe we take some lock so the task can't run, then flush the world,
> then release the lock.

Oh, I was thinking only of the case where you try to muck with your
*own* stack.  But, I see what you are saying about doing it to another
task on another CPU that is actively using the stack.

I think what you're saying is that we do not want to handle faults that
are caused by %esp being unusable.  Whatever we do, we've got to make
sure that no CPU has a stack in %esp that we are messing with.

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-22 19:01               ` Andy Lutomirski
  2018-02-22 19:19                 ` Dave Hansen
@ 2018-02-23 12:13                 ` Michal Hocko
  2018-03-01 18:16                   ` Eric Dumazet
  1 sibling, 1 reply; 24+ messages in thread
From: Michal Hocko @ 2018-02-23 12:13 UTC (permalink / raw)
  To: Andy Lutomirski
  Cc: Matthew Wilcox, Dave Hansen, Konstantin Khlebnikov, LKML,
	Christoph Hellwig, Linux-MM, Andrew Morton, Kirill A. Shutemov

On Thu 22-02-18 19:01:35, Andy Lutomirski wrote:
> On Thu, Feb 22, 2018 at 1:36 PM, Michal Hocko <mhocko@kernel.org> wrote:
> > On Thu 22-02-18 04:22:54, Matthew Wilcox wrote:
> >> On Thu, Feb 22, 2018 at 07:59:43AM +0100, Michal Hocko wrote:
> >> > On Wed 21-02-18 09:01:29, Matthew Wilcox wrote:
> >> > > Right.  It helps with fragmentation if we can keep higher-order
> >> > > allocations together.
> >> >
> >> > Hmm, wouldn't it help if we made vmalloc pages migrateable instead? That
> >> > would help the compaction and get us to a lower fragmentation longterm
> >> > without playing tricks in the allocation path.
> >>
> >> I was wondering about that possibility.  If we want to migrate a page
> >> then we have to shoot down the PTE across all CPUs, copy the data to the
> >> new page, and insert the new PTE.  Copying 4kB doesn't take long; if you
> >> have 12GB/s (current example on Wikipedia: dual-channel memory and one
> >> DDR2-800 module per channel gives a theoretical bandwidth of 12.8GB/s)
> >> then we should be able to copy a page in 666ns).  So there's no problem
> >> holding a spinlock for it.
> >>
> >> But we can't handle a fault in vmalloc space today.  It's handled in
> >> arch-specific code, see vmalloc_fault() in arch/x86/mm/fault.c
> >> If we're going to do this, it'll have to be something arches opt into
> >> because I'm not taking on the job of fixing every architecture!
> >
> > yes.
> 
> On x86, if you shoot down the PTE for the current stack, you're dead.
> vmalloc_fault() might not even be called.  Instead we hit
> do_double_fault(), and the manual warns extremely strongly against
> trying to recover, and, in this case, I agree with the SDM.  If you
> actually want this to work, there needs to be a special IPI broadcast
> to the task in question (with appropriate synchronization) that calls
> magic arch code that does the switcheroo.

Why cannot we use the pte swap entry trick also for vmalloc migration.
I haven't explored this path at all, to be honest.

> Didn't someone (Christoph?) have a patch to teach the page allocator
> to give high-order allocations if available and otherwise fall back to
> low order?

Do you mean kvmalloc?

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: Use higher-order pages in vmalloc
  2018-02-23 12:13                 ` Michal Hocko
@ 2018-03-01 18:16                   ` Eric Dumazet
  0 siblings, 0 replies; 24+ messages in thread
From: Eric Dumazet @ 2018-03-01 18:16 UTC (permalink / raw)
  To: Michal Hocko, Andy Lutomirski
  Cc: Matthew Wilcox, Dave Hansen, Konstantin Khlebnikov, LKML,
	Christoph Hellwig, Linux-MM, Andrew Morton, Kirill A. Shutemov

On Fri, 2018-02-23 at 13:13 +0100, Michal Hocko wrote:
> On Thu 22-02-18 19:01:35, Andy Lutomirski wrote:
> > On Thu, Feb 22, 2018 at 1:36 PM, Michal Hocko <mhocko@kernel.org> wrote:
> > > On Thu 22-02-18 04:22:54, Matthew Wilcox wrote:
> > > > On Thu, Feb 22, 2018 at 07:59:43AM +0100, Michal Hocko wrote:
> > > > > On Wed 21-02-18 09:01:29, Matthew Wilcox wrote:
> > > > > > Right.  It helps with fragmentation if we can keep higher-order
> > > > > > allocations together.
> > > > > 
> > > > > Hmm, wouldn't it help if we made vmalloc pages migrateable instead? That
> > > > > would help the compaction and get us to a lower fragmentation longterm
> > > > > without playing tricks in the allocation path.
> > > > 
> > > > I was wondering about that possibility.  If we want to migrate a page
> > > > then we have to shoot down the PTE across all CPUs, copy the data to the
> > > > new page, and insert the new PTE.  Copying 4kB doesn't take long; if you
> > > > have 12GB/s (current example on Wikipedia: dual-channel memory and one
> > > > DDR2-800 module per channel gives a theoretical bandwidth of 12.8GB/s)
> > > > then we should be able to copy a page in 666ns).  So there's no problem
> > > > holding a spinlock for it.
> > > > 
> > > > But we can't handle a fault in vmalloc space today.  It's handled in
> > > > arch-specific code, see vmalloc_fault() in arch/x86/mm/fault.c
> > > > If we're going to do this, it'll have to be something arches opt into
> > > > because I'm not taking on the job of fixing every architecture!
> > > 
> > > yes.
> > 
> > On x86, if you shoot down the PTE for the current stack, you're dead.
> > vmalloc_fault() might not even be called.  Instead we hit
> > do_double_fault(), and the manual warns extremely strongly against
> > trying to recover, and, in this case, I agree with the SDM.  If you
> > actually want this to work, there needs to be a special IPI broadcast
> > to the task in question (with appropriate synchronization) that calls
> > magic arch code that does the switcheroo.
> 
> Why cannot we use the pte swap entry trick also for vmalloc migration.
> I haven't explored this path at all, to be honest.
> 
> > Didn't someone (Christoph?) have a patch to teach the page allocator
> > to give high-order allocations if available and otherwise fall back to
> > low order?
> 
> Do you mean kvmalloc?


I sent something last year but had not finished the patch series :/

https://marc.info/?l=linux-kernel&m=148233423610544&w=2

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2018-03-01 18:16 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-01-23 10:55 [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Konstantin Khlebnikov
2018-01-23 10:55 ` [PATCH 2/4] vmalloc: add __vmalloc_area() Konstantin Khlebnikov
2018-01-23 10:55 ` [PATCH 3/4] kernel/fork: switch vmapped stack callation to __vmalloc_area() Konstantin Khlebnikov
2018-01-23 13:57   ` Konstantin Khlebnikov
2018-02-21  0:16     ` Andrew Morton
2018-02-21  7:23       ` Konstantin Khlebnikov
2018-02-21 16:35         ` Andy Lutomirski
2018-01-23 10:55 ` [PATCH 4/4] kernel/fork: add option to use virtually mapped stacks as fallback Konstantin Khlebnikov
2018-02-21 15:42   ` Use higher-order pages in vmalloc Matthew Wilcox
2018-02-21 16:11     ` Andy Lutomirski
2018-02-21 16:50       ` Matthew Wilcox
2018-02-21 16:16     ` Dave Hansen
2018-02-21 17:01       ` Matthew Wilcox
2018-02-22  6:59         ` Michal Hocko
2018-02-22 12:22           ` Matthew Wilcox
2018-02-22 13:36             ` Michal Hocko
2018-02-22 19:01               ` Andy Lutomirski
2018-02-22 19:19                 ` Dave Hansen
2018-02-22 19:27                   ` Andy Lutomirski
2018-02-22 19:36                     ` Dave Hansen
2018-02-23 12:13                 ` Michal Hocko
2018-03-01 18:16                   ` Eric Dumazet
2018-02-21 12:24 ` [PATCH 1/4] vmalloc: add vm_flags argument to internal __vmalloc_node() Matthew Wilcox
2018-02-21 12:39   ` Andrey Ryabinin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).