linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
@ 2015-02-18 17:44 Andrey Ryabinin
  2015-02-18 23:10 ` Rusty Russell
  0 siblings, 1 reply; 11+ messages in thread
From: Andrey Ryabinin @ 2015-02-18 17:44 UTC (permalink / raw)
  To: Andrew Morton
  Cc: linux-mm, linux-kernel, Andrey Ryabinin, Dmitry Vyukov, Rusty Russell

Current approach in handling shadow memory for modules is broken.

Shadow memory could be freed only after memory shadow corresponds
it is no longer used.
vfree() called from interrupt context could use memory its
freeing to store 'struct llist_node' in it:

void vfree(const void *addr)
{
...
	if (unlikely(in_interrupt())) {
		struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
		if (llist_add((struct llist_node *)addr, &p->list))
			schedule_work(&p->wq);

Latter this list node used in free_work() which actually frees memory.
Currently module_memfree() called in interrupt context will free
shadow before freeing module's memory which could provoke kernel
crash.
So shadow memory should be freed after module's memory.
However, such deallocation order could race with kasan_module_alloc()
in module_alloc().

To fix this we could move kasan hooks into vmalloc code. This allows
us to allocate/free shadow memory in appropriate time and order.

This hooks also might be helpful in future if we decide to track
other vmalloc'ed memory.

Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/x86/kernel/module.c | 11 +----------
 include/linux/kasan.h    | 26 +++++++++++++++++++-------
 kernel/module.c          |  2 --
 mm/kasan/kasan.c         | 12 +++++++++---
 mm/vmalloc.c             | 10 ++++++++++
 5 files changed, 39 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index d1ac80b..00ba926 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -24,7 +24,6 @@
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
-#include <linux/kasan.h>
 #include <linux/bug.h>
 #include <linux/mm.h>
 #include <linux/gfp.h>
@@ -84,22 +83,14 @@ static unsigned long int get_module_load_offset(void)
 
 void *module_alloc(unsigned long size)
 {
-	void *p;
-
 	if (PAGE_ALIGN(size) > MODULES_LEN)
 		return NULL;
 
-	p = __vmalloc_node_range(size, MODULE_ALIGN,
+	return __vmalloc_node_range(size, 1,
 				    MODULES_VADDR + get_module_load_offset(),
 				    MODULES_END, GFP_KERNEL | __GFP_HIGHMEM,
 				    PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
 				    __builtin_return_address(0));
-	if (p && (kasan_module_alloc(p, size) < 0)) {
-		vfree(p);
-		return NULL;
-	}
-
-	return p;
 }
 
 #ifdef CONFIG_X86_32
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 72ba725..54068a5 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -5,6 +5,7 @@
 
 struct kmem_cache;
 struct page;
+struct vm_struct;
 
 #ifdef CONFIG_KASAN
 
@@ -12,6 +13,7 @@ struct page;
 #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
 
 #include <asm/kasan.h>
+#include <linux/kernel.h>
 #include <linux/sched.h>
 
 static inline void *kasan_mem_to_shadow(const void *addr)
@@ -49,15 +51,19 @@ void kasan_krealloc(const void *object, size_t new_size);
 void kasan_slab_alloc(struct kmem_cache *s, void *object);
 void kasan_slab_free(struct kmem_cache *s, void *object);
 
-#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
+int kasan_vmalloc(const void *addr, size_t size);
+void kasan_vfree(const void *addr, const struct vm_struct *vm);
 
-int kasan_module_alloc(void *addr, size_t size);
-void kasan_module_free(void *addr);
+static inline unsigned long kasan_vmalloc_align(unsigned long addr,
+						unsigned long align)
+{
+	if (addr >= MODULES_VADDR && addr < MODULES_END)
+		return ALIGN(align, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT);
+	return align;
+}
 
 #else /* CONFIG_KASAN */
 
-#define MODULE_ALIGN 1
-
 static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
 
 static inline void kasan_enable_current(void) {}
@@ -81,8 +87,14 @@ static inline void kasan_krealloc(const void *object, size_t new_size) {}
 static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
 static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
 
-static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
-static inline void kasan_module_free(void *addr) {}
+static inline int kasan_vmalloc(const void *addr, size_t size) { return 0; }
+static inline void kasan_vfree(const void *addr, struct vm_struct *vm) {}
+
+static inline unsigned long kasan_vmalloc_align(unsigned long addr,
+						unsigned long align)
+{
+	return align;
+}
 
 #endif /* CONFIG_KASAN */
 
diff --git a/kernel/module.c b/kernel/module.c
index 8426ad4..82dc1f8 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -56,7 +56,6 @@
 #include <linux/async.h>
 #include <linux/percpu.h>
 #include <linux/kmemleak.h>
-#include <linux/kasan.h>
 #include <linux/jump_label.h>
 #include <linux/pfn.h>
 #include <linux/bsearch.h>
@@ -1814,7 +1813,6 @@ static void unset_module_init_ro_nx(struct module *mod) { }
 void __weak module_memfree(void *module_region)
 {
 	vfree(module_region);
-	kasan_module_free(module_region);
 }
 
 void __weak module_arch_cleanup(struct module *mod)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 78fee63..7a90c94 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -29,6 +29,7 @@
 #include <linux/stacktrace.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/vmalloc.h>
 #include <linux/kasan.h>
 
 #include "kasan.h"
@@ -396,7 +397,7 @@ void kasan_kfree_large(const void *ptr)
 			KASAN_FREE_PAGE);
 }
 
-int kasan_module_alloc(void *addr, size_t size)
+int kasan_vmalloc(const void *addr, size_t size)
 {
 	void *ret;
 	size_t shadow_size;
@@ -406,6 +407,9 @@ int kasan_module_alloc(void *addr, size_t size)
 	shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT,
 			PAGE_SIZE);
 
+	if (!(addr >= (void *)MODULES_VADDR && addr < (void *)MODULES_END))
+		return 0;
+
 	if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
 		return -EINVAL;
 
@@ -417,9 +421,11 @@ int kasan_module_alloc(void *addr, size_t size)
 	return ret ? 0 : -ENOMEM;
 }
 
-void kasan_module_free(void *addr)
+void kasan_vfree(const void *addr, const struct vm_struct *vm)
 {
-	vfree(kasan_mem_to_shadow(addr));
+	if (addr >= (void *)MODULES_VADDR && addr < (void *)MODULES_END
+		&& !(vm->flags & VM_UNINITIALIZED))
+			vfree(kasan_mem_to_shadow(addr));
 }
 
 static void register_global(struct kasan_global *global)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 35b25e1..a15799e 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -20,6 +20,7 @@
 #include <linux/seq_file.h>
 #include <linux/debugobjects.h>
 #include <linux/kallsyms.h>
+#include <linux/kasan.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
 #include <linux/radix-tree.h>
@@ -1412,6 +1413,8 @@ struct vm_struct *remove_vm_area(const void *addr)
 	if (va && va->flags & VM_VM_AREA) {
 		struct vm_struct *vm = va->vm;
 
+		kasan_vfree(addr, vm);
+
 		spin_lock(&vmap_area_lock);
 		va->vm = NULL;
 		va->flags &= ~VM_VM_AREA;
@@ -1640,6 +1643,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 	if (!size || (size >> PAGE_SHIFT) > totalram_pages)
 		goto fail;
 
+	align = kasan_vmalloc_align(start, align);
+
 	area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
 				vm_flags, start, end, node, gfp_mask, caller);
 	if (!area)
@@ -1649,6 +1654,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
 	if (!addr)
 		return NULL;
 
+	if (kasan_vmalloc(addr, size) < 0) {
+		vfree(addr);
+		return NULL;
+	}
+
 	/*
 	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED
 	 * flag. It means that vm_struct is not fully initialized.
-- 
2.3.0


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-18 17:44 [PATCH] kasan, module, vmalloc: rework shadow allocation for modules Andrey Ryabinin
@ 2015-02-18 23:10 ` Rusty Russell
  2015-02-19 13:21   ` Andrey Ryabinin
  0 siblings, 1 reply; 11+ messages in thread
From: Rusty Russell @ 2015-02-18 23:10 UTC (permalink / raw)
  To: Andrey Ryabinin, Andrew Morton
  Cc: linux-mm, linux-kernel, Andrey Ryabinin, Dmitry Vyukov

Andrey Ryabinin <a.ryabinin@samsung.com> writes:
> Current approach in handling shadow memory for modules is broken.
>
> Shadow memory could be freed only after memory shadow corresponds
> it is no longer used.
> vfree() called from interrupt context could use memory its
> freeing to store 'struct llist_node' in it:
>
> void vfree(const void *addr)
> {
> ...
> 	if (unlikely(in_interrupt())) {
> 		struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
> 		if (llist_add((struct llist_node *)addr, &p->list))
> 			schedule_work(&p->wq);
>
> Latter this list node used in free_work() which actually frees memory.
> Currently module_memfree() called in interrupt context will free
> shadow before freeing module's memory which could provoke kernel
> crash.
> So shadow memory should be freed after module's memory.
> However, such deallocation order could race with kasan_module_alloc()
> in module_alloc().
>
> To fix this we could move kasan hooks into vmalloc code. This allows
> us to allocate/free shadow memory in appropriate time and order.
>
> This hooks also might be helpful in future if we decide to track
> other vmalloc'ed memory.

This is not portable.  Other archs don't use vmalloc, or don't use
(or define) MODULES_VADDR.  If you really want to hook here, you'd
need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).

Thus I think modifying the callers is the better choice.

Cheers,
Rusty.

> Signed-off-by: Andrey Ryabinin <a.ryabinin@samsung.com>
> Cc: Dmitry Vyukov <dvyukov@google.com>
> Cc: Rusty Russell <rusty@rustcorp.com.au>
> ---
>  arch/x86/kernel/module.c | 11 +----------
>  include/linux/kasan.h    | 26 +++++++++++++++++++-------
>  kernel/module.c          |  2 --
>  mm/kasan/kasan.c         | 12 +++++++++---
>  mm/vmalloc.c             | 10 ++++++++++
>  5 files changed, 39 insertions(+), 22 deletions(-)
>
> diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
> index d1ac80b..00ba926 100644
> --- a/arch/x86/kernel/module.c
> +++ b/arch/x86/kernel/module.c
> @@ -24,7 +24,6 @@
>  #include <linux/fs.h>
>  #include <linux/string.h>
>  #include <linux/kernel.h>
> -#include <linux/kasan.h>
>  #include <linux/bug.h>
>  #include <linux/mm.h>
>  #include <linux/gfp.h>
> @@ -84,22 +83,14 @@ static unsigned long int get_module_load_offset(void)
>  
>  void *module_alloc(unsigned long size)
>  {
> -	void *p;
> -
>  	if (PAGE_ALIGN(size) > MODULES_LEN)
>  		return NULL;
>  
> -	p = __vmalloc_node_range(size, MODULE_ALIGN,
> +	return __vmalloc_node_range(size, 1,
>  				    MODULES_VADDR + get_module_load_offset(),
>  				    MODULES_END, GFP_KERNEL | __GFP_HIGHMEM,
>  				    PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
>  				    __builtin_return_address(0));
> -	if (p && (kasan_module_alloc(p, size) < 0)) {
> -		vfree(p);
> -		return NULL;
> -	}
> -
> -	return p;
>  }
>  
>  #ifdef CONFIG_X86_32
> diff --git a/include/linux/kasan.h b/include/linux/kasan.h
> index 72ba725..54068a5 100644
> --- a/include/linux/kasan.h
> +++ b/include/linux/kasan.h
> @@ -5,6 +5,7 @@
>  
>  struct kmem_cache;
>  struct page;
> +struct vm_struct;
>  
>  #ifdef CONFIG_KASAN
>  
> @@ -12,6 +13,7 @@ struct page;
>  #define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
>  
>  #include <asm/kasan.h>
> +#include <linux/kernel.h>
>  #include <linux/sched.h>
>  
>  static inline void *kasan_mem_to_shadow(const void *addr)
> @@ -49,15 +51,19 @@ void kasan_krealloc(const void *object, size_t new_size);
>  void kasan_slab_alloc(struct kmem_cache *s, void *object);
>  void kasan_slab_free(struct kmem_cache *s, void *object);
>  
> -#define MODULE_ALIGN (PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
> +int kasan_vmalloc(const void *addr, size_t size);
> +void kasan_vfree(const void *addr, const struct vm_struct *vm);
>  
> -int kasan_module_alloc(void *addr, size_t size);
> -void kasan_module_free(void *addr);
> +static inline unsigned long kasan_vmalloc_align(unsigned long addr,
> +						unsigned long align)
> +{
> +	if (addr >= MODULES_VADDR && addr < MODULES_END)
> +		return ALIGN(align, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT);
> +	return align;
> +}
>  
>  #else /* CONFIG_KASAN */
>  
> -#define MODULE_ALIGN 1
> -
>  static inline void kasan_unpoison_shadow(const void *address, size_t size) {}
>  
>  static inline void kasan_enable_current(void) {}
> @@ -81,8 +87,14 @@ static inline void kasan_krealloc(const void *object, size_t new_size) {}
>  static inline void kasan_slab_alloc(struct kmem_cache *s, void *object) {}
>  static inline void kasan_slab_free(struct kmem_cache *s, void *object) {}
>  
> -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
> -static inline void kasan_module_free(void *addr) {}
> +static inline int kasan_vmalloc(const void *addr, size_t size) { return 0; }
> +static inline void kasan_vfree(const void *addr, struct vm_struct *vm) {}
> +
> +static inline unsigned long kasan_vmalloc_align(unsigned long addr,
> +						unsigned long align)
> +{
> +	return align;
> +}
>  
>  #endif /* CONFIG_KASAN */
>  
> diff --git a/kernel/module.c b/kernel/module.c
> index 8426ad4..82dc1f8 100644
> --- a/kernel/module.c
> +++ b/kernel/module.c
> @@ -56,7 +56,6 @@
>  #include <linux/async.h>
>  #include <linux/percpu.h>
>  #include <linux/kmemleak.h>
> -#include <linux/kasan.h>
>  #include <linux/jump_label.h>
>  #include <linux/pfn.h>
>  #include <linux/bsearch.h>
> @@ -1814,7 +1813,6 @@ static void unset_module_init_ro_nx(struct module *mod) { }
>  void __weak module_memfree(void *module_region)
>  {
>  	vfree(module_region);
> -	kasan_module_free(module_region);
>  }
>  
>  void __weak module_arch_cleanup(struct module *mod)
> diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
> index 78fee63..7a90c94 100644
> --- a/mm/kasan/kasan.c
> +++ b/mm/kasan/kasan.c
> @@ -29,6 +29,7 @@
>  #include <linux/stacktrace.h>
>  #include <linux/string.h>
>  #include <linux/types.h>
> +#include <linux/vmalloc.h>
>  #include <linux/kasan.h>
>  
>  #include "kasan.h"
> @@ -396,7 +397,7 @@ void kasan_kfree_large(const void *ptr)
>  			KASAN_FREE_PAGE);
>  }
>  
> -int kasan_module_alloc(void *addr, size_t size)
> +int kasan_vmalloc(const void *addr, size_t size)
>  {
>  	void *ret;
>  	size_t shadow_size;
> @@ -406,6 +407,9 @@ int kasan_module_alloc(void *addr, size_t size)
>  	shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT,
>  			PAGE_SIZE);
>  
> +	if (!(addr >= (void *)MODULES_VADDR && addr < (void *)MODULES_END))
> +		return 0;
> +
>  	if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
>  		return -EINVAL;
>  
> @@ -417,9 +421,11 @@ int kasan_module_alloc(void *addr, size_t size)
>  	return ret ? 0 : -ENOMEM;
>  }
>  
> -void kasan_module_free(void *addr)
> +void kasan_vfree(const void *addr, const struct vm_struct *vm)
>  {
> -	vfree(kasan_mem_to_shadow(addr));
> +	if (addr >= (void *)MODULES_VADDR && addr < (void *)MODULES_END
> +		&& !(vm->flags & VM_UNINITIALIZED))
> +			vfree(kasan_mem_to_shadow(addr));
>  }
>  
>  static void register_global(struct kasan_global *global)
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 35b25e1..a15799e 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -20,6 +20,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/debugobjects.h>
>  #include <linux/kallsyms.h>
> +#include <linux/kasan.h>
>  #include <linux/list.h>
>  #include <linux/rbtree.h>
>  #include <linux/radix-tree.h>
> @@ -1412,6 +1413,8 @@ struct vm_struct *remove_vm_area(const void *addr)
>  	if (va && va->flags & VM_VM_AREA) {
>  		struct vm_struct *vm = va->vm;
>  
> +		kasan_vfree(addr, vm);
> +
>  		spin_lock(&vmap_area_lock);
>  		va->vm = NULL;
>  		va->flags &= ~VM_VM_AREA;
> @@ -1640,6 +1643,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
>  	if (!size || (size >> PAGE_SHIFT) > totalram_pages)
>  		goto fail;
>  
> +	align = kasan_vmalloc_align(start, align);
> +
>  	area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
>  				vm_flags, start, end, node, gfp_mask, caller);
>  	if (!area)
> @@ -1649,6 +1654,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
>  	if (!addr)
>  		return NULL;
>  
> +	if (kasan_vmalloc(addr, size) < 0) {
> +		vfree(addr);
> +		return NULL;
> +	}
> +
>  	/*
>  	 * In this function, newly allocated vm_struct has VM_UNINITIALIZED
>  	 * flag. It means that vm_struct is not fully initialized.
> -- 
> 2.3.0

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-18 23:10 ` Rusty Russell
@ 2015-02-19 13:21   ` Andrey Ryabinin
  2015-02-20  0:15     ` Rusty Russell
  0 siblings, 1 reply; 11+ messages in thread
From: Andrey Ryabinin @ 2015-02-19 13:21 UTC (permalink / raw)
  To: Rusty Russell, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

On 02/19/2015 02:10 AM, Rusty Russell wrote:
> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>> Current approach in handling shadow memory for modules is broken.
>>
>> Shadow memory could be freed only after memory shadow corresponds
>> it is no longer used.
>> vfree() called from interrupt context could use memory its
>> freeing to store 'struct llist_node' in it:
>>
>> void vfree(const void *addr)
>> {
>> ...
>> 	if (unlikely(in_interrupt())) {
>> 		struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
>> 		if (llist_add((struct llist_node *)addr, &p->list))
>> 			schedule_work(&p->wq);
>>
>> Latter this list node used in free_work() which actually frees memory.
>> Currently module_memfree() called in interrupt context will free
>> shadow before freeing module's memory which could provoke kernel
>> crash.
>> So shadow memory should be freed after module's memory.
>> However, such deallocation order could race with kasan_module_alloc()
>> in module_alloc().
>>
>> To fix this we could move kasan hooks into vmalloc code. This allows
>> us to allocate/free shadow memory in appropriate time and order.
>>
>> This hooks also might be helpful in future if we decide to track
>> other vmalloc'ed memory.
> 
> This is not portable.  Other archs don't use vmalloc, or don't use
> (or define) MODULES_VADDR.  If you really want to hook here, you'd
> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
> 

Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
or make make all those functions weak and allow arch code to redefine them.

> Thus I think modifying the callers is the better choice.
> 

I could suggest following (though, I still prefer 'modifying vmalloc' approach):
  * In do_init_module(), instead of call_rcu(&freeinit->rcu, do_free_init);
    use synchronyze_rcu() + module_memfree(). Of course this will be under CONFIG_KASAN.

    As you said there other module_memfree() users, so what if they will decide
    to free memory in atomic context?


   * And another option would be deferred kasan_module_free() in patch bellow.
     This is mostly copy-paste of deferred vfree(), thus I don't like it.

---
 arch/x86/mm/kasan_init_64.c |  1 +
 include/linux/kasan.h       |  1 +
 kernel/module.c             |  6 ++++--
 mm/kasan/kasan.c            | 42 +++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 4860906..66d2dba 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -173,6 +173,7 @@ void __init kasan_init(void)
 #ifdef CONFIG_KASAN_INLINE
 	register_die_notifier(&kasan_die_notifier);
 #endif
+	kasan_modules_init();

 	memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt));
 	load_cr3(early_level4_pgt);
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 72ba725..dba26f3 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -53,6 +53,7 @@ void kasan_slab_free(struct kmem_cache *s, void *object);

 int kasan_module_alloc(void *addr, size_t size);
 void kasan_module_free(void *addr);
+void kasan_modules_init(void);

 #else /* CONFIG_KASAN */

diff --git a/kernel/module.c b/kernel/module.c
index 8426ad4..e3d1a45 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1813,8 +1813,10 @@ static void unset_module_init_ro_nx(struct module *mod) { }

 void __weak module_memfree(void *module_region)
 {
-	vfree(module_region);
-	kasan_module_free(module_region);
+	if (IS_ENABLED(CONFIG_KASAN))
+		kasan_module_free(module_region);
+	else
+		vfree(module_region);
 }

 void __weak module_arch_cleanup(struct module *mod)
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 78fee63..333241e 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -19,6 +19,7 @@
 #include <linux/export.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/llist.h>
 #include <linux/memblock.h>
 #include <linux/memory.h>
 #include <linux/mm.h>
@@ -29,6 +30,7 @@
 #include <linux/stacktrace.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/vmalloc.h>
 #include <linux/kasan.h>

 #include "kasan.h"
@@ -417,9 +419,47 @@ int kasan_module_alloc(void *addr, size_t size)
 	return ret ? 0 : -ENOMEM;
 }

+struct vfree_deferred {
+	struct llist_head list;
+	struct work_struct wq;
+};
+static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
+
+static void free_work(struct work_struct *w)
+{
+	struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
+	struct llist_node *llnode = llist_del_all(&p->list);
+	while (llnode) {
+		void *p = llnode;
+		llnode = llist_next(llnode);
+		vfree(kasan_mem_to_shadow(p));
+		vfree(p);
+	}
+}
+
 void kasan_module_free(void *addr)
 {
-	vfree(kasan_mem_to_shadow(addr));
+	if (unlikely(in_interrupt())) {
+		struct vfree_deferred *p = this_cpu_ptr(&vfree_deferred);
+		if (llist_add((struct llist_node *)addr, &p->list))
+			schedule_work(&p->wq);
+	} else {
+		vfree(kasan_mem_to_shadow(addr));
+		vfree(addr);
+	}
+}
+
+void __init kasan_modules_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct vfree_deferred *p;
+
+		p = &per_cpu(vfree_deferred, i);
+		init_llist_head(&p->list);
+		INIT_WORK(&p->wq, free_work);
+	}
 }

 static void register_global(struct kasan_global *global)
-- 
2.3.0



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-19 13:21   ` Andrey Ryabinin
@ 2015-02-20  0:15     ` Rusty Russell
  2015-02-20  7:47       ` Andrey Ryabinin
  0 siblings, 1 reply; 11+ messages in thread
From: Rusty Russell @ 2015-02-20  0:15 UTC (permalink / raw)
  To: Andrey Ryabinin, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

Andrey Ryabinin <a.ryabinin@samsung.com> writes:
> On 02/19/2015 02:10 AM, Rusty Russell wrote:
>> This is not portable.  Other archs don't use vmalloc, or don't use
>> (or define) MODULES_VADDR.  If you really want to hook here, you'd
>> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
>> 
>
> Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
> I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
> or make make all those functions weak and allow arch code to redefine them.

That adds another layer of indirection.  And how would the caller of
plain vmalloc() even know what to return?

>> Thus I think modifying the callers is the better choice.
>> 
>
> I could suggest following (though, I still prefer 'modifying vmalloc' approach):
>   * In do_init_module(), instead of call_rcu(&freeinit->rcu, do_free_init);
>     use synchronyze_rcu() + module_memfree(). Of course this will be
>   under CONFIG_KASAN.

But it would be slow, and a disparate code path, which is usually a bad
idea.

>     As you said there other module_memfree() users, so what if they will decide
>     to free memory in atomic context?

Hmm, how about a hybrid:

1) Add kasan_module_alloc(p, size) after module alloc as your original.
2) Hook into vfree(), and ignore it if you can't find the map.

Or is the latter too expensive?

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-20  0:15     ` Rusty Russell
@ 2015-02-20  7:47       ` Andrey Ryabinin
  2015-02-23  8:26         ` Rusty Russell
  0 siblings, 1 reply; 11+ messages in thread
From: Andrey Ryabinin @ 2015-02-20  7:47 UTC (permalink / raw)
  To: Rusty Russell, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

On 02/20/2015 03:15 AM, Rusty Russell wrote:
> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>> On 02/19/2015 02:10 AM, Rusty Russell wrote:
>>> This is not portable.  Other archs don't use vmalloc, or don't use
>>> (or define) MODULES_VADDR.  If you really want to hook here, you'd
>>> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
>>>
>>
>> Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
>> I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
>> or make make all those functions weak and allow arch code to redefine them.
> 
> That adds another layer of indirection.  And how would the caller of
> plain vmalloc() even know what to return?
> 

I think I don't understand what do you mean here. vmalloc() callers shouldn't know
anything about kasan/shadow.

You were concerned that this patch is not portable, so I suggested to hide arch specific
part in arch code. That's it.

>>> Thus I think modifying the callers is the better choice.
>>>
>>
>> I could suggest following (though, I still prefer 'modifying vmalloc' approach):
>>   * In do_init_module(), instead of call_rcu(&freeinit->rcu, do_free_init);
>>     use synchronyze_rcu() + module_memfree(). Of course this will be
>>   under CONFIG_KASAN.
> 
> But it would be slow, and a disparate code path, which is usually a bad
> idea.
> 
>>     As you said there other module_memfree() users, so what if they will decide
>>     to free memory in atomic context?
> 
> Hmm, how about a hybrid:
> 
> 1) Add kasan_module_alloc(p, size) after module alloc as your original.
> 2) Hook into vfree(), and ignore it if you can't find the map.
> 

That should work, but it looks messy IMO.

> Or is the latter too expensive?
> 

Not sure whether this will be too expensive or not,
but definitely more expensive than simple (addr >= MODULES_VADDR && addr < MODULES_END) check.



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-20  7:47       ` Andrey Ryabinin
@ 2015-02-23  8:26         ` Rusty Russell
  2015-02-24 12:58           ` Andrey Ryabinin
  0 siblings, 1 reply; 11+ messages in thread
From: Rusty Russell @ 2015-02-23  8:26 UTC (permalink / raw)
  To: Andrey Ryabinin, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

Andrey Ryabinin <a.ryabinin@samsung.com> writes:
> On 02/20/2015 03:15 AM, Rusty Russell wrote:
>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>> On 02/19/2015 02:10 AM, Rusty Russell wrote:
>>>> This is not portable.  Other archs don't use vmalloc, or don't use
>>>> (or define) MODULES_VADDR.  If you really want to hook here, you'd
>>>> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
>>>>
>>>
>>> Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
>>> I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
>>> or make make all those functions weak and allow arch code to redefine them.
>> 
>> That adds another layer of indirection.  And how would the caller of
>> plain vmalloc() even know what to return?
>> 
>
> I think I don't understand what do you mean here. vmalloc() callers shouldn't know
> anything about kasan/shadow.

How else would kasan_need_to_allocate_shadow(const void *addr) work for
architectures which don't have a reserved vmalloc region for modules?

>> Hmm, how about a hybrid:
>> 
>> 1) Add kasan_module_alloc(p, size) after module alloc as your original.
>> 2) Hook into vfree(), and ignore it if you can't find the map.
>> 
>
> That should work, but it looks messy IMO.
>
>> Or is the latter too expensive?
>> 
>
> Not sure whether this will be too expensive or not,
> but definitely more expensive than simple (addr >= MODULES_VADDR && addr < MODULES_END) check.

Sure, if that check were portable.  If you ever wanted kasan on other
vmalloc addresses it wouldn't work either.

I actually think this pattern is the *simplest* solution for auxilliary
data like kasan.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-23  8:26         ` Rusty Russell
@ 2015-02-24 12:58           ` Andrey Ryabinin
  2015-02-25  6:25             ` Rusty Russell
  0 siblings, 1 reply; 11+ messages in thread
From: Andrey Ryabinin @ 2015-02-24 12:58 UTC (permalink / raw)
  To: Rusty Russell, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

On 02/23/2015 11:26 AM, Rusty Russell wrote:
> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>> On 02/20/2015 03:15 AM, Rusty Russell wrote:
>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>> On 02/19/2015 02:10 AM, Rusty Russell wrote:
>>>>> This is not portable.  Other archs don't use vmalloc, or don't use
>>>>> (or define) MODULES_VADDR.  If you really want to hook here, you'd
>>>>> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
>>>>>
>>>>
>>>> Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
>>>> I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
>>>> or make make all those functions weak and allow arch code to redefine them.
>>>
>>> That adds another layer of indirection.  And how would the caller of
>>> plain vmalloc() even know what to return?
>>>
>>
>> I think I don't understand what do you mean here. vmalloc() callers shouldn't know
>> anything about kasan/shadow.
> 
> How else would kasan_need_to_allocate_shadow(const void *addr) work for
> architectures which don't have a reserved vmalloc region for modules?
> 


I think I need to clarify what I'm doing.

Address sanitizer algorithm in short:
-------------------------------------
Every memory access is transformed by the compiler in the following way:

Before:
	*address = ...;

after:

	if (memory_is_poisoned(address)) {
		report_error(address, access_size);
	}
	*address = ...;

where memory_is_poisoned():
	bool memory_is_poisoned(unsigned long addr)
	{
        	s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr);
	        if (unlikely(shadow_value)) {
        	        s8 last_accessible_byte = addr & KASAN_SHADOW_MASK;
                	return unlikely(last_accessible_byte >= shadow_value);
	        }
	        return false;
	}
--------------------------------------

So shadow memory should be present for every accessible address in kernel
otherwise it will be unhandled page fault on reading shadow value.

Shadow for vmalloc addresses (on x86_64) is readonly mapping of one zero page.
Zero byte in shadow means that it's ok to access to that address.
Currently we don't catch bugs in vmalloc because most of such bugs could be caught
in more simple way with CONFIG_DEBUG_PAGEALLOC.
That's why we don't need RW shadow for vmalloc, it just one zero page that readonly
mapped early on boot for the whole [kasan_mem_to_shadow(VMALLOC_START, kasan_mem_to_shadow(VMALLOC_END)] range
So every access to vmalloc range assumed to be valid.

To catch out of bounds accesses in global variables we need to fill shadow corresponding
to variable's redzone with non-zero (negative) values.
So for kernel image and modules we need a writable shadow.

If some arch don't have separate address range for modules and it uses general vmalloc()
shadow for vmalloc should be writable, so it means that shadow has to be allocated
for every vmalloc() call.

In such arch kasan_need_to_allocate_shadow(const void *addr) should return true for every vmalloc address:
bool kasan_need_to_allocate_shadow(const void *addr)
{
	return addr >= VMALLOC_START && addr < VMALLOC_END;
}


All above means that current code is not very portable.
And 'kasan_module_alloc(p, size) after module alloc' approach is not portable
too. This won't work for arches that use [VMALLOC_START, VMALLOC_END] addresses for modules,
because now we need to handle all vmalloc() calls.

I really think that this patch after proposed addition of arch specific
'kasan_need_to_allocate_shadow(const void *addr)' is the simplest and best way to fix bug
and make it portable for other arches.
Though, I doubt that someone ever bother to port kasan on those arches
that don't have separate addresses for modules.


>>> Hmm, how about a hybrid:
>>>
>>> 1) Add kasan_module_alloc(p, size) after module alloc as your original.
>>> 2) Hook into vfree(), and ignore it if you can't find the map.
>>>
>>
>> That should work, but it looks messy IMO.
>>
>>> Or is the latter too expensive?
>>>
>>
>> Not sure whether this will be too expensive or not,
>> but definitely more expensive than simple (addr >= MODULES_VADDR && addr < MODULES_END) check.
> 
> Sure, if that check were portable.  If you ever wanted kasan on other
> vmalloc addresses it wouldn't work either.
> 
> I actually think this pattern is the *simplest* solution for auxilliary
> data like kasan.
> 
> Cheers,
> Rusty.
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-24 12:58           ` Andrey Ryabinin
@ 2015-02-25  6:25             ` Rusty Russell
  2015-02-25  7:56               ` Andrey Ryabinin
  0 siblings, 1 reply; 11+ messages in thread
From: Rusty Russell @ 2015-02-25  6:25 UTC (permalink / raw)
  To: Andrey Ryabinin, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

Andrey Ryabinin <a.ryabinin@samsung.com> writes:
> On 02/23/2015 11:26 AM, Rusty Russell wrote:
>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>> On 02/20/2015 03:15 AM, Rusty Russell wrote:
>>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>>> On 02/19/2015 02:10 AM, Rusty Russell wrote:
>>>>>> This is not portable.  Other archs don't use vmalloc, or don't use
>>>>>> (or define) MODULES_VADDR.  If you really want to hook here, you'd
>>>>>> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
>>>>>>
>>>>>
>>>>> Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
>>>>> I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
>>>>> or make make all those functions weak and allow arch code to redefine them.
>>>>
>>>> That adds another layer of indirection.  And how would the caller of
>>>> plain vmalloc() even know what to return?
>>>>
>>>
>>> I think I don't understand what do you mean here. vmalloc() callers shouldn't know
>>> anything about kasan/shadow.
>> 
>> How else would kasan_need_to_allocate_shadow(const void *addr) work for
>> architectures which don't have a reserved vmalloc region for modules?
>> 
>
>
> I think I need to clarify what I'm doing.
>
> Address sanitizer algorithm in short:
> -------------------------------------
> Every memory access is transformed by the compiler in the following way:
>
> Before:
> 	*address = ...;
>
> after:
>
> 	if (memory_is_poisoned(address)) {
> 		report_error(address, access_size);
> 	}
> 	*address = ...;
>
> where memory_is_poisoned():
> 	bool memory_is_poisoned(unsigned long addr)
> 	{
>         	s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr);
> 	        if (unlikely(shadow_value)) {
>         	        s8 last_accessible_byte = addr & KASAN_SHADOW_MASK;
>                 	return unlikely(last_accessible_byte >= shadow_value);
> 	        }
> 	        return false;
> 	}
> --------------------------------------
>
> So shadow memory should be present for every accessible address in kernel
> otherwise it will be unhandled page fault on reading shadow value.
>
> Shadow for vmalloc addresses (on x86_64) is readonly mapping of one zero page.
> Zero byte in shadow means that it's ok to access to that address.
> Currently we don't catch bugs in vmalloc because most of such bugs could be caught
> in more simple way with CONFIG_DEBUG_PAGEALLOC.
> That's why we don't need RW shadow for vmalloc, it just one zero page that readonly
> mapped early on boot for the whole [kasan_mem_to_shadow(VMALLOC_START, kasan_mem_to_shadow(VMALLOC_END)] range
> So every access to vmalloc range assumed to be valid.
>
> To catch out of bounds accesses in global variables we need to fill shadow corresponding
> to variable's redzone with non-zero (negative) values.
> So for kernel image and modules we need a writable shadow.
>
> If some arch don't have separate address range for modules and it uses general vmalloc()
> shadow for vmalloc should be writable, so it means that shadow has to be allocated
> for every vmalloc() call.
>
> In such arch kasan_need_to_allocate_shadow(const void *addr) should return true for every vmalloc address:
> bool kasan_need_to_allocate_shadow(const void *addr)
> {
> 	return addr >= VMALLOC_START && addr < VMALLOC_END;
> }

Thanks for the explanation.

> All above means that current code is not very portable.
> And 'kasan_module_alloc(p, size) after module alloc' approach is not portable
> too. This won't work for arches that use [VMALLOC_START, VMALLOC_END] addresses for modules,
> because now we need to handle all vmalloc() calls.

I'm confused.  That's what you do now, and it hasn't been a problem,
has it?  The problem is on the freeing from interrupt context...

How about:

#define VM_KASAN		0x00000080      /* has shadow kasan map */

Set that in kasan_module_alloc():

        if (ret) {
                struct vm_struct *vma = find_vm_area(addr);

                BUG_ON(!vma);
                /* Set VM_KASAN so vfree() can free up shadow. */
                vma->flags |= VM_KASAN;
        }

And check that in __vunmap():

        if (area->flags & VM_KASAN)
                kasan_module_free(addr);

That is portable, and is actually a fairly small patch on what you
have at the moment.

What am I missing?

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-25  6:25             ` Rusty Russell
@ 2015-02-25  7:56               ` Andrey Ryabinin
  2015-02-26  1:30                 ` Rusty Russell
  0 siblings, 1 reply; 11+ messages in thread
From: Andrey Ryabinin @ 2015-02-25  7:56 UTC (permalink / raw)
  To: Rusty Russell, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

On 02/25/2015 09:25 AM, Rusty Russell wrote:
> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>> On 02/23/2015 11:26 AM, Rusty Russell wrote:
>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>> On 02/20/2015 03:15 AM, Rusty Russell wrote:
>>>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>>>> On 02/19/2015 02:10 AM, Rusty Russell wrote:
>>>>>>> This is not portable.  Other archs don't use vmalloc, or don't use
>>>>>>> (or define) MODULES_VADDR.  If you really want to hook here, you'd
>>>>>>> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
>>>>>>>
>>>>>>
>>>>>> Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
>>>>>> I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
>>>>>> or make make all those functions weak and allow arch code to redefine them.
>>>>>
>>>>> That adds another layer of indirection.  And how would the caller of
>>>>> plain vmalloc() even know what to return?
>>>>>
>>>>
>>>> I think I don't understand what do you mean here. vmalloc() callers shouldn't know
>>>> anything about kasan/shadow.
>>>
>>> How else would kasan_need_to_allocate_shadow(const void *addr) work for
>>> architectures which don't have a reserved vmalloc region for modules?
>>>
>>
>>
>> I think I need to clarify what I'm doing.
>>
>> Address sanitizer algorithm in short:
>> -------------------------------------
>> Every memory access is transformed by the compiler in the following way:
>>
>> Before:
>> 	*address = ...;
>>
>> after:
>>
>> 	if (memory_is_poisoned(address)) {
>> 		report_error(address, access_size);
>> 	}
>> 	*address = ...;
>>
>> where memory_is_poisoned():
>> 	bool memory_is_poisoned(unsigned long addr)
>> 	{
>>         	s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr);
>> 	        if (unlikely(shadow_value)) {
>>         	        s8 last_accessible_byte = addr & KASAN_SHADOW_MASK;
>>                 	return unlikely(last_accessible_byte >= shadow_value);
>> 	        }
>> 	        return false;
>> 	}
>> --------------------------------------
>>
>> So shadow memory should be present for every accessible address in kernel
>> otherwise it will be unhandled page fault on reading shadow value.
>>
>> Shadow for vmalloc addresses (on x86_64) is readonly mapping of one zero page.
>> Zero byte in shadow means that it's ok to access to that address.
>> Currently we don't catch bugs in vmalloc because most of such bugs could be caught
>> in more simple way with CONFIG_DEBUG_PAGEALLOC.
>> That's why we don't need RW shadow for vmalloc, it just one zero page that readonly
>> mapped early on boot for the whole [kasan_mem_to_shadow(VMALLOC_START, kasan_mem_to_shadow(VMALLOC_END)] range
>> So every access to vmalloc range assumed to be valid.
>>
>> To catch out of bounds accesses in global variables we need to fill shadow corresponding
>> to variable's redzone with non-zero (negative) values.
>> So for kernel image and modules we need a writable shadow.
>>
>> If some arch don't have separate address range for modules and it uses general vmalloc()
>> shadow for vmalloc should be writable, so it means that shadow has to be allocated
>> for every vmalloc() call.
>>
>> In such arch kasan_need_to_allocate_shadow(const void *addr) should return true for every vmalloc address:
>> bool kasan_need_to_allocate_shadow(const void *addr)
>> {
>> 	return addr >= VMALLOC_START && addr < VMALLOC_END;
>> }
> 
> Thanks for the explanation.
> 
>> All above means that current code is not very portable.
>> And 'kasan_module_alloc(p, size) after module alloc' approach is not portable
>> too. This won't work for arches that use [VMALLOC_START, VMALLOC_END] addresses for modules,
>> because now we need to handle all vmalloc() calls.
> 
> I'm confused.  That's what you do now, and it hasn't been a problem,
> has it?  The problem is on the freeing from interrupt context...
> 

It's not problem now. It's only about portability.


> How about:
> 
> #define VM_KASAN		0x00000080      /* has shadow kasan map */
> 
> Set that in kasan_module_alloc():
> 
>         if (ret) {
>                 struct vm_struct *vma = find_vm_area(addr);
> 
>                 BUG_ON(!vma);
>                 /* Set VM_KASAN so vfree() can free up shadow. */
>                 vma->flags |= VM_KASAN;
>         }
> 
> And check that in __vunmap():
> 
>         if (area->flags & VM_KASAN)
>                 kasan_module_free(addr);
> 
> That is portable, and is actually a fairly small patch on what you
> have at the moment.
> 
> What am I missing?
> 

That is not portable.
Architectures that don't have separate region for modules should allocate shadow
for every vmalloc() call, not only for modules.
For x86_64 it is enough to call kasan_module_alloc() only in module_alloc().
For some other architectures kasan_module_alloc() ( kasan_vmalloc()/kasan_alloc_shadow() would be better name in this case)
should be called for all vmalloc() allocations.

Actually I'm fine with what you are proposing here. I think that portability issues could be fixed
latter when this will become a real problem.


> Thanks,
> Rusty.
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-25  7:56               ` Andrey Ryabinin
@ 2015-02-26  1:30                 ` Rusty Russell
  2015-02-27 12:20                   ` Andrey Ryabinin
  0 siblings, 1 reply; 11+ messages in thread
From: Rusty Russell @ 2015-02-26  1:30 UTC (permalink / raw)
  To: Andrey Ryabinin, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

Andrey Ryabinin <a.ryabinin@samsung.com> writes:
> On 02/25/2015 09:25 AM, Rusty Russell wrote:
>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>> On 02/23/2015 11:26 AM, Rusty Russell wrote:
>>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>>> On 02/20/2015 03:15 AM, Rusty Russell wrote:
>>>>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>>>>> On 02/19/2015 02:10 AM, Rusty Russell wrote:
>>>>>>>> This is not portable.  Other archs don't use vmalloc, or don't use
>>>>>>>> (or define) MODULES_VADDR.  If you really want to hook here, you'd
>>>>>>>> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
>>>>>>>>
>>>>>>>
>>>>>>> Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
>>>>>>> I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
>>>>>>> or make make all those functions weak and allow arch code to redefine them.
>>>>>>
>>>>>> That adds another layer of indirection.  And how would the caller of
>>>>>> plain vmalloc() even know what to return?
>>>>>>
>>>>>
>>>>> I think I don't understand what do you mean here. vmalloc() callers shouldn't know
>>>>> anything about kasan/shadow.
>>>>
>>>> How else would kasan_need_to_allocate_shadow(const void *addr) work for
>>>> architectures which don't have a reserved vmalloc region for modules?
>>>>
>>>
>>>
>>> I think I need to clarify what I'm doing.
>>>
>>> Address sanitizer algorithm in short:
>>> -------------------------------------
>>> Every memory access is transformed by the compiler in the following way:
>>>
>>> Before:
>>> 	*address = ...;
>>>
>>> after:
>>>
>>> 	if (memory_is_poisoned(address)) {
>>> 		report_error(address, access_size);
>>> 	}
>>> 	*address = ...;
>>>
>>> where memory_is_poisoned():
>>> 	bool memory_is_poisoned(unsigned long addr)
>>> 	{
>>>         	s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr);
>>> 	        if (unlikely(shadow_value)) {
>>>         	        s8 last_accessible_byte = addr & KASAN_SHADOW_MASK;
>>>                 	return unlikely(last_accessible_byte >= shadow_value);
>>> 	        }
>>> 	        return false;
>>> 	}
>>> --------------------------------------
>>>
>>> So shadow memory should be present for every accessible address in kernel
>>> otherwise it will be unhandled page fault on reading shadow value.
>>>
>>> Shadow for vmalloc addresses (on x86_64) is readonly mapping of one zero page.
>>> Zero byte in shadow means that it's ok to access to that address.
>>> Currently we don't catch bugs in vmalloc because most of such bugs could be caught
>>> in more simple way with CONFIG_DEBUG_PAGEALLOC.
>>> That's why we don't need RW shadow for vmalloc, it just one zero page that readonly
>>> mapped early on boot for the whole [kasan_mem_to_shadow(VMALLOC_START, kasan_mem_to_shadow(VMALLOC_END)] range
>>> So every access to vmalloc range assumed to be valid.
>>>
>>> To catch out of bounds accesses in global variables we need to fill shadow corresponding
>>> to variable's redzone with non-zero (negative) values.
>>> So for kernel image and modules we need a writable shadow.
>>>
>>> If some arch don't have separate address range for modules and it uses general vmalloc()
>>> shadow for vmalloc should be writable, so it means that shadow has to be allocated
>>> for every vmalloc() call.
>>>
>>> In such arch kasan_need_to_allocate_shadow(const void *addr) should return true for every vmalloc address:
>>> bool kasan_need_to_allocate_shadow(const void *addr)
>>> {
>>> 	return addr >= VMALLOC_START && addr < VMALLOC_END;
>>> }
>> 
>> Thanks for the explanation.
>> 
>>> All above means that current code is not very portable.
>>> And 'kasan_module_alloc(p, size) after module alloc' approach is not portable
>>> too. This won't work for arches that use [VMALLOC_START, VMALLOC_END] addresses for modules,
>>> because now we need to handle all vmalloc() calls.
>> 
>> I'm confused.  That's what you do now, and it hasn't been a problem,
>> has it?  The problem is on the freeing from interrupt context...
>> 
>
> It's not problem now. It's only about portability.

Your first patch in this conversation says "Current approach in handling
shadow memory for modules is broken."

>> #define VM_KASAN		0x00000080      /* has shadow kasan map */
>> 
>> Set that in kasan_module_alloc():
>> 
>>         if (ret) {
>>                 struct vm_struct *vma = find_vm_area(addr);
>> 
>>                 BUG_ON(!vma);
>>                 /* Set VM_KASAN so vfree() can free up shadow. */
>>                 vma->flags |= VM_KASAN;
>>         }
>> 
>> And check that in __vunmap():
>> 
>>         if (area->flags & VM_KASAN)
>>                 kasan_module_free(addr);
>> 
>> That is portable, and is actually a fairly small patch on what you
>> have at the moment.
>> 
>> What am I missing?
>> 
>
> That is not portable.
> Architectures that don't have separate region for modules should allocate shadow
> for every vmalloc() call, not only for modules.

OK, I didn't appreciate that.  But couldn't you still use the "R/O
shared zero page shadow" for vmalloc, and have kasan_module_alloc()
simply replace the pages with r/w ones (and kasan_module_free()
would have to remove it again).

> Actually I'm fine with what you are proposing here. I think that portability issues could be fixed
> latter when this will become a real problem.

OK.

Thanks for your patience!
Rusty.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] kasan, module, vmalloc: rework shadow allocation for modules
  2015-02-26  1:30                 ` Rusty Russell
@ 2015-02-27 12:20                   ` Andrey Ryabinin
  0 siblings, 0 replies; 11+ messages in thread
From: Andrey Ryabinin @ 2015-02-27 12:20 UTC (permalink / raw)
  To: Rusty Russell, Andrew Morton; +Cc: linux-mm, linux-kernel, Dmitry Vyukov

On 02/26/2015 04:30 AM, Rusty Russell wrote:
> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>> On 02/25/2015 09:25 AM, Rusty Russell wrote:
>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>> On 02/23/2015 11:26 AM, Rusty Russell wrote:
>>>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>>>> On 02/20/2015 03:15 AM, Rusty Russell wrote:
>>>>>>> Andrey Ryabinin <a.ryabinin@samsung.com> writes:
>>>>>>>> On 02/19/2015 02:10 AM, Rusty Russell wrote:
>>>>>>>>> This is not portable.  Other archs don't use vmalloc, or don't use
>>>>>>>>> (or define) MODULES_VADDR.  If you really want to hook here, you'd
>>>>>>>>> need a new flag (or maybe use PAGE_KERNEL_EXEC after an audit).
>>>>>>>>>
>>>>>>>>
>>>>>>>> Well, instead of explicit (addr >= MODULES_VADDR && addr < MODULES_END)
>>>>>>>> I could hide this into arch-specific function: 'kasan_need_to_allocate_shadow(const void *addr)'
>>>>>>>> or make make all those functions weak and allow arch code to redefine them.
>>>>>>>
>>>>>>> That adds another layer of indirection.  And how would the caller of
>>>>>>> plain vmalloc() even know what to return?
>>>>>>>
>>>>>>
>>>>>> I think I don't understand what do you mean here. vmalloc() callers shouldn't know
>>>>>> anything about kasan/shadow.
>>>>>
>>>>> How else would kasan_need_to_allocate_shadow(const void *addr) work for
>>>>> architectures which don't have a reserved vmalloc region for modules?
>>>>>
>>>>
>>>>
>>>> I think I need to clarify what I'm doing.
>>>>
>>>> Address sanitizer algorithm in short:
>>>> -------------------------------------
>>>> Every memory access is transformed by the compiler in the following way:
>>>>
>>>> Before:
>>>> 	*address = ...;
>>>>
>>>> after:
>>>>
>>>> 	if (memory_is_poisoned(address)) {
>>>> 		report_error(address, access_size);
>>>> 	}
>>>> 	*address = ...;
>>>>
>>>> where memory_is_poisoned():
>>>> 	bool memory_is_poisoned(unsigned long addr)
>>>> 	{
>>>>         	s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr);
>>>> 	        if (unlikely(shadow_value)) {
>>>>         	        s8 last_accessible_byte = addr & KASAN_SHADOW_MASK;
>>>>                 	return unlikely(last_accessible_byte >= shadow_value);
>>>> 	        }
>>>> 	        return false;
>>>> 	}
>>>> --------------------------------------
>>>>
>>>> So shadow memory should be present for every accessible address in kernel
>>>> otherwise it will be unhandled page fault on reading shadow value.
>>>>
>>>> Shadow for vmalloc addresses (on x86_64) is readonly mapping of one zero page.
>>>> Zero byte in shadow means that it's ok to access to that address.
>>>> Currently we don't catch bugs in vmalloc because most of such bugs could be caught
>>>> in more simple way with CONFIG_DEBUG_PAGEALLOC.
>>>> That's why we don't need RW shadow for vmalloc, it just one zero page that readonly
>>>> mapped early on boot for the whole [kasan_mem_to_shadow(VMALLOC_START, kasan_mem_to_shadow(VMALLOC_END)] range
>>>> So every access to vmalloc range assumed to be valid.
>>>>
>>>> To catch out of bounds accesses in global variables we need to fill shadow corresponding
>>>> to variable's redzone with non-zero (negative) values.
>>>> So for kernel image and modules we need a writable shadow.
>>>>
>>>> If some arch don't have separate address range for modules and it uses general vmalloc()
>>>> shadow for vmalloc should be writable, so it means that shadow has to be allocated
>>>> for every vmalloc() call.
>>>>
>>>> In such arch kasan_need_to_allocate_shadow(const void *addr) should return true for every vmalloc address:
>>>> bool kasan_need_to_allocate_shadow(const void *addr)
>>>> {
>>>> 	return addr >= VMALLOC_START && addr < VMALLOC_END;
>>>> }
>>>
>>> Thanks for the explanation.
>>>
>>>> All above means that current code is not very portable.
>>>> And 'kasan_module_alloc(p, size) after module alloc' approach is not portable
>>>> too. This won't work for arches that use [VMALLOC_START, VMALLOC_END] addresses for modules,
>>>> because now we need to handle all vmalloc() calls.
>>>
>>> I'm confused.  That's what you do now, and it hasn't been a problem,
>>> has it?  The problem is on the freeing from interrupt context...
>>>
>>
>> It's not problem now. It's only about portability.
> 
> Your first patch in this conversation says "Current approach in handling
> shadow memory for modules is broken."
> 

Sorry, my last answer was even more confusing.
You are right, the main problem is on the freeing form interrupts.

I meant that this:

> This won't work for arches that use [VMALLOC_START, VMALLOC_END] addresses for modules,
> because now we need to handle all vmalloc() calls.

is not a problem for now.



>>> #define VM_KASAN		0x00000080      /* has shadow kasan map */
>>>
>>> Set that in kasan_module_alloc():
>>>
>>>         if (ret) {
>>>                 struct vm_struct *vma = find_vm_area(addr);
>>>
>>>                 BUG_ON(!vma);
>>>                 /* Set VM_KASAN so vfree() can free up shadow. */
>>>                 vma->flags |= VM_KASAN;
>>>         }
>>>
>>> And check that in __vunmap():
>>>
>>>         if (area->flags & VM_KASAN)
>>>                 kasan_module_free(addr);
>>>
>>> That is portable, and is actually a fairly small patch on what you
>>> have at the moment.
>>>
>>> What am I missing?
>>>
>>
>> That is not portable.
>> Architectures that don't have separate region for modules should allocate shadow
>> for every vmalloc() call, not only for modules.
> 
> OK, I didn't appreciate that.  But couldn't you still use the "R/O
> shared zero page shadow" for vmalloc, and have kasan_module_alloc()
> simply replace the pages with r/w ones (and kasan_module_free()
> would have to remove it again).
> 
>> Actually I'm fine with what you are proposing here. I think that portability issues could be fixed
>> latter when this will become a real problem.
> 
> OK.
> 
> Thanks for your patience!
> Rusty.
> 


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-02-27 12:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-18 17:44 [PATCH] kasan, module, vmalloc: rework shadow allocation for modules Andrey Ryabinin
2015-02-18 23:10 ` Rusty Russell
2015-02-19 13:21   ` Andrey Ryabinin
2015-02-20  0:15     ` Rusty Russell
2015-02-20  7:47       ` Andrey Ryabinin
2015-02-23  8:26         ` Rusty Russell
2015-02-24 12:58           ` Andrey Ryabinin
2015-02-25  6:25             ` Rusty Russell
2015-02-25  7:56               ` Andrey Ryabinin
2015-02-26  1:30                 ` Rusty Russell
2015-02-27 12:20                   ` Andrey Ryabinin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).