All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] kprobes: Remove MODULES dependency
@ 2020-07-09 23:45 Jarkko Sakkinen
  2020-07-10  9:03 ` Peter Zijlstra
  2020-07-10 10:32 ` Masami Hiramatsu
  0 siblings, 2 replies; 19+ messages in thread
From: Jarkko Sakkinen @ 2020-07-09 23:45 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jarkko Sakkinen, Andi Kleen, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Masami Hiramatsu, Steven Rostedt, Andrew Morton,
	Peter Zijlstra, Aneesh Kumar K.V, Will Deacon, Kees Cook,
	Arnd Bergmann, Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

Remove MODULES dependency and migrate from module_alloc to vmalloc().
According to Andi, the history with this dependency is that kprobes
originally required custom LKM's, which does not hold today anymore.

Right now one has to compile LKM support only to enable kprobes.  With
this change applied, it is somewhat easier to create custom test
kernel's with a proper debugging capabilities, thus making Linux more
developer friendly.

Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
---
 arch/Kconfig                   |  1 -
 arch/x86/kernel/kprobes/core.c |  5 +++--
 kernel/kprobes.c               | 22 ++++++++++++++++++++--
 kernel/trace/trace_kprobe.c    | 12 ++++++++++++
 4 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 8cc35dc556c7..bb59cdf335ab 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -61,7 +61,6 @@ config OPROFILE_NMI_TIMER
 
 config KPROBES
 	bool "Kprobes"
-	depends on MODULES
 	depends on HAVE_KPROBES
 	select KALLSYMS
 	help
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index ada39ddbc922..dc7b8d6fd20d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -42,6 +42,7 @@
 #include <linux/moduleloader.h>
 #include <linux/vmalloc.h>
 #include <linux/pgtable.h>
+#include <linux/vmalloc.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -423,7 +424,7 @@ void *alloc_insn_page(void)
 {
 	void *page;
 
-	page = module_alloc(PAGE_SIZE);
+	page = vmalloc(PAGE_SIZE);
 	if (!page)
 		return NULL;
 
@@ -446,7 +447,7 @@ void *alloc_insn_page(void)
 /* Recover page to RW mode before releasing it */
 void free_insn_page(void *page)
 {
-	module_memfree(page);
+	vfree(page);
 }
 
 static int arch_copy_kprobe(struct kprobe *p)
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 4a904cc56d68..02680642ea11 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -35,6 +35,7 @@
 #include <linux/ftrace.h>
 #include <linux/cpu.h>
 #include <linux/jump_label.h>
+#include <linux/vmalloc.h>
 
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
@@ -111,12 +112,12 @@ enum kprobe_slot_state {
 
 void __weak *alloc_insn_page(void)
 {
-	return module_alloc(PAGE_SIZE);
+	return vmalloc(PAGE_SIZE);
 }
 
 void __weak free_insn_page(void *page)
 {
-	module_memfree(page);
+	vfree(page);
 }
 
 struct kprobe_insn_cache kprobe_insn_slots = {
@@ -563,8 +564,11 @@ static void kprobe_optimizer(struct work_struct *work)
 	mutex_lock(&kprobe_mutex);
 	cpus_read_lock();
 	mutex_lock(&text_mutex);
+
+#ifdef CONFIG_MODULES
 	/* Lock modules while optimizing kprobes */
 	mutex_lock(&module_mutex);
+#endif
 
 	/*
 	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
@@ -589,7 +593,9 @@ static void kprobe_optimizer(struct work_struct *work)
 	/* Step 4: Free cleaned kprobes after quiesence period */
 	do_free_cleaned_kprobes();
 
+#ifdef CONFIG_MODULES
 	mutex_unlock(&module_mutex);
+#endif
 	mutex_unlock(&text_mutex);
 	cpus_read_unlock();
 
@@ -739,6 +745,7 @@ static int reuse_unused_kprobe(struct kprobe *ap)
 	return 0;
 }
 
+#ifdef CONFIG_MODULE
 /* Remove optimized instructions */
 static void kill_optimized_kprobe(struct kprobe *p)
 {
@@ -764,6 +771,7 @@ static void kill_optimized_kprobe(struct kprobe *p)
 	/* Don't touch the code, because it is already freed. */
 	arch_remove_optimized_kprobe(op);
 }
+#endif
 
 static inline
 void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
@@ -1608,6 +1616,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
 			goto out;
 		}
 
+#ifdef CONFIG_MODULE
 		/*
 		 * If the module freed .init.text, we couldn't insert
 		 * kprobes in there.
@@ -1618,6 +1627,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
 			*probed_mod = NULL;
 			ret = -ENOENT;
 		}
+#endif
 	}
 out:
 	preempt_enable();
@@ -2090,6 +2100,7 @@ NOKPROBE_SYMBOL(pre_handler_kretprobe);
 
 #endif /* CONFIG_KRETPROBES */
 
+#ifdef CONFIG_MODULE
 /* Set the kprobe gone and remove its instruction buffer. */
 static void kill_kprobe(struct kprobe *p)
 {
@@ -2114,6 +2125,7 @@ static void kill_kprobe(struct kprobe *p)
 	 */
 	arch_remove_kprobe(p);
 }
+#endif
 
 /* Disable one kprobe */
 int disable_kprobe(struct kprobe *kp)
@@ -2214,6 +2226,7 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
 	return 0;
 }
 
+#ifdef CONFIG_MODULE
 /* Remove all symbols in given area from kprobe blacklist */
 static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
 {
@@ -2231,6 +2244,7 @@ static void kprobe_remove_ksym_blacklist(unsigned long entry)
 {
 	kprobe_remove_area_blacklist(entry, entry + 1);
 }
+#endif
 
 int __init __weak arch_populate_kprobe_blacklist(void)
 {
@@ -2274,6 +2288,7 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
 	return ret ? : arch_populate_kprobe_blacklist();
 }
 
+#ifdef CONFIG_MODULE
 static void add_module_kprobe_blacklist(struct module *mod)
 {
 	unsigned long start, end;
@@ -2375,6 +2390,7 @@ static struct notifier_block kprobe_module_nb = {
 	.notifier_call = kprobes_module_callback,
 	.priority = 0
 };
+#endif /* CONFIG_MODULE */
 
 /* Markers of _kprobe_blacklist section */
 extern unsigned long __start_kprobe_blacklist[];
@@ -2425,8 +2441,10 @@ static int __init init_kprobes(void)
 	err = arch_init_kprobes();
 	if (!err)
 		err = register_die_notifier(&kprobe_exceptions_nb);
+#ifdef CONFIG_MODULE
 	if (!err)
 		err = register_module_notifier(&kprobe_module_nb);
+#endif
 
 	kprobes_initialized = (err == 0);
 
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index aefb6065b508..30969b38fce8 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -103,6 +103,7 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
 	return !!(kprobe_gone(&tk->rp.kp));
 }
 
+#ifdef CONFIG_MODULE
 static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
 						 struct module *mod)
 {
@@ -110,7 +111,9 @@ static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
 	const char *name = trace_kprobe_symbol(tk);
 	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
 }
+#endif
 
+#ifdef CONFIG_MODULE
 static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
 {
 	char *p;
@@ -129,6 +132,7 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
 
 	return ret;
 }
+#endif
 
 static bool trace_kprobe_is_busy(struct dyn_event *ev)
 {
@@ -608,10 +612,12 @@ static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to)
 
 	/* Register k*probe */
 	ret = __register_trace_kprobe(tk);
+#ifdef CONFIG_MODULE
 	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
 		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
 		ret = 0;
 	}
+#endif
 
 	if (ret)
 		trace_probe_unlink(&tk->tp);
@@ -651,10 +657,12 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 
 	/* Register k*probe */
 	ret = __register_trace_kprobe(tk);
+#ifdef CONFIG_MODULE
 	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
 		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
 		ret = 0;
 	}
+#endif
 
 	if (ret < 0)
 		unregister_kprobe_event(tk);
@@ -666,6 +674,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
 	return ret;
 }
 
+#ifdef CONFIG_MODULE
 /* Module notifier call back, checking event on the module */
 static int trace_kprobe_module_callback(struct notifier_block *nb,
 				       unsigned long val, void *data)
@@ -700,6 +709,7 @@ static struct notifier_block trace_kprobe_module_nb = {
 	.notifier_call = trace_kprobe_module_callback,
 	.priority = 1	/* Invoked after kprobe module callback */
 };
+#endif
 
 /* Convert certain expected symbols into '_' when generating event names */
 static inline void sanitize_event_name(char *name)
@@ -1891,8 +1901,10 @@ static __init int init_kprobe_trace_early(void)
 	if (ret)
 		return ret;
 
+#ifdef CONFIG_MODULE
 	if (register_module_notifier(&trace_kprobe_module_nb))
 		return -EINVAL;
+#endif
 
 	return 0;
 }
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-09 23:45 [PATCH RFC] kprobes: Remove MODULES dependency Jarkko Sakkinen
@ 2020-07-10  9:03 ` Peter Zijlstra
  2020-07-10 10:36   ` Jarkko Sakkinen
  2020-07-10 10:32 ` Masami Hiramatsu
  1 sibling, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2020-07-10  9:03 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: linux-kernel, Andi Kleen, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Masami Hiramatsu, Steven Rostedt, Andrew Morton,
	Aneesh Kumar K.V, Will Deacon, Kees Cook, Arnd Bergmann,
	Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

On Fri, Jul 10, 2020 at 02:45:19AM +0300, Jarkko Sakkinen wrote:
> Remove MODULES dependency and migrate from module_alloc to vmalloc().
> According to Andi, the history with this dependency is that kprobes
> originally required custom LKM's, which does not hold today anymore.
> 
> Right now one has to compile LKM support only to enable kprobes.  With
> this change applied, it is somewhat easier to create custom test
> kernel's with a proper debugging capabilities, thus making Linux more
> developer friendly.
> 
> Cc: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>

NAK

this patch is horrific, it sprinkles a metric ton of #ifdef and silently
disables a lot of kprobe features (like all the opt stuff).

How about unconditionally providing module_alloc() instead?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-09 23:45 [PATCH RFC] kprobes: Remove MODULES dependency Jarkko Sakkinen
  2020-07-10  9:03 ` Peter Zijlstra
@ 2020-07-10 10:32 ` Masami Hiramatsu
  2020-07-10 11:32   ` Peter Zijlstra
                     ` (2 more replies)
  1 sibling, 3 replies; 19+ messages in thread
From: Masami Hiramatsu @ 2020-07-10 10:32 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: linux-kernel, Andi Kleen, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Masami Hiramatsu, Steven Rostedt, Andrew Morton,
	Peter Zijlstra, Aneesh Kumar K.V, Will Deacon, Kees Cook,
	Arnd Bergmann, Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

Hi Jarkko,

On Fri, 10 Jul 2020 02:45:19 +0300
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:

> Remove MODULES dependency and migrate from module_alloc to vmalloc().
> According to Andi, the history with this dependency is that kprobes
> originally required custom LKM's, which does not hold today anymore.
> 
> Right now one has to compile LKM support only to enable kprobes.  With
> this change applied, it is somewhat easier to create custom test
> kernel's with a proper debugging capabilities, thus making Linux more
> developer friendly.

Agreed. Now we have several way to access kprobes via ftrace/perf/bpf
without modules. So it's a good time to remove CONFIG_MODULE dependency.

But I also would like to minimize the changes for code readability.
I made some comments below.

> 
> Cc: Andi Kleen <ak@linux.intel.com>
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> ---
>  arch/Kconfig                   |  1 -
>  arch/x86/kernel/kprobes/core.c |  5 +++--
>  kernel/kprobes.c               | 22 ++++++++++++++++++++--
>  kernel/trace/trace_kprobe.c    | 12 ++++++++++++
>  4 files changed, 35 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 8cc35dc556c7..bb59cdf335ab 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -61,7 +61,6 @@ config OPROFILE_NMI_TIMER
>  
>  config KPROBES
>  	bool "Kprobes"
> -	depends on MODULES
>  	depends on HAVE_KPROBES
>  	select KALLSYMS
>  	help
> diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> index ada39ddbc922..dc7b8d6fd20d 100644
> --- a/arch/x86/kernel/kprobes/core.c
> +++ b/arch/x86/kernel/kprobes/core.c
> @@ -42,6 +42,7 @@
>  #include <linux/moduleloader.h>
>  #include <linux/vmalloc.h>
>  #include <linux/pgtable.h>
> +#include <linux/vmalloc.h>
>  
>  #include <asm/text-patching.h>
>  #include <asm/cacheflush.h>
> @@ -423,7 +424,7 @@ void *alloc_insn_page(void)
>  {
>  	void *page;
>  
> -	page = module_alloc(PAGE_SIZE);
> +	page = vmalloc(PAGE_SIZE);

No, you can not use vmalloc here. The reason why we use module_alloc()
is to allocate the executable memory for trampoline code.
So, you need to use vmalloc_exec() instead.

>  	if (!page)
>  		return NULL;
>  
> @@ -446,7 +447,7 @@ void *alloc_insn_page(void)
>  /* Recover page to RW mode before releasing it */
>  void free_insn_page(void *page)
>  {
> -	module_memfree(page);
> +	vfree(page);
>  }
>  
>  static int arch_copy_kprobe(struct kprobe *p)
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 4a904cc56d68..02680642ea11 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -35,6 +35,7 @@
>  #include <linux/ftrace.h>
>  #include <linux/cpu.h>
>  #include <linux/jump_label.h>
> +#include <linux/vmalloc.h>
>  
>  #include <asm/sections.h>
>  #include <asm/cacheflush.h>
> @@ -111,12 +112,12 @@ enum kprobe_slot_state {
>  
>  void __weak *alloc_insn_page(void)
>  {
> -	return module_alloc(PAGE_SIZE);
> +	return vmalloc(PAGE_SIZE);

Ditto.

>  }
>  
>  void __weak free_insn_page(void *page)
>  {
> -	module_memfree(page);
> +	vfree(page);
>  }
>  
>  struct kprobe_insn_cache kprobe_insn_slots = {
> @@ -563,8 +564,11 @@ static void kprobe_optimizer(struct work_struct *work)
>  	mutex_lock(&kprobe_mutex);
>  	cpus_read_lock();
>  	mutex_lock(&text_mutex);
> +
> +#ifdef CONFIG_MODULES
>  	/* Lock modules while optimizing kprobes */
>  	mutex_lock(&module_mutex);
> +#endif

Hmm, can you reduce these "#ifdef CONFIG_MODULE"s ?

e.g. 

#ifdef CONFIG_MODULES
static void lock_modules(void)
{
	mutex_lock(&module_mutex);
}
...
#else
#define lock_modules() do { } while (0)
...
#endif

>  
>  	/*
>  	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> @@ -589,7 +593,9 @@ static void kprobe_optimizer(struct work_struct *work)
>  	/* Step 4: Free cleaned kprobes after quiesence period */
>  	do_free_cleaned_kprobes();
>  
> +#ifdef CONFIG_MODULES
>  	mutex_unlock(&module_mutex);
> +#endif
>  	mutex_unlock(&text_mutex);
>  	cpus_read_unlock();
>  
> @@ -739,6 +745,7 @@ static int reuse_unused_kprobe(struct kprobe *ap)
>  	return 0;
>  }
>  

> +#ifdef CONFIG_MODULE
>  /* Remove optimized instructions */
>  static void kill_optimized_kprobe(struct kprobe *p)
>  {
> @@ -764,6 +771,7 @@ static void kill_optimized_kprobe(struct kprobe *p)
>  	/* Don't touch the code, because it is already freed. */
>  	arch_remove_optimized_kprobe(op);
>  }
> +#endif

This optprobe related one is OK to keep in this place. 

>  
>  static inline
>  void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
> @@ -1608,6 +1616,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
>  			goto out;
>  		}
>  
> +#ifdef CONFIG_MODULE
>  		/*
>  		 * If the module freed .init.text, we couldn't insert
>  		 * kprobes in there.
> @@ -1618,6 +1627,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
>  			*probed_mod = NULL;
>  			ret = -ENOENT;
>  		}
> +#endif
>  	}
>  out:
>  	preempt_enable();
> @@ -2090,6 +2100,7 @@ NOKPROBE_SYMBOL(pre_handler_kretprobe);
>  
>  #endif /* CONFIG_KRETPROBES */
>  
> +#ifdef CONFIG_MODULE
>  /* Set the kprobe gone and remove its instruction buffer. */
>  static void kill_kprobe(struct kprobe *p)
>  {
> @@ -2114,6 +2125,7 @@ static void kill_kprobe(struct kprobe *p)
>  	 */
>  	arch_remove_kprobe(p);
>  }
> +#endif
>  
>  /* Disable one kprobe */
>  int disable_kprobe(struct kprobe *kp)
> @@ -2214,6 +2226,7 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
>  	return 0;
>  }
>  
> +#ifdef CONFIG_MODULE
>  /* Remove all symbols in given area from kprobe blacklist */
>  static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
>  {
> @@ -2231,6 +2244,7 @@ static void kprobe_remove_ksym_blacklist(unsigned long entry)
>  {
>  	kprobe_remove_area_blacklist(entry, entry + 1);
>  }
> +#endif
>  
>  int __init __weak arch_populate_kprobe_blacklist(void)
>  {
> @@ -2274,6 +2288,7 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
>  	return ret ? : arch_populate_kprobe_blacklist();
>  }
>  
> +#ifdef CONFIG_MODULE
>  static void add_module_kprobe_blacklist(struct module *mod)
>  {
>  	unsigned long start, end;
> @@ -2375,6 +2390,7 @@ static struct notifier_block kprobe_module_nb = {
>  	.notifier_call = kprobes_module_callback,
>  	.priority = 0
>  };
> +#endif /* CONFIG_MODULE */
>  
>  /* Markers of _kprobe_blacklist section */
>  extern unsigned long __start_kprobe_blacklist[];
> @@ -2425,8 +2441,10 @@ static int __init init_kprobes(void)
>  	err = arch_init_kprobes();
>  	if (!err)
>  		err = register_die_notifier(&kprobe_exceptions_nb);
> +#ifdef CONFIG_MODULE
>  	if (!err)
>  		err = register_module_notifier(&kprobe_module_nb);
> +#endif
>  
>  	kprobes_initialized = (err == 0);
>  
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index aefb6065b508..30969b38fce8 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -103,6 +103,7 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
>  	return !!(kprobe_gone(&tk->rp.kp));
>  }
>  
> +#ifdef CONFIG_MODULE
>  static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
>  						 struct module *mod)
>  {
> @@ -110,7 +111,9 @@ static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
>  	const char *name = trace_kprobe_symbol(tk);
>  	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
>  }
> +#endif
>  
> +#ifdef CONFIG_MODULE
>  static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
>  {
>  	char *p;
> @@ -129,6 +132,7 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
>  
>  	return ret;
>  }
> +#endif

Here, too. If you make a dummy function in case of !CONFIG_MODULE,
we don't need to modify caller-side code.

>  
>  static bool trace_kprobe_is_busy(struct dyn_event *ev)
>  {
> @@ -608,10 +612,12 @@ static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to)
>  
>  	/* Register k*probe */
>  	ret = __register_trace_kprobe(tk);
> +#ifdef CONFIG_MODULE
>  	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
>  		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
>  		ret = 0;
>  	}
> +#endif

So, please make a dummy trace_kprobe_module_exist() and keep this untouched.

>  
>  	if (ret)
>  		trace_probe_unlink(&tk->tp);
> @@ -651,10 +657,12 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
>  
>  	/* Register k*probe */
>  	ret = __register_trace_kprobe(tk);
> +#ifdef CONFIG_MODULE
>  	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
>  		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
>  		ret = 0;
>  	}
> +#endif

Ditto.

>  
>  	if (ret < 0)
>  		unregister_kprobe_event(tk);
> @@ -666,6 +674,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
>  	return ret;
>  }
>  
> +#ifdef CONFIG_MODULE
>  /* Module notifier call back, checking event on the module */
>  static int trace_kprobe_module_callback(struct notifier_block *nb,
>  				       unsigned long val, void *data)
> @@ -700,6 +709,7 @@ static struct notifier_block trace_kprobe_module_nb = {
>  	.notifier_call = trace_kprobe_module_callback,
>  	.priority = 1	/* Invoked after kprobe module callback */
>  };
> +#endif

Unless it makes any build error, please keep it untouched.

>  
>  /* Convert certain expected symbols into '_' when generating event names */
>  static inline void sanitize_event_name(char *name)
> @@ -1891,8 +1901,10 @@ static __init int init_kprobe_trace_early(void)
>  	if (ret)
>  		return ret;
>  
> +#ifdef CONFIG_MODULE
>  	if (register_module_notifier(&trace_kprobe_module_nb))
>  		return -EINVAL;
> +#endif

Ditto.

Thank you,

>  
>  	return 0;
>  }
> -- 
> 2.25.1
> 


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10  9:03 ` Peter Zijlstra
@ 2020-07-10 10:36   ` Jarkko Sakkinen
  2020-07-10 10:49     ` Peter Zijlstra
  0 siblings, 1 reply; 19+ messages in thread
From: Jarkko Sakkinen @ 2020-07-10 10:36 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Andi Kleen, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Masami Hiramatsu, Steven Rostedt, Andrew Morton,
	Aneesh Kumar K.V, Will Deacon, Kees Cook, Arnd Bergmann,
	Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

On Fri, Jul 10, 2020 at 11:03:44AM +0200, Peter Zijlstra wrote:
> On Fri, Jul 10, 2020 at 02:45:19AM +0300, Jarkko Sakkinen wrote:
> > Remove MODULES dependency and migrate from module_alloc to vmalloc().
> > According to Andi, the history with this dependency is that kprobes
> > originally required custom LKM's, which does not hold today anymore.
> > 
> > Right now one has to compile LKM support only to enable kprobes.  With
> > this change applied, it is somewhat easier to create custom test
> > kernel's with a proper debugging capabilities, thus making Linux more
> > developer friendly.
> > 
> > Cc: Andi Kleen <ak@linux.intel.com>
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> 
> NAK
> 
> this patch is horrific, it sprinkles a metric ton of #ifdef and silently
> disables a lot of kprobe features (like all the opt stuff).

Perfectly nderstandable. I just drafted something quick andy dirty
together for idea's sake (and put RFC tag to state that).

The application where I use this chhange, is when I refactor large patch
set that I'm working on (namely SGX patch set in my case). I just want
squeece all the extra out from the kernel build and still have means for
instrumentation. A static kernel is very convenient for this kind of
purpose, as with EFI stub and statically linked user space you can have
a single test binary.

> How about unconditionally providing module_alloc() instead?

I believe so, yes.

Just so that I know (and learn), what did exactly disable optprobes?
Not too familiar with this part of the kernel - that's why I'm asking.
Does the module_alloc to vmalloc change disable it?

/Jarkko

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 10:36   ` Jarkko Sakkinen
@ 2020-07-10 10:49     ` Peter Zijlstra
  2020-07-13  5:05       ` Jarkko Sakkinen
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2020-07-10 10:49 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: linux-kernel, Andi Kleen, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Masami Hiramatsu, Steven Rostedt, Andrew Morton,
	Aneesh Kumar K.V, Will Deacon, Kees Cook, Arnd Bergmann,
	Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

On Fri, Jul 10, 2020 at 01:36:38PM +0300, Jarkko Sakkinen wrote:
> Just so that I know (and learn), what did exactly disable optprobes?

So regular, old-skool style kprobe is:

  - copy original instruction out
  - replace instruction with breakpoint (int3 on x86)
  - have exception handler return to the copied instruction with
    single-step on
  - have single step exception handler return to the original
    instruction stream

which is 2 exceptions.

optprobes avoid the single-step by not only writing a single
instruction, but additionally placing a JMP instruction behind it such
that it will automagically continue in the original instruction stream.

This brings the requirement that the copied instruction is placed
within the JMP displacement of the regular kernel text (s32 on x86).

module_alloc() ensures the memory provided is within that range.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 10:32 ` Masami Hiramatsu
@ 2020-07-10 11:32   ` Peter Zijlstra
  2020-07-10 13:04     ` Christoph Hellwig
                       ` (2 more replies)
  2020-07-10 15:51   ` Kees Cook
  2020-07-13  5:39   ` Jarkko Sakkinen
  2 siblings, 3 replies; 19+ messages in thread
From: Peter Zijlstra @ 2020-07-10 11:32 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Jarkko Sakkinen, linux-kernel, Andi Kleen, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Aneesh Kumar K.V,
	Will Deacon, Kees Cook, Arnd Bergmann, Alexandre Ghiti,
	Masahiro Yamada, Sami Tolvanen, Peter Collingbourne,
	Krzysztof Kozlowski, Frederic Weisbecker, Stephen Boyd,
	Alexei Starovoitov, Mike Rapoport, Sean Christopherson,
	Jiri Olsa, hch

On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> > -	page = module_alloc(PAGE_SIZE);
> > +	page = vmalloc(PAGE_SIZE);
> 
> No, you can not use vmalloc here. The reason why we use module_alloc()
> is to allocate the executable memory for trampoline code.
> So, you need to use vmalloc_exec() instead.

vmalloc_exec() would be broken too, also hch recently got rid of that
thing.

module_alloc() really is the only sane choice here.

We should make module_alloc() unconditionally available, and maybe even
rename it to text_alloc().

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 11:32   ` Peter Zijlstra
@ 2020-07-10 13:04     ` Christoph Hellwig
  2020-07-13  5:52       ` Jarkko Sakkinen
  2020-07-10 13:18     ` Masami Hiramatsu
  2020-07-13  5:49     ` Jarkko Sakkinen
  2 siblings, 1 reply; 19+ messages in thread
From: Christoph Hellwig @ 2020-07-10 13:04 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Masami Hiramatsu, Jarkko Sakkinen, linux-kernel, Andi Kleen,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Aneesh Kumar K.V,
	Will Deacon, Kees Cook, Arnd Bergmann, Alexandre Ghiti,
	Masahiro Yamada, Sami Tolvanen, Peter Collingbourne,
	Krzysztof Kozlowski, Frederic Weisbecker, Stephen Boyd,
	Alexei Starovoitov, Mike Rapoport, Sean Christopherson,
	Jiri Olsa, hch

On Fri, Jul 10, 2020 at 01:32:38PM +0200, Peter Zijlstra wrote:
> On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> > > -	page = module_alloc(PAGE_SIZE);
> > > +	page = vmalloc(PAGE_SIZE);
> > 
> > No, you can not use vmalloc here. The reason why we use module_alloc()
> > is to allocate the executable memory for trampoline code.
> > So, you need to use vmalloc_exec() instead.
> 
> vmalloc_exec() would be broken too, also hch recently got rid of that
> thing.
> 
> module_alloc() really is the only sane choice here.
> 
> We should make module_alloc() unconditionally available, and maybe even
> rename it to text_alloc().

I think unconitionally might be a bit too much, but for
MODULES || KRPOBES or a new symbol select by them makes sense.  As does
the rename.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 11:32   ` Peter Zijlstra
  2020-07-10 13:04     ` Christoph Hellwig
@ 2020-07-10 13:18     ` Masami Hiramatsu
  2020-07-10 13:22       ` Steven Rostedt
  2020-07-13  5:49     ` Jarkko Sakkinen
  2 siblings, 1 reply; 19+ messages in thread
From: Masami Hiramatsu @ 2020-07-10 13:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Jarkko Sakkinen, linux-kernel, Andi Kleen, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Aneesh Kumar K.V,
	Will Deacon, Kees Cook, Arnd Bergmann, Alexandre Ghiti,
	Masahiro Yamada, Sami Tolvanen, Peter Collingbourne,
	Krzysztof Kozlowski, Frederic Weisbecker, Stephen Boyd,
	Alexei Starovoitov, Mike Rapoport, Sean Christopherson,
	Jiri Olsa, hch

On Fri, 10 Jul 2020 13:32:38 +0200
Peter Zijlstra <peterz@infradead.org> wrote:

> On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> > > -	page = module_alloc(PAGE_SIZE);
> > > +	page = vmalloc(PAGE_SIZE);
> > 
> > No, you can not use vmalloc here. The reason why we use module_alloc()
> > is to allocate the executable memory for trampoline code.
> > So, you need to use vmalloc_exec() instead.
> 
> vmalloc_exec() would be broken too, also hch recently got rid of that
> thing.
> 
> module_alloc() really is the only sane choice here.
> 
> We should make module_alloc() unconditionally available, and maybe even
> rename it to text_alloc().

Agreed. As far as I know, ftrace and bpf also depends on module_alloc(),
so text_alloc() will help them too.

Thank you,


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 13:18     ` Masami Hiramatsu
@ 2020-07-10 13:22       ` Steven Rostedt
  2020-07-13  5:55         ` Jarkko Sakkinen
  0 siblings, 1 reply; 19+ messages in thread
From: Steven Rostedt @ 2020-07-10 13:22 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Peter Zijlstra, Jarkko Sakkinen, linux-kernel, Andi Kleen,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Andrew Morton, Aneesh Kumar K.V, Will Deacon,
	Kees Cook, Arnd Bergmann, Alexandre Ghiti, Masahiro Yamada,
	Sami Tolvanen, Peter Collingbourne, Krzysztof Kozlowski,
	Frederic Weisbecker, Stephen Boyd, Alexei Starovoitov,
	Mike Rapoport, Sean Christopherson, Jiri Olsa, hch

On Fri, 10 Jul 2020 22:18:02 +0900
Masami Hiramatsu <mhiramat@kernel.org> wrote:

> 
> Agreed. As far as I know, ftrace and bpf also depends on module_alloc(),
> so text_alloc() will help them too.
> 

Yes please.

arch/x86/kernel/ftrace.c:

#ifdef CONFIG_MODULES
#include <linux/moduleloader.h>
/* Module allocation simplifies allocating memory for code */
static inline void *alloc_tramp(unsigned long size)
{
	return module_alloc(size);
}
static inline void tramp_free(void *tramp)
{
	module_memfree(tramp);
}
#else
/* Trampolines can only be created if modules are supported */
static inline void *alloc_tramp(unsigned long size)
{
	return NULL;
}
static inline void tramp_free(void *tramp) { }
#endif

-- Steve

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 10:32 ` Masami Hiramatsu
  2020-07-10 11:32   ` Peter Zijlstra
@ 2020-07-10 15:51   ` Kees Cook
  2020-07-13  5:56     ` Jarkko Sakkinen
  2020-07-13  5:39   ` Jarkko Sakkinen
  2 siblings, 1 reply; 19+ messages in thread
From: Kees Cook @ 2020-07-10 15:51 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: Jarkko Sakkinen, linux-kernel, Andi Kleen, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Peter Zijlstra,
	Aneesh Kumar K.V, Will Deacon, Arnd Bergmann, Alexandre Ghiti,
	Masahiro Yamada, Sami Tolvanen, Peter Collingbourne,
	Krzysztof Kozlowski, Frederic Weisbecker, Stephen Boyd,
	Alexei Starovoitov, Mike Rapoport, Sean Christopherson,
	Jiri Olsa

On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> On Fri, 10 Jul 2020 02:45:19 +0300
> Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > +#ifdef CONFIG_MODULES
> >  	/* Lock modules while optimizing kprobes */
> >  	mutex_lock(&module_mutex);
> > +#endif
> 
> Hmm, can you reduce these "#ifdef CONFIG_MODULE"s ?
> 
> e.g. 
> 
> #ifdef CONFIG_MODULES
> static void lock_modules(void)
> {
> 	mutex_lock(&module_mutex);
> }
> ...
> #else
> #define lock_modules() do { } while (0)
> ...
> #endif

I prefer using "static inline" for no-op functions just because they
will maintain argument type validation by the compiler regardless of the
CONFIG state (though it doesn't really matter here since it's void).

#else
static inline lock_modules(void) { }
#endif

-- 
Kees Cook

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 10:49     ` Peter Zijlstra
@ 2020-07-13  5:05       ` Jarkko Sakkinen
  2020-07-13 10:17         ` Peter Zijlstra
  2020-07-14 11:52         ` Masami Hiramatsu
  0 siblings, 2 replies; 19+ messages in thread
From: Jarkko Sakkinen @ 2020-07-13  5:05 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, Andi Kleen, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Masami Hiramatsu, Steven Rostedt, Andrew Morton,
	Aneesh Kumar K.V, Will Deacon, Kees Cook, Arnd Bergmann,
	Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

On Fri, Jul 10, 2020 at 12:49:10PM +0200, Peter Zijlstra wrote:
> On Fri, Jul 10, 2020 at 01:36:38PM +0300, Jarkko Sakkinen wrote:
> > Just so that I know (and learn), what did exactly disable optprobes?
> 
> So regular, old-skool style kprobe is:
> 
>   - copy original instruction out
>   - replace instruction with breakpoint (int3 on x86)
>   - have exception handler return to the copied instruction with
>     single-step on
>   - have single step exception handler return to the original
>     instruction stream
> 
> which is 2 exceptions.

Out of pure interest, how does it handle a jump (as the original
opcode), given that it single steps a copy?

> optprobes avoid the single-step by not only writing a single
> instruction, but additionally placing a JMP instruction behind it such
> that it will automagically continue in the original instruction stream.
> 
> This brings the requirement that the copied instruction is placed
> within the JMP displacement of the regular kernel text (s32 on x86).
> 
> module_alloc() ensures the memory provided is within that range.

Right, a relative jump is placed instead of 0xcc to the breakpoint?

/Jarkko

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 10:32 ` Masami Hiramatsu
  2020-07-10 11:32   ` Peter Zijlstra
  2020-07-10 15:51   ` Kees Cook
@ 2020-07-13  5:39   ` Jarkko Sakkinen
  2 siblings, 0 replies; 19+ messages in thread
From: Jarkko Sakkinen @ 2020-07-13  5:39 UTC (permalink / raw)
  To: Masami Hiramatsu
  Cc: linux-kernel, Andi Kleen, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Peter Zijlstra,
	Aneesh Kumar K.V, Will Deacon, Kees Cook, Arnd Bergmann,
	Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> Hi Jarkko,
> 
> On Fri, 10 Jul 2020 02:45:19 +0300
> Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> 
> > Remove MODULES dependency and migrate from module_alloc to vmalloc().
> > According to Andi, the history with this dependency is that kprobes
> > originally required custom LKM's, which does not hold today anymore.
> > 
> > Right now one has to compile LKM support only to enable kprobes.  With
> > this change applied, it is somewhat easier to create custom test
> > kernel's with a proper debugging capabilities, thus making Linux more
> > developer friendly.
> 
> Agreed. Now we have several way to access kprobes via ftrace/perf/bpf
> without modules. So it's a good time to remove CONFIG_MODULE dependency.
> 
> But I also would like to minimize the changes for code readability.
> I made some comments below.

Thanks, appreciate this.

> > Cc: Andi Kleen <ak@linux.intel.com>
> > Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
> > ---
> >  arch/Kconfig                   |  1 -
> >  arch/x86/kernel/kprobes/core.c |  5 +++--
> >  kernel/kprobes.c               | 22 ++++++++++++++++++++--
> >  kernel/trace/trace_kprobe.c    | 12 ++++++++++++
> >  4 files changed, 35 insertions(+), 5 deletions(-)
> > 
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index 8cc35dc556c7..bb59cdf335ab 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -61,7 +61,6 @@ config OPROFILE_NMI_TIMER
> >  
> >  config KPROBES
> >  	bool "Kprobes"
> > -	depends on MODULES
> >  	depends on HAVE_KPROBES
> >  	select KALLSYMS
> >  	help
> > diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
> > index ada39ddbc922..dc7b8d6fd20d 100644
> > --- a/arch/x86/kernel/kprobes/core.c
> > +++ b/arch/x86/kernel/kprobes/core.c
> > @@ -42,6 +42,7 @@
> >  #include <linux/moduleloader.h>
> >  #include <linux/vmalloc.h>
> >  #include <linux/pgtable.h>
> > +#include <linux/vmalloc.h>
> >  
> >  #include <asm/text-patching.h>
> >  #include <asm/cacheflush.h>
> > @@ -423,7 +424,7 @@ void *alloc_insn_page(void)
> >  {
> >  	void *page;
> >  
> > -	page = module_alloc(PAGE_SIZE);
> > +	page = vmalloc(PAGE_SIZE);
> 
> No, you can not use vmalloc here. The reason why we use module_alloc()
> is to allocate the executable memory for trampoline code.
> So, you need to use vmalloc_exec() instead.

Right, I'm on the wagon with this after reading Peter's explanation.

> 
> >  	if (!page)
> >  		return NULL;
> >  
> > @@ -446,7 +447,7 @@ void *alloc_insn_page(void)
> >  /* Recover page to RW mode before releasing it */
> >  void free_insn_page(void *page)
> >  {
> > -	module_memfree(page);
> > +	vfree(page);
> >  }
> >  
> >  static int arch_copy_kprobe(struct kprobe *p)
> > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > index 4a904cc56d68..02680642ea11 100644
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -35,6 +35,7 @@
> >  #include <linux/ftrace.h>
> >  #include <linux/cpu.h>
> >  #include <linux/jump_label.h>
> > +#include <linux/vmalloc.h>
> >  
> >  #include <asm/sections.h>
> >  #include <asm/cacheflush.h>
> > @@ -111,12 +112,12 @@ enum kprobe_slot_state {
> >  
> >  void __weak *alloc_insn_page(void)
> >  {
> > -	return module_alloc(PAGE_SIZE);
> > +	return vmalloc(PAGE_SIZE);
> 
> Ditto.
> 
> >  }
> >  
> >  void __weak free_insn_page(void *page)
> >  {
> > -	module_memfree(page);
> > +	vfree(page);
> >  }
> >  
> >  struct kprobe_insn_cache kprobe_insn_slots = {
> > @@ -563,8 +564,11 @@ static void kprobe_optimizer(struct work_struct *work)
> >  	mutex_lock(&kprobe_mutex);
> >  	cpus_read_lock();
> >  	mutex_lock(&text_mutex);
> > +
> > +#ifdef CONFIG_MODULES
> >  	/* Lock modules while optimizing kprobes */
> >  	mutex_lock(&module_mutex);
> > +#endif
> 
> Hmm, can you reduce these "#ifdef CONFIG_MODULE"s ?
> 
> e.g. 
> 
> #ifdef CONFIG_MODULES
> static void lock_modules(void)
> {
> 	mutex_lock(&module_mutex);
> }
> ...
> #else
> #define lock_modules() do { } while (0)
> ...
> #endif

Yes, I'll use this idea also to address to issues reported below.

> 
> >  
> >  	/*
> >  	 * Step 1: Unoptimize kprobes and collect cleaned (unused and disarmed)
> > @@ -589,7 +593,9 @@ static void kprobe_optimizer(struct work_struct *work)
> >  	/* Step 4: Free cleaned kprobes after quiesence period */
> >  	do_free_cleaned_kprobes();
> >  
> > +#ifdef CONFIG_MODULES
> >  	mutex_unlock(&module_mutex);
> > +#endif
> >  	mutex_unlock(&text_mutex);
> >  	cpus_read_unlock();
> >  
> > @@ -739,6 +745,7 @@ static int reuse_unused_kprobe(struct kprobe *ap)
> >  	return 0;
> >  }
> >  
> 
> > +#ifdef CONFIG_MODULE
> >  /* Remove optimized instructions */
> >  static void kill_optimized_kprobe(struct kprobe *p)
> >  {
> > @@ -764,6 +771,7 @@ static void kill_optimized_kprobe(struct kprobe *p)
> >  	/* Don't touch the code, because it is already freed. */
> >  	arch_remove_optimized_kprobe(op);
> >  }
> > +#endif
> 
> This optprobe related one is OK to keep in this place. 
> 
> >  
> >  static inline
> >  void __prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *p)
> > @@ -1608,6 +1616,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
> >  			goto out;
> >  		}
> >  
> > +#ifdef CONFIG_MODULE
> >  		/*
> >  		 * If the module freed .init.text, we couldn't insert
> >  		 * kprobes in there.
> > @@ -1618,6 +1627,7 @@ static int check_kprobe_address_safe(struct kprobe *p,
> >  			*probed_mod = NULL;
> >  			ret = -ENOENT;
> >  		}
> > +#endif
> >  	}
> >  out:
> >  	preempt_enable();
> > @@ -2090,6 +2100,7 @@ NOKPROBE_SYMBOL(pre_handler_kretprobe);
> >  
> >  #endif /* CONFIG_KRETPROBES */
> >  
> > +#ifdef CONFIG_MODULE
> >  /* Set the kprobe gone and remove its instruction buffer. */
> >  static void kill_kprobe(struct kprobe *p)
> >  {
> > @@ -2114,6 +2125,7 @@ static void kill_kprobe(struct kprobe *p)
> >  	 */
> >  	arch_remove_kprobe(p);
> >  }
> > +#endif
> >  
> >  /* Disable one kprobe */
> >  int disable_kprobe(struct kprobe *kp)
> > @@ -2214,6 +2226,7 @@ int kprobe_add_area_blacklist(unsigned long start, unsigned long end)
> >  	return 0;
> >  }
> >  
> > +#ifdef CONFIG_MODULE
> >  /* Remove all symbols in given area from kprobe blacklist */
> >  static void kprobe_remove_area_blacklist(unsigned long start, unsigned long end)
> >  {
> > @@ -2231,6 +2244,7 @@ static void kprobe_remove_ksym_blacklist(unsigned long entry)
> >  {
> >  	kprobe_remove_area_blacklist(entry, entry + 1);
> >  }
> > +#endif
> >  
> >  int __init __weak arch_populate_kprobe_blacklist(void)
> >  {
> > @@ -2274,6 +2288,7 @@ static int __init populate_kprobe_blacklist(unsigned long *start,
> >  	return ret ? : arch_populate_kprobe_blacklist();
> >  }
> >  
> > +#ifdef CONFIG_MODULE
> >  static void add_module_kprobe_blacklist(struct module *mod)
> >  {
> >  	unsigned long start, end;
> > @@ -2375,6 +2390,7 @@ static struct notifier_block kprobe_module_nb = {
> >  	.notifier_call = kprobes_module_callback,
> >  	.priority = 0
> >  };
> > +#endif /* CONFIG_MODULE */
> >  
> >  /* Markers of _kprobe_blacklist section */
> >  extern unsigned long __start_kprobe_blacklist[];
> > @@ -2425,8 +2441,10 @@ static int __init init_kprobes(void)
> >  	err = arch_init_kprobes();
> >  	if (!err)
> >  		err = register_die_notifier(&kprobe_exceptions_nb);
> > +#ifdef CONFIG_MODULE
> >  	if (!err)
> >  		err = register_module_notifier(&kprobe_module_nb);
> > +#endif
> >  
> >  	kprobes_initialized = (err == 0);
> >  
> > diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> > index aefb6065b508..30969b38fce8 100644
> > --- a/kernel/trace/trace_kprobe.c
> > +++ b/kernel/trace/trace_kprobe.c
> > @@ -103,6 +103,7 @@ static nokprobe_inline bool trace_kprobe_has_gone(struct trace_kprobe *tk)
> >  	return !!(kprobe_gone(&tk->rp.kp));
> >  }
> >  
> > +#ifdef CONFIG_MODULE
> >  static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
> >  						 struct module *mod)
> >  {
> > @@ -110,7 +111,9 @@ static nokprobe_inline bool trace_kprobe_within_module(struct trace_kprobe *tk,
> >  	const char *name = trace_kprobe_symbol(tk);
> >  	return strncmp(mod->name, name, len) == 0 && name[len] == ':';
> >  }
> > +#endif
> >  
> > +#ifdef CONFIG_MODULE
> >  static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
> >  {
> >  	char *p;
> > @@ -129,6 +132,7 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
> >  
> >  	return ret;
> >  }
> > +#endif
> 
> Here, too. If you make a dummy function in case of !CONFIG_MODULE,
> we don't need to modify caller-side code.
> 
> >  
> >  static bool trace_kprobe_is_busy(struct dyn_event *ev)
> >  {
> > @@ -608,10 +612,12 @@ static int append_trace_kprobe(struct trace_kprobe *tk, struct trace_kprobe *to)
> >  
> >  	/* Register k*probe */
> >  	ret = __register_trace_kprobe(tk);
> > +#ifdef CONFIG_MODULE
> >  	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
> >  		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
> >  		ret = 0;
> >  	}
> > +#endif
> 
> So, please make a dummy trace_kprobe_module_exist() and keep this untouched.
> 
> >  
> >  	if (ret)
> >  		trace_probe_unlink(&tk->tp);
> > @@ -651,10 +657,12 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
> >  
> >  	/* Register k*probe */
> >  	ret = __register_trace_kprobe(tk);
> > +#ifdef CONFIG_MODULE
> >  	if (ret == -ENOENT && !trace_kprobe_module_exist(tk)) {
> >  		pr_warn("This probe might be able to register after target module is loaded. Continue.\n");
> >  		ret = 0;
> >  	}
> > +#endif
> 
> Ditto.
> 
> >  
> >  	if (ret < 0)
> >  		unregister_kprobe_event(tk);
> > @@ -666,6 +674,7 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
> >  	return ret;
> >  }
> >  
> > +#ifdef CONFIG_MODULE
> >  /* Module notifier call back, checking event on the module */
> >  static int trace_kprobe_module_callback(struct notifier_block *nb,
> >  				       unsigned long val, void *data)
> > @@ -700,6 +709,7 @@ static struct notifier_block trace_kprobe_module_nb = {
> >  	.notifier_call = trace_kprobe_module_callback,
> >  	.priority = 1	/* Invoked after kprobe module callback */
> >  };
> > +#endif
> 
> Unless it makes any build error, please keep it untouched.
> 
> >  
> >  /* Convert certain expected symbols into '_' when generating event names */
> >  static inline void sanitize_event_name(char *name)
> > @@ -1891,8 +1901,10 @@ static __init int init_kprobe_trace_early(void)
> >  	if (ret)
> >  		return ret;
> >  
> > +#ifdef CONFIG_MODULE
> >  	if (register_module_notifier(&trace_kprobe_module_nb))
> >  		return -EINVAL;
> > +#endif
> 
> Ditto.
> 
> Thank you,

Thanks a lot for the remarks.

> 
> >  
> >  	return 0;
> >  }
> > -- 
> > 2.25.1
> > 
> 
> 
> -- 
> Masami Hiramatsu <mhiramat@kernel.org>

/Jarkko

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 11:32   ` Peter Zijlstra
  2020-07-10 13:04     ` Christoph Hellwig
  2020-07-10 13:18     ` Masami Hiramatsu
@ 2020-07-13  5:49     ` Jarkko Sakkinen
  2020-07-14 11:45       ` Masami Hiramatsu
  2 siblings, 1 reply; 19+ messages in thread
From: Jarkko Sakkinen @ 2020-07-13  5:49 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Masami Hiramatsu, linux-kernel, Andi Kleen, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Aneesh Kumar K.V,
	Will Deacon, Kees Cook, Arnd Bergmann, Alexandre Ghiti,
	Masahiro Yamada, Sami Tolvanen, Peter Collingbourne,
	Krzysztof Kozlowski, Frederic Weisbecker, Stephen Boyd,
	Alexei Starovoitov, Mike Rapoport, Sean Christopherson,
	Jiri Olsa, hch

On Fri, Jul 10, 2020 at 01:32:38PM +0200, Peter Zijlstra wrote:
> On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> > > -	page = module_alloc(PAGE_SIZE);
> > > +	page = vmalloc(PAGE_SIZE);
> > 
> > No, you can not use vmalloc here. The reason why we use module_alloc()
> > is to allocate the executable memory for trampoline code.
> > So, you need to use vmalloc_exec() instead.
> 
> vmalloc_exec() would be broken too, also hch recently got rid of that
> thing.
> 
> module_alloc() really is the only sane choice here.
> 
> We should make module_alloc() unconditionally available, and maybe even
> rename it to text_alloc().

Thanks for the remarks.

The current module_alloc looks like this:

void * __weak module_alloc(unsigned long size)
{
	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
			GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
			NUMA_NO_NODE, __builtin_return_address(0));
}

What if I create inline functions for text_alloc() and text_memfree() and
convert this function as:

void * __weak module_alloc(unsigned long size)
{
	return text_alloc(size);
}

/Jarkko

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 13:04     ` Christoph Hellwig
@ 2020-07-13  5:52       ` Jarkko Sakkinen
  0 siblings, 0 replies; 19+ messages in thread
From: Jarkko Sakkinen @ 2020-07-13  5:52 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Peter Zijlstra, Masami Hiramatsu, linux-kernel, Andi Kleen,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Aneesh Kumar K.V,
	Will Deacon, Kees Cook, Arnd Bergmann, Alexandre Ghiti,
	Masahiro Yamada, Sami Tolvanen, Peter Collingbourne,
	Krzysztof Kozlowski, Frederic Weisbecker, Stephen Boyd,
	Alexei Starovoitov, Mike Rapoport, Sean Christopherson,
	Jiri Olsa

On Fri, Jul 10, 2020 at 03:04:29PM +0200, Christoph Hellwig wrote:
> On Fri, Jul 10, 2020 at 01:32:38PM +0200, Peter Zijlstra wrote:
> > On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> > > > -	page = module_alloc(PAGE_SIZE);
> > > > +	page = vmalloc(PAGE_SIZE);
> > > 
> > > No, you can not use vmalloc here. The reason why we use module_alloc()
> > > is to allocate the executable memory for trampoline code.
> > > So, you need to use vmalloc_exec() instead.
> > 
> > vmalloc_exec() would be broken too, also hch recently got rid of that
> > thing.
> > 
> > module_alloc() really is the only sane choice here.
> > 
> > We should make module_alloc() unconditionally available, and maybe even
> > rename it to text_alloc().
> 
> I think unconitionally might be a bit too much, but for
> MODULES || KRPOBES or a new symbol select by them makes sense.  As does
> the rename.

Given that they are simple wrappers, would it be too much harmo to have
inline functions for text_alloc() and text_memfree() and use them inside
module_alloc() and alloc_memfree() in order not to initially turn things
over too much?

/Jarkko

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 13:22       ` Steven Rostedt
@ 2020-07-13  5:55         ` Jarkko Sakkinen
  0 siblings, 0 replies; 19+ messages in thread
From: Jarkko Sakkinen @ 2020-07-13  5:55 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Masami Hiramatsu, Peter Zijlstra, linux-kernel, Andi Kleen,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Andrew Morton, Aneesh Kumar K.V, Will Deacon,
	Kees Cook, Arnd Bergmann, Alexandre Ghiti, Masahiro Yamada,
	Sami Tolvanen, Peter Collingbourne, Krzysztof Kozlowski,
	Frederic Weisbecker, Stephen Boyd, Alexei Starovoitov,
	Mike Rapoport, Sean Christopherson, Jiri Olsa, hch

On Fri, Jul 10, 2020 at 09:22:43AM -0400, Steven Rostedt wrote:
> On Fri, 10 Jul 2020 22:18:02 +0900
> Masami Hiramatsu <mhiramat@kernel.org> wrote:
> 
> > 
> > Agreed. As far as I know, ftrace and bpf also depends on module_alloc(),
> > so text_alloc() will help them too.
> > 
> 
> Yes please.
> 
> arch/x86/kernel/ftrace.c:
> 
> #ifdef CONFIG_MODULES
> #include <linux/moduleloader.h>
> /* Module allocation simplifies allocating memory for code */
> static inline void *alloc_tramp(unsigned long size)
> {
> 	return module_alloc(size);
> }
> static inline void tramp_free(void *tramp)
> {
> 	module_memfree(tramp);
> }
> #else
> /* Trampolines can only be created if modules are supported */
> static inline void *alloc_tramp(unsigned long size)
> {
> 	return NULL;
> }
> static inline void tramp_free(void *tramp) { }
> #endif
> 
> -- Steve

Thanks, note taken. I'll take this into account in the next version.

/Jarkko

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-10 15:51   ` Kees Cook
@ 2020-07-13  5:56     ` Jarkko Sakkinen
  0 siblings, 0 replies; 19+ messages in thread
From: Jarkko Sakkinen @ 2020-07-13  5:56 UTC (permalink / raw)
  To: Kees Cook
  Cc: Masami Hiramatsu, linux-kernel, Andi Kleen, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Peter Zijlstra,
	Aneesh Kumar K.V, Will Deacon, Arnd Bergmann, Alexandre Ghiti,
	Masahiro Yamada, Sami Tolvanen, Peter Collingbourne,
	Krzysztof Kozlowski, Frederic Weisbecker, Stephen Boyd,
	Alexei Starovoitov, Mike Rapoport, Sean Christopherson,
	Jiri Olsa

On Fri, Jul 10, 2020 at 08:51:56AM -0700, Kees Cook wrote:
> On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> > On Fri, 10 Jul 2020 02:45:19 +0300
> > Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:
> > > +#ifdef CONFIG_MODULES
> > >  	/* Lock modules while optimizing kprobes */
> > >  	mutex_lock(&module_mutex);
> > > +#endif
> > 
> > Hmm, can you reduce these "#ifdef CONFIG_MODULE"s ?
> > 
> > e.g. 
> > 
> > #ifdef CONFIG_MODULES
> > static void lock_modules(void)
> > {
> > 	mutex_lock(&module_mutex);
> > }
> > ...
> > #else
> > #define lock_modules() do { } while (0)
> > ...
> > #endif
> 
> I prefer using "static inline" for no-op functions just because they
> will maintain argument type validation by the compiler regardless of the
> CONFIG state (though it doesn't really matter here since it's void).
> 
> #else
> static inline lock_modules(void) { }
> #endif
> 
> -- 
> Kees Cook

Thanks Kees, good remark.

/Jarkko

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-13  5:05       ` Jarkko Sakkinen
@ 2020-07-13 10:17         ` Peter Zijlstra
  2020-07-14 11:52         ` Masami Hiramatsu
  1 sibling, 0 replies; 19+ messages in thread
From: Peter Zijlstra @ 2020-07-13 10:17 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: linux-kernel, Andi Kleen, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Masami Hiramatsu, Steven Rostedt, Andrew Morton,
	Aneesh Kumar K.V, Will Deacon, Kees Cook, Arnd Bergmann,
	Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

On Mon, Jul 13, 2020 at 08:05:49AM +0300, Jarkko Sakkinen wrote:
> On Fri, Jul 10, 2020 at 12:49:10PM +0200, Peter Zijlstra wrote:
> > On Fri, Jul 10, 2020 at 01:36:38PM +0300, Jarkko Sakkinen wrote:
> > > Just so that I know (and learn), what did exactly disable optprobes?
> > 
> > So regular, old-skool style kprobe is:
> > 
> >   - copy original instruction out
> >   - replace instruction with breakpoint (int3 on x86)
> >   - have exception handler return to the copied instruction with
> >     single-step on
> >   - have single step exception handler return to the original
> >     instruction stream
> > 
> > which is 2 exceptions.
> 
> Out of pure interest, how does it handle a jump (as the original
> opcode), given that it single steps a copy?

Good question, I hadn't ever looked at that detail. Anyway, I dug around
a little and it disallows 'boosting' (replacing single-step with a jmp)
for jump instructions and takes the double exception. It single steps
the original jump into 'thin-air' and does a relative fixup of the
resulting IP in the single-step exception.

For more details also see
arch/x86/kernel/kprobes/core.c:resume_execution().

> > optprobes avoid the single-step by not only writing a single
> > instruction, but additionally placing a JMP instruction behind it such
> > that it will automagically continue in the original instruction stream.
> > 
> > This brings the requirement that the copied instruction is placed
> > within the JMP displacement of the regular kernel text (s32 on x86).
> > 
> > module_alloc() ensures the memory provided is within that range.
> 
> Right, a relative jump is placed instead of 0xcc to the breakpoint?

So there's all sorts of optimizations. The one I was talking about is
apparently called boosting. That one still uses INT3 but avoids (where
possible) the single-step #DB trap by appending a JMP.d32 after it.

There's also optimized kprobes and that avoids all traps by replacing
the original instruction(s) with a JMP.d32 to a trampoline, this
trampoline calls the kprobe handler, after which it runs the original
instruction(s) and then a JMP.d32 back into where we came from.

These fully optimized kprobes have very specific constraints, best to
read the code if you want more details.

Anyway, the common theme here is that all the various optimizations rely
on the out-of-line text being withing the s32 displacement of relative
jumps.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-13  5:49     ` Jarkko Sakkinen
@ 2020-07-14 11:45       ` Masami Hiramatsu
  0 siblings, 0 replies; 19+ messages in thread
From: Masami Hiramatsu @ 2020-07-14 11:45 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: Peter Zijlstra, Masami Hiramatsu, linux-kernel, Andi Kleen,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Steven Rostedt, Andrew Morton, Aneesh Kumar K.V,
	Will Deacon, Kees Cook, Arnd Bergmann, Alexandre Ghiti,
	Masahiro Yamada, Sami Tolvanen, Peter Collingbourne,
	Krzysztof Kozlowski, Frederic Weisbecker, Stephen Boyd,
	Alexei Starovoitov, Mike Rapoport, Sean Christopherson,
	Jiri Olsa, hch

On Mon, 13 Jul 2020 08:49:55 +0300
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:

> On Fri, Jul 10, 2020 at 01:32:38PM +0200, Peter Zijlstra wrote:
> > On Fri, Jul 10, 2020 at 07:32:57PM +0900, Masami Hiramatsu wrote:
> > > > -	page = module_alloc(PAGE_SIZE);
> > > > +	page = vmalloc(PAGE_SIZE);
> > > 
> > > No, you can not use vmalloc here. The reason why we use module_alloc()
> > > is to allocate the executable memory for trampoline code.
> > > So, you need to use vmalloc_exec() instead.
> > 
> > vmalloc_exec() would be broken too, also hch recently got rid of that
> > thing.
> > 
> > module_alloc() really is the only sane choice here.
> > 
> > We should make module_alloc() unconditionally available, and maybe even
> > rename it to text_alloc().
> 
> Thanks for the remarks.
> 
> The current module_alloc looks like this:
> 
> void * __weak module_alloc(unsigned long size)
> {
> 	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
> 			GFP_KERNEL, PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS,
> 			NUMA_NO_NODE, __builtin_return_address(0));
> }
> 
> What if I create inline functions for text_alloc() and text_memfree() and
> convert this function as:
> 
> void * __weak module_alloc(unsigned long size)
> {
> 	return text_alloc(size);
> }

Yeah, that'll be good.

Thank you,


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH RFC] kprobes: Remove MODULES dependency
  2020-07-13  5:05       ` Jarkko Sakkinen
  2020-07-13 10:17         ` Peter Zijlstra
@ 2020-07-14 11:52         ` Masami Hiramatsu
  1 sibling, 0 replies; 19+ messages in thread
From: Masami Hiramatsu @ 2020-07-14 11:52 UTC (permalink / raw)
  To: Jarkko Sakkinen
  Cc: Peter Zijlstra, linux-kernel, Andi Kleen, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov,
	maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
	H. Peter Anvin, Naveen N. Rao, Anil S Keshavamurthy,
	David S. Miller, Masami Hiramatsu, Steven Rostedt, Andrew Morton,
	Aneesh Kumar K.V, Will Deacon, Kees Cook, Arnd Bergmann,
	Alexandre Ghiti, Masahiro Yamada, Sami Tolvanen,
	Peter Collingbourne, Krzysztof Kozlowski, Frederic Weisbecker,
	Stephen Boyd, Alexei Starovoitov, Mike Rapoport,
	Sean Christopherson, Jiri Olsa

On Mon, 13 Jul 2020 08:05:49 +0300
Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com> wrote:

> On Fri, Jul 10, 2020 at 12:49:10PM +0200, Peter Zijlstra wrote:
> > On Fri, Jul 10, 2020 at 01:36:38PM +0300, Jarkko Sakkinen wrote:
> > > Just so that I know (and learn), what did exactly disable optprobes?
> > 
> > So regular, old-skool style kprobe is:
> > 
> >   - copy original instruction out
> >   - replace instruction with breakpoint (int3 on x86)
> >   - have exception handler return to the copied instruction with
> >     single-step on
> >   - have single step exception handler return to the original
> >     instruction stream
> > 
> > which is 2 exceptions.
> 
> Out of pure interest, how does it handle a jump (as the original
> opcode), given that it single steps a copy?

Yes, the jump will be executed with a single-step on the copy buffer
and kprobes (on x86) fixes up the result, this means we modifies
the regs->ip. Also, there are some architectures which emulate the
jump instead of single-stepping.

> 
> > optprobes avoid the single-step by not only writing a single
> > instruction, but additionally placing a JMP instruction behind it such
> > that it will automagically continue in the original instruction stream.
> > 
> > This brings the requirement that the copied instruction is placed
> > within the JMP displacement of the regular kernel text (s32 on x86).
> > 
> > module_alloc() ensures the memory provided is within that range.
> 
> Right, a relative jump is placed instead of 0xcc to the breakpoint?

Yes, a relative (far) jump is used. So the target address (copied buffer)
must be in +-2GB range from the jump.

Thank you,

-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2020-07-14 11:52 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-09 23:45 [PATCH RFC] kprobes: Remove MODULES dependency Jarkko Sakkinen
2020-07-10  9:03 ` Peter Zijlstra
2020-07-10 10:36   ` Jarkko Sakkinen
2020-07-10 10:49     ` Peter Zijlstra
2020-07-13  5:05       ` Jarkko Sakkinen
2020-07-13 10:17         ` Peter Zijlstra
2020-07-14 11:52         ` Masami Hiramatsu
2020-07-10 10:32 ` Masami Hiramatsu
2020-07-10 11:32   ` Peter Zijlstra
2020-07-10 13:04     ` Christoph Hellwig
2020-07-13  5:52       ` Jarkko Sakkinen
2020-07-10 13:18     ` Masami Hiramatsu
2020-07-10 13:22       ` Steven Rostedt
2020-07-13  5:55         ` Jarkko Sakkinen
2020-07-13  5:49     ` Jarkko Sakkinen
2020-07-14 11:45       ` Masami Hiramatsu
2020-07-10 15:51   ` Kees Cook
2020-07-13  5:56     ` Jarkko Sakkinen
2020-07-13  5:39   ` Jarkko Sakkinen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.