All of lore.kernel.org
 help / color / mirror / Atom feed
* [S390] minor fault path optimization.
@ 2007-03-19 13:02 Martin Schwidefsky
  2007-03-19 13:31 ` Christoph Hellwig
  0 siblings, 1 reply; 6+ messages in thread
From: Martin Schwidefsky @ 2007-03-19 13:02 UTC (permalink / raw)
  To: linux-kernel, linux-s390

From: Martin Schwidefsky <schwidefsky@de.ibm.com>

[S390] minor fault path optimization.

The minor fault path has grown a lot in terms of cycles. In particular
the kprobes hook is very costly. Optimize the path to save a couple of
cycles. If kprobes is enabled more than 300 cycles can be avoided if 
kprobes_running() is false.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---

 arch/s390/mm/fault.c |  259 +++++++++++++++++++++++++++------------------------
 1 files changed, 141 insertions(+), 118 deletions(-)

diff -urpN linux-2.6/arch/s390/mm/fault.c linux-2.6-patched/arch/s390/mm/fault.c
--- linux-2.6/arch/s390/mm/fault.c	2007-03-19 13:08:56.000000000 +0100
+++ linux-2.6-patched/arch/s390/mm/fault.c	2007-03-19 13:09:34.000000000 +0100
@@ -63,21 +63,25 @@ int unregister_page_fault_notifier(struc
 	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
 }
 
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
+static int __kprobes __notify_page_fault(struct pt_regs *regs, long err)
 {
-	struct die_args args = {
-		.regs = regs,
-		.str = str,
-		.err = err,
-		.trapnr = trap,
-		.signr = sig
-	};
-	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+	struct die_args args = { .str = "page fault",
+				 .trapnr = 14,
+				 .signr = SIGSEGV };
+	args.regs = regs;
+	args.err = err;
+	return atomic_notifier_call_chain(&notify_page_fault_chain,
+					  DIE_PAGE_FAULT, &args);
+}
+
+static inline int notify_page_fault(struct pt_regs *regs, long err)
+{
+	if (unlikely(kprobe_running()))
+		return __notify_page_fault(regs, err);
+	return NOTIFY_DONE;
 }
 #else
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
+static inline int notify_page_fault(struct pt_regs *regs, long err)
 {
 	return NOTIFY_DONE;
 }
@@ -170,6 +174,89 @@ static void do_sigsegv(struct pt_regs *r
 	force_sig_info(SIGSEGV, &si, current);
 }
 
+static void do_no_context(struct pt_regs *regs, unsigned long error_code,
+			  unsigned long address)
+{
+	const struct exception_table_entry *fixup;
+
+	/* Are we prepared to handle this kernel fault?  */
+	fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK);
+	if (fixup) {
+		regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
+		return;
+	}
+
+	/*
+	 * Oops. The kernel tried to access some bad page. We'll have to
+	 * terminate things with extreme prejudice.
+	 */
+	if (check_space(current) == 0)
+		printk(KERN_ALERT "Unable to handle kernel pointer dereference"
+		       " at virtual kernel address %p\n", (void *)address);
+	else
+		printk(KERN_ALERT "Unable to handle kernel paging request"
+		       " at virtual user address %p\n", (void *)address);
+
+	die("Oops", regs, error_code);
+	do_exit(SIGKILL);
+}
+
+static void do_low_address(struct pt_regs *regs, unsigned long error_code)
+{
+	/* Low-address protection hit in kernel mode means
+	   NULL pointer write access in kernel mode.  */
+	if (regs->psw.mask & PSW_MASK_PSTATE) {
+		/* Low-address protection hit in user mode 'cannot happen'. */
+		die ("Low-address protection", regs, error_code);
+		do_exit(SIGKILL);
+	}
+
+	do_no_context(regs, error_code, 0);
+}
+
+/*
+ * We ran out of memory, or some other thing happened to us that made
+ * us unable to handle the page fault gracefully.
+ */
+static int do_out_of_memory(struct pt_regs *regs, unsigned long error_code,
+			    unsigned long address)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+
+	up_read(&mm->mmap_sem);
+	if (is_init(tsk)) {
+		yield();
+		down_read(&mm->mmap_sem);
+		return 1;
+	}
+	printk("VM: killing process %s\n", tsk->comm);
+	if (regs->psw.mask & PSW_MASK_PSTATE)
+		do_exit(SIGKILL);
+	do_no_context(regs, error_code, address);
+	return 0;
+}
+
+static void do_sigbus(struct pt_regs *regs, unsigned long error_code,
+		      unsigned long address)
+{
+	struct task_struct *tsk = current;
+	struct mm_struct *mm = tsk->mm;
+
+	up_read(&mm->mmap_sem);
+	/*
+	 * Send a sigbus, regardless of whether we were in kernel
+	 * or user mode.
+	 */
+	tsk->thread.prot_addr = address;
+	tsk->thread.trap_no = error_code;
+	force_sig(SIGBUS, tsk);
+
+	/* Kernel mode? Handle exceptions or die */
+	if (!(regs->psw.mask & PSW_MASK_PSTATE))
+		do_no_context(regs, error_code, address);
+}
+
 #ifdef CONFIG_S390_EXEC_PROTECT
 extern long sys_sigreturn(struct pt_regs *regs);
 extern long sys_rt_sigreturn(struct pt_regs *regs);
@@ -253,49 +340,23 @@ out_fault:
  *   3b       Region third trans.  ->  Not present       (nullification)
  */
 static inline void
-do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection)
+do_exception(struct pt_regs *regs, unsigned long error_code, int write)
 {
-        struct task_struct *tsk;
-        struct mm_struct *mm;
-        struct vm_area_struct * vma;
-        unsigned long address;
-	const struct exception_table_entry *fixup;
-	int si_code;
+	struct task_struct *tsk;
+	struct mm_struct *mm;
+	struct vm_area_struct *vma;
+	unsigned long address;
 	int space;
+	int si_code;
 
-        tsk = current;
-        mm = tsk->mm;
-	
-	if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
-					SIGSEGV) == NOTIFY_STOP)
+	if (notify_page_fault(regs, error_code) == NOTIFY_STOP)
 		return;
 
-	/* 
-         * Check for low-address protection.  This needs to be treated
-	 * as a special case because the translation exception code 
-	 * field is not guaranteed to contain valid data in this case.
-	 */
-	if (is_protection && !(S390_lowcore.trans_exc_code & 4)) {
-
-		/* Low-address protection hit in kernel mode means 
-		   NULL pointer write access in kernel mode.  */
- 		if (!(regs->psw.mask & PSW_MASK_PSTATE)) {
-			address = 0;
-			space = 0;
-			goto no_context;
-		}
-
-		/* Low-address protection hit in user mode 'cannot happen'.  */
-		die ("Low-address protection", regs, error_code);
-        	do_exit(SIGKILL);
-	}
+	tsk = current;
+	mm = tsk->mm;
 
-        /* 
-         * get the failing address 
-         * more specific the segment and page table portion of 
-         * the address 
-         */
-        address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
+	/* get the failing address and the affected space */
+	address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
 	space = check_space(tsk);
 
 	/*
@@ -313,7 +374,7 @@ do_exception(struct pt_regs *regs, unsig
 	 */
 	local_irq_enable();
 
-        down_read(&mm->mmap_sem);
+	down_read(&mm->mmap_sem);
 
 	si_code = SEGV_MAPERR;
 	vma = find_vma(mm, address);
@@ -330,19 +391,19 @@ do_exception(struct pt_regs *regs, unsig
 			return;
 #endif
 
-        if (vma->vm_start <= address) 
-                goto good_area;
-        if (!(vma->vm_flags & VM_GROWSDOWN))
-                goto bad_area;
-        if (expand_stack(vma, address))
-                goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
 /*
  * Ok, we have a good vm_area for this memory access, so
  * we can handle it..
  */
 good_area:
 	si_code = SEGV_ACCERR;
-	if (!is_protection) {
+	if (!write) {
 		/* page not present, check vm flags */
 		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
 			goto bad_area;
@@ -357,7 +418,7 @@ survive:
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
-	switch (handle_mm_fault(mm, vma, address, is_protection)) {
+	switch (handle_mm_fault(mm, vma, address, write)) {
 	case VM_FAULT_MINOR:
 		tsk->min_flt++;
 		break;
@@ -365,9 +426,12 @@ survive:
 		tsk->maj_flt++;
 		break;
 	case VM_FAULT_SIGBUS:
-		goto do_sigbus;
+		do_sigbus(regs, error_code, address);
+		return;
 	case VM_FAULT_OOM:
-		goto out_of_memory;
+		if (do_out_of_memory(regs, error_code, address))
+			goto survive;
+		return;
 	default:
 		BUG();
 	}
@@ -385,75 +449,34 @@ survive:
  * Fix it, but check if it's kernel or user first..
  */
 bad_area:
-        up_read(&mm->mmap_sem);
+	up_read(&mm->mmap_sem);
 
-        /* User mode accesses just cause a SIGSEGV */
-        if (regs->psw.mask & PSW_MASK_PSTATE) {
-                tsk->thread.prot_addr = address;
-                tsk->thread.trap_no = error_code;
+	/* User mode accesses just cause a SIGSEGV */
+	if (regs->psw.mask & PSW_MASK_PSTATE) {
+		tsk->thread.prot_addr = address;
+		tsk->thread.trap_no = error_code;
 		do_sigsegv(regs, error_code, si_code, address);
-                return;
+		return;
 	}
 
 no_context:
-        /* Are we prepared to handle this kernel fault?  */
-	fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK);
-	if (fixup) {
-		regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE;
-                return;
-        }
-
-/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
- */
-	if (space == 0)
-                printk(KERN_ALERT "Unable to handle kernel pointer dereference"
-        	       " at virtual kernel address %p\n", (void *)address);
-        else
-                printk(KERN_ALERT "Unable to handle kernel paging request"
-		       " at virtual user address %p\n", (void *)address);
-
-        die("Oops", regs, error_code);
-        do_exit(SIGKILL);
-
-
-/*
- * We ran out of memory, or some other thing happened to us that made
- * us unable to handle the page fault gracefully.
-*/
-out_of_memory:
-	up_read(&mm->mmap_sem);
-	if (is_init(tsk)) {
-		yield();
-		down_read(&mm->mmap_sem);
-		goto survive;
-	}
-	printk("VM: killing process %s\n", tsk->comm);
-	if (regs->psw.mask & PSW_MASK_PSTATE)
-		do_exit(SIGKILL);
-	goto no_context;
-
-do_sigbus:
-	up_read(&mm->mmap_sem);
-
-	/*
-	 * Send a sigbus, regardless of whether we were in kernel
-	 * or user mode.
-	 */
-        tsk->thread.prot_addr = address;
-        tsk->thread.trap_no = error_code;
-	force_sig(SIGBUS, tsk);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!(regs->psw.mask & PSW_MASK_PSTATE))
-		goto no_context;
+	do_no_context(regs, error_code, address);
 }
 
 void __kprobes do_protection_exception(struct pt_regs *regs,
 				       unsigned long error_code)
 {
+	/* Protection exception is supressing, decrement psw address. */
 	regs->psw.addr -= (error_code >> 16);
+	/*
+	 * Check for low-address protection.  This needs to be treated
+	 * as a special case because the translation exception code
+	 * field is not guaranteed to contain valid data in this case.
+	 */
+	if (unlikely(!(S390_lowcore.trans_exc_code & 4))) {
+		do_low_address(regs, error_code);
+		return;
+	}
 	do_exception(regs, 4, 1);
 }
 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [S390] minor fault path optimization.
  2007-03-19 13:02 [S390] minor fault path optimization Martin Schwidefsky
@ 2007-03-19 13:31 ` Christoph Hellwig
  2007-03-19 14:00   ` Andi Kleen
  0 siblings, 1 reply; 6+ messages in thread
From: Christoph Hellwig @ 2007-03-19 13:31 UTC (permalink / raw)
  To: Martin Schwidefsky; +Cc: linux-kernel, linux-s390, linux-arch

On Mon, Mar 19, 2007 at 02:02:06PM +0100, Martin Schwidefsky wrote:
> From: Martin Schwidefsky <schwidefsky@de.ibm.com>
> 
> [S390] minor fault path optimization.
> 
> The minor fault path has grown a lot in terms of cycles. In particular
> the kprobes hook is very costly. Optimize the path to save a couple of
> cycles. If kprobes is enabled more than 300 cycles can be avoided if 
> kprobes_running() is false.

Actually there is a lot more fishy here.  This code is duplicated over
every single architecture.  It uses a notifier chain that only ever has
one actual uaesr.  It exports interfaces that are potential harmful.

Please do the right thing to optimize this instead and rip out the brandead
notifier chain mechanisms and directly call into the krobes handler if
kprobes are active, in particular the DIE_PAGE_FAULT: case of 
kprobe_exceptions_notify.  That allows you to optimize the path both
if kprobes are active, and even further if they're not.

The same should be done for all other architectures, and the other
(ab-)uses of  the kprobes notifiers.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [S390] minor fault path optimization.
  2007-03-19 13:31 ` Christoph Hellwig
@ 2007-03-19 14:00   ` Andi Kleen
  2007-03-19 14:08       ` Christoph Hellwig
  0 siblings, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2007-03-19 14:00 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Martin Schwidefsky, linux-kernel, linux-s390, linux-arch

On Monday 19 March 2007 14:31, Christoph Hellwig wrote:
> On Mon, Mar 19, 2007 at 02:02:06PM +0100, Martin Schwidefsky wrote:
> > From: Martin Schwidefsky <schwidefsky@de.ibm.com>
> > 
> > [S390] minor fault path optimization.
> > 
> > The minor fault path has grown a lot in terms of cycles. In particular
> > the kprobes hook is very costly. Optimize the path to save a couple of
> > cycles. If kprobes is enabled more than 300 cycles can be avoided if 
> > kprobes_running() is false.
> 
> Actually there is a lot more fishy here.  This code is duplicated over
> every single architecture.  It uses a notifier chain that only ever has
> one actual uaesr.  It exports interfaces that are potential harmful.

Agreed, it should be fixed generically.

I think the right fix would be to make the notifier call an inline
that checks the call chain in the caller. That would be cheap enough,
except for the cache line that might need to be referenced.

For the cache line the short term fix would be to find some other
global cache line that is referenced in the fault path anyways and put
it on the same (e.g. by just putting the variable next to it or
using a shared structure) 

Longer term might be something like Ben LaHaise's patch that can patch
code inline for rarely-read globals.

> Please do the right thing to optimize this instead and rip out the brandead
> notifier chain mechanisms and directly call into the krobes handler if
> kprobes are active, 

No, we should keep the debugger hooks here. Otherwise there will be a zillion
external patches patches code in at these places for the various debuggers,
resulting in a merge nightmare for lots of people.

-Andi

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] powerpc minor pagefault optimization with kprobes enabled
  2007-03-19 14:00   ` Andi Kleen
@ 2007-03-19 14:08       ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2007-03-19 14:08 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Martin Schwidefsky, linux-kernel, linux-s390, linux-arch, linuxppc-dev

> Agreed, it should be fixed generically.
> 
> I think the right fix would be to make the notifier call an inline
> that checks the call chain in the caller. That would be cheap enough,
> except for the cache line that might need to be referenced.
> 
> For the cache line the short term fix would be to find some other
> global cache line that is referenced in the fault path anyways and put
> it on the same (e.g. by just putting the variable next to it or
> using a shared structure) 
> 
> Longer term might be something like Ben LaHaise's patch that can patch
> code inline for rarely-read globals.

I've attached a patch below the optimizes this code path for powerpc,
but the scheme applies to all architectures aswell.  It just rips out all
the callachin madness, and does as good as it gets in the pagefault
handler:

 - first we check for a user mode pagefault as that check is cheap
 - then we check kprobes_running which unfortunately requires a
   pagefault_disable
 - finally we do a direct function call to kprobe_fault_handler

> 
> > Please do the right thing to optimize this instead and rip out the brandead
> > notifier chain mechanisms and directly call into the krobes handler if
> > kprobes are active, 
> 
> No, we should keep the debugger hooks here. Otherwise there will be a zillion
> external patches patches code in at these places for the various debuggers,
> resulting in a merge nightmare for lots of people.

So get them to get their debuggers merged upstream.  Anyway, we need to
keep the die notifiers because we've actually grown some users for it.
I don't agree on the page fault notifiers, though - this code path
is too important to bloat it for posisble existing external parasites.


Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: linux-2.6.20/arch/powerpc/mm/fault.c
===================================================================
--- linux-2.6.20.orig/arch/powerpc/mm/fault.c	2007-03-19 14:50:41.000000000 +0100
+++ linux-2.6.20/arch/powerpc/mm/fault.c	2007-03-19 15:00:00.000000000 +0100
@@ -39,37 +39,26 @@
 #include <asm/kdebug.h>
 #include <asm/siginfo.h>
 
-#ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
 
-/* Hook to register for page fault notifications */
-int register_page_fault_notifier(struct notifier_block *nb)
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs)
 {
-	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-}
+	int ret = 0;
 
-int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
-}
+	/* kprobe_running() needs smp_processor_id() */
+	if (!user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, 11))
+			ret = 1;
+		preempt_enable();
+	}
 
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
-{
-	struct die_args args = {
-		.regs = regs,
-		.str = str,
-		.err = err,
-		.trapnr = trap,
-		.signr = sig
-	};
-	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+	return ret;
 }
 #else
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
+static inline int notify_page_fault(struct pt_regs *regs)
 {
-	return NOTIFY_DONE;
+	return 0;
 }
 #endif
 
@@ -175,8 +164,7 @@ int __kprobes do_page_fault(struct pt_re
 	is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
 
-	if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs, error_code,
-				11, SIGSEGV) == NOTIFY_STOP)
+	if (notify_page_fault(regs))
 		return 0;
 
 	if (trap == 0x300) {
Index: linux-2.6.20/arch/powerpc/kernel/kprobes.c
===================================================================
--- linux-2.6.20.orig/arch/powerpc/kernel/kprobes.c	2007-03-19 14:53:01.000000000 +0100
+++ linux-2.6.20/arch/powerpc/kernel/kprobes.c	2007-03-19 15:03:41.000000000 +0100
@@ -376,7 +376,7 @@ out:
 	return 1;
 }
 
-static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -461,14 +461,6 @@ int __kprobes kprobe_exceptions_notify(s
 		if (post_kprobe_handler(args->regs))
 			ret = NOTIFY_STOP;
 		break;
-	case DIE_PAGE_FAULT:
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() &&
-		    kprobe_fault_handler(args->regs, args->trapnr))
-			ret = NOTIFY_STOP;
-		preempt_enable();
-		break;
 	default:
 		break;
 	}
Index: linux-2.6.20/include/asm-powerpc/kdebug.h
===================================================================
--- linux-2.6.20.orig/include/asm-powerpc/kdebug.h	2007-03-19 14:55:13.000000000 +0100
+++ linux-2.6.20/include/asm-powerpc/kdebug.h	2007-03-19 15:03:27.000000000 +0100
@@ -18,8 +18,20 @@ struct die_args {
 
 extern int register_die_notifier(struct notifier_block *);
 extern int unregister_die_notifier(struct notifier_block *);
-extern int register_page_fault_notifier(struct notifier_block *);
-extern int unregister_page_fault_notifier(struct notifier_block *);
+
+/*
+ * These are only here because kprobes.c wants them to implement a
+ * blatant layer violation.  Will hopefully go away soon once all
+ * architectures are updated.
+ */
+static inline int register_page_fault_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
 extern struct atomic_notifier_head powerpc_die_chain;
 
 /* Grossly misnamed. */
@@ -29,7 +41,6 @@ enum die_val {
 	DIE_DABR_MATCH,
 	DIE_BPT,
 	DIE_SSTEP,
-	DIE_PAGE_FAULT,
 };
 
 static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig)
Index: linux-2.6.20/include/asm-powerpc/kprobes.h
===================================================================
--- linux-2.6.20.orig/include/asm-powerpc/kprobes.h	2007-03-19 15:01:14.000000000 +0100
+++ linux-2.6.20/include/asm-powerpc/kprobes.h	2007-03-19 15:01:55.000000000 +0100
@@ -100,5 +100,6 @@ struct kprobe_ctlblk {
 
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 					unsigned long val, void *data);
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_KPROBES_H */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] powerpc minor pagefault optimization with kprobes enabled
@ 2007-03-19 14:08       ` Christoph Hellwig
  0 siblings, 0 replies; 6+ messages in thread
From: Christoph Hellwig @ 2007-03-19 14:08 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Martin Schwidefsky, linux-s390, linux-kernel, linuxppc-dev, linux-arch

> Agreed, it should be fixed generically.
> 
> I think the right fix would be to make the notifier call an inline
> that checks the call chain in the caller. That would be cheap enough,
> except for the cache line that might need to be referenced.
> 
> For the cache line the short term fix would be to find some other
> global cache line that is referenced in the fault path anyways and put
> it on the same (e.g. by just putting the variable next to it or
> using a shared structure) 
> 
> Longer term might be something like Ben LaHaise's patch that can patch
> code inline for rarely-read globals.

I've attached a patch below the optimizes this code path for powerpc,
but the scheme applies to all architectures aswell.  It just rips out all
the callachin madness, and does as good as it gets in the pagefault
handler:

 - first we check for a user mode pagefault as that check is cheap
 - then we check kprobes_running which unfortunately requires a
   pagefault_disable
 - finally we do a direct function call to kprobe_fault_handler

> 
> > Please do the right thing to optimize this instead and rip out the brandead
> > notifier chain mechanisms and directly call into the krobes handler if
> > kprobes are active, 
> 
> No, we should keep the debugger hooks here. Otherwise there will be a zillion
> external patches patches code in at these places for the various debuggers,
> resulting in a merge nightmare for lots of people.

So get them to get their debuggers merged upstream.  Anyway, we need to
keep the die notifiers because we've actually grown some users for it.
I don't agree on the page fault notifiers, though - this code path
is too important to bloat it for posisble existing external parasites.


Signed-off-by: Christoph Hellwig <hch@lst.de>

Index: linux-2.6.20/arch/powerpc/mm/fault.c
===================================================================
--- linux-2.6.20.orig/arch/powerpc/mm/fault.c	2007-03-19 14:50:41.000000000 +0100
+++ linux-2.6.20/arch/powerpc/mm/fault.c	2007-03-19 15:00:00.000000000 +0100
@@ -39,37 +39,26 @@
 #include <asm/kdebug.h>
 #include <asm/siginfo.h>
 
-#ifdef CONFIG_KPROBES
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
 
-/* Hook to register for page fault notifications */
-int register_page_fault_notifier(struct notifier_block *nb)
+#ifdef CONFIG_KPROBES
+static inline int notify_page_fault(struct pt_regs *regs)
 {
-	return atomic_notifier_chain_register(&notify_page_fault_chain, nb);
-}
+	int ret = 0;
 
-int unregister_page_fault_notifier(struct notifier_block *nb)
-{
-	return atomic_notifier_chain_unregister(&notify_page_fault_chain, nb);
-}
+	/* kprobe_running() needs smp_processor_id() */
+	if (!user_mode(regs)) {
+		preempt_disable();
+		if (kprobe_running() && kprobe_fault_handler(regs, 11))
+			ret = 1;
+		preempt_enable();
+	}
 
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
-{
-	struct die_args args = {
-		.regs = regs,
-		.str = str,
-		.err = err,
-		.trapnr = trap,
-		.signr = sig
-	};
-	return atomic_notifier_call_chain(&notify_page_fault_chain, val, &args);
+	return ret;
 }
 #else
-static inline int notify_page_fault(enum die_val val, const char *str,
-			struct pt_regs *regs, long err, int trap, int sig)
+static inline int notify_page_fault(struct pt_regs *regs)
 {
-	return NOTIFY_DONE;
+	return 0;
 }
 #endif
 
@@ -175,8 +164,7 @@ int __kprobes do_page_fault(struct pt_re
 	is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
 
-	if (notify_page_fault(DIE_PAGE_FAULT, "page_fault", regs, error_code,
-				11, SIGSEGV) == NOTIFY_STOP)
+	if (notify_page_fault(regs))
 		return 0;
 
 	if (trap == 0x300) {
Index: linux-2.6.20/arch/powerpc/kernel/kprobes.c
===================================================================
--- linux-2.6.20.orig/arch/powerpc/kernel/kprobes.c	2007-03-19 14:53:01.000000000 +0100
+++ linux-2.6.20/arch/powerpc/kernel/kprobes.c	2007-03-19 15:03:41.000000000 +0100
@@ -376,7 +376,7 @@ out:
 	return 1;
 }
 
-static int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
@@ -461,14 +461,6 @@ int __kprobes kprobe_exceptions_notify(s
 		if (post_kprobe_handler(args->regs))
 			ret = NOTIFY_STOP;
 		break;
-	case DIE_PAGE_FAULT:
-		/* kprobe_running() needs smp_processor_id() */
-		preempt_disable();
-		if (kprobe_running() &&
-		    kprobe_fault_handler(args->regs, args->trapnr))
-			ret = NOTIFY_STOP;
-		preempt_enable();
-		break;
 	default:
 		break;
 	}
Index: linux-2.6.20/include/asm-powerpc/kdebug.h
===================================================================
--- linux-2.6.20.orig/include/asm-powerpc/kdebug.h	2007-03-19 14:55:13.000000000 +0100
+++ linux-2.6.20/include/asm-powerpc/kdebug.h	2007-03-19 15:03:27.000000000 +0100
@@ -18,8 +18,20 @@ struct die_args {
 
 extern int register_die_notifier(struct notifier_block *);
 extern int unregister_die_notifier(struct notifier_block *);
-extern int register_page_fault_notifier(struct notifier_block *);
-extern int unregister_page_fault_notifier(struct notifier_block *);
+
+/*
+ * These are only here because kprobes.c wants them to implement a
+ * blatant layer violation.  Will hopefully go away soon once all
+ * architectures are updated.
+ */
+static inline int register_page_fault_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+static inline int unregister_page_fault_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
 extern struct atomic_notifier_head powerpc_die_chain;
 
 /* Grossly misnamed. */
@@ -29,7 +41,6 @@ enum die_val {
 	DIE_DABR_MATCH,
 	DIE_BPT,
 	DIE_SSTEP,
-	DIE_PAGE_FAULT,
 };
 
 static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig)
Index: linux-2.6.20/include/asm-powerpc/kprobes.h
===================================================================
--- linux-2.6.20.orig/include/asm-powerpc/kprobes.h	2007-03-19 15:01:14.000000000 +0100
+++ linux-2.6.20/include/asm-powerpc/kprobes.h	2007-03-19 15:01:55.000000000 +0100
@@ -100,5 +100,6 @@ struct kprobe_ctlblk {
 
 extern int kprobe_exceptions_notify(struct notifier_block *self,
 					unsigned long val, void *data);
+extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
 #endif /* __KERNEL__ */
 #endif	/* _ASM_POWERPC_KPROBES_H */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] powerpc minor pagefault optimization with kprobes enabled
  2007-03-19 14:08       ` Christoph Hellwig
  (?)
@ 2007-03-20  5:12       ` Anton Blanchard
  -1 siblings, 0 replies; 6+ messages in thread
From: Anton Blanchard @ 2007-03-20  5:12 UTC (permalink / raw)
  To: Christoph Hellwig, Andi Kleen, Martin Schwidefsky, linux-kernel,
	linux-s390, linux-arch, linuxppc-dev


> I've attached a patch below the optimizes this code path for powerpc,
> but the scheme applies to all architectures aswell.  It just rips out all
> the callachin madness, and does as good as it gets in the pagefault
> handler:

NAK, patch on the way to get rid of all the debugger() crap by using
this very hook.

Anton

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2007-03-20  5:13 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-03-19 13:02 [S390] minor fault path optimization Martin Schwidefsky
2007-03-19 13:31 ` Christoph Hellwig
2007-03-19 14:00   ` Andi Kleen
2007-03-19 14:08     ` [PATCH] powerpc minor pagefault optimization with kprobes enabled Christoph Hellwig
2007-03-19 14:08       ` Christoph Hellwig
2007-03-20  5:12       ` Anton Blanchard

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.