linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andi Kleen <ak@muc.de>
To: torvalds@osdl.org
Cc: akpm@osdl.org, linux-kernel@vger.kernel.org, richard.brunner@amd.com
Subject: [PATCH] Athlon Prefetch workaround for 2.6.0test6
Date: Mon, 29 Sep 2003 14:56:29 +0200	[thread overview]
Message-ID: <20030929125629.GA1746@averell> (raw)


Here is a new version of the Athlon/Opteron prefetch issue workaround
for 2.6.0test6.  The issue was hit regularly by the 2.6 kernel
while doing prefetches on NULL terminated hlists.

These CPUs sometimes generate illegal exception for prefetch
instructions. The operating system can work around this by checking
if the faulting instruction is a prefetch and ignoring it when
it's the case.

The code is structured carefully to ensure that the page fault
will never recurse more than once. Also unmapped EIPs are special
cased to give more friendly oopses when the kernel jumps to 
unmapped addresses.

It removes the previous dumb in kernel workaround for this and shrinks the 
kernel by >10k.

Small behaviour change is that a SIGBUS fault for a *_user access will
cause an EFAULT now, no SIGBUS.

This version addresses all criticism that I got for previous versions.

- Only checks on AMD K7+ CPUs. 
- Computes linear address for VM86 mode or code segments
with non zero base.
- Some cleanup
- No pointer comparisons
- More comments

Please consider applying,

-Andi

diff -u linux-2.6.0test6-work/include/asm-i386/processor.h-PRE linux-2.6.0test6-work/include/asm-i386/processor.h
--- linux-2.6.0test6-work/include/asm-i386/processor.h-PRE	2003-09-11 04:12:39.000000000 +0200
+++ linux-2.6.0test6-work/include/asm-i386/processor.h	2003-09-28 10:52:55.000000000 +0200
@@ -578,8 +589,6 @@
 #define ARCH_HAS_PREFETCH
 extern inline void prefetch(const void *x)
 {
-	if (cpu_data[0].x86_vendor == X86_VENDOR_AMD)
-		return;		/* Some athlons fault if the address is bad */
 	alternative_input(ASM_NOP4,
 			  "prefetchnta (%1)",
 			  X86_FEATURE_XMM,
diff -u linux-2.6.0test6-work/arch/i386/mm/fault.c-PRE linux-2.6.0test6-work/arch/i386/mm/fault.c
--- linux-2.6.0test6-work/arch/i386/mm/fault.c-PRE	2003-05-27 03:00:20.000000000 +0200
+++ linux-2.6.0test6-work/arch/i386/mm/fault.c	2003-09-29 14:48:44.000000000 +0200
@@ -55,6 +55,104 @@
 	console_loglevel = loglevel_save;
 }
 
+/* 
+ * Find an segment base in the LDT/GDT.
+ * Don't need to do any boundary checking because the CPU did that already 
+ * when the instruction was executed 
+ */
+static unsigned long segment_base(unsigned seg) 
+{ 
+	u32 *desc;
+	/* 
+	 * No need to use get/put_cpu here because when we switch CPUs the
+	 * segment base is always switched too.
+	 */
+	if (seg & (1<<2))
+		desc = current->mm->context.ldt;
+	else
+		desc = (u32 *)&cpu_gdt_table[smp_processor_id()];
+	desc = (void *)desc + (seg & ~7); 	
+	return  (desc[0] >> 16) | 
+	       ((desc[1] & 0xFF) << 16) | 
+	        (desc[1] & 0xFF000000);
+}  
+
+/* 
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
+ * Check that here and ignore it.
+ */
+static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
+{ 
+	unsigned char *instr = (unsigned char *)(regs->eip);
+	int scan_more = 1;
+	int prefetch = 0; 
+	int i;
+
+	/* 
+	 * Avoid recursive faults. This catches the kernel jumping to nirvana.
+	 * More complicated races with unmapped EIP are handled elsewhere for 
+	 * user space.
+	 */
+	if (regs->eip == addr)
+		return 0; 
+
+	if (unlikely(regs->eflags & VM_MASK))
+		addr += regs->xcs << 4; 
+	else if (unlikely(regs->xcs != __USER_CS &&regs->xcs != __KERNEL_CS))
+		addr += segment_base(regs->xcs);
+
+	for (i = 0; scan_more && i < 15; i++) { 
+		unsigned char opcode;
+		unsigned char instr_hi;
+		unsigned char instr_lo;
+
+		if (__get_user(opcode, instr))
+			break; 
+
+		instr_hi = opcode & 0xf0; 
+		instr_lo = opcode & 0x0f; 
+		instr++;
+
+		switch (instr_hi) { 
+		case 0x20:
+		case 0x30:
+			/* Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes. */
+			scan_more = ((instr_lo & 7) == 0x6);
+			break;
+			
+		case 0x60:
+			/* 0x64 thru 0x67 are valid prefixes in all modes. */
+			scan_more = (instr_lo & 0xC) == 0x4;
+			break;		
+		case 0xF0:
+			/* 0xF0, 0xF2, and 0xF3 are valid prefixes in all modes. */
+			scan_more = !instr_lo || (instr_lo>>1) == 1;
+			break;			
+		case 0x00:
+			/* Prefetch instruction is 0x0F0D or 0x0F18 */
+			scan_more = 0;
+			if (__get_user(opcode, instr)) 
+				break;
+			prefetch = (instr_lo == 0xF) &&
+				(opcode == 0x0D || opcode == 0x18);
+			break;			
+		default:
+			scan_more = 0;
+			break;
+		} 
+	}
+
+	return prefetch;
+}
+
+static inline int is_prefetch(struct pt_regs *regs, unsigned long addr)
+{ 
+	if (likely(boot_cpu_data.x86_vendor != X86_VENDOR_AMD ||
+		   boot_cpu_data.x86 < 6))
+		return 0;
+	return __is_prefetch(regs, addr); 
+} 
+
 asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
 
 /*
@@ -110,7 +208,7 @@
 	 * atomic region then we must not take the fault..
 	 */
 	if (in_atomic() || !mm)
-		goto no_context;
+		goto bad_area_nosemaphore;
 
 	down_read(&mm->mmap_sem);
 
@@ -198,8 +296,16 @@
 bad_area:
 	up_read(&mm->mmap_sem);
 
+bad_area_nosemaphore:
 	/* User mode accesses just cause a SIGSEGV */
 	if (error_code & 4) {
+		/* 
+		 * Valid to do another page fault here because this one came 
+		 * from user space.
+		 */
+		if (is_prefetch(regs, address))
+			return;
+
 		tsk->thread.cr2 = address;
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_no = 14;
@@ -232,6 +338,14 @@
 	if (fixup_exception(regs))
 		return;
 
+	/* 
+	 * Valid to do another page fault here, because if this fault
+	 * had been triggered by is_prefetch fixup_exception would have 
+	 * handled it.
+	 */
+ 	if (is_prefetch(regs, address))
+ 		return;
+
 /*
  * Oops. The kernel tried to access some bad page. We'll have to
  * terminate things with extreme prejudice.
@@ -286,10 +400,14 @@
 do_sigbus:
 	up_read(&mm->mmap_sem);
 
-	/*
-	 * Send a sigbus, regardless of whether we were in kernel
-	 * or user mode.
-	 */
+	/* Kernel mode? Handle exceptions or die */
+	if (!(error_code & 4))
+		goto no_context;
+
+	/* User space => ok to do another page fault */
+	if (is_prefetch(regs, address))
+		return;
+
 	tsk->thread.cr2 = address;
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_no = 14;
@@ -298,10 +416,6 @@
 	info.si_code = BUS_ADRERR;
 	info.si_addr = (void *)address;
 	force_sig_info(SIGBUS, &info, tsk);
-
-	/* Kernel mode? Handle exceptions or die */
-	if (!(error_code & 4))
-		goto no_context;
 	return;
 
 vmalloc_fault:


             reply	other threads:[~2003-09-29 12:57 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-09-29 12:56 Andi Kleen [this message]
2003-09-29 17:03 ` [PATCH] Athlon Prefetch workaround for 2.6.0test6 Jamie Lokier
2003-09-29 17:49   ` Andi Kleen
2003-09-29 20:08     ` Jamie Lokier
2003-09-30  5:50       ` Andi Kleen
2003-09-30  9:35       ` Gabriel Paubert
2003-09-29 22:13     ` Jamie Lokier
2003-09-30  5:38       ` Andi Kleen
2003-09-30  0:19     ` Jamie Lokier
2003-09-29 21:02 ` bill davidsen
2003-09-30  0:50   ` Nick Piggin
2003-09-30 13:27   ` Dave Jones
2003-09-30 15:36     ` Bill Davidsen
     [not found] <20030929125629.GA1746@averell.suse.lists.linux.kernel>
     [not found] ` <20030929170323.GC21798@mail.jlokier.co.uk.suse.lists.linux.kernel>
     [not found]   ` <20030929174910.GA90905@colin2.muc.de.suse.lists.linux.kernel>
     [not found]     ` <20030929200820.GA23444@mail.jlokier.co.uk.suse.lists.linux.kernel>
     [not found]       ` <20030930093556.GB12970@iram.es.suse.lists.linux.kernel>
2003-09-30  9:50         ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20030929125629.GA1746@averell \
    --to=ak@muc.de \
    --cc=akpm@osdl.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=richard.brunner@amd.com \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).