linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch] raid-xor-2.4.10-A0
@ 2001-09-14 10:56 Ingo Molnar
  2001-09-14 11:02 ` Ingo Molnar
  0 siblings, 1 reply; 2+ messages in thread
From: Ingo Molnar @ 2001-09-14 10:56 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Neil Brown, linux-raid, linux-kernel

[-- Attachment #1: Type: TEXT/PLAIN, Size: 787 bytes --]


another RAID patch against 2.4.10-pre9:

 - update the SSE XOR routines to get compiled and used on recent kernels.

 - change prefetch code to pollute the cache less, and to prefetch in a
   wider window, to give enough time for prefetches to finish.

people with SSE-capable CPUs (PIII, PIV, newer Athlons) should see
something like this in the bootlog:

 raid5: measuring checksumming speed
    8regs     :  1292.400 MB/sec
    32regs    :   607.600 MB/sec
    pIII_sse  :  1407.200 MB/sec
    pII_mmx   :  1600.800 MB/sec
    p5_mmx    :  1670.000 MB/sec
 raid5: using function: pIII_sse (1407.200 MB/sec)

(if present then the SSE code is still picked up exclusively due to its
better cache-properties, even if it's 'cached performance' is lower than
that of MMX routines.)

	Ingo

[-- Attachment #2: Type: TEXT/PLAIN, Size: 2542 bytes --]

--- linux/include/asm-i386/xor.h.orig2	Fri Sep 14 12:10:21 2001
+++ linux/include/asm-i386/xor.h	Fri Sep 14 12:32:36 2001
@@ -527,8 +527,6 @@
 #undef FPU_SAVE
 #undef FPU_RESTORE
 
-#if defined(CONFIG_X86_FXSR) || defined(CONFIG_X86_RUNTIME_FXSR)
-
 /*
  * Cache avoiding checksumming functions utilizing KNI instructions
  * Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
@@ -559,19 +557,20 @@
 		: "memory")
 
 #define OFFS(x)		"16*("#x")"
-#define	PF0(x)		"	prefetcht0  "OFFS(x)"(%1)   ;\n"
-#define LD(x,y)		"       movaps   "OFFS(x)"(%1), %%xmm"#y"   ;\n"
-#define ST(x,y)		"       movaps %%xmm"#y",   "OFFS(x)"(%1)   ;\n"
-#define PF1(x)		"	prefetchnta "OFFS(x)"(%2)   ;\n"
-#define PF2(x)		"	prefetchnta "OFFS(x)"(%3)   ;\n"
-#define PF3(x)		"	prefetchnta "OFFS(x)"(%4)   ;\n"
-#define PF4(x)		"	prefetchnta "OFFS(x)"(%5)   ;\n"
-#define PF5(x)		"	prefetchnta "OFFS(x)"(%6)   ;\n"
-#define XO1(x,y)	"       xorps   "OFFS(x)"(%2), %%xmm"#y"   ;\n"
-#define XO2(x,y)	"       xorps   "OFFS(x)"(%3), %%xmm"#y"   ;\n"
-#define XO3(x,y)	"       xorps   "OFFS(x)"(%4), %%xmm"#y"   ;\n"
-#define XO4(x,y)	"       xorps   "OFFS(x)"(%5), %%xmm"#y"   ;\n"
-#define XO5(x,y)	"       xorps   "OFFS(x)"(%6), %%xmm"#y"   ;\n"
+#define PF_OFFS(x)	"256+16*("#x")"
+#define	PF0(x)		"	prefetchnta "PF_OFFS(x)"(%1)		;\n"
+#define LD(x,y)		"       movaps   "OFFS(x)"(%1), %%xmm"#y"	;\n"
+#define ST(x,y)		"       movaps %%xmm"#y",   "OFFS(x)"(%1)	;\n"
+#define PF1(x)		"	prefetchnta "PF_OFFS(x)"(%2)		;\n"
+#define PF2(x)		"	prefetchnta "PF_OFFS(x)"(%3)		;\n"
+#define PF3(x)		"	prefetchnta "PF_OFFS(x)"(%4)		;\n"
+#define PF4(x)		"	prefetchnta "PF_OFFS(x)"(%5)		;\n"
+#define PF5(x)		"	prefetchnta "PF_OFFS(x)"(%6)		;\n"
+#define XO1(x,y)	"       xorps   "OFFS(x)"(%2), %%xmm"#y"	;\n"
+#define XO2(x,y)	"       xorps   "OFFS(x)"(%3), %%xmm"#y"	;\n"
+#define XO3(x,y)	"       xorps   "OFFS(x)"(%4), %%xmm"#y"	;\n"
+#define XO4(x,y)	"       xorps   "OFFS(x)"(%5), %%xmm"#y"	;\n"
+#define XO5(x,y)	"       xorps   "OFFS(x)"(%6), %%xmm"#y"	;\n"
 
 
 static void
@@ -849,15 +848,6 @@
    deals with a load to a line that is being prefetched.  */
 #define XOR_SELECT_TEMPLATE(FASTEST) \
 	(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
-
-#else
-
-/* Don't try any SSE2 when FXSR is not enabled, because OSFXSR will not be set
-   -AK */ 
-#define XOR_SSE2 
-#define XOR_SELECT_TEMPLATE(FASTEST) (FASTEST)
-
-#endif
 
 /* Also try the generic routines.  */
 #include <asm-generic/xor.h>

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [patch] raid-xor-2.4.10-A0
  2001-09-14 10:56 [patch] raid-xor-2.4.10-A0 Ingo Molnar
@ 2001-09-14 11:02 ` Ingo Molnar
  0 siblings, 0 replies; 2+ messages in thread
From: Ingo Molnar @ 2001-09-14 11:02 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Neil Brown, linux-raid, linux-kernel

[-- Attachment #1: Type: TEXT/PLAIN, Size: 89 bytes --]


patch mixup - raid-xor-2.4.10-A1 attached, which does the prefetch
enhancements.

	Ingo

[-- Attachment #2: Type: TEXT/PLAIN, Size: 1829 bytes --]

--- linux/include/asm-i386/xor.h.orig	Mon Nov 13 04:39:51 2000
+++ linux/include/asm-i386/xor.h	Fri Sep 14 12:45:39 2001
@@ -555,19 +555,20 @@
 		: "memory")
 
 #define OFFS(x)		"16*("#x")"
-#define	PF0(x)		"	prefetcht0  "OFFS(x)"(%1)   ;\n"
-#define LD(x,y)		"       movaps   "OFFS(x)"(%1), %%xmm"#y"   ;\n"
-#define ST(x,y)		"       movaps %%xmm"#y",   "OFFS(x)"(%1)   ;\n"
-#define PF1(x)		"	prefetchnta "OFFS(x)"(%2)   ;\n"
-#define PF2(x)		"	prefetchnta "OFFS(x)"(%3)   ;\n"
-#define PF3(x)		"	prefetchnta "OFFS(x)"(%4)   ;\n"
-#define PF4(x)		"	prefetchnta "OFFS(x)"(%5)   ;\n"
-#define PF5(x)		"	prefetchnta "OFFS(x)"(%6)   ;\n"
-#define XO1(x,y)	"       xorps   "OFFS(x)"(%2), %%xmm"#y"   ;\n"
-#define XO2(x,y)	"       xorps   "OFFS(x)"(%3), %%xmm"#y"   ;\n"
-#define XO3(x,y)	"       xorps   "OFFS(x)"(%4), %%xmm"#y"   ;\n"
-#define XO4(x,y)	"       xorps   "OFFS(x)"(%5), %%xmm"#y"   ;\n"
-#define XO5(x,y)	"       xorps   "OFFS(x)"(%6), %%xmm"#y"   ;\n"
+#define PF_OFFS(x)	"256+16*("#x")"
+#define	PF0(x)		"	prefetchnta "PF_OFFS(x)"(%1)		;\n"
+#define LD(x,y)		"       movaps   "OFFS(x)"(%1), %%xmm"#y"	;\n"
+#define ST(x,y)		"       movaps %%xmm"#y",   "OFFS(x)"(%1)	;\n"
+#define PF1(x)		"	prefetchnta "PF_OFFS(x)"(%2)		;\n"
+#define PF2(x)		"	prefetchnta "PF_OFFS(x)"(%3)		;\n"
+#define PF3(x)		"	prefetchnta "PF_OFFS(x)"(%4)		;\n"
+#define PF4(x)		"	prefetchnta "PF_OFFS(x)"(%5)		;\n"
+#define PF5(x)		"	prefetchnta "PF_OFFS(x)"(%6)		;\n"
+#define XO1(x,y)	"       xorps   "OFFS(x)"(%2), %%xmm"#y"	;\n"
+#define XO2(x,y)	"       xorps   "OFFS(x)"(%3), %%xmm"#y"	;\n"
+#define XO3(x,y)	"       xorps   "OFFS(x)"(%4), %%xmm"#y"	;\n"
+#define XO4(x,y)	"       xorps   "OFFS(x)"(%5), %%xmm"#y"	;\n"
+#define XO5(x,y)	"       xorps   "OFFS(x)"(%6), %%xmm"#y"	;\n"
 
 
 static void

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2001-09-15 11:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-09-14 10:56 [patch] raid-xor-2.4.10-A0 Ingo Molnar
2001-09-14 11:02 ` Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).