* [patch] raid-xor-2.4.10-A0
@ 2001-09-14 10:56 Ingo Molnar
2001-09-14 11:02 ` Ingo Molnar
0 siblings, 1 reply; 2+ messages in thread
From: Ingo Molnar @ 2001-09-14 10:56 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Neil Brown, linux-raid, linux-kernel
[-- Attachment #1: Type: TEXT/PLAIN, Size: 787 bytes --]
another RAID patch against 2.4.10-pre9:
- update the SSE XOR routines to get compiled and used on recent kernels.
- change prefetch code to pollute the cache less, and to prefetch in a
wider window, to give enough time for prefetches to finish.
people with SSE-capable CPUs (PIII, PIV, newer Athlons) should see
something like this in the bootlog:
raid5: measuring checksumming speed
8regs : 1292.400 MB/sec
32regs : 607.600 MB/sec
pIII_sse : 1407.200 MB/sec
pII_mmx : 1600.800 MB/sec
p5_mmx : 1670.000 MB/sec
raid5: using function: pIII_sse (1407.200 MB/sec)
(if present then the SSE code is still picked up exclusively due to its
better cache-properties, even if it's 'cached performance' is lower than
that of MMX routines.)
Ingo
[-- Attachment #2: Type: TEXT/PLAIN, Size: 2542 bytes --]
--- linux/include/asm-i386/xor.h.orig2 Fri Sep 14 12:10:21 2001
+++ linux/include/asm-i386/xor.h Fri Sep 14 12:32:36 2001
@@ -527,8 +527,6 @@
#undef FPU_SAVE
#undef FPU_RESTORE
-#if defined(CONFIG_X86_FXSR) || defined(CONFIG_X86_RUNTIME_FXSR)
-
/*
* Cache avoiding checksumming functions utilizing KNI instructions
* Copyright (C) 1999 Zach Brown (with obvious credit due Ingo)
@@ -559,19 +557,20 @@
: "memory")
#define OFFS(x) "16*("#x")"
-#define PF0(x) " prefetcht0 "OFFS(x)"(%1) ;\n"
-#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
-#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
-#define PF1(x) " prefetchnta "OFFS(x)"(%2) ;\n"
-#define PF2(x) " prefetchnta "OFFS(x)"(%3) ;\n"
-#define PF3(x) " prefetchnta "OFFS(x)"(%4) ;\n"
-#define PF4(x) " prefetchnta "OFFS(x)"(%5) ;\n"
-#define PF5(x) " prefetchnta "OFFS(x)"(%6) ;\n"
-#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
-#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
-#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
-#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
-#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
+#define PF_OFFS(x) "256+16*("#x")"
+#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
+#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
+#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
+#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
+#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
+#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
+#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
+#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
+#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
+#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
+#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
+#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
+#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
static void
@@ -849,15 +848,6 @@
deals with a load to a line that is being prefetched. */
#define XOR_SELECT_TEMPLATE(FASTEST) \
(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
-
-#else
-
-/* Don't try any SSE2 when FXSR is not enabled, because OSFXSR will not be set
- -AK */
-#define XOR_SSE2
-#define XOR_SELECT_TEMPLATE(FASTEST) (FASTEST)
-
-#endif
/* Also try the generic routines. */
#include <asm-generic/xor.h>
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [patch] raid-xor-2.4.10-A0
2001-09-14 10:56 [patch] raid-xor-2.4.10-A0 Ingo Molnar
@ 2001-09-14 11:02 ` Ingo Molnar
0 siblings, 0 replies; 2+ messages in thread
From: Ingo Molnar @ 2001-09-14 11:02 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Neil Brown, linux-raid, linux-kernel
[-- Attachment #1: Type: TEXT/PLAIN, Size: 89 bytes --]
patch mixup - raid-xor-2.4.10-A1 attached, which does the prefetch
enhancements.
Ingo
[-- Attachment #2: Type: TEXT/PLAIN, Size: 1829 bytes --]
--- linux/include/asm-i386/xor.h.orig Mon Nov 13 04:39:51 2000
+++ linux/include/asm-i386/xor.h Fri Sep 14 12:45:39 2001
@@ -555,19 +555,20 @@
: "memory")
#define OFFS(x) "16*("#x")"
-#define PF0(x) " prefetcht0 "OFFS(x)"(%1) ;\n"
-#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
-#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
-#define PF1(x) " prefetchnta "OFFS(x)"(%2) ;\n"
-#define PF2(x) " prefetchnta "OFFS(x)"(%3) ;\n"
-#define PF3(x) " prefetchnta "OFFS(x)"(%4) ;\n"
-#define PF4(x) " prefetchnta "OFFS(x)"(%5) ;\n"
-#define PF5(x) " prefetchnta "OFFS(x)"(%6) ;\n"
-#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
-#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
-#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
-#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
-#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
+#define PF_OFFS(x) "256+16*("#x")"
+#define PF0(x) " prefetchnta "PF_OFFS(x)"(%1) ;\n"
+#define LD(x,y) " movaps "OFFS(x)"(%1), %%xmm"#y" ;\n"
+#define ST(x,y) " movaps %%xmm"#y", "OFFS(x)"(%1) ;\n"
+#define PF1(x) " prefetchnta "PF_OFFS(x)"(%2) ;\n"
+#define PF2(x) " prefetchnta "PF_OFFS(x)"(%3) ;\n"
+#define PF3(x) " prefetchnta "PF_OFFS(x)"(%4) ;\n"
+#define PF4(x) " prefetchnta "PF_OFFS(x)"(%5) ;\n"
+#define PF5(x) " prefetchnta "PF_OFFS(x)"(%6) ;\n"
+#define XO1(x,y) " xorps "OFFS(x)"(%2), %%xmm"#y" ;\n"
+#define XO2(x,y) " xorps "OFFS(x)"(%3), %%xmm"#y" ;\n"
+#define XO3(x,y) " xorps "OFFS(x)"(%4), %%xmm"#y" ;\n"
+#define XO4(x,y) " xorps "OFFS(x)"(%5), %%xmm"#y" ;\n"
+#define XO5(x,y) " xorps "OFFS(x)"(%6), %%xmm"#y" ;\n"
static void
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2001-09-15 11:29 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-09-14 10:56 [patch] raid-xor-2.4.10-A0 Ingo Molnar
2001-09-14 11:02 ` Ingo Molnar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).