linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Athlon possible fixes
@ 2001-05-05  7:35 Alan Cox
  2001-05-05 16:26 ` Rogier Wolff
  2001-05-06  2:23 ` Chris Wedgwood
  0 siblings, 2 replies; 18+ messages in thread
From: Alan Cox @ 2001-05-05  7:35 UTC (permalink / raw)
  To: linux-kernel

Assuming Manfred's diagnosis is right something like this might fix it

*note*: Not tested this is just off the top of my head...


--- arch/i386/lib/mmx.c~	Sun Apr 15 16:49:54 2001
+++ arch/i386/lib/mmx.c	Sat May  5 08:03:17 2001
@@ -57,7 +57,11 @@
 		: : "r" (from) );
 		
 	
-	for(; i>0; i--)
+	/*
+	 *	While we have at least 320 bytes left to copy
+	 */
+	 
+	for(; i>5; i--)
 	{
 		__asm__ __volatile__ (
 		"1:  prefetch 320(%0)\n"
@@ -89,6 +93,31 @@
 		from+=64;
 		to+=64;
 	}
+
+	/*
+	 *	While we have at least 64 bytes left to copy
+	 */
+	 
+	for(; i>0; i--)
+	{
+		__asm__ __volatile__ (
+		"  movq (%0), %%mm0\n"
+		"  movq 8(%0), %%mm1\n"
+		"  movq 16(%0), %%mm2\n"
+		"  movq 24(%0), %%mm3\n"
+		"  movq %%mm0, (%1)\n"
+		"  movq %%mm1, 8(%1)\n"
+		"  movq %%mm2, 16(%1)\n"
+		"  movq %%mm3, 24(%1)\n"
+		"  movq 32(%0), %%mm0\n"
+		"  movq 40(%0), %%mm1\n"
+		"  movq 48(%0), %%mm2\n"
+		"  movq 56(%0), %%mm3\n"
+		"  movq %%mm0, 32(%1)\n"
+		"  movq %%mm1, 40(%1)\n"
+		"  movq %%mm2, 48(%1)\n"
+		"  movq %%mm3, 56(%1)\n"
+		: : "r" (from), "r" (to) : "memory");
 	/*
 	 *	Now do the tail of the block
 	 */
@@ -163,7 +192,11 @@
 		".previous"
 		: : "r" (from) );
 
-	for(i=0; i<4096/64; i++)
+	/*
+	 *	While there is at least 320 bytes to copy
+	 */
+	 
+	for(i=0; i<59; i++)
 	{
 		__asm__ __volatile__ (
 		"1: prefetch 320(%0)\n"
@@ -195,6 +228,35 @@
 		from+=64;
 		to+=64;
 	}
+
+	/*
+	 *	Finish off the page
+	 */
+	 
+	for(; i<64; i++)
+	{
+		__asm__ __volatile__ (
+		"   movq (%0), %%mm0\n"
+		"   movntq %%mm0, (%1)\n"
+		"   movq 8(%0), %%mm1\n"
+		"   movntq %%mm1, 8(%1)\n"
+		"   movq 16(%0), %%mm2\n"
+		"   movntq %%mm2, 16(%1)\n"
+		"   movq 24(%0), %%mm3\n"
+		"   movntq %%mm3, 24(%1)\n"
+		"   movq 32(%0), %%mm4\n"
+		"   movntq %%mm4, 32(%1)\n"
+		"   movq 40(%0), %%mm5\n"
+		"   movntq %%mm5, 40(%1)\n"
+		"   movq 48(%0), %%mm6\n"
+		"   movntq %%mm6, 48(%1)\n"
+		"   movq 56(%0), %%mm7\n"
+		"   movntq %%mm7, 56(%1)\n"
+		: : "r" (from), "r" (to) : "memory");
+		from+=64;
+		to+=64;
+	}
+
 	/* since movntq is weakly-ordered, a "sfence" is needed to become
 	 * ordered again.
 	 */
@@ -270,7 +332,11 @@
 		".previous"
 		: : "r" (from) );
 
-	for(i=0; i<4096/64; i++)
+	/*
+	 *	Copy the page until we have 320 bytes to go
+	 */
+	 
+	for(i=0; i<59; i++)
 	{
 		__asm__ __volatile__ (
 		"1: prefetch 320(%0)\n"
@@ -298,6 +364,34 @@
 		"	.align 4\n"
 		"	.long 1b, 3b\n"
 		".previous"
+		: : "r" (from), "r" (to) : "memory");
+		from+=64;
+		to+=64;
+	}
+
+	/*
+	 *	Copy the tail of the page
+	 */
+	 
+	for(; i<64; i++)
+	{
+		__asm__ __volatile__ (
+		"   movq (%0), %%mm0\n"
+		"   movq 8(%0), %%mm1\n"
+		"   movq 16(%0), %%mm2\n"
+		"   movq 24(%0), %%mm3\n"
+		"   movq %%mm0, (%1)\n"
+		"   movq %%mm1, 8(%1)\n"
+		"   movq %%mm2, 16(%1)\n"
+		"   movq %%mm3, 24(%1)\n"
+		"   movq 32(%0), %%mm0\n"
+		"   movq 40(%0), %%mm1\n"
+		"   movq 48(%0), %%mm2\n"
+		"   movq 56(%0), %%mm3\n"
+		"   movq %%mm0, 32(%1)\n"
+		"   movq %%mm1, 40(%1)\n"
+		"   movq %%mm2, 48(%1)\n"
+		"   movq %%mm3, 56(%1)\n"
 		: : "r" (from), "r" (to) : "memory");
 		from+=64;
 		to+=64;

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-05  7:35 Athlon possible fixes Alan Cox
@ 2001-05-05 16:26 ` Rogier Wolff
  2001-05-05 16:42   ` Kurt Roeckx
  2001-05-05 22:44   ` Seth Goldberg
  2001-05-06  2:23 ` Chris Wedgwood
  1 sibling, 2 replies; 18+ messages in thread
From: Rogier Wolff @ 2001-05-05 16:26 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel


> +		__asm__ __volatile__ (
> +		"   movq (%0), %%mm0\n"
> +		"   movq 8(%0), %%mm1\n"
> +		"   movq 16(%0), %%mm2\n"
> +		"   movq 24(%0), %%mm3\n"
> +		"   movq %%mm0, (%1)\n"
> +		"   movq %%mm1, 8(%1)\n"
> +		"   movq %%mm2, 16(%1)\n"
> +		"   movq %%mm3, 24(%1)\n"
> +		"   movq 32(%0), %%mm0\n"
> +		"   movq 40(%0), %%mm1\n"
> +		"   movq 48(%0), %%mm2\n"
> +		"   movq 56(%0), %%mm3\n"
> +		"   movq %%mm0, 32(%1)\n"
> +		"   movq %%mm1, 40(%1)\n"
> +		"   movq %%mm2, 48(%1)\n"
> +		"   movq %%mm3, 56(%1)\n"
>  		: : "r" (from), "r" (to) : "memory");
>  		from+=64;
>  		to+=64;

As all this is trying to avoid bus turnarounds (i.e. switching from
reading to writing), wouldn't it be fastest to just trust that the CPU
has at least 4k worth of cache? (and hope for the best that we don't
get interrupted in the meanwhile).

void copy_page (char *dest, char *source)
{
	long *dst = (long *)dest, 
		*src=(long *)source, 
		*end= (long *)(source+PAGE_SIZE);
#if 1
	register int  i;
	long t=0;
	static long tt;

  	for (i=0;i<PAGE_SIZE/sizeof (long);i += cache_line_size()/sizeof(long))
	/* Actually the innards of this loop should be:
		(void) from[i];
	   however, the compiler will probably optimize that away. */ 
     		t += src[i];

	tt = t;
#endif
	while (src < end)
		*dst++ = *src++;

}

So, this is 15 lines of C, and it'd be interesting to benchmark this
against the assembly.

I'm assuming that the "loop variable handling" is not going to
influence the overall performance: that would run at 500 - 1000MHz,
and around 1 clock cycle (1-2ns) per loop. Set this against the stalls
against the memory unit whose output buffer is full, and memory writes
that take on the order of 30 ns per 64bits.

At least, that's what I expect, however, I haven't been optimizing
cycles for quite a while....

			Roger. 

-- 
** R.E.Wolff@BitWizard.nl ** http://www.BitWizard.nl/ ** +31-15-2137555 **
*-- BitWizard writes Linux device drivers for any device you may have! --*
* There are old pilots, and there are bold pilots. 
* There are also old, bald pilots. 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-05 16:26 ` Rogier Wolff
@ 2001-05-05 16:42   ` Kurt Roeckx
  2001-05-05 22:44   ` Seth Goldberg
  1 sibling, 0 replies; 18+ messages in thread
From: Kurt Roeckx @ 2001-05-05 16:42 UTC (permalink / raw)
  To: Rogier Wolff; +Cc: Alan Cox, linux-kernel

On Sat, May 05, 2001 at 06:26:30PM +0200, Rogier Wolff wrote:
> 
> As all this is trying to avoid bus turnarounds (i.e. switching from
> reading to writing), wouldn't it be fastest to just trust that the CPU
> has at least 4k worth of cache? (and hope for the best that we don't
> get interrupted in the meanwhile).
> 
> void copy_page (char *dest, char *source)
> {
> 	long *dst = (long *)dest, 
> 		*src=(long *)source, 
> 		*end= (long *)(source+PAGE_SIZE);
> #if 1
> 	register int  i;
> 	long t=0;
> 	static long tt;
> 
>   	for (i=0;i<PAGE_SIZE/sizeof (long);i += cache_line_size()/sizeof(long))
> 	/* Actually the innards of this loop should be:
> 		(void) from[i];
> 	   however, the compiler will probably optimize that away. */ 
>      		t += src[i];
> 
> 	tt = t;
> #endif
> 	while (src < end)
> 		*dst++ = *src++;
> 
> }
> 
> So, this is 15 lines of C, and it'd be interesting to benchmark this
> against the assembly.
> 
> I'm assuming that the "loop variable handling" is not going to
> influence the overall performance: that would run at 500 - 1000MHz,
> and around 1 clock cycle (1-2ns) per loop. Set this against the stalls
> against the memory unit whose output buffer is full, and memory writes
> that take on the order of 30 ns per 64bits.

Can't you use volatile to prevent the compiler from optimizing
it?


Kurt


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-05 16:26 ` Rogier Wolff
  2001-05-05 16:42   ` Kurt Roeckx
@ 2001-05-05 22:44   ` Seth Goldberg
  2001-05-06 16:44     ` Jussi Laako
  1 sibling, 1 reply; 18+ messages in thread
From: Seth Goldberg @ 2001-05-05 22:44 UTC (permalink / raw)
  To: Rogier Wolff; +Cc: Alan Cox, linux-kernel

> 
> As all this is trying to avoid bus turnarounds (i.e. switching from
> reading to writing), wouldn't it be fastest to just trust that the CPU
> has at least 4k worth of cache? (and hope for the best that we don't
> get interrupted in the meanwhile).
> 
> void copy_page (char *dest, char *source)
> {
>         long *dst = (long *)dest,
>                 *src=(long *)source,
>                 *end= (long *)(source+PAGE_SIZE);
> #if 1
>         register int  i;
>         long t=0;
>         static long tt;
> 
>         for (i=0;i<PAGE_SIZE/sizeof (long);i += cache_line_size()/sizeof(long))
>         /* Actually the innards of this loop should be:
>                 (void) from[i];
>            however, the compiler will probably optimize that away. */
>                 t += src[i];
> 
>         tt = t;
> #endif
>         while (src < end)
>                 *dst++ = *src++;
> 
> }
> 
> So, this is 15 lines of C, and it'd be interesting to benchmark this
> against the assembly.
> 

  Well you asked for it :) :

clear_page by 'normal_clear_page'        took 12196 cycles (318.1 MB/s)
clear_page by 'slow_zero_page'           took 12207 cycles (317.9 MB/s)
clear_page by 'fast_clear_page'          took 29272 cycles (132.6 MB/s)
clear_page by 'faster_clear_page'        took 4831 cycles (803.1 MB/s)
 
copy_page by 'normal_copy_page'  took 12607 cycles (307.8 MB/s)
copy_page by 'slow_copy_page'    took 13617 cycles (285.0 MB/s)
copy_page by 'fast_copy_page'    took 9531 cycles (407.1 MB/s)
copy_page by 'faster_copy'       took 5585 cycles (694.7 MB/s)
copy_page by 'even_faster'       took 5621 cycles (690.3 MB/s)
copy_page by 'even_faster_nopre'         took 5837 cycles (664.8 MB/s)
copy_page by 'c_source'  took 17296 cycles (224.3
MB/s)                         

 The last one is yours :).  I'd assume this is because the compiler is
not
using mmx instructions for this. (the nopre is a routine I added to
check
the speed with only a single prefetch instruction.  When I tried adding
the routing with the single prefetch instruction to mmx.c and
recompiling
and rebooted, the system stayed up a lot longer, but it still crashed (I
was in Xwindows and the crash was partially written to the log file)
after around 3 minutes of work in X.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-05  7:35 Athlon possible fixes Alan Cox
  2001-05-05 16:26 ` Rogier Wolff
@ 2001-05-06  2:23 ` Chris Wedgwood
  2001-05-06 12:51   ` Alan Cox
  1 sibling, 1 reply; 18+ messages in thread
From: Chris Wedgwood @ 2001-05-06  2:23 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel

    Assuming Manfred's diagnosis is right something like this might
    fix it

There really needs to be a hardware fix... this doesn't stop some
application having it's owne optimised code from breaking on some
hardware (think games and similation software perhaps).



  --cw

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-06  2:23 ` Chris Wedgwood
@ 2001-05-06 12:51   ` Alan Cox
  2001-05-06 13:00     ` Chris Wedgwood
  2001-05-11  4:02     ` Ralf Baechle
  0 siblings, 2 replies; 18+ messages in thread
From: Alan Cox @ 2001-05-06 12:51 UTC (permalink / raw)
  To: Chris Wedgwood; +Cc: Alan Cox, linux-kernel

> There really needs to be a hardware fix... this doesn't stop some
> application having it's owne optimised code from breaking on some
> hardware (think games and similation software perhaps).

prefetch is virtually addresses. An application would need access to /dev/mem
or similar. So the only folks I think it might actually bite are the Xserver
people.



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-06 12:51   ` Alan Cox
@ 2001-05-06 13:00     ` Chris Wedgwood
  2001-05-11  4:02     ` Ralf Baechle
  1 sibling, 0 replies; 18+ messages in thread
From: Chris Wedgwood @ 2001-05-06 13:00 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel

On Sun, May 06, 2001 at 01:51:59PM +0100, Alan Cox wrote:

    prefetch is virtually addresses. An application would need access
    to /dev/mem or similar. So the only folks I think it might
    actually bite are the Xserver people.

depends, maybe it depends on what part of the northbridge it
traverses, so it may only affect RAM and not PCI/AGP memory

it should be possible to write a test program that uses /dev/mem to
test for this is someone has a buggy MB (or wants to send me one,
I'll gladly do it -- my MBs work perfect it seems)


  --cw
    
    
    

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-05 22:44   ` Seth Goldberg
@ 2001-05-06 16:44     ` Jussi Laako
  2001-05-06 17:41       ` Zilvinas Valinskas
  2001-05-06 18:16       ` Christian Bornträger
  0 siblings, 2 replies; 18+ messages in thread
From: Jussi Laako @ 2001-05-06 16:44 UTC (permalink / raw)
  To: Seth Goldberg; +Cc: linux-kernel

Seth Goldberg wrote:
> 
> and rebooted, the system stayed up a lot longer, but it still crashed (I
> was in Xwindows and the crash was partially written to the log file)
> after around 3 minutes of work in X.

Hmm, I'm wondering if this could be same bug that I'm seeing with ASUS
A7V133 & Duron/800 when using IDE autotuning (PDC20265).

Still haven't got any replies suggesting any reason for lockups I'm seeing
(no oopses). Or is the Promise driver just buggy, because system is solid
with noautotune. RAID5 (md) on that server is just little bit sluggish with
~1.7 MB/s transfer rate... I should have stayed with SCSI disks...

Can't use the VIA controller either, because there are four HDDs and one CD
and the system can't boot from CD connected to Promise controller (I
tested).

Is there somewhere some up-to-date list of "don't buy that hardware"?

 - Jussi Laako

-- 
PGP key fingerprint: 161D 6FED 6A92 39E2 EB5B  39DD A4DE 63EB C216 1E4B
Available at PGP keyservers

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-06 16:44     ` Jussi Laako
@ 2001-05-06 17:41       ` Zilvinas Valinskas
  2001-05-06 18:16       ` Christian Bornträger
  1 sibling, 0 replies; 18+ messages in thread
From: Zilvinas Valinskas @ 2001-05-06 17:41 UTC (permalink / raw)
  To: Jussi Laako; +Cc: Seth Goldberg, linux-kernel

On Sun, May 06, 2001 at 07:44:19PM +0300, Jussi Laako wrote:
> Seth Goldberg wrote:
> > 
> > and rebooted, the system stayed up a lot longer, but it still crashed (I
> > was in Xwindows and the crash was partially written to the log file)
> > after around 3 minutes of work in X.
> 
> Hmm, I'm wondering if this could be same bug that I'm seeing with ASUS
> A7V133 & Duron/800 when using IDE autotuning (PDC20265).
> 
> Still haven't got any replies suggesting any reason for lockups I'm seeing
> (no oopses). Or is the Promise driver just buggy, because system is solid
> with noautotune. RAID5 (md) on that server is just little bit sluggish with
> ~1.7 MB/s transfer rate... I should have stayed with SCSI disks...

http://www.viahardware.com/

there you should find (if I'm right) somewhere mentioned that you likely
to trash your hard drives or experience random lock ups with KT133a chipset
especially if you use off-board ide controller ... As to why it happens is
beyond me to explain ...

(maybe even this doesn't apply for case).
-- 
Zilvinas Valinskas

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-06 16:44     ` Jussi Laako
  2001-05-06 17:41       ` Zilvinas Valinskas
@ 2001-05-06 18:16       ` Christian Bornträger
  2001-05-06 19:23         ` Marek Pętlicki
                           ` (2 more replies)
  1 sibling, 3 replies; 18+ messages in thread
From: Christian Bornträger @ 2001-05-06 18:16 UTC (permalink / raw)
  To: Jussi Laako; +Cc: linux-kernel

> Hmm, I'm wondering if this could be same bug that I'm seeing with ASUS
> A7V133 & Duron/800 when using IDE autotuning (PDC20265).
> Still haven't got any replies suggesting any reason for lockups I'm seeing
> (no oopses). Or is the Promise driver just buggy, because system is solid
> with noautotune. RAID5 (md) on that server is just little bit sluggish
with

OK. I tried it on my A7V133 system and leaving out autotune (just for
Promise, where I connected the hard discs)  makes the system stable.  I
thought my problem is related to the Athlon compile problem I read in this
group, but now I am not sure.
Can you try and mail me if the Kernel 2.4.3 (without any ac patch) is stable
with your system even if you use autotune? "Downgrade" to this kernel works
fine for me.

greetings
Christian Bornträger



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-06 18:16       ` Christian Bornträger
@ 2001-05-06 19:23         ` Marek Pętlicki
  2001-05-07 18:54         ` Jussi Laako
  2001-05-11 20:09         ` Jussi Laako
  2 siblings, 0 replies; 18+ messages in thread
From: Marek Pętlicki @ 2001-05-06 19:23 UTC (permalink / raw)
  To: Christian Bornträger; +Cc: Jussi Laako, linux-kernel

On Sunday, May, 2001-05-06 at 20:18:36, Christian Bornträger wrote:
> > Hmm, I'm wondering if this could be same bug that I'm seeing with ASUS
> > A7V133 & Duron/800 when using IDE autotuning (PDC20265).
> > Still haven't got any replies suggesting any reason for lockups I'm seeing
> > (no oopses). Or is the Promise driver just buggy, because system is solid
> > with noautotune. RAID5 (md) on that server is just little bit sluggish
> with
> 
> OK. I tried it on my A7V133 system and leaving out autotune (just for
> Promise, where I connected the hard discs)  makes the system stable.  I
> thought my problem is related to the Athlon compile problem I read in this
> group, but now I am not sure.
> Can you try and mail me if the Kernel 2.4.3 (without any ac patch) is stable
> with your system even if you use autotune? "Downgrade" to this kernel works
> fine for me.

I must say that this is the only version working more-or-less OK. The
only bad thing I experience are (very rare) X lockups, but SysRq
magik-key lets me reboot nicely. I put it on binary NVidias though
(XFree 4.0.3, once RH 7.0, but most of the libs hand-upgraded).

With other kernel versions I experience hard lockups - only hard reset then.

No HDD corruption though

 lspci -vv
00:00.0 Host bridge: VIA Technologies, Inc.: Unknown device 0305 (rev 02)
	Subsystem: Elitegroup Computer Systems: Unknown device 0987
	Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B-
	Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort+ >SERR- <PERR-
	Latency: 0
	Region 0: Memory at d0000000 (32-bit, prefetchable) [size=64M]
	Capabilities: [a0] AGP version 2.0
		Status: RQ=31 SBA+ 64bit- FW- Rate=x1,x2
		Command: RQ=0 SBA- AGP- 64bit- FW- Rate=<none>
	Capabilities: [c0] Power Management version 2
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 PME-Enable- DSel=0 DScale=0 PME-

00:01.0 PCI bridge: VIA Technologies, Inc.: Unknown device 8305 (prog-if 00 [Normal decode])
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B-
	Status: Cap+ 66Mhz+ UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort+ >SERR- <PERR+
	Latency: 0
	Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
	Memory behind bridge: d6000000-d7ffffff
	Prefetchable memory behind bridge: d4000000-d5ffffff
	BridgeCtl: Parity- SERR- NoISA+ VGA+ MAbort- >Reset- FastB2B-
	Capabilities: [80] Power Management version 2
		Flags: PMEClk- DSI- D1+ D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 PME-Enable- DSel=0 DScale=0 PME-

00:07.0 ISA bridge: VIA Technologies, Inc. VT82C686 [Apollo Super] (rev 40)
	Subsystem: VIA Technologies, Inc. VT82C686/A PCI to ISA Bridge
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping+ SERR- FastB2B-
	Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
	Latency: 0
	Capabilities: [c0] Power Management version 2
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 PME-Enable- DSel=0 DScale=0 PME-

00:07.1 IDE interface: VIA Technologies, Inc. VT82C586 IDE [Apollo] (rev 06) (prog-if 8a [Master SecP PriP])
	Subsystem: VIA Technologies, Inc. VT82C586 IDE [Apollo]
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B-
	Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
	Latency: 32
	Region 4: I/O ports at d000 [size=16]
	Capabilities: [c0] Power Management version 2
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 PME-Enable- DSel=0 DScale=0 PME-

00:07.2 USB Controller: VIA Technologies, Inc. VT82C586B USB (rev 16) (prog-if 00 [UHCI])
	Subsystem: Unknown device 0925:1234
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B-
	Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
	Latency: 32, cache line size 08
	Interrupt: pin D routed to IRQ 5
	Region 4: I/O ports at d400 [size=32]
	Capabilities: [80] Power Management version 2
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 PME-Enable- DSel=0 DScale=0 PME-

00:07.4 Host bridge: VIA Technologies, Inc. VT82C686 [Apollo Super ACPI] (rev 40)
	Subsystem: Elitegroup Computer Systems: Unknown device 0987
	Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B-
	Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
	Interrupt: pin ? routed to IRQ 9
	Capabilities: [68] Power Management version 2
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 PME-Enable- DSel=0 DScale=0 PME-

00:07.5 Multimedia audio controller: VIA Technologies, Inc. VT82C686 [Apollo Super AC97/Audio] (rev 50)
	Subsystem: VIA Technologies, Inc.: Unknown device 4511
	Control: I/O+ Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B-
	Status: Cap+ 66Mhz- UDF- FastB2B- ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
	Interrupt: pin C routed to IRQ 11
	Region 0: I/O ports at dc00 [size=256]
	Region 1: I/O ports at e000 [size=4]
	Region 2: I/O ports at e400 [size=4]
	Capabilities: [c0] Power Management version 2
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 PME-Enable- DSel=0 DScale=0 PME-

00:0b.0 Ethernet controller: Realtek Semiconductor Co., Ltd. RTL-8139 (rev 10)
	Subsystem: Realtek Semiconductor Co., Ltd. RT8139
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B-
	Status: Cap+ 66Mhz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
	Latency: 32 (8000ns min, 16000ns max)
	Interrupt: pin A routed to IRQ 11
	Region 0: I/O ports at ec00 [size=256]
	Region 1: Memory at d8000000 (32-bit, non-prefetchable) [size=256]
	Capabilities: [50] Power Management version 2
		Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=0mA PME(D0-,D1+,D2+,D3hot+,D3cold-)
		Status: D0 PME-Enable+ DSel=0 DScale=0 PME-

01:00.0 VGA compatible controller: nVidia Corporation Vanta [NV6] (rev 15) (prog-if 00 [VGA])
	Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B-
	Status: Cap+ 66Mhz+ UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR-
	Latency: 248 (1250ns min, 250ns max)
	Interrupt: pin A routed to IRQ 10
	Region 0: Memory at d6000000 (32-bit, non-prefetchable) [size=16M]
	Region 1: Memory at d4000000 (32-bit, prefetchable) [size=32M]
	Expansion ROM at <unassigned> [disabled] [size=64K]
	Capabilities: [60] Power Management version 1
		Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0-,D1-,D2-,D3hot-,D3cold-)
		Status: D0 PME-Enable- DSel=0 DScale=0 PME-
	Capabilities: [44] AGP version 2.0
		Status: RQ=31 SBA- 64bit- FW- Rate=x1,x2
		Command: RQ=0 SBA- AGP- 64bit- FW- Rate=<none>


 cat /proc/cpuinfo 
processor	: 0
vendor_id	: AuthenticAMD
cpu family	: 6
model		: 4
model name	: AMD Athlon(tm) Processor
stepping	: 2
cpu MHz		: 851.509
cache size	: 256 KB
fdiv_bug	: no
hlt_bug		: no
f00f_bug	: no
coma_bug	: no
fpu		: yes
fpu_exception	: yes
cpuid level	: 1
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 sep mtrr pge mca cmov pat pse36 mmx fxsr syscall mmxext 3dnowext 3dnow
bogomips	: 1697.38

 hdparm -i /dev/hda

/dev/hda:

 Model=ST52520A, FwRev=840304, SerialNo=PD771012
 Config={ HardSect NotMFM HdSw>15uSec Fixed DTR>10Mbs }
 RawCHS=4970/16/63, TrkSize=36540, SectSize=580, ECCbytes=4
 BuffType=DualPortCache, BuffSize=112kB, MaxMultSect=16, MultSect=off
 CurCHS=4970/16/63, CurSects=1902116940, LBA=yes, LBAsects=5010016
 IORDY=yes, tPIO={min:240,w/IORDY:120}, tDMA={min:120,rec:120}
 PIO modes: pio0 pio1 pio2 pio3 pio4 
 DMA modes: mdma0 mdma1 *mdma2

 hdparm -i /dev/hdb

/dev/hdb:

 Model=CREATIVE CD5233E, FwRev=C2.02, SerialNo=MT1199 A Firmware
 Config={ Fixed Removeable DTR<=5Mbs DTR>10Mbs nonMagnetic }
 RawCHS=0/0/0, TrkSize=0, SectSize=0, ECCbytes=0
 BuffType=unknown, BuffSize=0kB, MaxMultSect=0
 (maybe): CurCHS=0/0/0, CurSects=0, LBA=yes, LBAsects=0
 IORDY=yes, tPIO={min:227,w/IORDY:120}, tDMA={min:120,rec:150}
 PIO modes: pio0 pio1 pio2 pio3 pio4 
 DMA modes: sdma0 sdma1 sdma2 mdma0 mdma1 *mdma2

(CD-ROM - does it make any sense to use hdparm on it?)

 hdparm -i /dev/hdc

/dev/hdc:

 Model=IBM-DTLA-307030, FwRev=TX4OA50C, SerialNo=YKDYKMC0534
 Config={ HardSect NotMFM HdSw>15uSec Fixed DTR>10Mbs }
 RawCHS=16383/16/63, TrkSize=0, SectSize=0, ECCbytes=40
 BuffType=DualPortCache, BuffSize=1916kB, MaxMultSect=16, MultSect=off
 CurCHS=16383/16/63, CurSects=-66060037, LBA=yes, LBAsects=60036480
 IORDY=on/off, tPIO={min:240,w/IORDY:120}, tDMA={min:120,rec:120}
 PIO modes: pio0 pio1 pio2 pio3 pio4 
 DMA modes: mdma0 mdma1 mdma2 udma0 udma1 udma2 udma3 udma4 *udma5

-- 
Marek Pętlicki <marpet@buy.pl>


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-06 18:16       ` Christian Bornträger
  2001-05-06 19:23         ` Marek Pętlicki
@ 2001-05-07 18:54         ` Jussi Laako
  2001-05-11 20:09         ` Jussi Laako
  2 siblings, 0 replies; 18+ messages in thread
From: Jussi Laako @ 2001-05-07 18:54 UTC (permalink / raw)
  To: Christian Bornträger; +Cc: linux-kernel

Christian Bornträger wrote:
> 
> Can you try and mail me if the Kernel 2.4.3 (without any ac patch) is 
> stable with your system even if you use autotune? "Downgrade" to this 
> kernel works fine for me.

At least RedHat's 2.4.2-2 doesn't seem to lockup. I'll compile and try 2.4.3
tomorrow.

 - Jussi Laako

-- 
PGP key fingerprint: 161D 6FED 6A92 39E2 EB5B  39DD A4DE 63EB C216 1E4B
Available at PGP keyservers

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-06 12:51   ` Alan Cox
  2001-05-06 13:00     ` Chris Wedgwood
@ 2001-05-11  4:02     ` Ralf Baechle
  1 sibling, 0 replies; 18+ messages in thread
From: Ralf Baechle @ 2001-05-11  4:02 UTC (permalink / raw)
  To: Alan Cox; +Cc: Chris Wedgwood, linux-kernel

On Sun, May 06, 2001 at 01:51:59PM +0100, Alan Cox wrote:

> > There really needs to be a hardware fix... this doesn't stop some
> > application having it's owne optimised code from breaking on some
> > hardware (think games and similation software perhaps).
> 
> prefetch is virtually addresses. An application would need access to /dev/mem
> or similar. So the only folks I think it might actually bite are the Xserver
> people.

Prefetch bugs in hardware have biten Linux/68k as early as '94; a GVP SCSI
HBA on the Amiga may touch areas beyond the last valid RAM address when
doing DMA to the last page.  Being a burned child from that time Linux/MIPS
didn't use the last RAM page just to be on the safe side.

  Ralf

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-06 18:16       ` Christian Bornträger
  2001-05-06 19:23         ` Marek Pętlicki
  2001-05-07 18:54         ` Jussi Laako
@ 2001-05-11 20:09         ` Jussi Laako
  2001-05-11 20:22           ` Alan Cox
  2 siblings, 1 reply; 18+ messages in thread
From: Jussi Laako @ 2001-05-11 20:09 UTC (permalink / raw)
  To: Christian Bornträger; +Cc: linux-kernel

Christian Bornträger wrote:
> 
> Can you try and mail me if the Kernel 2.4.3 (without any ac patch) is 
> stable with your system even if you use autotune? "Downgrade" to this 
> kernel works fine for me.

Ahmm, 2.4.3 doesn't work. Gives some IDE DMA timeouts on boot. Kernel was
compiled with Pentium-MMX processor setting, but I don't know if that's
enough to disable the Athlon code parts (autodetected at runtime?).

So only working kernel (without noautotune) on that A7V133 machine is
RedHat's 2.4.2-2 shipped with RedHat 7.1... But that's not good either
because the system has large reiserfs volume and 2.4.2-2 has some reiserfs
bugs.

I really start hating IDE/ATA stuff again.

 - Jussi Laako

-- 
PGP key fingerprint: 161D 6FED 6A92 39E2 EB5B  39DD A4DE 63EB C216 1E4B
Available at PGP keyservers

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-11 20:09         ` Jussi Laako
@ 2001-05-11 20:22           ` Alan Cox
  2001-05-12  9:51             ` Jussi Laako
  0 siblings, 1 reply; 18+ messages in thread
From: Alan Cox @ 2001-05-11 20:22 UTC (permalink / raw)
  To: Jussi Laako; +Cc: Christian Bornträger, linux-kernel

> Ahmm, 2.4.3 doesn't work. Gives some IDE DMA timeouts on boot. Kernel w=
> as
> compiled with Pentium-MMX processor setting, but I don't know if that's
> enough to disable the Athlon code parts (autodetected at runtime?).

That sounds totally unrelated to any Athlon optimisations

> So only working kernel (without noautotune) on that A7V133 machine is
> RedHat's 2.4.2-2 shipped with RedHat 7.1... But that's not good either
> because the system has large reiserfs volume and 2.4.2-2 has some reise=

I wish I knew why the Red Hat one worked 8)

Alan


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-11 20:22           ` Alan Cox
@ 2001-05-12  9:51             ` Jussi Laako
  0 siblings, 0 replies; 18+ messages in thread
From: Jussi Laako @ 2001-05-12  9:51 UTC (permalink / raw)
  To: Alan Cox; +Cc: Christian Bornträger, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 452 bytes --]

Alan Cox wrote:
> 
> > So only working kernel (without noautotune) on that A7V133 machine is
> > RedHat's 2.4.2-2 shipped with RedHat 7.1... But that's not good either
> > because the system has large reiserfs volume and 2.4.2-2 has some
> I wish I knew why the Red Hat one worked 8)

Here's my kernel config if anyone has any idea what's wrong...

-- 
PGP key fingerprint: 161D 6FED 6A92 39E2 EB5B  39DD A4DE 63EB C216 1E4B
Available at PGP keyservers

[-- Attachment #2: kernel2.cfg --]
[-- Type: application/x-ns-proxy-autoconfig, Size: 16439 bytes --]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
  2001-05-12 18:31 Ishikawa
@ 2001-05-12 23:02 ` Alan Cox
  0 siblings, 0 replies; 18+ messages in thread
From: Alan Cox @ 2001-05-12 23:02 UTC (permalink / raw)
  To: Ishikawa; +Cc: linux-kernel

> I was a little skeptical to think that the X11 server code
> has such a bug for SVGA 16bits color server today,
> and yet was still wondering if

Corner cases could exist. If you can replicate it the X folks will be most
interested I suspect.
> 
> But can the same problem manifest on AMD 751 chipset?
> That would explain this mysterious X11 server
> crash beatifully :-)

The X server doesnt use prefetch instructions...


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: Athlon possible fixes
@ 2001-05-12 18:31 Ishikawa
  2001-05-12 23:02 ` Alan Cox
  0 siblings, 1 reply; 18+ messages in thread
From: Ishikawa @ 2001-05-12 18:31 UTC (permalink / raw)
  To: linux-kernel

>On Sun, May 06, 2001 at 01:51:59PM +0100, Alan Cox wrote:
>
>> > There really needs to be a hardware fix... this doesn't stop some
>> > application having it's owne optimised code from breaking on some
>> > hardware (think games and similation software perhaps).
>>
>> prefetch is virtually addresses. An application would need access to
/dev/mem
>> or similar. So the only folks I think it might actually bite are the
Xserver
>> people.
>
>Prefetch bugs in hardware have biten Linux/68k as early as '94; a GVP
SCSI
>HBA on the Amiga may touch areas beyond the last valid RAM address when

>doing DMA to the last page. Being a burned child from that time
Linux/MIPS
>didn't use the last RAM page just to be on the safe side.
>
>  Ralf

I use Duron 750 MHz and has experienced a strange X11 server error.
(The motherboard is Gigabyte 7IXE4 and uses AMD 751 and 756 chipsets.)

If I follow a certain steps accessing a web page using
netscape, the X11 server crashes reliably.
(The server is for ATI rage 128. Xfree86 3.3.6.)

After recompiling the X11 server with debug flag to C compiler,
I figured that the X11 server crashes in a bitblt copy againt
its backing up store . (I forgot what the proper X11 terminology, but
this is where the image data is saved for quick re-display, etc..
You can build an image in a memory buffer and then simply copy it
onto screen memory, etc..)

I was a little skeptical to think that the X11 server code
has such a bug for SVGA 16bits color server today,
and yet was still wondering if
the code might want to access non-allocated area due
to some optimized accessing pattern or something.

(Long time ago, I had a similar bug on a dedicated bitblt
instruction for a  workstation: the bitblt instruction could
access outside the boundary of malloc-ed  area
since it tries to access as many words as possible if there is a chance
to use
long word access. In doing so, the CPU could
step outside sbrk() limit and VM access error condition was generated.
Since this access violation occured inside the CPU firmware and
not visible to outside, it was very hard to track. Eventually, I figured
out
the problem, and always allocated enough trailing area for bitmap
storage
just in case the CPU tried to access a few word outside the limit.

Now, back to DuronAthlon problem:
I wasn't following this Athlong bug discussion in depth, thinking
it has something to do with VIA chipset alone.

But can the same problem manifest on AMD 751 chipset?
That would explain this mysterious X11 server
crash beatifully :-)

Happy Hacking,
ci




^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2001-05-12 23:05 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-05-05  7:35 Athlon possible fixes Alan Cox
2001-05-05 16:26 ` Rogier Wolff
2001-05-05 16:42   ` Kurt Roeckx
2001-05-05 22:44   ` Seth Goldberg
2001-05-06 16:44     ` Jussi Laako
2001-05-06 17:41       ` Zilvinas Valinskas
2001-05-06 18:16       ` Christian Bornträger
2001-05-06 19:23         ` Marek Pętlicki
2001-05-07 18:54         ` Jussi Laako
2001-05-11 20:09         ` Jussi Laako
2001-05-11 20:22           ` Alan Cox
2001-05-12  9:51             ` Jussi Laako
2001-05-06  2:23 ` Chris Wedgwood
2001-05-06 12:51   ` Alan Cox
2001-05-06 13:00     ` Chris Wedgwood
2001-05-11  4:02     ` Ralf Baechle
2001-05-12 18:31 Ishikawa
2001-05-12 23:02 ` Alan Cox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).