[PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
@ 2009-09-24  0:45 Rex Feany
  2009-09-24  6:44 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 58+ messages in thread
From: Rex Feany @ 2009-09-24  0:45 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev

After upgrading to the latest kernel on my mpc875 userspace started
running incredibly slow (hours to get to a shell, even!).
I tracked it down to commit 8d30c14cab30d405a05f2aaceda1e9ad57800f36,
that patch removed a work-around for the 8xx. Adding it
back makes my problem go away.

Signed-off-by: Rex Feany <rfeany@mrv.com>

diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 627767d..d8e6725 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -30,6 +30,8 @@
 #include <asm/tlbflush.h>
 #include <asm/tlb.h>
 
+#include "mmu_decl.h"
+
 static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
 static unsigned long pte_freelist_forced_free;
 
@@ -119,7 +121,7 @@ void pte_free_finish(void)
 /*
  * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
  */
-static pte_t do_dcache_icache_coherency(pte_t pte)
+static pte_t do_dcache_icache_coherency(pte_t pte, unsigned long addr)
 {
 	unsigned long pfn = pte_pfn(pte);
 	struct page *page;
@@ -128,6 +130,17 @@ static pte_t do_dcache_icache_coherency(pte_t pte)
 		return pte;
 	page = pfn_to_page(pfn);
 
+#ifdef CONFIG_8xx
+       /* On 8xx, cache control instructions (particularly
+        * "dcbst" from flush_dcache_icache) fault as write
+        * operation if there is an unpopulated TLB entry
+        * for the address in question. To workaround that,
+        * we invalidate the TLB here, thus avoiding dcbst
+        * misbehaviour.
+        */
+       _tlbil_va(addr, 0 /* 8xx doesn't care about PID */);
+#endif
+
 	if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
 		pr_devel("do_dcache_icache_coherency... flushing\n");
 		flush_dcache_icache_page(page);
@@ -198,7 +211,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte
 	 */
 	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
 	if (pte_need_exec_flush(pte, 1))
-		pte = do_dcache_icache_coherency(pte);
+		pte = do_dcache_icache_coherency(pte, addr);
 
 	/* Perform the setting of the PTE */
 	__set_pte_at(mm, addr, ptep, pte, 0);
@@ -216,7 +229,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 {
 	int changed;
 	if (!dirty && pte_need_exec_flush(entry, 0))
-		entry = do_dcache_icache_coherency(entry);
+		entry = do_dcache_icache_coherency(entry, address);
 	changed = !pte_same(*(ptep), entry);
 	if (changed) {
 		if (!(vma->vm_flags & VM_HUGETLB))

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-24  0:45 [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite Rex Feany
@ 2009-09-24  6:44 ` Benjamin Herrenschmidt
  2009-09-24 23:33   ` Rex Feany
  0 siblings, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-24  6:44 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

On Wed, 2009-09-23 at 17:45 -0700, Rex Feany wrote:
> After upgrading to the latest kernel on my mpc875 userspace started
> running incredibly slow (hours to get to a shell, even!).
> I tracked it down to commit 8d30c14cab30d405a05f2aaceda1e9ad57800f36,
> that patch removed a work-around for the 8xx. Adding it
> back makes my problem go away.
> 
> Signed-off-by: Rex Feany <rfeany@mrv.com>

Note that while your patch applies to earlier kernels such as 2.6.31, it
doesn't apply with current Linus tree due to changes in that file. I've
updated the powerpc "merge" branch of my tree with a hand-modified
version, please test and let me know. That should also contain the
_PAGE_SPECIAL fix.
 
You can get my tree at:

git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git

(To get the "merge" branch, just add "merge" after the clone if you
are cloning it, or just create a local branch and manually pull
into it)

Note that i just pushed out so it may take a little while for
the kernel.org mirrors to get it, the commit ID is: 

ae48e383e3299c2f87723d21df2db97927774b1e

Cheers,
Ben. 

> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 627767d..d8e6725 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -30,6 +30,8 @@
>  #include <asm/tlbflush.h>
>  #include <asm/tlb.h>
>  
> +#include "mmu_decl.h"
> +
>  static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
>  static unsigned long pte_freelist_forced_free;
>  
> @@ -119,7 +121,7 @@ void pte_free_finish(void)
>  /*
>   * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
>   */
> -static pte_t do_dcache_icache_coherency(pte_t pte)
> +static pte_t do_dcache_icache_coherency(pte_t pte, unsigned long addr)
>  {
>  	unsigned long pfn = pte_pfn(pte);
>  	struct page *page;
> @@ -128,6 +130,17 @@ static pte_t do_dcache_icache_coherency(pte_t pte)
>  		return pte;
>  	page = pfn_to_page(pfn);
>  
> +#ifdef CONFIG_8xx
> +       /* On 8xx, cache control instructions (particularly
> +        * "dcbst" from flush_dcache_icache) fault as write
> +        * operation if there is an unpopulated TLB entry
> +        * for the address in question. To workaround that,
> +        * we invalidate the TLB here, thus avoiding dcbst
> +        * misbehaviour.
> +        */
> +       _tlbil_va(addr, 0 /* 8xx doesn't care about PID */);
> +#endif
> +
>  	if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
>  		pr_devel("do_dcache_icache_coherency... flushing\n");
>  		flush_dcache_icache_page(page);
> @@ -198,7 +211,7 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte
>  	 */
>  	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
>  	if (pte_need_exec_flush(pte, 1))
> -		pte = do_dcache_icache_coherency(pte);
> +		pte = do_dcache_icache_coherency(pte, addr);
>  
>  	/* Perform the setting of the PTE */
>  	__set_pte_at(mm, addr, ptep, pte, 0);
> @@ -216,7 +229,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
>  {
>  	int changed;
>  	if (!dirty && pte_need_exec_flush(entry, 0))
> -		entry = do_dcache_icache_coherency(entry);
> +		entry = do_dcache_icache_coherency(entry, address);
>  	changed = !pte_same(*(ptep), entry);
>  	if (changed) {
>  		if (!(vma->vm_flags & VM_HUGETLB))

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-24  6:44 ` Benjamin Herrenschmidt
@ 2009-09-24 23:33   ` Rex Feany
  2009-09-24 23:52     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 58+ messages in thread
From: Rex Feany @ 2009-09-24 23:33 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev

Thus spake Benjamin Herrenschmidt (benh@kernel.crashing.org):

> You can get my tree at:
> 
> git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git
> 
> (To get the "merge" branch, just add "merge" after the clone if you
> are cloning it, or just create a local branch and manually pull
> into it)

Your tree hangs on boot, similar to what I saw without the 8xx
work-around patch -- it is hard to tell if it is the same though. :(

> Note that i just pushed out so it may take a little while for
> the kernel.org mirrors to get it, the commit ID is: 
> 
> ae48e383e3299c2f87723d21df2db97927774b1e

Maybe I fail git 101 -- I can't seem to find that commit ID in the repo
above, but I do see the patches you commited to fix PAGE_SPECIAL and the
8xx work-around.

thanks!
/rex.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-24 23:33   ` Rex Feany
@ 2009-09-24 23:52     ` Benjamin Herrenschmidt
  2009-09-25  1:35       ` Rex Feany
  0 siblings, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-24 23:52 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

On Thu, 2009-09-24 at 16:33 -0700, Rex Feany wrote:
> Thus spake Benjamin Herrenschmidt (benh@kernel.crashing.org):
> 
> > You can get my tree at:
> > 
> > git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git
> > 
> > (To get the "merge" branch, just add "merge" after the clone if you
> > are cloning it, or just create a local branch and manually pull
> > into it)
> 
> Your tree hangs on boot, similar to what I saw without the 8xx
> work-around patch -- it is hard to tell if it is the same though. :(

There's no backtrace ? Where does it hang ? Also which workaround
patch ? The missing tlbil_va() or the _PAGE_SPECIAL problem ?

> > Note that i just pushed out so it may take a little while for
> > the kernel.org mirrors to get it, the commit ID is: 
> > 
> > ae48e383e3299c2f87723d21df2db97927774b1e
> 
> Maybe I fail git 101 -- I can't seem to find that commit ID in the repo
> above, but I do see the patches you commited to fix PAGE_SPECIAL and the
> 8xx work-around.

Right. I must have screwed something with the ID.

Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-24 23:52     ` Benjamin Herrenschmidt
@ 2009-09-25  1:35       ` Rex Feany
  2009-09-25  1:51         ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 58+ messages in thread
From: Rex Feany @ 2009-09-25  1:35 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev

Thus spake Benjamin Herrenschmidt (benh@kernel.crashing.org):

> > Your tree hangs on boot, similar to what I saw without the 8xx
> > work-around patch -- it is hard to tell if it is the same though. :(
> 
> There's no backtrace ? Where does it hang ? Also which workaround
> patch ? The missing tlbil_va() or the _PAGE_SPECIAL problem ?
> 

Ben, I'm sorry, my last email was basically useless. I was refering to the
missing tlbil_va().  The system doesn't crash, but it does seem to hang right
after "Freeing unused kernel memory: 100k init" using your tree.

If I move the tlbil_va() outside of the test for PG_arch_1 :

diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 5304093..d927269 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -176,7 +176,7 @@ static pte_t set_pte_filter(pte_t pte, unsigned long addr)
                struct page *pg = maybe_pte_to_page(pte);
                if (!pg)
                        return pte;
-               if (!test_bit(PG_arch_1, &pg->flags)) {
+
 #ifdef CONFIG_8xx
                        /* On 8xx, cache control instructions (particularly
                         * "dcbst" from flush_dcache_icache) fault as write
@@ -188,6 +188,8 @@ static pte_t set_pte_filter(pte_t pte, unsigned long addr)
                        /* 8xx doesn't care about PID, size or ind args */
                        _tlbil_va(addr, 0, 0, 0);
 #endif /* CONFIG_8xx */
+
+               if (!test_bit(PG_arch_1, &pg->flags)) {
                        flush_dcache_icache_page(pg);
                        set_bit(PG_arch_1, &pg->flags);
                }

Then I can boot and get to a shell, but userspace is slow. 8 seconds to mount
/proc (vs. less then a second using my old kernel)! Maybe this is an
unrelated issue?  I'm pretty clueless about the details, I'm sorry.
PG_arch_1 is used to prevent a cache flush unless it is actually needed?
Then why would changing the location of the tlbil_va() make a difference?

take care!
/rex.

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-25  1:35       ` Rex Feany
@ 2009-09-25  1:51         ` Benjamin Herrenschmidt
  2009-09-25  3:03           ` Benjamin Herrenschmidt
  2009-09-29  1:21           ` Rex Feany
  0 siblings, 2 replies; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-25  1:51 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

On Thu, 2009-09-24 at 18:35 -0700, Rex Feany wrote:
> 
> Then I can boot and get to a shell, but userspace is slow. 8 seconds
> to mount
> /proc (vs. less then a second using my old kernel)! Maybe this is an
> unrelated issue?  I'm pretty clueless about the details, I'm sorry.
> PG_arch_1 is used to prevent a cache flush unless it is actually
> needed?
> Then why would changing the location of the tlbil_va() make a
> difference?

I think there's more finishyness to 8xx than we thought. IE. That
tlbil_va might have more reasons to be there than what the comment
seems to advertize. Can you try to move it even higher up ? IE.
Unconditionally at the beginning of set_pte_filter ?

Also, if that doesn't help, can you try putting one in
set_access_flags_filter() just below ?

(Beware that there's two different versions of both functions, only the
first one is compiled/used on 8xx).

It's going to be hard for me to get that "right" since I don't really
know what's going on with the core here, but I suppose if we get it
moving along with extra tlb invalidations, that should be "good enough"
until somebody who really knows what's going on comes up with possibly
a better fix.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-25  1:51         ` Benjamin Herrenschmidt
@ 2009-09-25  3:03           ` Benjamin Herrenschmidt
  2009-09-25  8:31             ` Joakim Tjernlund
  2009-09-25 21:18             ` Rex Feany
  2009-09-29  1:21           ` Rex Feany
  1 sibling, 2 replies; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-25  3:03 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev


> I think there's more finishyness to 8xx than we thought. IE. That
> tlbil_va might have more reasons to be there than what the comment
> seems to advertize. Can you try to move it even higher up ? IE.
> Unconditionally at the beginning of set_pte_filter ?
> 
> Also, if that doesn't help, can you try putting one in
> set_access_flags_filter() just below ?

Ok, I got a refresher on the whole concept of "unpopulated TLB entries"
on 8xx, and that's damn scary. I think what mislead me initially is that
the comment around the workaround is simply not properly describing the
extent of the problem :-)

So I'm not going to make the 8xx TLB miss code sane, that's beyond what
I'm prepare to do with it, but I suspect that this should fix it (on top
of upstream). Let me know if that's enough or if we also need to put
one of these in ptep_set_access_flags().

Please let me know if that works for you.

Cheers,
Ben.

diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 5304093..7a8e676 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -170,6 +170,16 @@ struct page * maybe_pte_to_page(pte_t pte)
 
 static pte_t set_pte_filter(pte_t pte, unsigned long addr)
 {
+#ifdef CONFIG_8xx
+	/* 8xx has a weird concept of "unpopulated" entries. When we take
+	 * a TLB miss for a non-valid PTE, we insert such an entry which
+	 * causes a page fault the next time around. This entry must now
+	 * be kicked out or we'll just fault again
+	 */
+	/* 8xx doesn't care about PID, size or ind args */
+	_tlbil_va(addr, 0, 0, 0);
+#endif /* CONFIG_8xx */
+
 	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
 	if (pte_looks_normal(pte) && !(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
 				       cpu_has_feature(CPU_FTR_NOEXECUTE))) {
@@ -177,17 +187,6 @@ static pte_t set_pte_filter(pte_t pte, unsigned long addr)
 		if (!pg)
 			return pte;
 		if (!test_bit(PG_arch_1, &pg->flags)) {
-#ifdef CONFIG_8xx
-			/* On 8xx, cache control instructions (particularly
-			 * "dcbst" from flush_dcache_icache) fault as write
-			 * operation if there is an unpopulated TLB entry
-			 * for the address in question. To workaround that,
-			 * we invalidate the TLB here, thus avoiding dcbst
-			 * misbehaviour.
-			 */
-			/* 8xx doesn't care about PID, size or ind args */
-			_tlbil_va(addr, 0, 0, 0);
-#endif /* CONFIG_8xx */
 			flush_dcache_icache_page(pg);
 			set_bit(PG_arch_1, &pg->flags);
 		}

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-25  3:03           ` Benjamin Herrenschmidt
@ 2009-09-25  8:31             ` Joakim Tjernlund
  2009-09-25  9:47               ` Benjamin Herrenschmidt
  2009-09-25 21:18             ` Rex Feany
  1 sibling, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-25  8:31 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

>
>
> > I think there's more finishyness to 8xx than we thought. IE. That
> > tlbil_va might have more reasons to be there than what the comment
> > seems to advertize. Can you try to move it even higher up ? IE.
> > Unconditionally at the beginning of set_pte_filter ?
> >
> > Also, if that doesn't help, can you try putting one in
> > set_access_flags_filter() just below ?
>
> Ok, I got a refresher on the whole concept of "unpopulated TLB entries"
> on 8xx, and that's damn scary. I think what mislead me initially is that
> the comment around the workaround is simply not properly describing the
> extent of the problem :-)
>
> So I'm not going to make the 8xx TLB miss code sane, that's beyond what
> I'm prepare to do with it, but I suspect that this should fix it (on top
> of upstream). Let me know if that's enough or if we also need to put
> one of these in ptep_set_access_flags().
>
> Please let me know if that works for you.
>
> Cheers,
> Ben.
>
> diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
> index 5304093..7a8e676 100644
> --- a/arch/powerpc/mm/pgtable.c
> +++ b/arch/powerpc/mm/pgtable.c
> @@ -170,6 +170,16 @@ struct page * maybe_pte_to_page(pte_t pte)
>
>  static pte_t set_pte_filter(pte_t pte, unsigned long addr)
>  {
> +#ifdef CONFIG_8xx
> +   /* 8xx has a weird concept of "unpopulated" entries. When we take
> +    * a TLB miss for a non-valid PTE, we insert such an entry which
> +    * causes a page fault the next time around. This entry must now
> +    * be kicked out or we'll just fault again
> +    */
> +   /* 8xx doesn't care about PID, size or ind args */
> +   _tlbil_va(addr, 0, 0, 0);
> +#endif /* CONFIG_8xx */
> +

The main problem with 8xx it does not update the DAR register in
the TLB Miss/Fault handlers for cache instructions :( It on old bug
that was found only some years ago.

I think the old comment is correct though, as I recall it was Marcelo
that found the problem and added the workaround.

   Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-25  8:31             ` Joakim Tjernlund
@ 2009-09-25  9:47               ` Benjamin Herrenschmidt
  2009-09-25 10:21                 ` Joakim Tjernlund
  0 siblings, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-25  9:47 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev, Rex Feany

On Fri, 2009-09-25 at 10:31 +0200, Joakim Tjernlund wrote:
> 
> The main problem with 8xx it does not update the DAR register in
> the TLB Miss/Fault handlers for cache instructions :( It on old bug
> that was found only some years ago.
> 
> I think the old comment is correct though, as I recall it was Marcelo
> that found the problem and added the workaround.

But the TLB needs flushing on more than just the cache instructions,
no ?

IE. We take a TLB miss, there's no valid PTE, we put one of those
"unpopulated" entries in and get into the page fault, at which point we
do a set_pte, we -still- need to do an invalidation to get rid of the
unpopulated entry so it gets a new TLB miss no ? Without that, it's just
going to fault over and over again...

In any case, I think flushing unconditionally the target address isn't
going to hurt since we are just changing its PTE anyways.

As for the DAR problem, I'm not sure whether we really need a workaround
since I haven't seem much people complaining about it so far :-)

Can you educate me more on the problem ? Can it be fixed without
bloating those handlers to oblivion ?

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-25  9:47               ` Benjamin Herrenschmidt
@ 2009-09-25 10:21                 ` Joakim Tjernlund
  0 siblings, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-25 10:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

[-- Attachment #1: Type: text/plain, Size: 2112 bytes --]

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 25/09/2009 11:47:34:
>
> On Fri, 2009-09-25 at 10:31 +0200, Joakim Tjernlund wrote:
> >
> > The main problem with 8xx it does not update the DAR register in
> > the TLB Miss/Fault handlers for cache instructions :( It on old bug
> > that was found only some years ago.
> >
> > I think the old comment is correct though, as I recall it was Marcelo
> > that found the problem and added the workaround.
>
> But the TLB needs flushing on more than just the cache instructions,
> no ?
>
> IE. We take a TLB miss, there's no valid PTE, we put one of those
> "unpopulated" entries in and get into the page fault, at which point we
> do a set_pte, we -still- need to do an invalidation to get rid of the
> unpopulated entry so it gets a new TLB miss no ? Without that, it's just
> going to fault over and over again...

I don't know enough about 8xx in 2.6 as we still use 2.4 for 8xx to
say for sure.

>
> In any case, I think flushing unconditionally the target address isn't
> going to hurt since we are just changing its PTE anyways.
>
> As for the DAR problem, I'm not sure whether we really need a workaround
> since I haven't seem much people complaining about it so far :-)

I did some years ago on 2.4 but no one cared enough :(
The drawbacks of not handling this problem is that you will have
to very carful to use cache instructions and user space must
be especially compiled to omit using them in optimizations.

>
> Can you educate me more on the problem ? Can it be fixed without
> bloating those handlers to oblivion ?

Yes, I fixed it for myself but the fix was never accepted. Currently
only TLB Error depends on DAR so what I did was to tag DAR with an impossible
value and test for that value in the TLB Error handler. If it matched I
branched to a subroutine the did instruction decoding in assembler to
get at registers used and calculate DAR, then return to the TLB error
handler. In hindsight it would have been better to do this work in
handle_page_fault.

I am attaching my old head_8xx.S for 2.4

 Jocke
(See attached file: head_8xx.S)

[-- Attachment #2: head_8xx.S --]
[-- Type: application/octet-stream, Size: 33768 bytes --]

/*
 *  arch/ppc/kernel/except_8xx.S
 *
 *  PowerPC version 
 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
 *  Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP
 *    Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu>
 *  Low-level exception handlers and MMU support
 *  rewritten by Paul Mackerras.
 *    Copyright (C) 1996 Paul Mackerras.
 *  MPC8xx modifications by Dan Malek
 *    Copyright (C) 1997 Dan Malek (dmalek@jlc.net).
 *
 *  This file contains low-level support and setup for PowerPC 8xx
 *  embedded processors, including trap and interrupt dispatch.
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version
 *  2 of the License, or (at your option) any later version.
 *
 */

#include <linux/config.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
#include <asm/cache.h>
#include <asm/pgtable.h>
#include <asm/cputable.h>
#include <asm/ppc_asm.h>
#include "ppc_defs.h"

#ifdef CONFIG_8xx_DCBxFIXED
/* These macros are used to tag DAR with a known value so that the
 * DataTLBError can recognize a buggy dcbx instruction and workaround
 * the problem.
 */
	#define TAG_VAL 0x00f0	/*  -1 may also be used */
	#define TAG_DAR_R20 	\
		li	r20, TAG_VAL;\
		mtspr	DAR, r20;
#else
	#define TAG_DAR_R20
#endif
/* Macro to make the code more readable. */
#ifdef CONFIG_8xx_CPU6
  #define DO_8xx_CPU6(val, reg) \
	li	reg, val; \
	stw	reg, 12(r0); \
	lwz	reg, 12(r0);
#else
  #define DO_8xx_CPU6(val, reg)
#endif
	.text
	.globl	_stext
_stext:

/*
 * _start is defined this way because the XCOFF loader in the OpenFirmware
 * on the powermac expects the entry point to be a procedure descriptor.
 */
	.text
	.globl	_start
_start:

/* MPC8xx
 * This port was done on an MBX board with an 860.  Right now I only
 * support an ELF compressed (zImage) boot from EPPC-Bug because the
 * code there loads up some registers before calling us:
 *   r3: ptr to board info data
 *   r4: initrd_start or if no initrd then 0
 *   r5: initrd_end - unused if r4 is 0
 *   r6: Start of command line string
 *   r7: End of command line string
 *
 * I decided to use conditional compilation instead of checking PVR and
 * adding more processor specific branches around code I don't need.
 * Since this is an embedded processor, I also appreciate any memory
 * savings I can get.
 *
 * The MPC8xx does not have any BATs, but it supports large page sizes.
 * We first initialize the MMU to support 8M byte pages, then load one
 * entry into each of the instruction and data TLBs to map the first
 * 8M 1:1.  I also mapped an additional I/O space 1:1 so we can get to
 * the "internal" processor registers before MMU_init is called.
 *
 * The TLB code currently contains a major hack.  Since I use the condition
 * code register, I have to save and restore it.  I am out of registers, so
 * I just store it in memory location 0 (the TLB handlers are not reentrant).
 * To avoid making any decisions, I need to use the "segment" valid bit
 * in the first level table, but that would require many changes to the
 * Linux page directory/table functions that I don't want to do right now.
 *
 * I used to use SPRG2 for a temporary register in the TLB handler, but it
 * has since been put to other uses.  I now use a hack to save a register
 * and the CCR at memory location 0.....Someday I'll fix this.....
 *	-- Dan
 */

	.globl	__start
__start:
	/* To accomodate some SMP systems that overwrite the first few
	 * locations before cpu 0 starts, the bootloader starts us at 0xc.
	 */
	nop
	nop
	nop
	mr	r31,r3			/* save parameters */
	mr	r30,r4
	mr	r29,r5
	mr	r28,r6
	mr	r27,r7
	li	r24,0			/* cpu # */

	/* We have to turn on the MMU right away so we get cache modes
	 * set correctly.
	 */
	bl	initial_mmu

/* We now have the lower 8 Meg mapped into TLB entries, and the caches
 * ready to work.
 */

turn_on_mmu:
	mfmsr	r0
	ori	r0,r0,MSR_DR|MSR_IR
	mtspr	SRR1,r0
	lis	r0,start_here@h
	ori	r0,r0,start_here@l
	mtspr	SRR0,r0
	SYNC
	rfi				/* enables MMU */

/*
 * Exception entry code.  This code runs with address translation
 * turned off, i.e. using physical addresses.
 * We assume sprg3 has the physical address of the current
 * task's thread_struct.
 */
#define EXCEPTION_PROLOG	\
	mtspr	SPRG0,r20;	\
	mtspr	SPRG1,r21;	\
	mfcr	r20;		\
	mfspr	r21,SPRG2;		/* exception stack to use from */ \
	cmpwi	0,r21,0;		/* user mode or RTAS */ \
	bne	1f;		\
	tophys(r21,r1);			/* use tophys(kernel sp) otherwise */ \
	subi	r21,r21,INT_FRAME_SIZE;	/* alloc exc. frame */\
1:	stw	r20,_CCR(r21);		/* save registers */ \
	stw	r22,GPR22(r21);	\
	stw	r23,GPR23(r21);	\
	mfspr	r20,SPRG0;	\
	stw	r20,GPR20(r21);	\
	mfspr	r22,SPRG1;	\
	stw	r22,GPR21(r21);	\
	mflr	r20;		\
	stw	r20,_LINK(r21);	\
	mfctr	r22;		\
	stw	r22,_CTR(r21);	\
	mfspr	r20,XER;	\
	stw	r20,_XER(r21);	\
	mfspr	r22,SRR0;	\
	mfspr	r23,SRR1;	\
	stw	r0,GPR0(r21);	\
	stw	r1,GPR1(r21);	\
	stw	r2,GPR2(r21);	\
	stw	r1,0(r21);	\
	tovirt(r1,r21);			/* set new kernel sp */	\
	SAVE_4GPRS(3, r21);	\
	SAVE_GPR(7, r21);
/*
 * Note: code which follows this uses cr0.eq (set if from kernel),
 * r21, r22 (SRR0), and r23 (SRR1).
 */

/*
 * Exception vectors.
 */

#define FINISH_EXCEPTION(func)			\
	bl	transfer_to_handler;		\
	.long	func;				\
	.long	ret_from_except

#define STD_EXCEPTION(n, label, hdlr)		\
	. = n;					\
label:						\
	EXCEPTION_PROLOG;			\
	TAG_DAR_R20;				\
	addi	r3,r1,STACK_FRAME_OVERHEAD;	\
	li	r20,MSR_KERNEL;			\
	FINISH_EXCEPTION(hdlr)

/* System reset */
	STD_EXCEPTION(0x100, Reset, UnknownException)

/* Machine check */
	STD_EXCEPTION(0x200, MachineCheck, MachineCheckException)

/* Data access exception.
 * This is "never generated" by the MPC8xx.  We jump to it for other
 * translation errors.
 */
	. = 0x300
DataAccess:
	EXCEPTION_PROLOG
	mfspr	r20,DSISR
	stw	r20,_DSISR(r21)
	mr	r5,r20
	mfspr	r4,DAR
	stw	r4,_DAR(r21)
	TAG_DAR_R20
	addi	r3,r1,STACK_FRAME_OVERHEAD
	li	r20,MSR_KERNEL
	rlwimi	r20,r23,0,16,16		/* copy EE bit from saved MSR */
	FINISH_EXCEPTION(do_page_fault)

/* Instruction access exception.
 * This is "never generated" by the MPC8xx.  We jump to it for other
 * translation errors.
 */
	. = 0x400
InstructionAccess:
	EXCEPTION_PROLOG
	addi	r3,r1,STACK_FRAME_OVERHEAD
	mr	r4,r22
	mr	r5,r23
	li	r20,MSR_KERNEL
	rlwimi	r20,r23,0,16,16		/* copy EE bit from saved MSR */
	FINISH_EXCEPTION(do_page_fault)

/* External interrupt */
	. = 0x500;
HardwareInterrupt:
	EXCEPTION_PROLOG;
	addi	r3,r1,STACK_FRAME_OVERHEAD
	li	r20,MSR_KERNEL
	li	r4,0
	bl	transfer_to_handler
	.globl	do_IRQ_intercept
do_IRQ_intercept:
	.long	do_IRQ;
	.long	ret_from_intercept

/* Alignment exception */
	. = 0x600
Alignment:
	EXCEPTION_PROLOG
	mfspr	r4,DAR
	stw	r4,_DAR(r21)
	TAG_DAR_R20
	mfspr	r5,DSISR
	stw	r5,_DSISR(r21)
	addi	r3,r1,STACK_FRAME_OVERHEAD
	li	r20,MSR_KERNEL
	rlwimi	r20,r23,0,16,16		/* copy EE bit from saved MSR */
	FINISH_EXCEPTION(AlignmentException)

/* Program check exception */
	. = 0x700
ProgramCheck:
	EXCEPTION_PROLOG
	addi	r3,r1,STACK_FRAME_OVERHEAD
	li	r20,MSR_KERNEL
	rlwimi	r20,r23,0,16,16		/* copy EE bit from saved MSR */
	FINISH_EXCEPTION(ProgramCheckException)

/* No FPU on MPC8xx.  This exception is not supposed to happen.
*/
	STD_EXCEPTION(0x800, FPUnavailable, UnknownException)

	. = 0x900
Decrementer:
	EXCEPTION_PROLOG
	addi	r3,r1,STACK_FRAME_OVERHEAD
	li	r20,MSR_KERNEL
	bl	transfer_to_handler
	.globl	timer_interrupt_intercept
timer_interrupt_intercept:
	.long	timer_interrupt
	.long	ret_from_intercept

	STD_EXCEPTION(0xa00, Trap_0a, UnknownException)
	STD_EXCEPTION(0xb00, Trap_0b, UnknownException)

/* System call */
	. = 0xc00
SystemCall:
	EXCEPTION_PROLOG
	stw	r3,ORIG_GPR3(r21)
	li	r20,MSR_KERNEL
	rlwimi	r20,r23,0,16,16		/* copy EE bit from saved MSR */
	FINISH_EXCEPTION(DoSyscall)

/* Single step - not used on 601 */
	STD_EXCEPTION(0xd00, SingleStep, SingleStepException)

	STD_EXCEPTION(0xe00, Trap_0e, UnknownException)
	STD_EXCEPTION(0xf00, Trap_0f, UnknownException)

/* On the MPC8xx, this is a software emulation interrupt.  It occurs
 * for all unimplemented and illegal instructions.
 */
	STD_EXCEPTION(0x1000, SoftEmu, SoftwareEmulation)

	. = 0x1100
/*
 * For the MPC8xx, this is a software tablewalk to load the instruction
 * TLB.  It is modelled after the example in the Motorola manual.  The task
 * switch loads the M_TWB register with the pointer to the first level table.
 * If we discover there is no second level table (the value is zero), the
 * plan was to load that into the TLB, which causes another fault into the
 * TLB Error interrupt where we can handle such problems.  However, that did
 * not work, so if we discover there is no second level table, we restore
 * registers and branch to the error exception.  We have to use the MD_xxx
 * registers for the tablewalk because the equivalent MI_xxx registers
 * only perform the attribute functions.
 */
InstructionTLBMiss:
#ifdef CONFIG_8xx_CPU6
	stw	r3, 8(r0)
	li	r3, 0x3f80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	M_TW, r20	/* Save a couple of working registers */
#if !CONFIG_PIN_TLB || CONFIG_MODULES
	mfcr	r20
	stw	r20, 0(r0)
#endif
	stw	r21, 4(r0)
	mfspr	r20, SRR0	/* Get effective address of fault */
#ifdef CONFIG_8xx_CPU6
	li	r3, 0x3780
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MD_EPN, r20	/* Have to use MD_EPN for walk, MI_EPN can't */
	mfspr	r20, M_TWB	/* Get level 1 table entry address */

#if !CONFIG_PIN_TLB || CONFIG_MODULES
	/* If we are faulting a kernel address, we have to use the
	 * kernel page tables.
	 */
	andi.	r21, r20, 0x0800	/* Address >= 0x80000000 */
	beq	3f
	lis	r21, swapper_pg_dir@h
	ori	r21, r21, swapper_pg_dir@l
	rlwimi	r20, r21, 0, 2, 19
3:
	lwz	r21, 0(r20)	/* Get the level 1 entry */
	rlwinm.	r20, r21,0,0,19	/* Extract page descriptor page address */
	tophys(r21,r21)
	ori	r21,r21,1		/* Set valid bit */
	beq	2f		/* If zero, don't try to find a pte */
#else
	lwz	r21, 0(r20)	/* Get the level 1 entry */
	mfcr	r20
	cmplwi	cr0,r21,0x0fff	/* Test page descriptor page address */
	tophys(r21,r21)
	ori	r21,r21,1		/* Set valid bit */
	bng-	2f		/* If zero, don't try to find a pte */
	mtcr	r20
#endif

	/* We have a pte table, so load the MI_TWC with the attributes
	 * for this "segment."
	 */
#ifdef CONFIG_8xx_CPU6
	li	r3, 0x2b80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MI_TWC, r21	/* Set segment attributes */
#ifdef CONFIG_8xx_CPU6
	li	r3, 0x3b80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MD_TWC, r21	/* Load pte table base address */
	mfspr	r21, MD_TWC	/* ....and get the pte address */
	lwz	r20, 0(r21)	/* Get the pte */

	ori	r20, r20, _PAGE_ACCESSED
	stw	r20, 0(r21)

	/* The Linux PTE won't go exactly into the MMU TLB.
	 * Software indicator bits 21, 22 and 28 must be clear.
	 * Software indicator bits 24, 25, 26, and 27 must be
	 * set.  All other Linux PTE bits control the behavior
	 * of the MMU.
	 */
	li	r21, 0x00f0
	rlwimi	r20, r21, 0, 24, 28	/* Set 24-27, clear 28 */

#ifdef CONFIG_8xx_CPU6
	li	r3, 0x2d80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MI_RPN, r20	/* Update TLB entry */

	mfspr	r20, M_TW	/* Restore registers */
#if !CONFIG_PIN_TLB || CONFIG_MODULES
	lwz	r21, 0(r0)
	mtcr	r21
#endif
	lwz	r21, 4(r0)
#ifdef CONFIG_8xx_CPU6
	lwz	r3, 8(r0)
#endif
	rfi

2:	/* Restore registers */
#if !CONFIG_PIN_TLB || CONFIG_MODULES
	lwz	r21, 0(r0)
	mtcr	r21
#else
	mtcr	r20
#endif
	mfspr	r20, M_TW
	lwz	r21, 4(r0)
#ifdef CONFIG_8xx_CPU6
	lwz	r3, 8(r0)
#endif
	b	InstructionAccess

	. = 0x1200
DataStoreTLBMiss:
#ifdef CONFIG_8xx_CPU6
	stw	r3, 8(r0)
	li	r3, 0x3f80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	M_TW, r20	/* Save a couple of working registers */
	mfcr	r20
	stw	r20, 0(r0)
	stw	r21, 4(r0)
	mfspr	r20, M_TWB	/* Get level 1 table entry address */

	/* If we are faulting a kernel address, we have to use the
	 * kernel page tables.
	 */
	andi.	r21, r20, 0x0800
	beq+	3f
	lis	r21, swapper_pg_dir@h
	ori	r21, r21, swapper_pg_dir@l
	rlwimi r20, r21, 0, 2, 19
3:
	lwz	r21, 0(r20)	/* Get the level 1 entry */
	rlwinm.	r20, r21,0,0,19	/* Extract page descriptor page address */

//	beq	4f		/* If zero, don't try to find a pte */
	/* We have a pte table, so load fetch the pte from the table.
	 */
	tophys(r21, r21)
	ori	r21, r21, 1	/* Set valid bit in physical L2 page */
//	beq-	4f		/* If zero, don't try to find a pte */
	beq-	2f		/* If zero, don't try to find a pte */

#ifdef CONFIG_8xx_CPU6
	li	r3, 0x3b80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MD_TWC, r21	/* Load pte table base address */
	mfspr	r20, MD_TWC	/* ....and get the pte address */
	lwz	r20, 0(r20)	/* Get the pte */

	/* Insert the Guarded flag into the TWC from the Linux PTE.
	 * It is bit 27 of both the Linux PTE and the TWC (at least
	 * I got that right :-).  It will be better when we can put
	 * this into the Linux pgd/pmd and load it in the operation
	 * above.
	 */
	rlwimi	r21, r20, 0, 27, 27
#ifdef CONFIG_8xx_CPU6
	li	r3, 0x3b80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MD_TWC, r21

//	mfspr	r21, MD_TWC	/* get the pte address again */
	ori	r20, r20, _PAGE_ACCESSED
999:	mfspr	r21, MD_TWC	/* get the pte address again */
	stw	r20, 0(r21)

	/* The Linux PTE won't go exactly into the MMU TLB.
	 * Software indicator bits 21, 22 and 28 must be clear.
	 * Software indicator bits 24, 25, 26, and 27 must be
	 * set.  All other Linux PTE bits control the behavior
	 * of the MMU.
	 */
4:	li	r21, 0x00f0
	rlwimi	r20, r21, 0, 24, 28	/* Set 24-27, clear 28 */

#ifdef CONFIG_8xx_CPU6
	li	r3, 0x3d80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MD_RPN, r20	/* Update TLB entry */

#ifdef CONFIG_8xx_DCBxFIXED
#if TAG_VAL == 0x00f0 /* Save 1 instr. by reusing the val loaded in r21 above */
	mtspr	DAR, r21
#else
	TAG_DAR_R20
#endif
#endif
	mfspr	r20, M_TW	/* Restore registers */
	lwz	r21, 0(r0)
	mtcr	r21
	lwz	r21, 4(r0)
#ifdef CONFIG_8xx_CPU6
	lwz	r3, 8(r0)
#endif
	rfi

2:
#ifdef CONFIG_8xx_DCBxFIXED
	/* Copy 20 msb from MD_EPN to DAR since the dcxx instructions fails
	 * to update DAR when they cause a DTLB Miss.
	 */
	mfspr	r21, MD_EPN
	mfspr	r20, DAR
	rlwimi	r20, r21, 0, 0, 19
	mtspr	DAR, r20
#endif
	mfspr	r20, M_TW	/* Restore registers */
	lwz	r21, 0(r0)
	mtcr	r21
	lwz	r21, 4(r0)
#ifdef CONFIG_8xx_CPU6
	lwz	r3, 8(r0)
#endif
	b	DataAccess

/* This is an instruction TLB error on the MPC8xx.  This could be due
 * to many reasons, such as executing guarded memory or illegal instruction
 * addresses.  There is nothing to do but handle a big time error fault.
 */
	. = 0x1300
InstructionTLBError:
	b	InstructionAccess

/* This is the data TLB error on the MPC8xx.  This could be due to
 * many reasons, including a dirty update to a pte.  We can catch that
 * one here, but anything else is an error.  First, we track down the
 * Linux pte.  If it is valid, write access is allowed, but the
 * page dirty bit is not set, we will set it and reload the TLB.  For
 * any other case, we bail out to a higher level function that can
 * handle it.
 */
	. = 0x1400
DataTLBError:
#ifdef CONFIG_8xx_CPU6
	stw	r3, 8(r0)
	li	r3, 0x3f80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	M_TW, r20	/* Save a couple of working registers */
	mfcr	r20
	stw	r20, 0(r0)
	stw	r21, 4(r0)

	mfspr	r20, DAR
#ifdef  CONFIG_8xx_DCBxFIXED
	/* If DAR contains TAG_VAL implies a buggy dcbx instruction
	 * that did not set DAR.
	 */
	cmpwi	cr0, r20, TAG_VAL
	beq-	100f	/* Branch if TAG_VAL to dcbx workaround procedure */
101:	/* return from dcbx instruction bug workaround, r20 holds value of DAR */	
	/* First, make sure this was a store operation.
	*/
#endif
	mfspr	r21, DSISR
	andis.	r21, r21, 0x0200	/* If set, indicates store op */
//	beq	2f

	/* The EA of a data TLB miss is automatically stored in the MD_EPN 
	 * register.  The EA of a data TLB error is automatically stored in 
	 * the DAR, but not the MD_EPN register.  We must copy the 20 most 
	 * significant bits of the EA from the DAR to MD_EPN before we 
	 * start walking the page tables.  We also need to copy the CASID 
	 * value from the M_CASID register.
	 * Addendum:  The EA of a data TLB error is _supposed_ to be stored 
	 * in DAR, but it seems that this doesn't happen in some cases, such 
	 * as when the error is due to a dcbi instruction to a page with a 
	 * TLB that doesn't have the changed bit set.  In such cases, there 
	 * does not appear to be any way  to recover the EA of the error 
	 * since it is neither in DAR nor MD_EPN.  As a workaround, the 
	 * _PAGE_HWWRITE bit is set for all kernel data pages when the PTEs 
	 * are initialized in mapin_ram().  This will avoid the problem, 
	 * assuming we only use the dcbi instruction on kernel addresses.
	 */
	/* DAR is in r20 already */
	rlwinm	r21, r20, 0, 0, 19
	ori	r21, r21, MD_EVALID
	beq-	2f
	mfspr	r20, M_CASID
	rlwimi	r21, r20, 0, 28, 31
#ifdef CONFIG_8xx_CPU6
	li	r3, 0x3780
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MD_EPN, r21

	mfspr	r20, M_TWB	/* Get level 1 table entry address */

	/* If we are faulting a kernel address, we have to use the
	 * kernel page tables.
	 */
	andi.	r21, r20, 0x0800
	beq+	3f
	lis	r21, swapper_pg_dir@h
	ori	r21, r21, swapper_pg_dir@l
	rlwimi	r20, r21, 0, 2, 19
3:
	lwz	r21, 0(r20)	/* Get the level 1 entry */
	rlwinm.	r20, r21,0,0,19	/* Extract page descriptor page address */
//	beq	2f		/* If zero, bail */

	/* We have a pte table, so fetch the pte from the table.
	 */
	tophys(r21, r21)
	ori	r21, r21, 1		/* Set valid bit in physical L2 page */
	beq-	2f		/* If zero, bail */
#ifdef CONFIG_8xx_CPU6
	li	r3, 0x3b80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MD_TWC, r21		/* Load pte table base address */
	mfspr	r21, MD_TWC		/* ....and get the pte address */
	lwz	r20, 0(r21)		/* Get the pte */

	andi.	r21, r20, _PAGE_RW	/* Is it writeable? */
//	beq	2f			/* Bail out if not */

	/* Update 'changed', among others.
	*/
	ori	r20, r20, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
	beq-	2f			/* Bail out if not */
	b	999b
	mfspr	r21, MD_TWC		/* Get pte address again */
	stw	r20, 0(r21)		/* and update pte in table */

	/* The Linux PTE won't go exactly into the MMU TLB.
	 * Software indicator bits 21, 22 and 28 must be clear.
	 * Software indicator bits 24, 25, 26, and 27 must be
	 * set.  All other Linux PTE bits control the behavior
	 * of the MMU.
	 */
	li	r21, 0x00f0
	rlwimi	r20, r21, 0, 24, 28	/* Set 24-27, clear 28 */

#ifdef CONFIG_8xx_CPU6
	li	r3, 0x3d80
	stw	r3, 12(r0)
	lwz	r3, 12(r0)
#endif
	mtspr	MD_RPN, r20	/* Update TLB entry */

#ifdef CONFIG_8xx_DCBxFIXED
#if TAG_VAL == 0x00f0 /* Save 1 instr. by reusing the val loaded in r21 above */
	mtspr	DAR, r21
#else
	TAG_DAR_R20
#endif
#endif
	mfspr	r20, M_TW	/* Restore registers */
	lwz	r21, 0(r0)
	mtcr	r21
	lwz	r21, 4(r0)
#ifdef CONFIG_8xx_CPU6
	lwz	r3, 8(r0)
#endif
	rfi
2:
	mfspr	r20, M_TW	/* Restore registers */
	lwz	r21, 0(r0)
	mtcr	r21
	lwz	r21, 4(r0)
#ifdef CONFIG_8xx_CPU6
	lwz	r3, 8(r0)
#endif
	b	DataAccess

	STD_EXCEPTION(0x1500, Trap_15, UnknownException)
	STD_EXCEPTION(0x1600, Trap_16, UnknownException)
	STD_EXCEPTION(0x1700, Trap_17, TAUException)
	STD_EXCEPTION(0x1800, Trap_18, UnknownException)
	STD_EXCEPTION(0x1900, Trap_19, UnknownException)
	STD_EXCEPTION(0x1a00, Trap_1a, UnknownException)
	STD_EXCEPTION(0x1b00, Trap_1b, UnknownException)

/* On the MPC8xx, these next four traps are used for development
 * support of breakpoints and such.  Someday I will get around to
 * using them.
 */
	STD_EXCEPTION(0x1c00, Trap_1c, UnknownException)
	STD_EXCEPTION(0x1d00, Trap_1d, UnknownException)
	STD_EXCEPTION(0x1e00, Trap_1e, UnknownException)
	STD_EXCEPTION(0x1f00, Trap_1f, UnknownException)

	. = 0x2000

#ifdef CONFIG_8xx_DCBxFIXED
/* This is the workaround procedure to calculate the data EA for buggy dcbx,dcbi instructions
 * by decoding the registers used by the dcbx instruction and adding them.
 * DAR is set to the calculated address and r20 also holds the EA on exit.
 */
//#define INSTR_CHECK /* define to verify if it is a dcbx instr. Should not be needed. */
//#define NO_SELF_MODIFYING_CODE /* define if you don't want to use self modifying code */
//#define DEBUG_DCBX_INSTRUCTIONS /* for debugging only. Needs INSTR_CHECK defined as well. */
//#define KERNEL_SPACE_ONLY /* define if user space do NOT contain dcbx instructions. */

#ifndef KERNEL_SPACE_ONLY
	nop	/* A few nops to make the modified_instr: space below cache line aligned */
	nop
139:	/* fetch instruction from userspace memory */
	DO_8xx_CPU6(0x3780, r3)
	mtspr	MD_EPN, r20
	mfspr	r21, M_TWB	/* Get level 1 table entry address */
	lwz	r21, 0(r21)	/* Get the level 1 entry */
	tophys  (r21, r21)
	DO_8xx_CPU6(0x3b80, r3)
	mtspr	MD_TWC, r21	/* Load pte table base address */
	mfspr	r21, MD_TWC	/* ....and get the pte address */
	lwz	r21, 0(r21)	/* Get the pte */
	/* concat physical page address(r21) and page offset(r20) */
	rlwimi	r21, r20, 0, 20, 31
	b	140f
#endif
100:	/* Entry point for dcbx workaround. */
	/* fetch instruction from memory. */
	mfspr	r20,SRR0
#ifndef KERNEL_SPACE_ONLY
	andis.	r21, r20, 0x8000
	tophys  (r21, r20)
	beq-	139b		/* Branch if user space address */
#else
	tophys  (r21, r20)
#endif
140:	lwz	r21,0(r21)
#ifdef INSTR_CHECK
/* Check if it really is a dcbx instruction. This is not needed as far as I can tell */
/* dcbt and dcbtst does not generate DTLB Misses/Errors, no need to include them here */
	rlwinm	r20, r21, 0, 21, 30
	cmpwi	cr0, r20, 2028	/* Is dcbz? */
	beq+	142f
	cmpwi	cr0, r20, 940	/* Is dcbi? */
	beq+	142f
	cmpwi	cr0, r20, 108	/* Is dcbst? */
	beq+	142f
	cmpwi	cr0, r20, 172	/* Is dcbf? */
	beq+	142f
	cmpwi	cr0, r20, 1964	/* Is icbi? */
	beq+	142f
#ifdef DEBUG_DCBX_INSTRUCTIONS
141:	b 141b /* Stop here if no dcbx instruction */
#endif
	mfspr	r20, DAR	/* r20 must hold DAR at exit */
	b 101b			/* None of the above, go back to normal TLB processing */
142:	/* continue, it was a dcbx instruction. */
#endif
#ifdef CONFIG_8xx_CPU6
	lwz	r3, 8(r0)		/* restore r3 from memory */
#endif
#ifndef NO_SELF_MODIFYING_CODE
	andis.	r20,r21,0x1f	/* test if reg RA is r0 */
	li	r20,modified_instr@l
	dcbtst	r0,r20		/* touch for store */
	rlwinm	r21,r21,0,0,20	/* Zero lower 10 bits */
	oris	r21,r21,640	/* Transform instr. to a "add r20,RA,RB" */
	ori	r21,r21,532
	stw	r21,0(r20)	/* store add/and instruction */
	dcbf	0,r20		/* flush new instr. to memory. */
	icbi	0,r20		/* invalidate instr. cache line */
	lwz	r21, 4(r0)	/* restore r21 from memory */
	mfspr	r20, M_TW	/* restore r20 from M_TW */
	isync			/* Wait until new instr is loaded from memory */
modified_instr:
	.space	4		/* this is where the add/and instr. is stored */
#ifdef DEBUG_DCBX_INSTRUCTIONS
	/* fill with some garbage */ 
	li	r21,0xffff
	stw	r21,0(r21)
#endif
	bne+	143f
	subf	r20,r0,r20		/* r20=r20-r0, only if reg RA is r0 */
143:	mtdar	r20			/* store faulting EA in DAR */
	b	101b			/* Go back to normal TLB handling */
#else
	mfctr	r20
	mtdar	r20			/* save ctr reg in DAR */
	rlwinm	r20, r21, 24, 24, 28	/* offset into jump table for reg RB */
	addi	r20, r20, 150f@l	/* add start of table */
	mtctr	r20			/* load ctr with jump address */
	xor	r20, r20, r20		/* sum starts at zero */
	bctr				/* jump into table */
150:
	add	r20, r20, r0
	b	151f
	add	r20, r20, r1
	b	151f
	add	r20, r20, r2
	b	151f
	add	r20, r20, r3
	b	151f
	add	r20, r20, r4
	b	151f
	add	r20, r20, r5
	b	151f
	add	r20, r20, r6
	b	151f
	add	r20, r20, r7
	b	151f
	add	r20, r20, r8
	b	151f
	add	r20, r20, r9
	b	151f
	add	r20, r20, r10
	b	151f
	add	r20, r20, r11
	b	151f
	add	r20, r20, r12
	b	151f
	add	r20, r20, r13
	b	151f
	add	r20, r20, r14
	b	151f
	add	r20, r20, r15
	b	151f
	add	r20, r20, r16
	b	151f
	add	r20, r20, r17
	b	151f
	add	r20, r20, r18
	b	151f
	add	r20, r20, r19
	b	151f
	mtctr	r21	/* reg 20 needs special handling */
	b	154f
	mtctr	r21	/* reg 21 needs special handling */
	b	153f
	add	r20, r20, r22
	b	151f
	add	r20, r20, r23
	b	151f
	add	r20, r20, r24
	b	151f
	add	r20, r20, r25
	b	151f
	add	r20, r20, r25
	b	151f
	add	r20, r20, r27
	b	151f
	add	r20, r20, r28
	b	151f
	add	r20, r20, r29
	b	151f
	add	r20, r20, r30
	b	151f
	add	r20, r20, r31
151:
	rlwinm. r21,r21,19,24,28	/* offset into jump table for reg RA */
	beq	152f			/* if reg RA is zero, don't add it */ 
	addi	r21, r21, 150b@l	/* add start of table */
	mtctr	r21			/* load ctr with jump address */
	rlwinm	r21,r21,0,16,10		/* make sure we don't execute this more than once */
	bctr				/* jump into table */
152:
	mfdar	r21
	mtctr	r21			/* restore ctr reg from DAR */
	mtdar	r20			/* save fault EA to DAR */
	b	101b			/* Go back to normal TLB handling */

	/* special handling for r20,r21 since these are modified already */
153:	lwz	r21, 4(r0)	/* load r21 from memory */
	b	155f
154:	mfspr	r21, M_TW	/* load r20 from M_TW */
155:	add	r20, r20, r21	/* add it */
	mfctr	r21		/* restore r21 */
	b	151b
#endif
#endif
/*
 * This code finishes saving the registers to the exception frame
 * and jumps to the appropriate handler for the exception, turning
 * on address translation.
 */
	.globl	transfer_to_handler
transfer_to_handler:
	stw	r22,_NIP(r21)
	lis	r22,MSR_POW@h
	andc	r23,r23,r22
	stw	r23,_MSR(r21)
	SAVE_4GPRS(8, r21)
	SAVE_8GPRS(12, r21)
	SAVE_8GPRS(24, r21)
	andi.	r23,r23,MSR_PR
	mfspr	r23,SPRG3		/* if from user, fix up THREAD.regs */
	beq	2f
	addi	r24,r1,STACK_FRAME_OVERHEAD
	stw	r24,PT_REGS(r23)
2:	addi	r2,r23,-THREAD		/* set r2 to current */
	tovirt(r2,r2)
	mflr	r23
	andi.	r24,r23,0x3f00		/* get vector offset */
	stw	r24,TRAP(r21)
	li	r22,0
	stw	r22,RESULT(r21)
	mtspr	SPRG2,r22		/* r1 is now kernel sp */
	addi	r24,r2,TASK_STRUCT_SIZE	/* check for kernel stack overflow */
	cmplw	0,r1,r2
	cmplw	1,r1,r24
	crand	1,1,4
	bgt-	stack_ovf		/* if r2 < r1 < r2+TASK_STRUCT_SIZE */
	lwz	r24,0(r23)		/* virtual address of handler */
	lwz	r23,4(r23)		/* where to go when done */
	mtspr	SRR0,r24
	mtspr	SRR1,r20
	mtlr	r23
	SYNC
	rfi				/* jump to handler, enable MMU */

/*
 * On kernel stack overflow, load up an initial stack pointer
 * and call StackOverflow(regs), which should not return.
 */
stack_ovf:
	addi	r3,r1,STACK_FRAME_OVERHEAD
	lis	r1,init_task_union@ha
	addi	r1,r1,init_task_union@l
	addi	r1,r1,TASK_UNION_SIZE-STACK_FRAME_OVERHEAD
	lis	r24,StackOverflow@ha
	addi	r24,r24,StackOverflow@l
	li	r20,MSR_KERNEL
	mtspr	SRR0,r24
	mtspr	SRR1,r20
	SYNC
	rfi

	.globl	giveup_fpu
giveup_fpu:
	blr

/* Maybe someday.......
*/
_GLOBAL(__setup_cpu_8xx)
	blr

/*
 * This is where the main kernel code starts.
 */
start_here:

	/* ptr to current */
	lis	r2,init_task_union@h
	ori	r2,r2,init_task_union@l

	/* ptr to phys current thread */
	tophys(r4,r2)
	addi	r4,r4,THREAD	/* init task's THREAD */
	mtspr	SPRG3,r4
	li	r3,0
	mtspr	SPRG2,r3	/* 0 => r1 has kernel sp */

	/* stack */
	addi	r1,r2,TASK_UNION_SIZE
	li	r0,0
	stwu	r0,-STACK_FRAME_OVERHEAD(r1)

	bl	early_init	/* We have to do this with MMU on */

/*
 * Decide what sort of machine this is and initialize the MMU.
 */
	mr	r3,r31
	mr	r4,r30
	mr	r5,r29
	mr	r6,r28
	mr	r7,r27
	bl	machine_init
	bl	MMU_init

/*
 * Go back to running unmapped so we can load up new values
 * and change to using our exception vectors.
 * On the 8xx, all we have to do is invalidate the TLB to clear
 * the old 8M byte TLB mappings and load the page table base register.
 */
	/* The right way to do this would be to track it down through
	 * init's THREAD like the context switch code does, but this is
	 * easier......until someone changes init's static structures.
	 */
	lis	r6, swapper_pg_dir@h
	ori	r6, r6, swapper_pg_dir@l
	tophys(r6,r6)
#ifdef CONFIG_8xx_CPU6
	lis	r4, cpu6_errata_word@h
	ori	r4, r4, cpu6_errata_word@l
	li	r3, 0x3980
	stw	r3, 12(r4)
	lwz	r3, 12(r4)
#endif
	mtspr	M_TWB, r6
	lis	r4,2f@h
	ori	r4,r4,2f@l
	tophys(r4,r4)
	li	r3,MSR_KERNEL & ~(MSR_IR|MSR_DR)
	mtspr	SRR0,r4
	mtspr	SRR1,r3
	rfi
/* Load up the kernel context */
2:
	SYNC			/* Force all PTE updates to finish */
	tlbia			/* Clear all TLB entries */
	sync			/* wait for tlbia/tlbie to finish */
	TLBSYNC			/* ... on all CPUs */

#ifdef CONFIG_BDI_SWITCH
	/* Add helper information for the Abatron bdiGDB debugger.
	 * We do this here because we know the mmu is disabled, and
	 * will be enabled for real in just a few instructions.
	 */
	tovirt(r6,r6)
	lis	r5, abatron_pteptrs@h
	ori	r5, r5, abatron_pteptrs@l
	stw	r5, 0xf0(r0)	/* Must match your Abatron config file */
	tophys(r5,r5)
	stw	r6, 0(r5)
#endif

/* Now turn on the MMU for real! */
	li	r4,MSR_KERNEL
	lis	r3,start_kernel@h
	ori	r3,r3,start_kernel@l
	mtspr	SRR0,r3
	mtspr	SRR1,r4
	rfi			/* enable MMU and jump to start_kernel */

/* Set up the initial MMU state so we can do the first level of
 * kernel initialization.  This maps the first 8 MBytes of memory 1:1
 * virtual to physical.  Also, set the cache mode since that is defined
 * by TLB entries and perform any additional mapping (like of the IMMR).
 * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel,
 * 24 Mbytes of data, and the 8M IMMR space.  Anything not covered by
 * these mappings is mapped by page tables.
 */
initial_mmu:
	tlbia			/* Invalidate all TLB entries */
#ifdef CONFIG_PIN_TLB
	lis	r8, MI_RSV4I@h
	ori	r8, r8, 0x1c00
#else
	li	r8, 0
#endif
	mtspr	MI_CTR, r8	/* Set instruction MMU control */

#ifdef CONFIG_PIN_TLB
	lis	r10, (MD_RSV4I | MD_RESETVAL)@h
	ori	r10, r10, 0x1c00
	mr	r8, r10
#else
	lis	r10, MD_RESETVAL@h
#endif
#ifndef CONFIG_8xx_COPYBACK
	oris	r10, r10, MD_WTDEF@h
#endif
	mtspr	MD_CTR, r10	/* Set data TLB control */

	/* Now map the lower 8 Meg into the TLBs.  For this quick hack,
	 * we can load the instruction and data TLB registers with the
	 * same values.
	 */
	lis	r8, KERNELBASE@h	/* Create vaddr for TLB */
	ori	r8, r8, MI_EVALID	/* Mark it valid */
	mtspr	MI_EPN, r8
	mtspr	MD_EPN, r8
	li	r8, MI_PS8MEG		/* Set 8M byte page */
	ori	r8, r8, MI_SVALID	/* Make it valid */
	mtspr	MI_TWC, r8
	mtspr	MD_TWC, r8
	li	r8, MI_BOOTINIT		/* Create RPN for address 0 */
	mtspr	MI_RPN, r8		/* Store TLB entry */
	mtspr	MD_RPN, r8
	lis	r8, MI_Kp@h		/* Set the protection mode */
	mtspr	MI_AP, r8
	mtspr	MD_AP, r8

	/* Map another 8 MByte at the IMMR to get the processor
	 * internal registers (among other things).
	 */
#ifdef CONFIG_PIN_TLB
	addi	r10, r10, 0x0100
	mtspr	MD_CTR, r10
#endif
	mfspr	r9, 638			/* Get current IMMR */
	andis.	r9, r9, 0xff80		/* Get 8Mbyte boundary */

	mr	r8, r9			/* Create vaddr for TLB */
	ori	r8, r8, MD_EVALID	/* Mark it valid */
	mtspr	MD_EPN, r8
	li	r8, MD_PS8MEG		/* Set 8M byte page */
	ori	r8, r8, MD_SVALID	/* Make it valid */
	mtspr	MD_TWC, r8
	mr	r8, r9			/* Create paddr for TLB */
	ori	r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */
	mtspr	MD_RPN, r8

#ifdef CONFIG_PIN_TLB
	/* Map two more 8M kernel data pages.
	*/
	addi	r10, r10, 0x0100
	mtspr	MD_CTR, r10

	lis	r8, KERNELBASE@h	/* Create vaddr for TLB */
	addis	r8, r8, 0x0080		/* Add 8M */
	ori	r8, r8, MI_EVALID	/* Mark it valid */
	mtspr	MD_EPN, r8
	li	r9, MI_PS8MEG		/* Set 8M byte page */
	ori	r9, r9, MI_SVALID	/* Make it valid */
	mtspr	MD_TWC, r9
	li	r11, MI_BOOTINIT	/* Create RPN for address 0 */
	addis	r11, r11, 0x0080	/* Add 8M */
	mtspr	MD_RPN, r8

	addis	r8, r8, 0x0080		/* Add 8M */
	mtspr	MD_EPN, r8
	mtspr	MD_TWC, r9
	addis	r11, r11, 0x0080	/* Add 8M */
	mtspr	MD_RPN, r8
#endif

	/* Since the cache is enabled according to the information we
	 * just loaded into the TLB, invalidate and enable the caches here.
	 * We should probably check/set other modes....later.
	 */
	lis	r8, IDC_INVALL@h
	mtspr	IC_CST, r8
	mtspr	DC_CST, r8
	lis	r8, IDC_ENABLE@h
	mtspr	IC_CST, r8
#ifdef CONFIG_8xx_COPYBACK
	mtspr	DC_CST, r8
#else
	/* For a debug option, I left this here to easily enable
	 * the write through cache mode
	 */
	lis	r8, DC_SFWT@h
	mtspr	DC_CST, r8
	lis	r8, IDC_ENABLE@h
	mtspr	DC_CST, r8
#endif
	blr


/*
 * Set up to use a given MMU context.
 * r3 is context number, r4 is PGD pointer.
 *
 * We place the physical address of the new task page directory loaded
 * into the MMU base register, and set the ASID compare register with
 * the new "context."
 */
_GLOBAL(set_context)

#ifdef CONFIG_BDI_SWITCH
	/* Context switch the PTE pointer for the Abatron BDI2000.
	 * The PGDIR is passed as second argument.
	 */
	lis	r5, KERNELBASE@h
	lwz	r5, 0xf0(r5)
	stw	r4, 0x4(r5)
#endif

#ifdef CONFIG_8xx_CPU6
	lis	r6, cpu6_errata_word@h
	ori	r6, r6, cpu6_errata_word@l
	tophys	(r4, r4)
	li	r7, 0x3980
	stw	r7, 12(r6)
	lwz	r7, 12(r6)
        mtspr   M_TWB, r4               /* Update MMU base address */
	li	r7, 0x3380
	stw	r7, 12(r6)
	lwz	r7, 12(r6)
        mtspr   M_CASID, r3             /* Update context */
#else
        mtspr   M_CASID,r3		/* Update context */
	tophys	(r4, r4)
	mtspr	M_TWB, r4		/* and pgd */
#endif
	SYNC
	blr

#ifdef CONFIG_8xx_CPU6
/* It's here because it is unique to the 8xx.
 * It is important we get called with interrupts disabled.  I used to
 * do that, but it appears that all code that calls this already had
 * interrupt disabled.
 */
	.globl	set_dec_cpu6
set_dec_cpu6:
	lis	r7, cpu6_errata_word@h
	ori	r7, r7, cpu6_errata_word@l
	li	r4, 0x2c00
	stw	r4, 8(r7)
	lwz	r4, 8(r7)
        mtspr   22, r3		/* Update Decrementer */
	SYNC
	blr
#endif

/*
 * We put a few things here that have to be page-aligned.
 * This stuff goes at the beginning of the data segment,
 * which is page-aligned.
 */
	.data
	.globl	sdata
sdata:
	.globl	empty_zero_page
empty_zero_page:
	.space	4096

	.globl	swapper_pg_dir
swapper_pg_dir:
	.space	4096

/*
 * This space gets a copy of optional info passed to us by the bootstrap
 * Used to pass parameters into the kernel like root=/dev/sda1, etc.
 */
	.globl	cmd_line
cmd_line:
	.space	512

#ifdef CONFIG_BDI_SWITCH
/* Room for two PTE table poiners, usually the kernel and current user
 * pointer to their respective root page table (pgdir).
 */
abatron_pteptrs:
	.space	8
#endif

#ifdef CONFIG_8xx_CPU6
	.globl	cpu6_errata_word
cpu6_errata_word:
	.space	16
#endif

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-25  3:03           ` Benjamin Herrenschmidt
  2009-09-25  8:31             ` Joakim Tjernlund
@ 2009-09-25 21:18             ` Rex Feany
  2009-09-27 13:22               ` Joakim Tjernlund
  1 sibling, 1 reply; 58+ messages in thread
From: Rex Feany @ 2009-09-25 21:18 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev

Thus spake Benjamin Herrenschmidt (benh@kernel.crashing.org):

> 
> > I think there's more finishyness to 8xx than we thought. IE. That
> > tlbil_va might have more reasons to be there than what the comment
> > seems to advertize. Can you try to move it even higher up ? IE.
> > Unconditionally at the beginning of set_pte_filter ?
> > 
> > Also, if that doesn't help, can you try putting one in
> > set_access_flags_filter() just below ?
> 
> Ok, I got a refresher on the whole concept of "unpopulated TLB entries"
> on 8xx, and that's damn scary. I think what mislead me initially is that
> the comment around the workaround is simply not properly describing the
> extent of the problem :-)

Oh boy, that sounds bad. Where is a good place to read about this?

> So I'm not going to make the 8xx TLB miss code sane, that's beyond what
> I'm prepare to do with it, but I suspect that this should fix it (on top
> of upstream). Let me know if that's enough or if we also need to put
> one of these in ptep_set_access_flags().
> 
> Please let me know if that works for you.

Putting the tlbil_va() in the top of set_pte_filter() doesn't work - it
hangs on boot before it even prints any messages to the console.

However, adding tlbil_va() to ptep_set_access_flags() as you suggested
makes everything happy. I need to test it some more, but it looks good
so far. Below is what I am testing now.

thanks!
/rex.


diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 5304093..aef552a 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -176,18 +176,19 @@ static pte_t set_pte_filter(pte_t pte, unsigned long addr)
 		struct page *pg = maybe_pte_to_page(pte);
 		if (!pg)
 			return pte;
-		if (!test_bit(PG_arch_1, &pg->flags)) {
 #ifdef CONFIG_8xx
-			/* On 8xx, cache control instructions (particularly
-			 * "dcbst" from flush_dcache_icache) fault as write
-			 * operation if there is an unpopulated TLB entry
-			 * for the address in question. To workaround that,
-			 * we invalidate the TLB here, thus avoiding dcbst
-			 * misbehaviour.
-			 */
-			/* 8xx doesn't care about PID, size or ind args */
-			_tlbil_va(addr, 0, 0, 0);
+		/* On 8xx, cache control instructions (particularly
+		 * "dcbst" from flush_dcache_icache) fault as write
+		 * operation if there is an unpopulated TLB entry
+		 * for the address in question. To workaround that,
+		 * we invalidate the TLB here, thus avoiding dcbst
+		 * misbehaviour.
+		 */
+		/* 8xx doesn't care about PID, size or ind args */
+		_tlbil_va(addr, 0, 0, 0);
 #endif /* CONFIG_8xx */
+
+		if (!test_bit(PG_arch_1, &pg->flags)) {
 			flush_dcache_icache_page(pg);
 			set_bit(PG_arch_1, &pg->flags);
 		}
@@ -308,6 +309,12 @@ int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 	int changed;
 	entry = set_access_flags_filter(entry, vma, dirty);
 	changed = !pte_same(*(ptep), entry);
+
+#ifdef CONFIG_8xx
+	/* 8xx doesn't care about PID, size or ind args */
+	_tlbil_va(address, 0, 0, 0);
+#endif /* CONFIG_8xx */
+
 	if (changed) {
 		if (!(vma->vm_flags & VM_HUGETLB))
 			assert_pte_locked(vma->vm_mm, address);

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-25 21:18             ` Rex Feany
@ 2009-09-27 13:22               ` Joakim Tjernlund
  2009-09-28  3:21                 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-27 13:22 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

>
> Thus spake Benjamin Herrenschmidt (benh@kernel.crashing.org):
>
> >
> > > I think there's more finishyness to 8xx than we thought. IE. That
> > > tlbil_va might have more reasons to be there than what the comment
> > > seems to advertize. Can you try to move it even higher up ? IE.
> > > Unconditionally at the beginning of set_pte_filter ?
> > >
> > > Also, if that doesn't help, can you try putting one in
> > > set_access_flags_filter() just below ?
> >
> > Ok, I got a refresher on the whole concept of "unpopulated TLB entries"
> > on 8xx, and that's damn scary. I think what mislead me initially is that
> > the comment around the workaround is simply not properly describing the
> > extent of the problem :-)
>
> Oh boy, that sounds bad. Where is a good place to read about this?
>
> > So I'm not going to make the 8xx TLB miss code sane, that's beyond what
> > I'm prepare to do with it, but I suspect that this should fix it (on top
> > of upstream). Let me know if that's enough or if we also need to put
> > one of these in ptep_set_access_flags().
> >
> > Please let me know if that works for you.
>
> Putting the tlbil_va() in the top of set_pte_filter() doesn't work - it
> hangs on boot before it even prints any messages to the console.
>
> However, adding tlbil_va() to ptep_set_access_flags() as you suggested
> makes everything happy. I need to test it some more, but it looks good
> so far. Below is what I am testing now.

8xx, is getting very hacky and I suspect that the only long term fix is
add code to trap the cache instructions in TLB error/miss and fixup the
exception in page fault handler. This will also have the added benefit on being able
to use the cache instructions in both kernel and user space like any other
ppc arch.

   Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-27 13:22               ` Joakim Tjernlund
@ 2009-09-28  3:21                 ` Benjamin Herrenschmidt
  2009-09-28  7:22                   ` Joakim Tjernlund
  0 siblings, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-28  3:21 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev, Rex Feany

On Sun, 2009-09-27 at 15:22 +0200, Joakim Tjernlund wrote:
> > However, adding tlbil_va() to ptep_set_access_flags() as you suggested
> > makes everything happy. I need to test it some more, but it looks good
> > so far. Below is what I am testing now.
> 
> 8xx, is getting very hacky and I suspect that the only long term fix is
> add code to trap the cache instructions in TLB error/miss and fixup the
> exception in page fault handler. This will also have the added benefit on being able
> to use the cache instructions in both kernel and user space like any other
> ppc arch.

First I'd like to understand exactly what's happening today, since
it makes little sense :-) I suppose I'll have to get myself some
8xx doco and understand how the bloody MMU works.

Then, I saw your old patch and it's -very- invasive. If we can get away
with a one liner just adding tlbil_va in the right place, I think I'm
happy to stick with it until somebody comes up with a real good reason
to do more :-) 8xx is on life support and has been around for long
enough without people feeling the need overall to work around that
problem so I'm tempted to keep the status-quo here.

Cheers,
Ben

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-28  3:21                 ` Benjamin Herrenschmidt
@ 2009-09-28  7:22                   ` Joakim Tjernlund
  2009-09-28  7:34                     ` Benjamin Herrenschmidt
  2009-09-28 10:02                     ` Joakim Tjernlund
  0 siblings, 2 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-28  7:22 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 28/09/2009 05:21:00:
>
> On Sun, 2009-09-27 at 15:22 +0200, Joakim Tjernlund wrote:
> > > However, adding tlbil_va() to ptep_set_access_flags() as you suggested
> > > makes everything happy. I need to test it some more, but it looks good
> > > so far. Below is what I am testing now.
> >
> > 8xx, is getting very hacky and I suspect that the only long term fix is
> > add code to trap the cache instructions in TLB error/miss and fixup the
> > exception in page fault handler. This will also have the added benefit on being able
> > to use the cache instructions in both kernel and user space like any other
> > ppc arch.
>
> First I'd like to understand exactly what's happening today, since
> it makes little sense :-) I suppose I'll have to get myself some
> 8xx doco and understand how the bloody MMU works.

hmm, I recall that the reason for the currect "force a TLB error"
procedure was to fix a problem Montavista had, possibly Tom Rini.
He fixed the problem differently at first but later it
was changed to what is in use today.

If you have access to the old linuxppc-embedded you will find a lot
of info in a thread with subject "[PATCH 2.6.14] mm: 8xx MM fix for"
Also I made a ref that I cannot access anymore:
http://ozlabs.org/pipermail/linuxppc-embedded/2005-January/016382.html

>
> Then, I saw your old patch and it's -very- invasive. If we can get away
> with a one liner just adding tlbil_va in the right place, I think I'm

Yes, my patch is invasive but that is because I did it in asm. If you do
it in the page fault handler, it will be much easier.
In principle you would only have to tag DAR and test for
the tag value in asm then branch to the page fault handler. Once there
it is easy to decode the insns.

> happy to stick with it until somebody comes up with a real good reason
> to do more :-) 8xx is on life support and has been around for long

yeah, I figured that too but Freescale seems to be cranking out new variants
still.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-28  7:22                   ` Joakim Tjernlund
@ 2009-09-28  7:34                     ` Benjamin Herrenschmidt
  2009-09-28  7:39                       ` Joakim Tjernlund
  2009-09-28 10:02                     ` Joakim Tjernlund
  1 sibling, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-28  7:34 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev, Rex Feany

On Mon, 2009-09-28 at 09:22 +0200, Joakim Tjernlund wrote:
> 
> > happy to stick with it until somebody comes up with a real good
> reason
> > to do more :-) 8xx is on life support and has been around for long
> 
> yeah, I figured that too but Freescale seems to be cranking out new
> variants still.

Ugh ? And still not fixing the major core bugs ? Grmbl...

Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-28  7:34                     ` Benjamin Herrenschmidt
@ 2009-09-28  7:39                       ` Joakim Tjernlund
  0 siblings, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-28  7:39 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 28/09/2009 09:34:46:
>
> On Mon, 2009-09-28 at 09:22 +0200, Joakim Tjernlund wrote:
> >
> > > happy to stick with it until somebody comes up with a real good
> > reason
> > > to do more :-) 8xx is on life support and has been around for long
> >
> > yeah, I figured that too but Freescale seems to be cranking out new
> > variants still.
>
> Ugh ? And still not fixing the major core bugs ? Grmbl...

yes, I even had them chase down the problem so they know exactly where
it is :( I don't think it is in any errata though, but I haven't looked
for a few years.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-28  7:22                   ` Joakim Tjernlund
  2009-09-28  7:34                     ` Benjamin Herrenschmidt
@ 2009-09-28 10:02                     ` Joakim Tjernlund
  1 sibling, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-28 10:02 UTC (permalink / raw)
  Cc: Rex Feany, linuxppc-dev

>
> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 28/09/2009 05:21:00:
> >
> > On Sun, 2009-09-27 at 15:22 +0200, Joakim Tjernlund wrote:
> > > > However, adding tlbil_va() to ptep_set_access_flags() as you suggested
> > > > makes everything happy. I need to test it some more, but it looks good
> > > > so far. Below is what I am testing now.
> > >
> > > 8xx, is getting very hacky and I suspect that the only long term fix is
> > > add code to trap the cache instructions in TLB error/miss and fixup the
> > > exception in page fault handler. This will also have the added benefit on being able
> > > to use the cache instructions in both kernel and user space like any other
> > > ppc arch.
> >
> > First I'd like to understand exactly what's happening today, since
> > it makes little sense :-) I suppose I'll have to get myself some
> > 8xx doco and understand how the bloody MMU works.
>
> hmm, I recall that the reason for the currect "force a TLB error"
> procedure was to fix a problem Montavista had, possibly Tom Rini.
> He fixed the problem differently at first but later it
> was changed to what is in use today.
>
> If you have access to the old linuxppc-embedded you will find a lot
> of info in a thread with subject "[PATCH 2.6.14] mm: 8xx MM fix for"
> Also I made a ref that I cannot access anymore:
> http://ozlabs.org/pipermail/linuxppc-embedded/2005-January/016382.html

Found a link to the original problem
 http://www.mail-archive.com/linuxppc-embedded@ozlabs.org/msg03376.html
seems like I had something to do with the solution that is haunting
8xx now :(
Basically 8xx should revert back to the fix suggested in the above link.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-25  1:51         ` Benjamin Herrenschmidt
  2009-09-25  3:03           ` Benjamin Herrenschmidt
@ 2009-09-29  1:21           ` Rex Feany
  2009-09-29  6:26             ` Joakim Tjernlund
  2009-09-29  7:07             ` Benjamin Herrenschmidt
  1 sibling, 2 replies; 58+ messages in thread
From: Rex Feany @ 2009-09-29  1:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev

Thus spake Benjamin Herrenschmidt (benh@kernel.crashing.org):

> On Thu, 2009-09-24 at 18:35 -0700, Rex Feany wrote:
> > 
> > Then I can boot and get to a shell, but userspace is slow. 8 seconds
> > to mount
> > /proc (vs. less then a second using my old kernel)! Maybe this is an
> > unrelated issue?  I'm pretty clueless about the details, I'm sorry.
> > PG_arch_1 is used to prevent a cache flush unless it is actually
> > needed?
> > Then why would changing the location of the tlbil_va() make a
> > difference?
> 
> I think there's more finishyness to 8xx than we thought. IE. That
> tlbil_va might have more reasons to be there than what the comment
> seems to advertize. Can you try to move it even higher up ? IE.
> Unconditionally at the beginning of set_pte_filter ?
> 
> Also, if that doesn't help, can you try putting one in
> set_access_flags_filter() just below ?
> 
> (Beware that there's two different versions of both functions, only the
> first one is compiled/used on 8xx).
> 
> It's going to be hard for me to get that "right" since I don't really
> know what's going on with the core here, but I suppose if we get it
> moving along with extra tlb invalidations, that should be "good enough"
> until somebody who really knows what's going on comes up with possibly
> a better fix.

I've tried sticking tlbil_va() in those places, nothing seems to help.
In some cases userspace is slow, in other cases userspace is faster and
unstable: sometimes commands hang, sometimes I am able to ctrl-c and
and kill it, sometimes I get other strange crashes or falures (so far no
kernel oopses though).

take care!
/rex.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29  1:21           ` Rex Feany
@ 2009-09-29  6:26             ` Joakim Tjernlund
  2009-09-29  7:07               ` Benjamin Herrenschmidt
  2009-09-29  7:07             ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-29  6:26 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

>
> Thus spake Benjamin Herrenschmidt (benh@kernel.crashing.org):
>
> > On Thu, 2009-09-24 at 18:35 -0700, Rex Feany wrote:
> > >
> > > Then I can boot and get to a shell, but userspace is slow. 8 seconds
> > > to mount
> > > /proc (vs. less then a second using my old kernel)! Maybe this is an
> > > unrelated issue?  I'm pretty clueless about the details, I'm sorry.
> > > PG_arch_1 is used to prevent a cache flush unless it is actually
> > > needed?
> > > Then why would changing the location of the tlbil_va() make a
> > > difference?
> >
> > I think there's more finishyness to 8xx than we thought. IE. That
> > tlbil_va might have more reasons to be there than what the comment
> > seems to advertize. Can you try to move it even higher up ? IE.
> > Unconditionally at the beginning of set_pte_filter ?
> >
> > Also, if that doesn't help, can you try putting one in
> > set_access_flags_filter() just below ?
> >
> > (Beware that there's two different versions of both functions, only the
> > first one is compiled/used on 8xx).
> >
> > It's going to be hard for me to get that "right" since I don't really
> > know what's going on with the core here, but I suppose if we get it
> > moving along with extra tlb invalidations, that should be "good enough"
> > until somebody who really knows what's going on comes up with possibly
> > a better fix.
>
> I've tried sticking tlbil_va() in those places, nothing seems to help.
> In some cases userspace is slow, in other cases userspace is faster and
> unstable: sometimes commands hang, sometimes I am able to ctrl-c and
> and kill it, sometimes I get other strange crashes or falures (so far no
> kernel oopses though).

This is exactly what you get when the "cache insn does not update DAR" bug hits
you.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29  6:26             ` Joakim Tjernlund
@ 2009-09-29  7:07               ` Benjamin Herrenschmidt
  2009-09-29  8:13                 ` Joakim Tjernlund
  0 siblings, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-29  7:07 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev, Rex Feany

On Tue, 2009-09-29 at 08:26 +0200, Joakim Tjernlund wrote:
> > I've tried sticking tlbil_va() in those places, nothing seems to
> help.
> > In some cases userspace is slow, in other cases userspace is faster
> and
> > unstable: sometimes commands hang, sometimes I am able to ctrl-c and
> > and kill it, sometimes I get other strange crashes or falures (so
> far no
> > kernel oopses though).
> 
> This is exactly what you get when the "cache insn does not update DAR"
> bug hits
> you.

But then why was it working fine before ? Or it wasn't ?

Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29  7:07               ` Benjamin Herrenschmidt
@ 2009-09-29  8:13                 ` Joakim Tjernlund
  2009-09-29  8:16                   ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-29  8:13 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 29/09/2009 09:07:37:
>
> On Tue, 2009-09-29 at 08:26 +0200, Joakim Tjernlund wrote:
> > > I've tried sticking tlbil_va() in those places, nothing seems to
> > help.
> > > In some cases userspace is slow, in other cases userspace is faster
> > and
> > > unstable: sometimes commands hang, sometimes I am able to ctrl-c and
> > > and kill it, sometimes I get other strange crashes or falures (so
> > far no
> > > kernel oopses though).
> >
> > This is exactly what you get when the "cache insn does not update DAR"
> > bug hits
> > you.
>
> But then why was it working fine before ? Or it wasn't ?

hmm, yes. You do get this and mysterious SEGV if you hit the but so does
other bugs too so this is probably due to missing invalidation.

I suspect that something like below will fix the problem and
is the "correct" fix(untested, not even compiled):

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 52ff8c5..f579a11 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -428,7 +428,7 @@ DataStoreTLBMiss:
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
@@ -441,7 +441,23 @@ DataStoreTLBMiss:
 	lwz	r3, 8(r0)
 #endif
 	rfi
+2:
+	/* Copy 20 msb from MD_EPN to DAR since the dcxx instructions fails
+	 * to update DAR when they cause a DTLB Miss.
+	 */
+	mfspr	r11, SPRN_MD_EPN
+	mfspr	r10, SPRN_DAR
+	rlwimi	r10, r11, 0, 0, 19
+	mtspr	SPRN_DAR, r10

+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	DataAccess
 /* This is an instruction TLB error on the MPC8xx.  This could be due
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29  8:13                 ` Joakim Tjernlund
@ 2009-09-29  8:16                   ` Benjamin Herrenschmidt
  2009-09-29  8:24                     ` Joakim Tjernlund
  2009-09-29 11:56                     ` Joakim Tjernlund
  0 siblings, 2 replies; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-29  8:16 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev, Rex Feany


> hmm, yes. You do get this and mysterious SEGV if you hit the but so does
> other bugs too so this is probably due to missing invalidation.
> 
> I suspect that something like below will fix the problem and
> is the "correct" fix(untested, not even compiled):

Ok but do we also still have to worry about the "unpopulated" TLB
entries and invalidate them somehow when populating ?

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29  8:16                   ` Benjamin Herrenschmidt
@ 2009-09-29  8:24                     ` Joakim Tjernlund
  2009-09-29 11:56                     ` Joakim Tjernlund
  1 sibling, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-29  8:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 29/09/2009 10:16:38:
>
>
> > hmm, yes. You do get this and mysterious SEGV if you hit the but so does
> > other bugs too so this is probably due to missing invalidation.
> >
> > I suspect that something like below will fix the problem and
> > is the "correct" fix(untested, not even compiled):
>
> Ok but do we also still have to worry about the "unpopulated" TLB
> entries and invalidate them somehow when populating ?

No, this patch is to not populate the MMU with invalid entries at
all. Hopefully you can remove those invalidate 8xx TLBs hacks
you currently have.

     Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29  8:16                   ` Benjamin Herrenschmidt
  2009-09-29  8:24                     ` Joakim Tjernlund
@ 2009-09-29 11:56                     ` Joakim Tjernlund
  2009-09-29 21:03                       ` Rex Feany
  1 sibling, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-29 11:56 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 29/09/2009 10:16:38:
>
>
> > hmm, yes. You do get this and mysterious SEGV if you hit the but so does
> > other bugs too so this is probably due to missing invalidation.
> >
> > I suspect that something like below will fix the problem and
> > is the "correct" fix(untested, not even compiled):
>
> Ok but do we also still have to worry about the "unpopulated" TLB
> entries and invalidate them somehow when populating ?

Since I am probably the only one that knows about DAR problem I figured
I should take a stab at it. This is not tested, but I hope Rex and the list
can do that. Once this works as it should, we can remove all special handling
for 8xx in copy_tofrom_user() and friends.
No sign-off yet, want some confirmation first.

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 4dd38f1..691ebd3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -774,7 +774,14 @@ restore:
 	lwz	r11,_CTR(r1)
 	mtspr	SPRN_XER,r10
 	mtctr	r11
-
+#ifdef CONFIG_8xx
+	/* Tag DAR with a well know value.
+	 * This needs to match head_8xx.S and
+	 * do_page_fault()
+	 */
+	li	r10, 0xf0
+	mtspr	SPRN_DAR, r10
+#endif
 	PPC405_ERR77(0,r1)
 BEGIN_FTR_SECTION
 	lwarx	r11,0,r1
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 52ff8c5..418ea96 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -39,6 +39,15 @@
 #else
 #define DO_8xx_CPU6(val, reg)
 #endif
+
+/* DAR needs to be tagged with a known value so that the
+ * DataTLB Miss/Error and do_page_fault() can recognize a
+ * buggy dcbx instruction and workaround the problem.
+ * dcbf, dcbi, dcbst, dcbz instructions do not update DAR
+ * when trapping into a Data TLB Miss/Error. See
+ * DataStoreTLBMiss and DataTLBError for details
+ */
+
 	__HEAD
 _ENTRY(_stext);
 _ENTRY(_start);
@@ -428,7 +437,8 @@ DataStoreTLBMiss:
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
+	mtspr	SPRN_DAR, r11		/* Tag DAR */
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
@@ -441,7 +451,15 @@ DataStoreTLBMiss:
 	lwz	r3, 8(r0)
 #endif
 	rfi
-
+2:
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	DataAccess
 /* This is an instruction TLB error on the MPC8xx.  This could be due
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
@@ -492,6 +510,8 @@ DataTLBError:
 	 * assuming we only use the dcbi instruction on kernel addresses.
 	 */
 	mfspr	r10, SPRN_DAR
+	cmpwi	cr0, r10, 0xf0		/* check it DAR holds a tag */
+	beq-	2f
 	rlwinm	r11, r10, 0, 0, 19
 	ori	r11, r11, MD_EVALID
 	mfspr	r10, SPRN_M_CASID
@@ -547,6 +567,7 @@ DataTLBError:
 	 * of the MMU.
 	 */
 	li	r11, 0x00f0
+	mtspr	SPRN_DAR, r11		/* Tag DAR */
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7699394..be779b2 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -125,6 +125,32 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 	int trap = TRAP(regs);
  	int is_exec = trap == 0x400;

+#if defined(CONFIG_8xx)
+/*
+ Workarund DTLB Miss/Error, as these do not update DAR
+ for dcbf, dcbi, dcbst, dcbz instructions
+ This relies on every exception tagging DAR with 0xf0
+ before returning (rfi)
+ DAR as passed as address to this function.
+ */
+#define RA(inst)        (((inst) & 0x001F0000) >> 16)
+#define RB(inst)        (((inst) & 0x0000F800) >> 11)
+	{
+		unsigned long ra, rb, dar, insns;
+
+		if (trap == 0x300 && address == 0xf0) {
+			insns = *((unsigned long *)regs->nip);
+			/* Really check if it is an dcbf, dcbi, dcbst, dcbz insns ? */
+			ra  = RA(insns); /* Reg Ra */
+			rb  = RB(insns); /* Reg Rb */
+			dar = regs->gpr[rb];
+			if (ra)
+				dar += regs->gpr[ra];
+			/* regs->dar = dar; perhaps */
+			address = dar;
+		}
+	}
+#endif
 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 	/*
 	 * Fortunately the bit assignments in SRR1 for an instruction

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29 11:56                     ` Joakim Tjernlund
@ 2009-09-29 21:03                       ` Rex Feany
  2009-09-30  7:59                         ` Joakim Tjernlund
  0 siblings, 1 reply; 58+ messages in thread
From: Rex Feany @ 2009-09-29 21:03 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev

Thus spake Joakim Tjernlund (joakim.tjernlund@transmode.se):

> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 29/09/2009 10:16:38:
> >
> >
> > > hmm, yes. You do get this and mysterious SEGV if you hit the but so does
> > > other bugs too so this is probably due to missing invalidation.
> > >
> > > I suspect that something like below will fix the problem and
> > > is the "correct" fix(untested, not even compiled):
> >
> > Ok but do we also still have to worry about the "unpopulated" TLB
> > entries and invalidate them somehow when populating ?
> 
> Since I am probably the only one that knows about DAR problem I figured
> I should take a stab at it. This is not tested, but I hope Rex and the list
> can do that. Once this works as it should, we can remove all special handling
> for 8xx in copy_tofrom_user() and friends.
> No sign-off yet, want some confirmation first.

It doesn't make a difference. I applied it to the top of the tree,
it got to userspace but it is stuck. Using break I am able to dump the
registers, I'm not sure if this is useful. Oh, well, I finally got to a shell
but it is unusably slow. Is there some test code that would be better to run?
I tried looking through the mailing list archives but I couldn't find anything.

thanks!
/rex.

SysRq : Show Regs
NIP: c00588d0 LR: c000e3c4 CTR: 00001fde
REGS: c3459d90 TRAP: 0501   Not tainted  (2.6.32-rc2-00013-g2d222d9-dirty)
MSR: 00009032 <EE,ME,IR,DR>  CR: 44008422  XER: 20006a02
TASK = c3438050[13] 'rc.sysinit' THREAD: c3458000
GPR00: c000e3c4 c3459e40 c3438050 c21a2a60 c345bdf4 0fd81032 00000000 3001e920 
GPR08: 00000000 00000005 c21a2a60 c0210000 03438260 
NIP [c00588d0] handle_mm_fault+0x10/0xacc
LR [c000e3c4] do_page_fault+0x2f0/0x474
Call Trace:
[c3459e40] [c0059328] handle_mm_fault+0xa68/0xacc (unreliable)
[c3459e90] [c000e418] do_page_fault+0x344/0x474
[c3459f40] [c000d520] handle_page_fault+0xc/0x80
Instruction dump:
4bff15f1 39600000 80010014 7d635b78 7c0803a6 bbc10008 38210010 4e800020 
7c0802a6 9421ffb0 3d60c021 be810020 <90010054> 38000000 90020000 396b4bec 

SysRq : Show Regs
NIP: c000e0e0 LR: c000d520 CTR: 00001fde
REGS: c21adde0 TRAP: 0501   Not tainted  (2.6.32-rc2-00013-g2d222d9-dirty)
MSR: 00009032 <EE,ME,IR,DR>  CR: 48008424  XER: 00006a02
TASK = c3438460[18] 'sh' THREAD: c21ac000
GPR00: c000d520 c21ade90 c3438460 c21adf50 0fd76cb8 c0000000 00000004 0fee8c44 
GPR08: 00009f6c c000d788 00009032 c000d514 03438670 
NIP [c000e0e0] do_page_fault+0xc/0x474
LR [c000d520] handle_page_fault+0xc/0x80
Call Trace:
[c21ade90] [c000e418] do_page_fault+0x344/0x474 (unreliable)
[c21adf40] [c000d520] handle_page_fault+0xc/0x80
Instruction dump:
3863f604 7fe4fb78 7fc5f378 4bffcd39 80010014 bbc10008 7c0803a6 38210010 
4e800020 7c0802a6 9421ff50 bf010090 <900100b4> 7c7e1b78 800300a0 7c9d2378 

SysRq : Show Regs
NIP: c005930c LR: c000e3c4 CTR: 00001fde
REGS: c21add90 TRAP: 0501   Not tainted  (2.6.32-rc2-00013-g2d222d9-dirty)
MSR: 00009032 <EE,ME,IR,DR>  CR: 24002422  XER: 00006a02
TASK = c3438460[18] 'sh' THREAD: c21ac000
GPR00: c0236000 c21ade40 c3438460 c21a2a60 c3444df4 c3456000 00000000 feff0000 
GPR08: c21ac000 03456000 c0220000 00000001 03438670 
NIP [c005930c] handle_mm_fault+0xa4c/0xacc
LR [c000e3c4] do_page_fault+0x2f0/0x474
Call Trace:
[c21ade40] [c0059328] handle_mm_fault+0xa68/0xacc (unreliable)
[c21ade90] [c000e3c4] do_page_fault+0x2f0/0x474
[c21adf40] [c000d520] handle_page_fault+0xc/0x80
Instruction dump:
4802192d 48000094 2f9f0000 3ba00001 419e0088 7fe3fb78 4bff46a1 48000078 
7fe3fb78 4bff4695 48000070 63390020 <633f0080> 7f45d378 7f63db78 7f04c378 

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29 21:03                       ` Rex Feany
@ 2009-09-30  7:59                         ` Joakim Tjernlund
  2009-09-30  8:19                           ` Joakim Tjernlund
  0 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-30  7:59 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

Rex Feany <RFeany@mrv.com> wrote on 29/09/2009 23:03:31:
>
> Thus spake Joakim Tjernlund (joakim.tjernlund@transmode.se):
>
> > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 29/09/2009 10:16:38:
> > >
> > >
> > > > hmm, yes. You do get this and mysterious SEGV if you hit the but so does
> > > > other bugs too so this is probably due to missing invalidation.
> > > >
> > > > I suspect that something like below will fix the problem and
> > > > is the "correct" fix(untested, not even compiled):
> > >
> > > Ok but do we also still have to worry about the "unpopulated" TLB
> > > entries and invalidate them somehow when populating ?
> >
> > Since I am probably the only one that knows about DAR problem I figured
> > I should take a stab at it. This is not tested, but I hope Rex and the list
> > can do that. Once this works as it should, we can remove all special handling
> > for 8xx in copy_tofrom_user() and friends.
> > No sign-off yet, want some confirmation first.
>
> It doesn't make a difference. I applied it to the top of the tree,
> it got to userspace but it is stuck. Using break I am able to dump the
> registers, I'm not sure if this is useful. Oh, well, I finally got to a shell
> but it is unusably slow. Is there some test code that would be better to run?
> I tried looking through the mailing list archives but I couldn't find anything.
>
> thanks!
> /rex.

Ok, I have made some minor tweaks and added debug code in
do_page_fault(). Would be great if you could try on both
.31 and top of tree.

 Jocke

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 4dd38f1..b3f6687 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -709,6 +709,14 @@ ret_from_except:
 	SYNC			/* Some chip revs have problems here... */
 	MTMSRD(r10)		/* disable interrupts */

+#ifdef CONFIG_8xx
+	/* Tag DAR with a well know value.
+	 * This needs to match head_8xx.S and
+	 * do_page_fault()
+	 */
+	li	r3, 0x00f0
+	mtspr	SPRN_DAR, r3
+#endif
 	lwz	r3,_MSR(r1)	/* Returning to user mode? */
 	andi.	r0,r3,MSR_PR
 	beq	resume_kernel
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 52ff8c5..f9e2363 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -39,6 +39,15 @@
 #else
 #define DO_8xx_CPU6(val, reg)
 #endif
+
+/* DAR needs to be tagged with a known value so that the
+ * DataTLB Miss/Error and do_page_fault() can recognize a
+ * buggy dcbx instruction and workaround the problem.
+ * dcbf, dcbi, dcbst, dcbz instructions do not update DAR
+ * when trapping into a Data TLB Miss/Error. See
+ * DataStoreTLBMiss and DataTLBError for details
+ */
+
 	__HEAD
 _ENTRY(_stext);
 _ENTRY(_start);
@@ -352,7 +361,7 @@ InstructionTLBMiss:
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x2d80, r3)
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
@@ -365,6 +374,19 @@ InstructionTLBMiss:
 	lwz	r3, 8(r0)
 #endif
 	rfi
+2:
+	mfspr	r10, SPRN_SRR1
+	rlwinm	r10,r10,0,5,3 /* clear bit 4(0x08000000) */
+	mtspr	SPRN_SRR1, r10
+
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	InstructionAccess

 	. = 0x1200
 DataStoreTLBMiss:
@@ -428,10 +450,11 @@ DataStoreTLBMiss:
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+	mtspr	SPRN_DAR, r11		/* Tag DAR */

 	mfspr	r10, SPRN_M_TW	/* Restore registers */
 	lwz	r11, 0(r0)
@@ -441,7 +464,15 @@ DataStoreTLBMiss:
 	lwz	r3, 8(r0)
 #endif
 	rfi
-
+2:
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	DataAccess
 /* This is an instruction TLB error on the MPC8xx.  This could be due
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
@@ -492,6 +523,8 @@ DataTLBError:
 	 * assuming we only use the dcbi instruction on kernel addresses.
 	 */
 	mfspr	r10, SPRN_DAR
+	cmpwi	cr0, r10, 0x00f0	/* check if DAR holds a tag */
+	beq-	2f
 	rlwinm	r11, r10, 0, 0, 19
 	ori	r11, r11, MD_EVALID
 	mfspr	r10, SPRN_M_CASID
@@ -550,6 +583,7 @@ DataTLBError:
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+	mtspr	SPRN_DAR, r11		/* Tag DAR */

 	mfspr	r10, SPRN_M_TW	/* Restore registers */
 	lwz	r11, 0(r0)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7699394..4fefd01 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -125,6 +125,61 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 	int trap = TRAP(regs);
  	int is_exec = trap == 0x400;

+#if 1 /* defined(CONFIG_8xx)*/
+/*
+ Work around DTLB Miss/Error, as these do not update
+ DAR for dcbf, dcbi, dcbst, dcbz instructions
+ This relies on every exception tagging DAR with 0xf0
+ before returning (rfi)
+ DAR is passed as 'address' to this function.
+ */
+	{
+		unsigned long ra, rb, dar, insn;
+		char *istr = NULL;
+
+		insn = *((unsigned long *)regs->nip);
+		if ((insn >> (31-5)) == 31) {
+			if ((insn >> 1) == 1014) /* dcbz ? */
+				istr = "dcbz";
+			if ((insn >> 1) == 86) /* dcbf ? */
+				istr = "dcbf";
+			if ((insn >> 1) == 470) /* dcbi ? */
+				istr = "dcbi";
+			if ((insn >> 1) == 54) /* dcbst ? */
+				istr = "dcbst";
+		}
+		if (istr) {
+			ra = (insn >> (31-15)) & 0x1f; /* Reg RA */
+			rb = (insn >> (31-20)) & 0x1f; /* Reg RB */
+			printk(KERN_CRIT "%s r%ld,r%ld found at NIP:%lx\n",
+			       istr, regs->nip, ra, rb);
+			printk(KERN_CRIT "DAR is:%lx (should be 0xf0)\n", regs->dar);
+
+		}
+		if (regs->dar == 0xf0 && !istr)
+			printk(KERN_CRIT "DAR is 0xf0 but insn at NIP is: %lx is"
+			       "not a cache insn:%lx!\n", regs->nip, insn);
+		if (trap == 0x300 && address != regs->dar)
+			printk(KERN_CRIT "DAR:%lx != address:%lx!\n", address, regs->dar);
+
+		if (istr || (trap == 0x300 && address == 0xf0)) {
+			insn = *((unsigned long *)regs->nip);
+			/* Check if it is a
+			 * dcbf, dcbi, dcbst, dcbz insn ?
+			if ((insn >> (31-5)) != 31)
+				break; // Not a dcbx instruction
+			 */
+
+			ra = (insn >> (31-15)) & 0x1f; /* Reg RA */
+			rb = (insn >> (31-20)) & 0x1f; /* Reg RB */
+			dar = regs->gpr[rb];
+			if (ra)
+				dar += regs->gpr[ra];
+			regs->dar = dar;
+			address = dar;
+		}
+	}
+#endif
 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 	/*
 	 * Fortunately the bit assignments in SRR1 for an instruction

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30  7:59                         ` Joakim Tjernlund
@ 2009-09-30  8:19                           ` Joakim Tjernlund
  2009-09-30  9:00                             ` Rex Feany
  0 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-30  8:19 UTC (permalink / raw)
  Cc: linuxppc-dev, Rex Feany

>
> Rex Feany <RFeany@mrv.com> wrote on 29/09/2009 23:03:31:
> >
> > Thus spake Joakim Tjernlund (joakim.tjernlund@transmode.se):
> >
> > > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 29/09/2009 10:16:38:
> > > >
> > > >
> > > > > hmm, yes. You do get this and mysterious SEGV if you hit the but so does
> > > > > other bugs too so this is probably due to missing invalidation.
> > > > >
> > > > > I suspect that something like below will fix the problem and
> > > > > is the "correct" fix(untested, not even compiled):
> > > >
> > > > Ok but do we also still have to worry about the "unpopulated" TLB
> > > > entries and invalidate them somehow when populating ?
> > >
> > > Since I am probably the only one that knows about DAR problem I figured
> > > I should take a stab at it. This is not tested, but I hope Rex and the list
> > > can do that. Once this works as it should, we can remove all special handling
> > > for 8xx in copy_tofrom_user() and friends.
> > > No sign-off yet, want some confirmation first.
> >
> > It doesn't make a difference. I applied it to the top of the tree,
> > it got to userspace but it is stuck. Using break I am able to dump the
> > registers, I'm not sure if this is useful. Oh, well, I finally got to a shell
> > but it is unusably slow. Is there some test code that would be better to run?
> > I tried looking through the mailing list archives but I couldn't find anything.
> >
> > thanks!
> > /rex.
>
> Ok, I have made some minor tweaks and added debug code in
> do_page_fault(). Would be great if you could try on both
> .31 and top of tree.
>
>  Jocke

OOPS, found a bug. Use this one instead:

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 4dd38f1..b3f6687 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -709,6 +709,14 @@ ret_from_except:
 	SYNC			/* Some chip revs have problems here... */
 	MTMSRD(r10)		/* disable interrupts */

+#ifdef CONFIG_8xx
+	/* Tag DAR with a well know value.
+	 * This needs to match head_8xx.S and
+	 * do_page_fault()
+	 */
+	li	r3, 0x00f0
+	mtspr	SPRN_DAR, r3
+#endif
 	lwz	r3,_MSR(r1)	/* Returning to user mode? */
 	andi.	r0,r3,MSR_PR
 	beq	resume_kernel
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 52ff8c5..f9e2363 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -39,6 +39,15 @@
 #else
 #define DO_8xx_CPU6(val, reg)
 #endif
+
+/* DAR needs to be tagged with a known value so that the
+ * DataTLB Miss/Error and do_page_fault() can recognize a
+ * buggy dcbx instruction and workaround the problem.
+ * dcbf, dcbi, dcbst, dcbz instructions do not update DAR
+ * when trapping into a Data TLB Miss/Error. See
+ * DataStoreTLBMiss and DataTLBError for details
+ */
+
 	__HEAD
 _ENTRY(_stext);
 _ENTRY(_start);
@@ -352,7 +361,7 @@ InstructionTLBMiss:
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x2d80, r3)
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
@@ -365,6 +374,19 @@ InstructionTLBMiss:
 	lwz	r3, 8(r0)
 #endif
 	rfi
+2:
+	mfspr	r10, SPRN_SRR1
+	rlwinm	r10,r10,0,5,3 /* clear bit 4(0x08000000) */
+	mtspr	SPRN_SRR1, r10
+
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	InstructionAccess

 	. = 0x1200
 DataStoreTLBMiss:
@@ -428,10 +450,11 @@ DataStoreTLBMiss:
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+	mtspr	SPRN_DAR, r11		/* Tag DAR */

 	mfspr	r10, SPRN_M_TW	/* Restore registers */
 	lwz	r11, 0(r0)
@@ -441,7 +464,15 @@ DataStoreTLBMiss:
 	lwz	r3, 8(r0)
 #endif
 	rfi
-
+2:
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	DataAccess
 /* This is an instruction TLB error on the MPC8xx.  This could be due
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
@@ -492,6 +523,8 @@ DataTLBError:
 	 * assuming we only use the dcbi instruction on kernel addresses.
 	 */
 	mfspr	r10, SPRN_DAR
+	cmpwi	cr0, r10, 0x00f0	/* check if DAR holds a tag */
+	beq-	2f
 	rlwinm	r11, r10, 0, 0, 19
 	ori	r11, r11, MD_EVALID
 	mfspr	r10, SPRN_M_CASID
@@ -550,6 +583,7 @@ DataTLBError:
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+	mtspr	SPRN_DAR, r11		/* Tag DAR */

 	mfspr	r10, SPRN_M_TW	/* Restore registers */
 	lwz	r11, 0(r0)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7699394..789b16b 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -125,6 +125,61 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 	int trap = TRAP(regs);
  	int is_exec = trap == 0x400;

+#if 1 /* defined(CONFIG_8xx)*/
+/*
+ Work around DTLB Miss/Error, as these do not update
+ DAR for dcbf, dcbi, dcbst, dcbz instructions
+ This relies on every exception tagging DAR with 0xf0
+ before returning (rfi)
+ DAR is passed as 'address' to this function.
+ */
+	{
+		unsigned long ra, rb, dar, insn;
+		char *istr = NULL;
+
+		insn = *((unsigned long *)regs->nip);
+		if (((insn >> (31-5)) & 0x3f) == 31) {
+			if (((insn >> 1) & 0x3ff) == 1014) /* dcbz ? 0x3f6 */
+				istr = "dcbz";
+			if (((insn >> 1) & 0x3ff) == 86) /* dcbf ? 0x56 */
+				istr = "dcbf";
+			if (((insn >> 1) & 0x3ff) == 470) /* dcbi ? 0x1d6 */
+				istr = "dcbi";
+			if (((insn >> 1) & 0x3ff) == 54) /* dcbst ? 0x36 */
+				istr = "dcbst";
+		}
+		if (istr) {
+			ra = (insn >> (31-15)) & 0x1f; /* Reg RA */
+			rb = (insn >> (31-20)) & 0x1f; /* Reg RB */
+			printk(KERN_CRIT "%s r%ld,r%ld found at NIP:%lx\n",
+			       istr, regs->nip, ra, rb);
+			printk(KERN_CRIT "DAR is:%lx (should be 0xf0)\n", regs->dar);
+
+		}
+		if (regs->dar == 0x00f0 && !istr)
+			printk(KERN_CRIT "DAR is 0x00f0 but insn at NIP is: %lx is"
+			       "not a cache insn:%lx!\n", regs->nip, insn);
+		if (trap == 0x300 && address != regs->dar)
+			printk(KERN_CRIT "DAR:%lx != address:%lx!\n", address, regs->dar);
+
+		if (istr || (trap == 0x300 && address == 0x00f0)) {
+			insn = *((unsigned long *)regs->nip);
+			/* Check if it is a
+			 * dcbf, dcbi, dcbst, dcbz insn ?
+			if ((insn >> (31-5)) != 31)
+				break; // Not a dcbx instruction
+			 */
+
+			ra = (insn >> (31-15)) & 0x1f; /* Reg RA */
+			rb = (insn >> (31-20)) & 0x1f; /* Reg RB */
+			dar = regs->gpr[rb];
+			if (ra)
+				dar += regs->gpr[ra];
+			regs->dar = dar;
+			address = dar;
+		}
+	}
+#endif
 #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE))
 	/*
 	 * Fortunately the bit assignments in SRR1 for an instruction

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30  8:19                           ` Joakim Tjernlund
@ 2009-09-30  9:00                             ` Rex Feany
  2009-09-30  9:58                               ` Joakim Tjernlund
  2009-09-30 11:18                               ` Joakim Tjernlund
  0 siblings, 2 replies; 58+ messages in thread
From: Rex Feany @ 2009-09-30  9:00 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev

Thus spake Joakim Tjernlund (joakim.tjernlund@transmode.se):

> > Ok, I have made some minor tweaks and added debug code in
> > do_page_fault(). Would be great if you could try on both
> > .31 and top of tree.
> >
> >  Jocke
> 
> OOPS, found a bug. Use this one instead:

.31 - no change, it worked before your patch and it works after.
None of your debugging printks show up. I tried removing the tlbil_va()
from do_dcache_icache_coherency() and userspace goes back to be being
slow/non functional.

top of tree - userspace is slow and unstable, random segfaults,
and none of your debugging printks show up :(

take care!
/rex

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30  9:00                             ` Rex Feany
@ 2009-09-30  9:58                               ` Joakim Tjernlund
  2009-09-30 11:18                               ` Joakim Tjernlund
  1 sibling, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-30  9:58 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

Rex Feany <RFeany@mrv.com> wrote on 30/09/2009 11:00:02:
>
> Thus spake Joakim Tjernlund (joakim.tjernlund@transmode.se):
>
> > > Ok, I have made some minor tweaks and added debug code in
> > > do_page_fault(). Would be great if you could try on both
> > > .31 and top of tree.
> > >
> > >  Jocke
> >
> > OOPS, found a bug. Use this one instead:
>
> .31 - no change, it worked before your patch and it works after.
> None of your debugging printks show up. I tried removing the tlbil_va()
> from do_dcache_icache_coherency() and userspace goes back to be being
> slow/non functional.
>
> top of tree - userspace is slow and unstable, random segfaults,
> and none of your debugging printks show up :(

OK, something strange is going on. I am starting to suspect that there
is some other problem here.

If my patch is any good you should be able to use dcbx insn in copy_tofrom_user().
You could try changing all CONFIG_8xx to CONFIG_8xx_deleted arch/powerpc/lib/copy_32.S
and see if that works and if you see any debug prints.

 Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30  9:00                             ` Rex Feany
  2009-09-30  9:58                               ` Joakim Tjernlund
@ 2009-09-30 11:18                               ` Joakim Tjernlund
  2009-09-30 17:23                                 ` Joakim Tjernlund
  1 sibling, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-30 11:18 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

Rex Feany <RFeany@mrv.com> wrote on 30/09/2009 11:00:02:
>
> Thus spake Joakim Tjernlund (joakim.tjernlund@transmode.se):
>
> > > Ok, I have made some minor tweaks and added debug code in
> > > do_page_fault(). Would be great if you could try on both
> > > .31 and top of tree.
> > >
> > >  Jocke
> >
> > OOPS, found a bug. Use this one instead:
>
> .31 - no change, it worked before your patch and it works after.
> None of your debugging printks show up. I tried removing the tlbil_va()
> from do_dcache_icache_coherency() and userspace goes back to be being
> slow/non functional.

Had a look at linus tree and there is something I don't understand.
Your fix, e0908085fc2391c85b85fb814ae1df377c8e0dcb, fixes a problem
that was introduced by 8d30c14cab30d405a05f2aaceda1e9ad57800f36 but
8d30c14cab30d405a05f2aaceda1e9ad57800f36 was included in .31 and .31
works and top of tree does not, how can that be?

To me it seems more likely that some mm change introduced between .31 and
top of tree is the culprit.
My patch addresses the problem described in the comment:
/* On 8xx, cache control instructions (particularly
 * "dcbst" from flush_dcache_icache) fault as write
 * operation if there is an unpopulated TLB entry
 * for the address in question. To workaround that,
 * we invalidate the TLB here, thus avoiding dcbst
 * misbehaviour.
 */
Now you are using this old fix to paper over some other bug or so I think.

 Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30 11:18                               ` Joakim Tjernlund
@ 2009-09-30 17:23                                 ` Joakim Tjernlund
  2009-09-30 22:35                                   ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-09-30 17:23 UTC (permalink / raw)
  Cc: linuxppc-dev, Rex Feany

>
> Rex Feany <RFeany@mrv.com> wrote on 30/09/2009 11:00:02:
> >
> > Thus spake Joakim Tjernlund (joakim.tjernlund@transmode.se):
> >
> > > > Ok, I have made some minor tweaks and added debug code in
> > > > do_page_fault(). Would be great if you could try on both
> > > > .31 and top of tree.
> > > >
> > > >  Jocke
> > >
> > > OOPS, found a bug. Use this one instead:
> >
> > .31 - no change, it worked before your patch and it works after.
> > None of your debugging printks show up. I tried removing the tlbil_va()
> > from do_dcache_icache_coherency() and userspace goes back to be being
> > slow/non functional.
>
> Had a look at linus tree and there is something I don't understand.
> Your fix, e0908085fc2391c85b85fb814ae1df377c8e0dcb, fixes a problem
> that was introduced by 8d30c14cab30d405a05f2aaceda1e9ad57800f36 but
> 8d30c14cab30d405a05f2aaceda1e9ad57800f36 was included in .31 and .31
> works and top of tree does not, how can that be?
>
> To me it seems more likely that some mm change introduced between .31 and
> top of tree is the culprit.
> My patch addresses the problem described in the comment:
> /* On 8xx, cache control instructions (particularly
>  * "dcbst" from flush_dcache_icache) fault as write
>  * operation if there is an unpopulated TLB entry
>  * for the address in question. To workaround that,
>  * we invalidate the TLB here, thus avoiding dcbst
>  * misbehaviour.
>  */
> Now you are using this old fix to paper over some other bug or so I think.

There is something fishy with the TLB status, looking into the mpc862 manual I
don't see how it can work reliably. Need to dwell some more.
Anyhow, I have incorporated some of my findings into a new patch,
perhaps this will be the golden one?

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 4dd38f1..b3f6687 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -709,6 +709,14 @@ ret_from_except:
 	SYNC			/* Some chip revs have problems here... */
 	MTMSRD(r10)		/* disable interrupts */

+#ifdef CONFIG_8xx
+	/* Tag DAR with a well know value.
+	 * This needs to match head_8xx.S and
+	 * do_page_fault()
+	 */
+	li	r3, 0x00f0
+	mtspr	SPRN_DAR, r3
+#endif
 	lwz	r3,_MSR(r1)	/* Returning to user mode? */
 	andi.	r0,r3,MSR_PR
 	beq	resume_kernel
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 52ff8c5..de1fd58 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -39,6 +39,15 @@
 #else
 #define DO_8xx_CPU6(val, reg)
 #endif
+
+/* DAR needs to be tagged with a known value so that the
+ * DataTLB Miss/Error and do_page_fault() can recognize a
+ * buggy dcbx instruction and workaround the problem.
+ * dcbf, dcbi, dcbst, dcbz instructions do not update DAR
+ * when trapping into a Data TLB Miss/Error. See
+ * DataStoreTLBMiss and DataTLBError for details
+ */
+
 	__HEAD
 _ENTRY(_stext);
 _ENTRY(_start);
@@ -222,6 +231,7 @@ DataAccess:
 	stw	r10,_DSISR(r11)
 	mr	r5,r10
 	mfspr	r4,SPRN_DAR
+	stw	r4,_DAR(r11)
 	EXC_XFER_EE_LITE(0x300, handle_page_fault)

 /* Instruction access exception.
@@ -352,7 +362,7 @@ InstructionTLBMiss:
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x2d80, r3)
 	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
@@ -365,6 +375,19 @@ InstructionTLBMiss:
 	lwz	r3, 8(r0)
 #endif
 	rfi
+2:
+	mfspr	r10, SPRN_SRR1
+	rlwinm	r10,r10,0,5,3 /* clear bit 4(0x08000000) */
+	mtspr	SPRN_SRR1, r10
+
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	InstructionAccess

 	. = 0x1200
 DataStoreTLBMiss:
@@ -428,10 +451,11 @@ DataStoreTLBMiss:
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
 	 */
-2:	li	r11, 0x00f0
+	li	r11, 0x00f0
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+	mtspr	SPRN_DAR, r11		/* Tag DAR */

 	mfspr	r10, SPRN_M_TW	/* Restore registers */
 	lwz	r11, 0(r0)
@@ -441,7 +465,18 @@ DataStoreTLBMiss:
 	lwz	r3, 8(r0)
 #endif
 	rfi
+2:
+	li	r10, 0
+	mtspr	SPRN_DSISR, r10 /* Clear all bits */

+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	b	DataAccess
 /* This is an instruction TLB error on the MPC8xx.  This could be due
  * to many reasons, such as executing guarded memory or illegal instruction
  * addresses.  There is nothing to do but handle a big time error fault.
@@ -492,6 +527,8 @@ DataTLBError:
 	 * assuming we only use the dcbi instruction on kernel addresses.
 	 */
 	mfspr	r10, SPRN_DAR
+	cmpwi	cr0, r10, 0x00f0	/* check if DAR holds a tag */
+	beq-	2f
 	rlwinm	r11, r10, 0, 0, 19
 	ori	r11, r11, MD_EVALID
 	mfspr	r10, SPRN_M_CASID
@@ -550,6 +587,7 @@ DataTLBError:
 	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
 	DO_8xx_CPU6(0x3d80, r3)
 	mtspr	SPRN_MD_RPN, r10	/* Update TLB entry */
+	mtspr	SPRN_DAR, r11		/* Tag DAR */

 	mfspr	r10, SPRN_M_TW	/* Restore registers */
 	lwz	r11, 0(r0)

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30 17:23                                 ` Joakim Tjernlund
@ 2009-09-30 22:35                                   ` Benjamin Herrenschmidt
  2009-10-01  7:05                                     ` Joakim Tjernlund
                                                       ` (2 more replies)
  0 siblings, 3 replies; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-30 22:35 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: linuxppc-dev, Rex Feany

> > Had a look at linus tree and there is something I don't understand.
> > Your fix, e0908085fc2391c85b85fb814ae1df377c8e0dcb, fixes a problem
> > that was introduced by 8d30c14cab30d405a05f2aaceda1e9ad57800f36 but
> > 8d30c14cab30d405a05f2aaceda1e9ad57800f36 was included in .31 and .31
> > works and top of tree does not, how can that be?
> >
> > To me it seems more likely that some mm change introduced between .31 and
> > top of tree is the culprit.
> > My patch addresses the problem described in the comment:
> > /* On 8xx, cache control instructions (particularly
> >  * "dcbst" from flush_dcache_icache) fault as write
> >  * operation if there is an unpopulated TLB entry
> >  * for the address in question. To workaround that,
> >  * we invalidate the TLB here, thus avoiding dcbst
> >  * misbehaviour.
> >  */
> > Now you are using this old fix to paper over some other bug or so I think.
> 
> There is something fishy with the TLB status, looking into the mpc862 manual I
> don't see how it can work reliably. Need to dwell some more.
> Anyhow, I have incorporated some of my findings into a new patch,
> perhaps this will be the golden one?

Well, I still wonder about what whole unpopulated entry business.

>From what I can see, the TLB miss code will check _PAGE_PRESENT, and
when not set, it will -still- insert something into the TLB (unlike
all other CPU types that go straight to data access faults from there).

So we end up populating with those unpopulated entries that will then
cause us to take a DSI (or ISI) instead of a TLB miss the next time
around ... and so we need to remove them once we do that, no ? IE. Once
we have actually put a valid PTE in.

At least that's my understanding and why I believe we should always have
tlbil_va() in set_pte() and ptep_set_access_flags(), which boils down
in putting it into the 2 "filter" functions in the new code.

Well.. actually, the ptep_set_access_flags() will already do a
flush_tlb_page_nohash() when the PTE is changed. So I suppose all we
really need here would be in set_pte_filter(), to do the same if we are
writing a PTE that has _PAGE_PRESENT, at least on 8xx.

But just unconditionally doing a tlbil_va() in both the filter functions
shouldn't harm and also fix the problem, though Rex seems to indicate
that is not the case.

Now, we -might- have something else broken on 8xx, hard to tell. You may
want to try to bisect, adding back the removed tlbil_va() as you go
backward, between .31 and upstream... 

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30 22:35                                   ` Benjamin Herrenschmidt
@ 2009-10-01  7:05                                     ` Joakim Tjernlund
  2009-10-02 13:06                                     ` Joakim Tjernlund
  2009-10-02 21:49                                     ` Scott Wood
  2 siblings, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-01  7:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 01/10/2009 00:35:59:
>
>
> > > Had a look at linus tree and there is something I don't understand.
> > > Your fix, e0908085fc2391c85b85fb814ae1df377c8e0dcb, fixes a problem
> > > that was introduced by 8d30c14cab30d405a05f2aaceda1e9ad57800f36 but
> > > 8d30c14cab30d405a05f2aaceda1e9ad57800f36 was included in .31 and .31
> > > works and top of tree does not, how can that be?
> > >
> > > To me it seems more likely that some mm change introduced between .31 and
> > > top of tree is the culprit.
> > > My patch addresses the problem described in the comment:
> > > /* On 8xx, cache control instructions (particularly
> > >  * "dcbst" from flush_dcache_icache) fault as write
> > >  * operation if there is an unpopulated TLB entry
> > >  * for the address in question. To workaround that,
> > >  * we invalidate the TLB here, thus avoiding dcbst
> > >  * misbehaviour.
> > >  */
> > > Now you are using this old fix to paper over some other bug or so I think.
> >
> > There is something fishy with the TLB status, looking into the mpc862 manual I
> > don't see how it can work reliably. Need to dwell some more.
> > Anyhow, I have incorporated some of my findings into a new patch,
> > perhaps this will be the golden one?
>
> Well, I still wonder about what whole unpopulated entry business.

Yes, is a odd compared to the other ppc arch. I know why it is there
and I also know that there is alternative way to work around the problem
it is supposed to fix. I went back to the old way in my patch but it didn't
help. although I don't see why there is such a benefit to branch to DataAccess
directly, is it so expensive to to a DTLB error and then go to DataAccess?

>
> >From what I can see, the TLB miss code will check _PAGE_PRESENT, and

No, it will branch to before that happens.

> when not set, it will -still- insert something into the TLB (unlike
> all other CPU types that go straight to data access faults from there).

Yes, a non valid pte so that you will trap to DTLB Error.
The other arch also muck around to get load/store bit etc. and
accroding to the 8xx user manual there are no such info available, not even DAR:

Table 6-14. Register Settings after a Data TLB Miss Exception
Register  Setting
SRR0      Set to the EA of the instruction that caused the exception.
SRR1      1–4     0
          10–15 0
          Others Loaded from MSR[16-31]. SRR1[30] is cleared only by loading a zero from MSR[RI].
MSR       IP      No change
          ME      No change
          LE      Copied from the ILE setting of the interrupted process
          Other 0

However from the past we know that DAR is avaliable (but not for dcbX insn though)

For a TLB Error there are:
Table 6-16. Register Settings after a Data TLB Error Exception
Register Setting
SRR0     Set to the EA of the instruction that caused the exception.
SRR1     1–4     0
         10–15 0
         Others—Loaded from MSR[16-31]. SRR1[30] is cleared only by loading a zero from MSR[RI].
MSR      IP      No change
         ME      No change
         LE      Copied from the ILE setting of the interrupted process
         Other   0
DSISR    0       0
         1       Set if the translation of an attempted access is not found in the translation tables. Otherwise, cleared
         2–3     0
         4       Set if the memory access is not permitted by the protection mechanism; otherwise cleared
         5       0
         6       1 for a store operation; 0 for a load operation.
         7–31    0
DAR      Set to the EA of the data access that caused the exception.

For completeness here are ITLB Miss/Error too:
Table 6-13. Register Settings after an Instruction TLB Miss Exception
Register  Setting
SRR0      Set to the EA of the instruction that caused the exception.
SRR1      0–3      0
          4       1
          10      1
          11–15   0
          Others  Loaded from MSR[16-31]. SRR1[30] is cleared only by loading a zero from MSR[RI].
MSR       IP      No change
          ME      No change
          LE      Copied from the ILE setting of the interrupted process
          Other 0

Table 6-15. Register Settings after an Instruction TLB Error Exception
Register Setting
SRR0     Set to the EA of the instruction that caused the exception.
SRR1     Note: Only one of bits 1, 3, and 4 will be set.
         1       1 if the translation of an attempted access is not in the translation tables. Otherwise 0
         2       0
         3       1 if the fetch access was to guarded memory when MSR[IR] = 1. Otherwise 0
         4       1 if the access is not permitted by the protection mechanism; otherwise 0.
         11–15 0
         Others Loaded from MSR[16-31]. SRR1[30] is cleared only by loading a zero from MSR[RI].
MSR      IP      No change
         ME      No change
         LE      Copied from the ILE setting of the interrupted process
         Other   0

>
> So we end up populating with those unpopulated entries that will then
> cause us to take a DSI (or ISI) instead of a TLB miss the next time
> around ... and so we need to remove them once we do that, no ? IE. Once
> we have actually put a valid PTE in.
>
> At least that's my understanding and why I believe we should always have
> tlbil_va() in set_pte() and ptep_set_access_flags(), which boils down
> in putting it into the 2 "filter" functions in the new code.
>
> Well.. actually, the ptep_set_access_flags() will already do a
> flush_tlb_page_nohash() when the PTE is changed. So I suppose all we
> really need here would be in set_pte_filter(), to do the same if we are
> writing a PTE that has _PAGE_PRESENT, at least on 8xx.

I am pondering another idea. I suspect that this bit is set for non-valid ptes:
1       Set if the translation of an attempted access is not found in the translation tables. Otherwise, cleared
So perhaps we can test this bit in do_page_fault() and then do the
tlbil_va() directly?

>
> But just unconditionally doing a tlbil_va() in both the filter functions
> shouldn't harm and also fix the problem, though Rex seems to indicate
> that is not the case.
>
> Now, we -might- have something else broken on 8xx, hard to tell. You may
> want to try to bisect, adding back the removed tlbil_va() as you go
> backward, between .31 and upstream...
>
> Cheers,
> Ben.
>
>
>
>

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30 22:35                                   ` Benjamin Herrenschmidt
  2009-10-01  7:05                                     ` Joakim Tjernlund
@ 2009-10-02 13:06                                     ` Joakim Tjernlund
  2009-10-02 18:10                                       ` Joakim Tjernlund
  2009-10-02 21:49                                     ` Scott Wood
  2 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-02 13:06 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 01/10/2009 00:35:59:
>
>
> > > Had a look at linus tree and there is something I don't understand.
> > > Your fix, e0908085fc2391c85b85fb814ae1df377c8e0dcb, fixes a problem
> > > that was introduced by 8d30c14cab30d405a05f2aaceda1e9ad57800f36 but
> > > 8d30c14cab30d405a05f2aaceda1e9ad57800f36 was included in .31 and .31
> > > works and top of tree does not, how can that be?
> > >
> > > To me it seems more likely that some mm change introduced between .31 and
> > > top of tree is the culprit.
> > > My patch addresses the problem described in the comment:
> > > /* On 8xx, cache control instructions (particularly
> > >  * "dcbst" from flush_dcache_icache) fault as write
> > >  * operation if there is an unpopulated TLB entry
> > >  * for the address in question. To workaround that,
> > >  * we invalidate the TLB here, thus avoiding dcbst
> > >  * misbehaviour.
> > >  */
> > > Now you are using this old fix to paper over some other bug or so I think.
> >
> > There is something fishy with the TLB status, looking into the mpc862 manual I
> > don't see how it can work reliably. Need to dwell some more.
> > Anyhow, I have incorporated some of my findings into a new patch,
> > perhaps this will be the golden one?
>
> Well, I still wonder about what whole unpopulated entry business.
>
> >From what I can see, the TLB miss code will check _PAGE_PRESENT, and
> when not set, it will -still- insert something into the TLB (unlike
> all other CPU types that go straight to data access faults from there).
>
> So we end up populating with those unpopulated entries that will then
> cause us to take a DSI (or ISI) instead of a TLB miss the next time
> around ... and so we need to remove them once we do that, no ? IE. Once
> we have actually put a valid PTE in.
>
> At least that's my understanding and why I believe we should always have
> tlbil_va() in set_pte() and ptep_set_access_flags(), which boils down
> in putting it into the 2 "filter" functions in the new code.
>
> Well.. actually, the ptep_set_access_flags() will already do a
> flush_tlb_page_nohash() when the PTE is changed. So I suppose all we
> really need here would be in set_pte_filter(), to do the same if we are
> writing a PTE that has _PAGE_PRESENT, at least on 8xx.
>
> But just unconditionally doing a tlbil_va() in both the filter functions
> shouldn't harm and also fix the problem, though Rex seems to indicate
> that is not the case.
>
> Now, we -might- have something else broken on 8xx, hard to tell. You may
> want to try to bisect, adding back the removed tlbil_va() as you go
> backward, between .31 and upstream...

Found something odd, perhaps Rex can test?
I tested this on my old 2.4 and it worked well.

 Jocke

>From dd0f213d51437bb48ff42c1c0e690f243266c133 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Fri, 2 Oct 2009 14:59:21 +0200
Subject: [PATCH] powerpc, 8xx: DTLB Error must check for more errors.

DataTLBError do only check for store op's. I think this
is too narrow. Protection and and no translation errors
needs to be checked too.
---
 arch/powerpc/kernel/head_8xx.S |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 52ff8c5..ef8a5ce 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -472,7 +472,7 @@ DataTLBError:
 	/* First, make sure this was a store operation.
 	*/
 	mfspr	r10, SPRN_DSISR
-	andis.	r11, r10, 0x0200	/* If set, indicates store op */
+	andis.	r11, r10, 0x4a00 /* If set, store, protection or no trans. */
 	beq	2f

 	/* The EA of a data TLB miss is automatically stored in the MD_EPN
--
1.6.4.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-02 13:06                                     ` Joakim Tjernlund
@ 2009-10-02 18:10                                       ` Joakim Tjernlund
  0 siblings, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-02 18:10 UTC (permalink / raw)
  Cc: linuxppc-dev

>
> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 01/10/2009 00:35:59:
> >
> >
> > > > Had a look at linus tree and there is something I don't understand.
> > > > Your fix, e0908085fc2391c85b85fb814ae1df377c8e0dcb, fixes a problem
> > > > that was introduced by 8d30c14cab30d405a05f2aaceda1e9ad57800f36 but
> > > > 8d30c14cab30d405a05f2aaceda1e9ad57800f36 was included in .31 and .31
> > > > works and top of tree does not, how can that be?
> > > >
> > > > To me it seems more likely that some mm change introduced between .31 and
> > > > top of tree is the culprit.
> > > > My patch addresses the problem described in the comment:
> > > > /* On 8xx, cache control instructions (particularly
> > > >  * "dcbst" from flush_dcache_icache) fault as write
> > > >  * operation if there is an unpopulated TLB entry
> > > >  * for the address in question. To workaround that,
> > > >  * we invalidate the TLB here, thus avoiding dcbst
> > > >  * misbehaviour.
> > > >  */
> > > > Now you are using this old fix to paper over some other bug or so I think.
> > >
> > > There is something fishy with the TLB status, looking into the mpc862 manual I
> > > don't see how it can work reliably. Need to dwell some more.
> > > Anyhow, I have incorporated some of my findings into a new patch,
> > > perhaps this will be the golden one?
> >
> > Well, I still wonder about what whole unpopulated entry business.
> >
> > >From what I can see, the TLB miss code will check _PAGE_PRESENT, and
> > when not set, it will -still- insert something into the TLB (unlike
> > all other CPU types that go straight to data access faults from there).
> >
> > So we end up populating with those unpopulated entries that will then
> > cause us to take a DSI (or ISI) instead of a TLB miss the next time
> > around ... and so we need to remove them once we do that, no ? IE. Once
> > we have actually put a valid PTE in.
> >
> > At least that's my understanding and why I believe we should always have
> > tlbil_va() in set_pte() and ptep_set_access_flags(), which boils down
> > in putting it into the 2 "filter" functions in the new code.
> >
> > Well.. actually, the ptep_set_access_flags() will already do a
> > flush_tlb_page_nohash() when the PTE is changed. So I suppose all we
> > really need here would be in set_pte_filter(), to do the same if we are
> > writing a PTE that has _PAGE_PRESENT, at least on 8xx.
> >
> > But just unconditionally doing a tlbil_va() in both the filter functions
> > shouldn't harm and also fix the problem, though Rex seems to indicate
> > that is not the case.
> >
> > Now, we -might- have something else broken on 8xx, hard to tell. You may
> > want to try to bisect, adding back the removed tlbil_va() as you go
> > backward, between .31 and upstream...
>
> Found something odd, perhaps Rex can test?
> I tested this on my old 2.4 and it worked well.

That was not quite correct, sorry.
Try this instead:

>From c5f1a70561730b8a443f7081fbd9c5b023147043 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Fri, 2 Oct 2009 14:59:21 +0200
Subject: [PATCH] powerpc, 8xx: DTLB Error must check for more errors.

DataTLBError currently does:
 if ((err & 0x02000000) == 0)
    DSI();
This won't handle a store with no valid translation.
Change this to
 if ((err & 0x48000000) != 0)
    DSI();
that is, branch to DSI if either !permission or
!translation.
---
 arch/powerpc/kernel/head_8xx.S |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 52ff8c5..118bb05 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -472,8 +472,8 @@ DataTLBError:
 	/* First, make sure this was a store operation.
 	*/
 	mfspr	r10, SPRN_DSISR
-	andis.	r11, r10, 0x0200	/* If set, indicates store op */
-	beq	2f
+	andis.	r11, r10, 0x4800 /* no translation, no permission. */
+	bne	2f	/* branch if either is set */

 	/* The EA of a data TLB miss is automatically stored in the MD_EPN
 	 * register.  The EA of a data TLB error is automatically stored in
--
1.6.4.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-30 22:35                                   ` Benjamin Herrenschmidt
  2009-10-01  7:05                                     ` Joakim Tjernlund
  2009-10-02 13:06                                     ` Joakim Tjernlund
@ 2009-10-02 21:49                                     ` Scott Wood
  2009-10-02 22:04                                       ` Benjamin Herrenschmidt
                                                         ` (2 more replies)
  2 siblings, 3 replies; 58+ messages in thread
From: Scott Wood @ 2009-10-02 21:49 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

On Thu, Oct 01, 2009 at 08:35:59AM +1000, Benjamin Herrenschmidt wrote:
> >From what I can see, the TLB miss code will check _PAGE_PRESENT, and
> when not set, it will -still- insert something into the TLB (unlike
> all other CPU types that go straight to data access faults from there).
> 
> So we end up populating with those unpopulated entries that will then
> cause us to take a DSI (or ISI) instead of a TLB miss the next time
> around ... and so we need to remove them once we do that, no ? IE. Once
> we have actually put a valid PTE in.
> 
> At least that's my understanding and why I believe we should always have
> tlbil_va() in set_pte() and ptep_set_access_flags(), which boils down
> in putting it into the 2 "filter" functions in the new code.
> 
> Well.. actually, the ptep_set_access_flags() will already do a
> flush_tlb_page_nohash() when the PTE is changed. So I suppose all we
> really need here would be in set_pte_filter(), to do the same if we are
> writing a PTE that has _PAGE_PRESENT, at least on 8xx.
> 
> But just unconditionally doing a tlbil_va() in both the filter functions
> shouldn't harm and also fix the problem, though Rex seems to indicate
> that is not the case.

Adding a tlbil_va to do_page_fault makes the problem go away for me (on
top of your "merge" branch) -- none of the other changes in this thread
do (assuming I didn't miss any).  FWIW, when it gets stuck on a fault,
DSISR is 0xc0000000, and handle_mm_fault returns zero.

-Scott

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-02 21:49                                     ` Scott Wood
@ 2009-10-02 22:04                                       ` Benjamin Herrenschmidt
  2009-10-05 19:28                                         ` Scott Wood
  2009-10-03  8:05                                       ` Joakim Tjernlund
  2009-10-04 20:10                                       ` Joakim Tjernlund
  2 siblings, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-10-02 22:04 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc-dev, Rex Feany

On Fri, 2009-10-02 at 16:49 -0500, Scott Wood wrote:
> Adding a tlbil_va to do_page_fault makes the problem go away for me (on
> top of your "merge" branch) -- none of the other changes in this thread
> do (assuming I didn't miss any).  FWIW, when it gets stuck on a fault,
> DSISR is 0xc0000000, and handle_mm_fault returns zero. 

But in that case, it should hit ptep_set_access_flags() (via
handle_mm_fault) and from there call tlbil_va (provided we add a call to
it in the relevant filter function which I proposed initially), no ?

Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-02 22:04                                       ` Benjamin Herrenschmidt
@ 2009-10-05 19:28                                         ` Scott Wood
  2009-10-05 20:29                                           ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 58+ messages in thread
From: Scott Wood @ 2009-10-05 19:28 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

On Sat, Oct 03, 2009 at 08:04:33AM +1000, Benjamin Herrenschmidt wrote:
> On Fri, 2009-10-02 at 16:49 -0500, Scott Wood wrote:
> > Adding a tlbil_va to do_page_fault makes the problem go away for me (on
> > top of your "merge" branch) -- none of the other changes in this thread
> > do (assuming I didn't miss any).  FWIW, when it gets stuck on a fault,
> > DSISR is 0xc0000000, and handle_mm_fault returns zero. 
> 
> But in that case, it should hit ptep_set_access_flags() (via
> handle_mm_fault) and from there call tlbil_va (provided we add a call to
> it in the relevant filter function which I proposed initially), no ?

Yes, it hits ptep_set_access_flags() and adding _tlbil_va there helps (I
didn't put it in the filter function, because that doesn't take address as a
parameter).  I'd misread your suggestion as referring to the various changes
to set_pte_filter() that were posted.

As for unconditionally invalidating in set_pte_filter(), that doesn't boot
for me unless I check for kernel addresses -- I guess we populate page
tables that overlap the pinned large page region? 

-Scott

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-05 19:28                                         ` Scott Wood
@ 2009-10-05 20:29                                           ` Benjamin Herrenschmidt
  2009-10-05 21:04                                             ` Scott Wood
  0 siblings, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-10-05 20:29 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc-dev, Rex Feany

On Mon, 2009-10-05 at 14:28 -0500, Scott Wood wrote:
> Yes, it hits ptep_set_access_flags() and adding _tlbil_va there helps (I
> didn't put it in the filter function, because that doesn't take address as a
> parameter).  I'd misread your suggestion as referring to the various changes
> to set_pte_filter() that were posted.
> 
> As for unconditionally invalidating in set_pte_filter(), that doesn't boot
> for me unless I check for kernel addresses -- I guess we populate page
> tables that overlap the pinned large page region? 

Good point. I think we do on 8xx. Does doing it in set_pte_filter()
(with the kernel check) makes any difference ? faster ? slower ? no
visible change ?

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-05 20:29                                           ` Benjamin Herrenschmidt
@ 2009-10-05 21:04                                             ` Scott Wood
  0 siblings, 0 replies; 58+ messages in thread
From: Scott Wood @ 2009-10-05 21:04 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, Rex Feany

Benjamin Herrenschmidt wrote:
> On Mon, 2009-10-05 at 14:28 -0500, Scott Wood wrote:
>> Yes, it hits ptep_set_access_flags() and adding _tlbil_va there helps (I
>> didn't put it in the filter function, because that doesn't take address as a
>> parameter).  I'd misread your suggestion as referring to the various changes
>> to set_pte_filter() that were posted.
>>
>> As for unconditionally invalidating in set_pte_filter(), that doesn't boot
>> for me unless I check for kernel addresses -- I guess we populate page
>> tables that overlap the pinned large page region? 
> 
> Good point. I think we do on 8xx. Does doing it in set_pte_filter()
> (with the kernel check) makes any difference ? faster ? slower ? no
> visible change ?

ptep_set_access_flags() is enough to make the worst of it go away.  I'm 
having another intermittent problem that seems to happen more often 
without the change in set_pte_filter(), but I've yet to narrow down 
what's actually going on there.

-Scott

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-02 21:49                                     ` Scott Wood
  2009-10-02 22:04                                       ` Benjamin Herrenschmidt
@ 2009-10-03  8:05                                       ` Joakim Tjernlund
  2009-10-03  8:31                                         ` Benjamin Herrenschmidt
  2009-10-05 18:24                                         ` Scott Wood
  2009-10-04 20:10                                       ` Joakim Tjernlund
  2 siblings, 2 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-03  8:05 UTC (permalink / raw)
  To: Scott Wood; +Cc: Rex Feany, linuxppc-dev

Scott Wood <scottwood@freescale.com> wrote on 02/10/2009 23:49:49:
>
> On Thu, Oct 01, 2009 at 08:35:59AM +1000, Benjamin Herrenschmidt wrote:
> > >From what I can see, the TLB miss code will check _PAGE_PRESENT, and
> > when not set, it will -still- insert something into the TLB (unlike
> > all other CPU types that go straight to data access faults from there).
> >
> > So we end up populating with those unpopulated entries that will then
> > cause us to take a DSI (or ISI) instead of a TLB miss the next time
> > around ... and so we need to remove them once we do that, no ? IE. Once
> > we have actually put a valid PTE in.
> >
> > At least that's my understanding and why I believe we should always have
> > tlbil_va() in set_pte() and ptep_set_access_flags(), which boils down
> > in putting it into the 2 "filter" functions in the new code.
> >
> > Well.. actually, the ptep_set_access_flags() will already do a
> > flush_tlb_page_nohash() when the PTE is changed. So I suppose all we
> > really need here would be in set_pte_filter(), to do the same if we are
> > writing a PTE that has _PAGE_PRESENT, at least on 8xx.
> >
> > But just unconditionally doing a tlbil_va() in both the filter functions
> > shouldn't harm and also fix the problem, though Rex seems to indicate
> > that is not the case.
>
> Adding a tlbil_va to do_page_fault makes the problem go away for me (on
> top of your "merge" branch) -- none of the other changes in this thread
> do (assuming I didn't miss any).  FWIW, when it gets stuck on a fault,
> DSISR is 0xc0000000, and handle_mm_fault returns zero.

OK, that is a no translation error for a load (assuming trap is 0x400)
Do you know what insn this is? I am adding a patch last in this mail
for catching dcbX insn in do_page_fault()

You need the patch I posted yesterday too:

>From c5f1a70561730b8a443f7081fbd9c5b023147043 Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Fri, 2 Oct 2009 14:59:21 +0200
Subject: [PATCH] powerpc, 8xx: DTLB Error must check for more errors.

DataTLBError currently does:
 if ((err & 0x02000000) == 0)
    DSI();
This won't handle a store with no valid translation.
Change this to
 if ((err & 0x48000000) != 0)
    DSI();
that is, branch to DSI if either !permission or
!translation.
---
 arch/powerpc/kernel/head_8xx.S |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 52ff8c5..118bb05 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -472,8 +472,8 @@ DataTLBError:
 	/* First, make sure this was a store operation.
 	*/
 	mfspr	r10, SPRN_DSISR
-	andis.	r11, r10, 0x0200	/* If set, indicates store op */
-	beq	2f
+	andis.	r11, r10, 0x4800 /* no translation, no permission. */
+	bne	2f	/* branch if either is set */

 	/* The EA of a data TLB miss is automatically stored in the MD_EPN
 	 * register.  The EA of a data TLB error is automatically stored in
--
1.6.4.4


Cannot shake the feeling that it this snip of code that causes it
	lwz	r11, 0(r10)	/* Get the level 1 entry */
	rlwinm.	r10, r11,0,0,19	/* Extract page descriptor page address */
	beq	2f		/* If zero, don't try to find a pte */
Perhaps we can do something better? I still feel that we need to
force a TLB Error as the TLBMiss does not set DSISR so we have no way of
knowing if it is an load or store.

 Jocke

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 7699394..c33c6de 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -139,6 +139,88 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
 #else
 	is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
+#if 1 /* defined(CONFIG_8xx)*/
+#define DEBUG_DCBX
+/*
+ Work around DTLB Miss/Error, as these do not update
+ DAR for dcbf, dcbi, dcbst, dcbz and icbi instructions
+ This relies on every exception tagging DAR with 0xf0
+ before returning (rfi)
+ DAR is passed as 'address' to this function.
+ */
+	{
+		unsigned long ra, rb, dar, insn;
+#ifdef DEBUG_DCBX
+		const char *istr = NULL;
+
+		insn = *((unsigned long *)regs->nip);
+		if (((insn >> (31-5)) & 0x3f) == 31) {
+			if (((insn >> 1) & 0x3ff) == 1014) /* dcbz ? 0x3f6 */
+				istr = "dcbz";
+			if (((insn >> 1) & 0x3ff) == 86) /* dcbf ? 0x56 */
+				istr = "dcbf";
+			if (((insn >> 1) & 0x3ff) == 470) /* dcbi ? 0x1d6 */
+				istr = "dcbi";
+			if (((insn >> 1) & 0x3ff) == 54) /* dcbst ? 0x36 */
+				istr = "dcbst";
+			if (((insn >> 1) & 0x3ff) == 982) /* icbi ? 0x3d6 */
+				istr = "icbi";
+			if (istr) {
+				ra = (insn >> (31-15)) & 0x1f; /* Reg RA */
+				rb = (insn >> (31-20)) & 0x1f; /* Reg RB */
+				dar = regs->gpr[rb];
+				if (ra)
+					dar += regs->gpr[ra];
+				if (dar != address && address != 0x00f0 && trap == 0x300)
+					printk(KERN_CRIT "%s: address:%lx, dar:%lx!\n", istr, address, dar);
+				if (!strcmp(istr, "dcbst") && is_write) {
+					printk(KERN_CRIT "dcbst R%ld,R%ld = %lx as a store, fixing!\n",
+					       ra, rb, dar);
+					is_write = 0;
+				}
+
+				if (trap == 0x300 && address != dar) {
+					__asm__ ("mtdar %0" : : "r" (dar));
+					return 0;
+				}
+			}
+		}
+#endif
+		if (address == 0x00f0 && trap == 0x300) {
+			pte_t *ptep;
+
+			/* This is from a dcbX or icbi insn gone bad, these
+			 * insn do not set DAR so we have to do it here instead */
+			insn = *((unsigned long *)regs->nip);
+
+			ra = (insn >> (31-15)) & 0x1f; /* Reg RA */
+			rb = (insn >> (31-20)) & 0x1f; /* Reg RB */
+			dar = regs->gpr[rb];
+			if (ra)
+				dar += regs->gpr[ra];
+			/* Set DAR to correct address for the DTLB Miss/Error handler
+			 * to redo the TLB exception. This time with correct address */
+			__asm__ ("mtdar %0" : : "r" (dar));
+#ifdef DEBUG_DCBX
+			printk(KERN_CRIT "trap:%x address:%lx, dar:%lx,err:%lx %s\n",
+			       trap, address, dar, error_code, istr);
+#endif
+			address = dar;
+#if 1
+			if (is_write && get_pteptr(mm, dar, &ptep, NULL)) {
+				pte_t my_pte = *ptep;
+
+				if (pte_present(my_pte) && pte_write(my_pte)) {
+					pte_val(my_pte) |= _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE;
+					set_pte_at(mm, dar, ptep, my_pte);
+				}
+			}
+#else
+			return 0;
+#endif
+		}
+	}
+#endif

 	if (notify_page_fault(regs))
 		return 0;

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-03  8:05                                       ` Joakim Tjernlund
@ 2009-10-03  8:31                                         ` Benjamin Herrenschmidt
  2009-10-03  9:24                                           ` Joakim Tjernlund
  2009-10-05 18:24                                         ` Scott Wood
  1 sibling, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-10-03  8:31 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Scott Wood, linuxppc-dev, Rex Feany

On Sat, 2009-10-03 at 10:05 +0200, Joakim Tjernlund wrote:
> Cannot shake the feeling that it this snip of code that causes it
>         lwz     r11, 0(r10)     /* Get the level 1 entry */
>         rlwinm. r10, r11,0,0,19 /* Extract page descriptor page
> address */
>         beq     2f              /* If zero, don't try to find a pte */
> Perhaps we can do something better? I still feel that we need to
> force a TLB Error as the TLBMiss does not set DSISR so we have no way
> of
> knowing if it is an load or store.

Can't we manufacture a DSISR and branch to the right function ?

Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-03  8:31                                         ` Benjamin Herrenschmidt
@ 2009-10-03  9:24                                           ` Joakim Tjernlund
  2009-10-03 10:57                                             ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-03  9:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 03/10/2009 10:31:18:
>
> On Sat, 2009-10-03 at 10:05 +0200, Joakim Tjernlund wrote:
> > Cannot shake the feeling that it this snip of code that causes it
> >         lwz     r11, 0(r10)     /* Get the level 1 entry */
> >         rlwinm. r10, r11,0,0,19 /* Extract page descriptor page
> > address */
> >         beq     2f              /* If zero, don't try to find a pte */
> > Perhaps we can do something better? I still feel that we need to
> > force a TLB Error as the TLBMiss does not set DSISR so we have no way
> > of
> > knowing if it is an load or store.
>
> Can't we manufacture a DSISR and branch to the right function ?

Not if we want know if it is a load or store. There is no info to manufacture
a DSISR from. The best we can do here is try getting the RPN physical page
number correct. Perhaps something like this will do:
	/* Copy 20 msb from MD_EPN to r20 to get the correct page
	 * number. Do not rely on DAR since the dcxx instructions fails
	 * to update DAR when they cause a DTLB Miss
	 */
	mfspr	r21, MD_EPN
	li	r20, 0x0
	rlwimi	r20, r21, 0, 0, 19
Then go back and set the RPN accordingly.

The 8xx is different as as it will force a TLB error every time
it needs to deal with a page fault.
I suspect adding
if (!ret)
   _tlbil_va(address);
in do_page_fault() will do the trick too.

So yes, there is a missing _tlbil_va() missing for 8xx somewhere
but there is something more too.
Maybe your new filter functions and my
 powerpc, 8xx: DTLB Error must check for more errors.
will do the trick?

 Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-03  9:24                                           ` Joakim Tjernlund
@ 2009-10-03 10:57                                             ` Benjamin Herrenschmidt
  2009-10-03 11:47                                               ` Joakim Tjernlund
  2009-10-04  8:35                                               ` Joakim Tjernlund
  0 siblings, 2 replies; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-10-03 10:57 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Scott Wood, linuxppc-dev, Rex Feany

On Sat, 2009-10-03 at 11:24 +0200, Joakim Tjernlund wrote:
> 
> So yes, there is a missing _tlbil_va() missing for 8xx somewhere
> but there is something more too.
> Maybe your new filter functions and my
>  powerpc, 8xx: DTLB Error must check for more errors.
> will do the trick?

Well, if we can't tell between a load and a store on a TLB miss, then
we should probably let it create an unpopulated entry in all cases,
so that we do take a proper DSI/ISI the second time around, which
would then tell us where we come from...

Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-03 10:57                                             ` Benjamin Herrenschmidt
@ 2009-10-03 11:47                                               ` Joakim Tjernlund
  2009-10-04  8:35                                               ` Joakim Tjernlund
  1 sibling, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-03 11:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Rex Feany



Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 03/10/2009 12:57:28:

> On Sat, 2009-10-03 at 11:24 +0200, Joakim Tjernlund wrote:
> >
> > So yes, there is a missing _tlbil_va() missing for 8xx somewhere
> > but there is something more too.
> > Maybe your new filter functions and my
> >  powerpc, 8xx: DTLB Error must check for more errors.
> > will do the trick?
>
> Well, if we can't tell between a load and a store on a TLB miss, then
> we should probably let it create an unpopulated entry in all cases,
> so that we do take a proper DSI/ISI the second time around, which
> would then tell us where we come from...

Not quite sure what you mean here?
Always branch to DSI at TLB Miss and create a unpopulated
entry? That does not feel right.

The current method is better. The only odd thing is
the null pmd entry and what to load into RPN. I am not convinced
that it is a problem but I would like to see a generic impl. of
a null pmd and use that instead. A 8xx impl. of that
looks like this(tested on 2.4, works fine)

	lwz	r11, 0(r10)	/* Get the level 1 entry */
	rlwinm.	r10, r11,0,0,19	/* Extract page descriptor page address */
	bne+	4f		/* If zero, use a null pmd */
	lis	r11, null_pmd_dir@h
	ori	r11, r11, null_pmd_dir@l
4:	tophys(r11,r11)

...
	.data
	.globl	null_pmd_dir
null_pmd_dir:
	.space	4096

If the pmd_none() and friends are updated to test for a null_pmd_dir
we can loose the test above and save 4 insn :)

Anyhow did you post a patch(can't find one) about your suggested
filter functions for 8xx? I sure would like to see one :)

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-03 10:57                                             ` Benjamin Herrenschmidt
  2009-10-03 11:47                                               ` Joakim Tjernlund
@ 2009-10-04  8:35                                               ` Joakim Tjernlund
  2009-10-04 20:26                                                 ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-04  8:35 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 03/10/2009 12:57:28:
>
> On Sat, 2009-10-03 at 11:24 +0200, Joakim Tjernlund wrote:
> >
> > So yes, there is a missing _tlbil_va() missing for 8xx somewhere
> > but there is something more too.
> > Maybe your new filter functions and my
> >  powerpc, 8xx: DTLB Error must check for more errors.
> > will do the trick?
>
> Well, if we can't tell between a load and a store on a TLB miss, then
> we should probably let it create an unpopulated entry in all cases,
> so that we do take a proper DSI/ISI the second time around, which
> would then tell us where we come from...

While looking closer on 8xx TLB handling I noticed we were taking
an extra TLB Error when making a page dirty. Tracked it down to:
static inline pte_t pte_mkdirty(pte_t pte) {
	pte_val(pte) |= _PAGE_DIRTY; return pte; }
pte_mkdirty does not set HWWRITE thus forcing a new
TLB error to clear it. Adding HWWRITE to pte_mkdirty fixes the
problem.

Looking at (especially pte_mkclean):
static inline pte_t pte_wrprotect(pte_t pte) {
	pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
static inline pte_t pte_mkclean(pte_t pte) {
	pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }

it looks like a mistake not to include HWWRITE in pte_mkdirty(),
what do you think?

 Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-04  8:35                                               ` Joakim Tjernlund
@ 2009-10-04 20:26                                                 ` Benjamin Herrenschmidt
  2009-10-04 20:38                                                   ` Joakim Tjernlund
  0 siblings, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-10-04 20:26 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Scott Wood, linuxppc-dev, Rex Feany

On Sun, 2009-10-04 at 10:35 +0200, Joakim Tjernlund wrote:
> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 03/10/2009 12:57:28:
> >
> > On Sat, 2009-10-03 at 11:24 +0200, Joakim Tjernlund wrote:
> > >
> > > So yes, there is a missing _tlbil_va() missing for 8xx somewhere
> > > but there is something more too.
> > > Maybe your new filter functions and my
> > >  powerpc, 8xx: DTLB Error must check for more errors.
> > > will do the trick?
> >
> > Well, if we can't tell between a load and a store on a TLB miss, then
> > we should probably let it create an unpopulated entry in all cases,
> > so that we do take a proper DSI/ISI the second time around, which
> > would then tell us where we come from...
> 
> While looking closer on 8xx TLB handling I noticed we were taking
> an extra TLB Error when making a page dirty. Tracked it down to:
> static inline pte_t pte_mkdirty(pte_t pte) {
> 	pte_val(pte) |= _PAGE_DIRTY; return pte; }
> pte_mkdirty does not set HWWRITE thus forcing a new
> TLB error to clear it. Adding HWWRITE to pte_mkdirty fixes the
> problem.

We should just get rid of HWWRITE like I did for 44x and BookE.

> Looking at (especially pte_mkclean):
> static inline pte_t pte_wrprotect(pte_t pte) {
> 	pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
> static inline pte_t pte_mkclean(pte_t pte) {
> 	pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
> 
> it looks like a mistake not to include HWWRITE in pte_mkdirty(),
> what do you think?

Maybe yes. No big deal right now, we have more important problems
to fix no ? :-)

Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-04 20:26                                                 ` Benjamin Herrenschmidt
@ 2009-10-04 20:38                                                   ` Joakim Tjernlund
  0 siblings, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-04 20:38 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 04/10/2009 22:26:42:
> On Sun, 2009-10-04 at 10:35 +0200, Joakim Tjernlund wrote:
> > Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 03/10/2009 12:57:28:
> > >
> > > On Sat, 2009-10-03 at 11:24 +0200, Joakim Tjernlund wrote:
> > > >
> > > > So yes, there is a missing _tlbil_va() missing for 8xx somewhere
> > > > but there is something more too.
> > > > Maybe your new filter functions and my
> > > >  powerpc, 8xx: DTLB Error must check for more errors.
> > > > will do the trick?
> > >
> > > Well, if we can't tell between a load and a store on a TLB miss, then
> > > we should probably let it create an unpopulated entry in all cases,
> > > so that we do take a proper DSI/ISI the second time around, which
> > > would then tell us where we come from...
> >
> > While looking closer on 8xx TLB handling I noticed we were taking
> > an extra TLB Error when making a page dirty. Tracked it down to:
> > static inline pte_t pte_mkdirty(pte_t pte) {
> >    pte_val(pte) |= _PAGE_DIRTY; return pte; }
> > pte_mkdirty does not set HWWRITE thus forcing a new
> > TLB error to clear it. Adding HWWRITE to pte_mkdirty fixes the
> > problem.
>
> We should just get rid of HWWRITE like I did for 44x and BookE.

I am trying :)

>
> > Looking at (especially pte_mkclean):
> > static inline pte_t pte_wrprotect(pte_t pte) {
> >    pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE); return pte; }
> > static inline pte_t pte_mkclean(pte_t pte) {
> >    pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
> >
> > it looks like a mistake not to include HWWRITE in pte_mkdirty(),
> > what do you think?
>
> Maybe yes. No big deal right now, we have more important problems
> to fix no ? :-)

The missing invalidate you guys need to work out. I have no clue
where to put it. If we are really lucky, getting rid of HWWRITE
might help :)

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-03  8:05                                       ` Joakim Tjernlund
  2009-10-03  8:31                                         ` Benjamin Herrenschmidt
@ 2009-10-05 18:24                                         ` Scott Wood
  2009-10-05 18:50                                           ` Joakim Tjernlund
  1 sibling, 1 reply; 58+ messages in thread
From: Scott Wood @ 2009-10-05 18:24 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Rex Feany, linuxppc-dev

On Sat, Oct 03, 2009 at 10:05:46AM +0200, Joakim Tjernlund wrote:
> Scott Wood <scottwood@freescale.com> wrote on 02/10/2009 23:49:49:
> > Adding a tlbil_va to do_page_fault makes the problem go away for me (on
> > top of your "merge" branch) -- none of the other changes in this thread
> > do (assuming I didn't miss any).  FWIW, when it gets stuck on a fault,
> > DSISR is 0xc0000000, and handle_mm_fault returns zero.
> 
> OK, that is a no translation error for a load (assuming trap is 0x400)

Yes, 0x400.

> Do you know what insn this is? 

Various lbz and lwz.

-Scott

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-05 18:24                                         ` Scott Wood
@ 2009-10-05 18:50                                           ` Joakim Tjernlund
  0 siblings, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-05 18:50 UTC (permalink / raw)
  To: Scott Wood; +Cc: Rex Feany, linuxppc-dev

Scott Wood <scottwood@freescale.com> wrote on 05/10/2009 20:24:29:
>
> On Sat, Oct 03, 2009 at 10:05:46AM +0200, Joakim Tjernlund wrote:
> > Scott Wood <scottwood@freescale.com> wrote on 02/10/2009 23:49:49:
> > > Adding a tlbil_va to do_page_fault makes the problem go away for me (on
> > > top of your "merge" branch) -- none of the other changes in this thread
> > > do (assuming I didn't miss any).  FWIW, when it gets stuck on a fault,
> > > DSISR is 0xc0000000, and handle_mm_fault returns zero.
> >
> > OK, that is a no translation error for a load (assuming trap is 0x400)
>
> Yes, 0x400.
>
> > Do you know what insn this is?
>
> Various lbz and lwz.

OK, this rules out any dcbX problem. Perhaps you can try any of Bens
ideas? Not sure what they were but hopefully you and Ben do :)

Preferably after you have tested my new patches :)

 Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-02 21:49                                     ` Scott Wood
  2009-10-02 22:04                                       ` Benjamin Herrenschmidt
  2009-10-03  8:05                                       ` Joakim Tjernlund
@ 2009-10-04 20:10                                       ` Joakim Tjernlund
  2009-10-04 20:28                                         ` Benjamin Herrenschmidt
  2 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-04 20:10 UTC (permalink / raw)
  To: Scott Wood, Rex Feany; +Cc: linuxppc-dev

Scott Wood <scottwood@freescale.com> wrote on 02/10/2009 23:49:49:
>
> On Thu, Oct 01, 2009 at 08:35:59AM +1000, Benjamin Herrenschmidt wrote:
> > >From what I can see, the TLB miss code will check _PAGE_PRESENT, and
> > when not set, it will -still- insert something into the TLB (unlike
> > all other CPU types that go straight to data access faults from there).
> >
> > So we end up populating with those unpopulated entries that will then
> > cause us to take a DSI (or ISI) instead of a TLB miss the next time
> > around ... and so we need to remove them once we do that, no ? IE. Once
> > we have actually put a valid PTE in.
> >
> > At least that's my understanding and why I believe we should always have
> > tlbil_va() in set_pte() and ptep_set_access_flags(), which boils down
> > in putting it into the 2 "filter" functions in the new code.
> >
> > Well.. actually, the ptep_set_access_flags() will already do a
> > flush_tlb_page_nohash() when the PTE is changed. So I suppose all we
> > really need here would be in set_pte_filter(), to do the same if we are
> > writing a PTE that has _PAGE_PRESENT, at least on 8xx.
> >
> > But just unconditionally doing a tlbil_va() in both the filter functions
> > shouldn't harm and also fix the problem, though Rex seems to indicate
> > that is not the case.
>
> Adding a tlbil_va to do_page_fault makes the problem go away for me (on
> top of your "merge" branch) -- none of the other changes in this thread
> do (assuming I didn't miss any).  FWIW, when it gets stuck on a fault,
> DSISR is 0xc0000000, and handle_mm_fault returns zero.

Scott and Rex

I have managed to update the TLB code to make proper use of dirty and accessed states.
Advantages are:
 - I/D TLB Miss never needs to write to the linux pte, saving a few cycles
 - Accessed is only set by I/D TLB Error, should be a plus when SWAP is used.
 - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly
    and there will be no extra DTLB Error to actually set the changed bit
    when a page has been made dirty.
- Proper RO/RW mapping of user space.

Cons:
 - 4 more insn in TLB Miss handlers, but the since the linux pte isn't
   written it should still be a win.

However, I did this on my 2.4 tree but I can port it to 2.6 if you guys
can test it for me.

 Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-04 20:10                                       ` Joakim Tjernlund
@ 2009-10-04 20:28                                         ` Benjamin Herrenschmidt
  2009-10-04 20:45                                           ` Joakim Tjernlund
                                                             ` (2 more replies)
  0 siblings, 3 replies; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-10-04 20:28 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Scott Wood, linuxppc-dev, Rex Feany


> I have managed to update the TLB code to make proper use of dirty and accessed states.
> Advantages are:
>  - I/D TLB Miss never needs to write to the linux pte, saving a few cycles

That's good, that leaves us with only 40x to fix now. Also we can remove
atomic updates of PTEs for all non-hash. It's pointless on those CPUs
anyway.

>  - Accessed is only set by I/D TLB Error, should be a plus when SWAP is used.

No need for that neither.

ISI/DSI shouldn't touch the PTE. They should just fall back to C code
which takes care of it all.l

>  - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly
>     and there will be no extra DTLB Error to actually set the changed bit
>     when a page has been made dirty.
> - Proper RO/RW mapping of user space.
> 
> Cons:
>  - 4 more insn in TLB Miss handlers, but the since the linux pte isn't
>    written it should still be a win.
>
> However, I did this on my 2.4 tree but I can port it to 2.6 if you guys
> can test it for me.

Why don't you use and test 2.6 ? :-)

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-04 20:28                                         ` Benjamin Herrenschmidt
@ 2009-10-04 20:45                                           ` Joakim Tjernlund
  2009-10-05  7:28                                           ` Joakim Tjernlund
  2009-10-05 19:16                                           ` Joakim Tjernlund
  2 siblings, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-04 20:45 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 04/10/2009 22:28:38:
>
> > I have managed to update the TLB code to make proper use of dirty and accessed states.
> > Advantages are:
> >  - I/D TLB Miss never needs to write to the linux pte, saving a few cycles
>
> That's good, that leaves us with only 40x to fix now. Also we can remove
> atomic updates of PTEs for all non-hash. It's pointless on those CPUs
> anyway.
>
> >  - Accessed is only set by I/D TLB Error, should be a plus when SWAP is used.
>
> No need for that neither.

Since 8xx lacks HW support for ACCESSED, the only way is map
the page NoAccess and take a TLB Error on first access that sets
access bit (or bails to do_page_fault)

>
> ISI/DSI shouldn't touch the PTE. They should just fall back to C code
> which takes care of it all.l

Yes, that is what I do now(i.e I only read the pte). ISI and DSI is the
TLB Miss handlers on 8xx.

>
> >  - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly
> >     and there will be no extra DTLB Error to actually set the changed bit
> >     when a page has been made dirty.
> > - Proper RO/RW mapping of user space.
> >
> > Cons:
> >  - 4 more insn in TLB Miss handlers, but the since the linux pte isn't
> >    written it should still be a win.
> >
> > However, I did this on my 2.4 tree but I can port it to 2.6 if you guys
> > can test it for me.
>
> Why don't you use and test 2.6 ? :-)

Because porting my 8xx board to 2.6 isn't going to be easy so
I havn't yet. One day I might when we can't get away with 2.4 on our
old boards.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-04 20:28                                         ` Benjamin Herrenschmidt
  2009-10-04 20:45                                           ` Joakim Tjernlund
@ 2009-10-05  7:28                                           ` Joakim Tjernlund
  2009-10-05 19:16                                           ` Joakim Tjernlund
  2 siblings, 0 replies; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-05  7:28 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 04/10/2009 22:28:38:
>
>
> > I have managed to update the TLB code to make proper use of dirty and accessed states.
> > Advantages are:
> >  - I/D TLB Miss never needs to write to the linux pte, saving a few cycles
>
> That's good, that leaves us with only 40x to fix now. Also we can remove
> atomic updates of PTEs for all non-hash. It's pointless on those CPUs
> anyway.
>
> >  - Accessed is only set by I/D TLB Error, should be a plus when SWAP is used.
>
> No need for that neither.
>
> ISI/DSI shouldn't touch the PTE. They should just fall back to C code
> which takes care of it all.l
>
> >  - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly
> >     and there will be no extra DTLB Error to actually set the changed bit
> >     when a page has been made dirty.
> > - Proper RO/RW mapping of user space.
> >
> > Cons:
> >  - 4 more insn in TLB Miss handlers, but the since the linux pte isn't
> >    written it should still be a win.
> >
> > However, I did this on my 2.4 tree but I can port it to 2.6 if you guys
> > can test it for me.

So it was easy to update the patch for 2.6, this is on top
of "powerpc, 8xx: DTLB Error must check for more errors."
You probably need the extra tlbil_va(), but let us know if you
can get away without it.

Scott and Rex, please give this a spin. Comments welcome too :)

     Jocke

>From 7880d402cc05dd6e27d8804218ee4c80a879403e Mon Sep 17 00:00:00 2001
From: Joakim Tjernlund <Joakim.Tjernlund@transmode.se>
Date: Mon, 5 Oct 2009 09:08:31 +0200
Subject: [PATCH] 8xx: get rid of _PAGE_HWWRITE dependency in MMU.

Update the TLB asm to make proper use of _PAGE_DIRY and _PAGE_ACCESSED.
Pros:
 - I/D TLB Miss never needs to write to the linux pte.
 - _PAGE_ACCESSED is only set on I/D TLB Error fixing accounting
 - _PAGE_DIRTY is mapped to 0x100, the changed bit, and is set directly
    when a page has been made dirty.
 - Proper RO/RW mapping of user space.
Cons:
 - 4 more instructions in I/D TLB Miss, but the since the linux pte is
   not written anymore, it should still be a win.
---
 arch/powerpc/include/asm/pte-8xx.h |    9 +-
 arch/powerpc/kernel/head_8xx.S     |  163 ++++++++++++++++++++++++++----------
 2 files changed, 122 insertions(+), 50 deletions(-)

diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h
index 8c6e312..af541a2 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -32,22 +32,21 @@
 #define _PAGE_FILE	0x0002	/* when !present: nonlinear file mapping */
 #define _PAGE_NO_CACHE	0x0002	/* I: cache inhibit */
 #define _PAGE_SHARED	0x0004	/* No ASID (context) compare */
+#define _PAGE_DIRTY	0x0100	/* C: page changed */

 /* These five software bits must be masked out when the entry is loaded
  * into the TLB.
  */
 #define _PAGE_EXEC	0x0008	/* software: i-cache coherency required */
 #define _PAGE_GUARDED	0x0010	/* software: guarded access */
-#define _PAGE_DIRTY	0x0020	/* software: page changed */
-#define _PAGE_RW	0x0040	/* software: user write access allowed */
-#define _PAGE_ACCESSED	0x0080	/* software: page referenced */
+#define _PAGE_USER	0x0020	/* software: User space access */

 /* Setting any bits in the nibble with the follow two controls will
  * require a TLB exception handler change.  It is assumed unused bits
  * are always zero.
  */
-#define _PAGE_HWWRITE	0x0100	/* h/w write enable: never set in Linux PTE */
-#define _PAGE_USER	0x0800	/* One of the PP bits, the other is USER&~RW */
+#define _PAGE_RW	0x0400	/* lsb PP bits */
+#define _PAGE_ACCESSED	0x0800	/* msb PP bits */

 #define _PMD_PRESENT	0x0001
 #define _PMD_BAD	0x0ff0
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 118bb05..e111d2f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -333,21 +333,15 @@ InstructionTLBMiss:
 	mfspr	r11, SPRN_MD_TWC	/* ....and get the pte address */
 	lwz	r10, 0(r11)	/* Get the pte */

-#ifdef CONFIG_SWAP
-	/* do not set the _PAGE_ACCESSED bit of a non-present page */
-	andi.	r11, r10, _PAGE_PRESENT
-	beq	4f
-	ori	r10, r10, _PAGE_ACCESSED
-	mfspr	r11, SPRN_MD_TWC	/* get the pte address again */
-	stw	r10, 0(r11)
-4:
-#else
-	ori	r10, r10, _PAGE_ACCESSED
-	stw	r10, 0(r11)
-#endif
-
+	andi.	r11, r10, _PAGE_USER | _PAGE_ACCESSED
+	cmpwi	cr0, r11, _PAGE_USER | _PAGE_ACCESSED
+	beq-	cr0, 5f	/* branch if access allowed */
+	rlwinm	r10, r10, 0, 22, 19 /* r20 &= ~(_PAGE_ACCESSED | _PAGE_RW) */
+	b	6f
+5:	xori	r10, r10, _PAGE_RW /* invert RW bit */
+6:
 	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 21, 22 and 28 must be clear.
+	 * Software indicator bit 28 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
@@ -409,21 +403,15 @@ DataStoreTLBMiss:
 	DO_8xx_CPU6(0x3b80, r3)
 	mtspr	SPRN_MD_TWC, r11

-#ifdef CONFIG_SWAP
-	/* do not set the _PAGE_ACCESSED bit of a non-present page */
-	andi.	r11, r10, _PAGE_PRESENT
-	beq	4f
-	ori	r10, r10, _PAGE_ACCESSED
-4:
-	/* and update pte in table */
-#else
-	ori	r10, r10, _PAGE_ACCESSED
-#endif
-	mfspr	r11, SPRN_MD_TWC	/* get the pte address again */
-	stw	r10, 0(r11)
-
+	andi.	r11, r10, _PAGE_USER | _PAGE_ACCESSED
+	cmpwi	cr0, r11, _PAGE_USER | _PAGE_ACCESSED
+	beq-	cr0, 5f	/* branch if access allowed */
+	rlwinm	r10, r10, 0, 22, 19 /* r20 &= ~(_PAGE_ACCESSED | _PAGE_RW) */
+	b	6f
+5:	xori	r10, r10, _PAGE_RW /* invert RW bit */
+6:
 	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 21, 22 and 28 must be clear.
+	 * Software indicator bit 28 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
@@ -448,6 +436,91 @@ DataStoreTLBMiss:
  */
 	. = 0x1300
 InstructionTLBError:
+#ifdef CONFIG_8xx_CPU6
+	stw	r3, 8(r0)
+#endif
+	DO_8xx_CPU6(0x3f80, r3)
+	mtspr	SPRN_M_TW, r10	/* Save a couple of working registers */
+	mfcr	r10
+	stw	r10, 0(r0)
+	stw	r11, 4(r0)
+
+	mfspr	r11, SRR1
+	andis.	r11, r11, 0x5000	/* no translation, guarded */
+	bne	2f
+
+	mfspr	r10, SPRN_SRR0	/* Get effective address of fault */
+#ifdef CONFIG_8xx_CPU15
+	addi	r11, r10, 0x1000
+	tlbie	r11
+	addi	r11, r10, -0x1000
+	tlbie	r11
+#endif
+	DO_8xx_CPU6(0x3780, r3)
+	mtspr	SPRN_MD_EPN, r10	/* Have to use MD_EPN for walk, MI_EPN can't */
+	mfspr	r10, SPRN_M_TWB	/* Get level 1 table entry address */
+
+	/* If we are faulting a kernel address, we have to use the
+	 * kernel page tables.
+	 */
+	andi.	r11, r10, 0x0800	/* Address >= 0x80000000 */
+	beq	3f
+	lis	r11, swapper_pg_dir@h
+	ori	r11, r11, swapper_pg_dir@l
+	rlwimi	r10, r11, 0, 2, 19
+3:
+	lwz	r11, 0(r10)	/* Get the level 1 entry */
+	rlwinm.	r10, r11,0,0,19	/* Extract page descriptor page address */
+	beq	2f		/* If zero, don't try to find a pte */
+
+	/* We have a pte table, so load the MI_TWC with the attributes
+	 * for this "segment."
+	 */
+	ori	r11,r11,1		/* Set valid bit */
+	DO_8xx_CPU6(0x2b80, r3)
+	mtspr	SPRN_MI_TWC, r11	/* Set segment attributes */
+	DO_8xx_CPU6(0x3b80, r3)
+	mtspr	SPRN_MD_TWC, r11	/* Load pte table base address */
+
+	mfspr	r11, SRR1
+	andi.	r11, r11, 0x4000 /* MSR[PR] */
+	mfspr	r11, MD_TWC	/* ....and get the pte address */
+	lwz	r10, 0(r11)	/* Get the pte */
+	beq	5f /* Kernel access always OK */
+	andi.	r11,r10, _PAGE_USER
+	beq	2f
+5:	ori	r10, r10, _PAGE_ACCESSED
+	mfspr	r21, MD_TWC	/* ....and get the pte address */
+	stw	r10, 0(r11)
+	xori	r10, r10, _PAGE_RW /* invert RW bit */
+
+	/* The Linux PTE won't go exactly into the MMU TLB.
+	 * Software indicator bit 28 must be clear.
+	 * Software indicator bits 24, 25, 26, and 27 must be
+	 * set.  All other Linux PTE bits control the behavior
+	 * of the MMU.
+	 */
+	li	r11, 0x00f0
+	rlwimi	r10, r11, 0, 24, 28	/* Set 24-27, clear 28 */
+	DO_8xx_CPU6(0x2d80, r3)
+	mtspr	SPRN_MI_RPN, r10	/* Update TLB entry */
+
+	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
+	rfi
+
+2:	mfspr	r10, SPRN_M_TW	/* Restore registers */
+	lwz	r11, 0(r0)
+	mtcr	r11
+	lwz	r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
+	lwz	r3, 8(r0)
+#endif
 	b	InstructionAccess

 /* This is the data TLB error on the MPC8xx.  This could be due to
@@ -472,8 +545,8 @@ DataTLBError:
 	/* First, make sure this was a store operation.
 	*/
 	mfspr	r10, SPRN_DSISR
-	andis.	r11, r10, 0x4800 /* no translation, no permission. */
-	bne	2f	/* branch if either is set */
+	andis.	r11, r10, 0x4000 /* no translation */
+	bne	2f	/* branch if set */

 	/* The EA of a data TLB miss is automatically stored in the MD_EPN
 	 * register.  The EA of a data TLB error is automatically stored in
@@ -522,26 +595,26 @@ DataTLBError:
 	mfspr	r11, SPRN_MD_TWC		/* ....and get the pte address */
 	lwz	r10, 0(r11)		/* Get the pte */

-	andi.	r11, r10, _PAGE_RW	/* Is it writeable? */
-	beq	2f			/* Bail out if not */
+	mfspr	r11, SRR1
+	andi.	r11, r11, 0x4000 /* MSR[PR] */
+	beq	5f /* Kernel access always OK */
+	andi.	r11,r10, _PAGE_USER
+	beq	2f
+5:	mfspr	r11, DSISR
+	andis.	r11, r11, 0x0200	/* store */
+	beq	6f
+	andi.	r11, r10, _PAGE_RW	/* writeable? */
+	beq	2f /* branch if not */
+	ori	r10, r10, _PAGE_DIRTY | _PAGE_HWWRITE
+6:	ori	r10, r10, _PAGE_ACCESSED

-	/* Update 'changed', among others.
-	*/
-#ifdef CONFIG_SWAP
-	ori	r10, r10, _PAGE_DIRTY|_PAGE_HWWRITE
-	/* do not set the _PAGE_ACCESSED bit of a non-present page */
-	andi.	r11, r10, _PAGE_PRESENT
-	beq	4f
-	ori	r10, r10, _PAGE_ACCESSED
-4:
-#else
-	ori	r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE
-#endif
 	mfspr	r11, SPRN_MD_TWC		/* Get pte address again */
 	stw	r10, 0(r11)		/* and update pte in table */

+	xori	r10, r10, _PAGE_RW	/* Invert RW bit */
+
 	/* The Linux PTE won't go exactly into the MMU TLB.
-	 * Software indicator bits 21, 22 and 28 must be clear.
+	 * Software indicator bit 28 must be clear.
 	 * Software indicator bits 24, 25, 26, and 27 must be
 	 * set.  All other Linux PTE bits control the behavior
 	 * of the MMU.
--
1.6.4.4

^ permalink raw reply related	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-04 20:28                                         ` Benjamin Herrenschmidt
  2009-10-04 20:45                                           ` Joakim Tjernlund
  2009-10-05  7:28                                           ` Joakim Tjernlund
@ 2009-10-05 19:16                                           ` Joakim Tjernlund
  2009-10-05 20:28                                             ` Benjamin Herrenschmidt
  2 siblings, 1 reply; 58+ messages in thread
From: Joakim Tjernlund @ 2009-10-05 19:16 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Rex Feany

Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote on 04/10/2009 22:28:38:
>
>
> > I have managed to update the TLB code to make proper use of dirty and accessed states.
> > Advantages are:
> >  - I/D TLB Miss never needs to write to the linux pte, saving a few cycles
>
> That's good, that leaves us with only 40x to fix now. Also we can remove
> atomic updates of PTEs for all non-hash. It's pointless on those CPUs
> anyway.
>
> >  - Accessed is only set by I/D TLB Error, should be a plus when SWAP is used.
>
> No need for that neither.
>
> ISI/DSI shouldn't touch the PTE. They should just fall back to C code
> which takes care of it all.l

Ben, for my understanding: It seems to that the TLB Miss routines in
head_32.S are less than optimal as it too touches the pte every time
it hits. Would it not be better to test if ACCESSED and friends are already set
and skip storing the same pte over and over again?

 Jocke

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-10-05 19:16                                           ` Joakim Tjernlund
@ 2009-10-05 20:28                                             ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-10-05 20:28 UTC (permalink / raw)
  To: Joakim Tjernlund; +Cc: Scott Wood, linuxppc-dev, Rex Feany

On Mon, 2009-10-05 at 21:16 +0200, Joakim Tjernlund wrote:
> Ben, for my understanding: It seems to that the TLB Miss routines in
> head_32.S are less than optimal as it too touches the pte every time
> it hits. Would it not be better to test if ACCESSED and friends are
> already set
> and skip storing the same pte over and over again?

I wouldn't think it's a big deal, but then, the 32-bit hash code
has to also update _PAGE_HASHPTE etc... overall I wouldn't touch
it for now.

However, 8xx should instead look at what I do in recent versions
of head_44x.S or what Kumar did in head_fsl_booke.S

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29  1:21           ` Rex Feany
  2009-09-29  6:26             ` Joakim Tjernlund
@ 2009-09-29  7:07             ` Benjamin Herrenschmidt
  2009-09-29 21:09               ` Rex Feany
  1 sibling, 1 reply; 58+ messages in thread
From: Benjamin Herrenschmidt @ 2009-09-29  7:07 UTC (permalink / raw)
  To: Rex Feany; +Cc: linuxppc-dev

On Mon, 2009-09-28 at 18:21 -0700, Rex Feany wrote:
> > It's going to be hard for me to get that "right" since I don't really
> > know what's going on with the core here, but I suppose if we get it
> > moving along with extra tlb invalidations, that should be "good enough"
> > until somebody who really knows what's going on comes up with possibly
> > a better fix.
> 
> I've tried sticking tlbil_va() in those places, nothing seems to help.
> In some cases userspace is slow, in other cases userspace is faster and
> unstable: sometimes commands hang, sometimes I am able to ctrl-c and
> and kill it, sometimes I get other strange crashes or falures (so far no
> kernel oopses though).

And you are positive that with 2.6.31 and your other patch, it works
both fast and stable ? This is strange... the code should be mostly
identical. I'll have a second look and see if I can get you a patch that
reproduce -exactly- the behaviour of 2.6.31 plus your patch.

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 58+ messages in thread

* Re: [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite
  2009-09-29  7:07             ` Benjamin Herrenschmidt
@ 2009-09-29 21:09               ` Rex Feany
  0 siblings, 0 replies; 58+ messages in thread
From: Rex Feany @ 2009-09-29 21:09 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev

Thus spake Benjamin Herrenschmidt (benh@kernel.crashing.org):

> On Mon, 2009-09-28 at 18:21 -0700, Rex Feany wrote:
> > > It's going to be hard for me to get that "right" since I don't really
> > > know what's going on with the core here, but I suppose if we get it
> > > moving along with extra tlb invalidations, that should be "good enough"
> > > until somebody who really knows what's going on comes up with possibly
> > > a better fix.
> > 
> > I've tried sticking tlbil_va() in those places, nothing seems to help.
> > In some cases userspace is slow, in other cases userspace is faster and
> > unstable: sometimes commands hang, sometimes I am able to ctrl-c and
> > and kill it, sometimes I get other strange crashes or falures (so far no
> > kernel oopses though).
> 
> And you are positive that with 2.6.31 and your other patch, it works
> both fast and stable ? This is strange... the code should be mostly
> identical. I'll have a second look and see if I can get you a patch that
> reproduce -exactly- the behaviour of 2.6.31 plus your patch.

The difference is night and day - with 2.6.31 I can boot into single
user mode, I can run our custom software (I left it running over night
with very simple test script - no crashes).

With the top of the tree I can sometimes boot into a shell, and if it
isn't crashing or hanging on boot it runs very slow (10+ seconds to do
anything, if I am lucky).

thanks!
/rex

^ permalink raw reply	[flat|nested] 58+ messages in thread

end of thread, other threads:[~2009-10-05 21:04 UTC | newest]

Thread overview: 58+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-09-24  0:45 [PATCH] powerpc/8xx: fix regression introduced by cache coherency rewrite Rex Feany
2009-09-24  6:44 ` Benjamin Herrenschmidt
2009-09-24 23:33   ` Rex Feany
2009-09-24 23:52     ` Benjamin Herrenschmidt
2009-09-25  1:35       ` Rex Feany
2009-09-25  1:51         ` Benjamin Herrenschmidt
2009-09-25  3:03           ` Benjamin Herrenschmidt
2009-09-25  8:31             ` Joakim Tjernlund
2009-09-25  9:47               ` Benjamin Herrenschmidt
2009-09-25 10:21                 ` Joakim Tjernlund
2009-09-25 21:18             ` Rex Feany
2009-09-27 13:22               ` Joakim Tjernlund
2009-09-28  3:21                 ` Benjamin Herrenschmidt
2009-09-28  7:22                   ` Joakim Tjernlund
2009-09-28  7:34                     ` Benjamin Herrenschmidt
2009-09-28  7:39                       ` Joakim Tjernlund
2009-09-28 10:02                     ` Joakim Tjernlund
2009-09-29  1:21           ` Rex Feany
2009-09-29  6:26             ` Joakim Tjernlund
2009-09-29  7:07               ` Benjamin Herrenschmidt
2009-09-29  8:13                 ` Joakim Tjernlund
2009-09-29  8:16                   ` Benjamin Herrenschmidt
2009-09-29  8:24                     ` Joakim Tjernlund
2009-09-29 11:56                     ` Joakim Tjernlund
2009-09-29 21:03                       ` Rex Feany
2009-09-30  7:59                         ` Joakim Tjernlund
2009-09-30  8:19                           ` Joakim Tjernlund
2009-09-30  9:00                             ` Rex Feany
2009-09-30  9:58                               ` Joakim Tjernlund
2009-09-30 11:18                               ` Joakim Tjernlund
2009-09-30 17:23                                 ` Joakim Tjernlund
2009-09-30 22:35                                   ` Benjamin Herrenschmidt
2009-10-01  7:05                                     ` Joakim Tjernlund
2009-10-02 13:06                                     ` Joakim Tjernlund
2009-10-02 18:10                                       ` Joakim Tjernlund
2009-10-02 21:49                                     ` Scott Wood
2009-10-02 22:04                                       ` Benjamin Herrenschmidt
2009-10-05 19:28                                         ` Scott Wood
2009-10-05 20:29                                           ` Benjamin Herrenschmidt
2009-10-05 21:04                                             ` Scott Wood
2009-10-03  8:05                                       ` Joakim Tjernlund
2009-10-03  8:31                                         ` Benjamin Herrenschmidt
2009-10-03  9:24                                           ` Joakim Tjernlund
2009-10-03 10:57                                             ` Benjamin Herrenschmidt
2009-10-03 11:47                                               ` Joakim Tjernlund
2009-10-04  8:35                                               ` Joakim Tjernlund
2009-10-04 20:26                                                 ` Benjamin Herrenschmidt
2009-10-04 20:38                                                   ` Joakim Tjernlund
2009-10-05 18:24                                         ` Scott Wood
2009-10-05 18:50                                           ` Joakim Tjernlund
2009-10-04 20:10                                       ` Joakim Tjernlund
2009-10-04 20:28                                         ` Benjamin Herrenschmidt
2009-10-04 20:45                                           ` Joakim Tjernlund
2009-10-05  7:28                                           ` Joakim Tjernlund
2009-10-05 19:16                                           ` Joakim Tjernlund
2009-10-05 20:28                                             ` Benjamin Herrenschmidt
2009-09-29  7:07             ` Benjamin Herrenschmidt
2009-09-29 21:09               ` Rex Feany

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.