All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] MIPS: Don't branch to eret in TLB refill.
@ 2009-05-12 22:45 David Daney
  2009-05-13  0:23 ` David VomLehn
  0 siblings, 1 reply; 10+ messages in thread
From: David Daney @ 2009-05-12 22:45 UTC (permalink / raw)
  To: linux-mips, ralf; +Cc: David Daney

If the TLB refill handler is too bit and needs to be split, there is
no need to branch around the split if the branch target would be an
eret.  Since the eret returns from the handler, control flow never
passes it.  A branch to an eret is equivalent to the eret itself.

Signed-off-by: David Daney <ddaney@caviumnetworks.com>
---
 arch/mips/mm/tlbex.c |   50 +++++++++++++++++++++++++++++++++-----------------
 1 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 4dc4f3e..ffa7104 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -656,6 +656,7 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 	struct uasm_reloc *r = relocs;
 	u32 *f;
 	unsigned int final_len;
+	int split_on_eret;
 
 	memset(tlb_handler, 0, sizeof(tlb_handler));
 	memset(labels, 0, sizeof(labels));
@@ -684,6 +685,13 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 	build_update_entries(&p, K0, K1);
 	build_tlb_write_entry(&p, &l, &r, tlb_random);
 	uasm_l_leave(&l, p);
+
+	/*
+	 * Check to see if the eret will be the last instruction
+	 * before the split.  If it is, there is no need to branch
+	 * around the split, as we are returning.
+	 */
+	split_on_eret = (p - tlb_handler == 31);
 	uasm_i_eret(&p); /* return from trap */
 
 #ifdef CONFIG_64BIT
@@ -723,28 +731,36 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
 		uasm_copy_handler(relocs, labels, tlb_handler, p, f);
 		final_len = p - tlb_handler;
 	} else {
-		u32 *split = tlb_handler + 30;
-
-		/*
-		 * Find the split point.
-		 */
-		if (uasm_insn_has_bdelay(relocs, split - 1))
-			split--;
+		u32 *split;
+		if (split_on_eret) {
+			split = tlb_handler + 32;
+		} else {
+			split = tlb_handler + 30;
+
+			/*
+			 * Find the split point.
+			 */
+			if (uasm_insn_has_bdelay(relocs, split - 1))
+				split--;
+		}
 
 		/* Copy first part of the handler. */
 		uasm_copy_handler(relocs, labels, tlb_handler, split, f);
 		f += split - tlb_handler;
 
-		/* Insert branch. */
-		uasm_l_split(&l, final_handler);
-		uasm_il_b(&f, &r, label_split);
-		if (uasm_insn_has_bdelay(relocs, split))
-			uasm_i_nop(&f);
-		else {
-			uasm_copy_handler(relocs, labels, split, split + 1, f);
-			uasm_move_labels(labels, f, f + 1, -1);
-			f++;
-			split++;
+		if (!split_on_eret) {
+			/* Insert branch. */
+			uasm_l_split(&l, final_handler);
+			uasm_il_b(&f, &r, label_split);
+			if (uasm_insn_has_bdelay(relocs, split))
+				uasm_i_nop(&f);
+			else {
+				uasm_copy_handler(relocs, labels,
+						  split, split + 1, f);
+				uasm_move_labels(labels, f, f + 1, -1);
+				f++;
+				split++;
+			}
 		}
 
 		/* Copy the rest of the handler. */
-- 
1.6.0.6

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] MIPS: Don't branch to eret in TLB refill.
  2009-05-12 22:45 [PATCH] MIPS: Don't branch to eret in TLB refill David Daney
@ 2009-05-13  0:23 ` David VomLehn
  2009-05-13  1:12   ` David Daney
  0 siblings, 1 reply; 10+ messages in thread
From: David VomLehn @ 2009-05-13  0:23 UTC (permalink / raw)
  To: David Daney; +Cc: linux-mips, ralf

On Tue, May 12, 2009 at 03:45:16PM -0700, David Daney wrote:
> If the TLB refill handler is too bit and needs to be split, there is
> no need to branch around the split if the branch target would be an
> eret.  Since the eret returns from the handler, control flow never
> passes it.  A branch to an eret is equivalent to the eret itself.
> 
> Signed-off-by: David Daney <ddaney@caviumnetworks.com>
> ---
>  arch/mips/mm/tlbex.c |   50 +++++++++++++++++++++++++++++++++-----------------
>  1 files changed, 33 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
> index 4dc4f3e..ffa7104 100644
> --- a/arch/mips/mm/tlbex.c
> +++ b/arch/mips/mm/tlbex.c
> @@ -723,28 +731,36 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
>  		uasm_copy_handler(relocs, labels, tlb_handler, p, f);
>  		final_len = p - tlb_handler;
>  	} else {
> -		u32 *split = tlb_handler + 30;
> -
> -		/*
> -		 * Find the split point.
> -		 */
> -		if (uasm_insn_has_bdelay(relocs, split - 1))
> -			split--;
> +		u32 *split;
> +		if (split_on_eret) {
> +			split = tlb_handler + 32;
> +		} else {
> +			split = tlb_handler + 30;

It would be a shame to pass up an opportunity to eliminate some of the
pile of magic constants we have in the MIPS tree. Given that the MIPS
documentation describes the size of the TLB Refill handler as 0x80 bytes,
we could add something like:

/* Maximum # of instructions in exception vector for TLB Refill handler */
#define MIPS64_TLB_REFILL_OPS	(0x80 / sizeof(union mips_instruction))

and could change the last few lines of the code above to, for example:

		if (split_on_eret) {
			split = tlb_handler + MIPS64_TLB_REFILL_OPS;
		} else {
			split = tlb_handler + MIPS64_TLB_REFILL_OPS - 2;

(you'd need to include asm/inst.h to get union mips_instruction defined)

> +
> +			/*
> +			 * Find the split point.
> +			 */
> +			if (uasm_insn_has_bdelay(relocs, split - 1))
> +				split--;
> +		}

The code itself makes sense. Does this case actually happen much, or was
this just an itch?

Reviewed-by: David VomLehn <dvomlehn@cisco.com>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] MIPS: Don't branch to eret in TLB refill.
  2009-05-13  0:23 ` David VomLehn
@ 2009-05-13  1:12   ` David Daney
  2009-05-13  2:01     ` Paul Gortmaker
  2009-05-16  7:28     ` Maciej W. Rozycki
  0 siblings, 2 replies; 10+ messages in thread
From: David Daney @ 2009-05-13  1:12 UTC (permalink / raw)
  To: David VomLehn; +Cc: linux-mips, ralf

David VomLehn wrote:
> On Tue, May 12, 2009 at 03:45:16PM -0700, David Daney wrote:
>> If the TLB refill handler is too bit and needs to be split, there is
>> no need to branch around the split if the branch target would be an
>> eret.  Since the eret returns from the handler, control flow never
>> passes it.  A branch to an eret is equivalent to the eret itself.
>>
>> Signed-off-by: David Daney <ddaney@caviumnetworks.com>
>> ---
>>  arch/mips/mm/tlbex.c |   50 +++++++++++++++++++++++++++++++++-----------------
>>  1 files changed, 33 insertions(+), 17 deletions(-)
>>
>> diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
>> index 4dc4f3e..ffa7104 100644
>> --- a/arch/mips/mm/tlbex.c
>> +++ b/arch/mips/mm/tlbex.c
>> @@ -723,28 +731,36 @@ static void __cpuinit build_r4000_tlb_refill_handler(void)
>>  		uasm_copy_handler(relocs, labels, tlb_handler, p, f);
>>  		final_len = p - tlb_handler;
>>  	} else {
>> -		u32 *split = tlb_handler + 30;
>> -
>> -		/*
>> -		 * Find the split point.
>> -		 */
>> -		if (uasm_insn_has_bdelay(relocs, split - 1))
>> -			split--;
>> +		u32 *split;
>> +		if (split_on_eret) {
>> +			split = tlb_handler + 32;
>> +		} else {
>> +			split = tlb_handler + 30;
> 
> It would be a shame to pass up an opportunity to eliminate some of the
> pile of magic constants we have in the MIPS tree. Given that the MIPS
> documentation describes the size of the TLB Refill handler as 0x80 bytes,
> we could add something like:
> 

That would be a different patch according to the one patch per problem rule.

> /* Maximum # of instructions in exception vector for TLB Refill handler */
> #define MIPS64_TLB_REFILL_OPS	(0x80 / sizeof(union mips_instruction))
> 
> and could change the last few lines of the code above to, for example:
> 
> 		if (split_on_eret) {
> 			split = tlb_handler + MIPS64_TLB_REFILL_OPS;
> 		} else {
> 			split = tlb_handler + MIPS64_TLB_REFILL_OPS - 2;
> 
> (you'd need to include asm/inst.h to get union mips_instruction defined)

It is certainly possible to do something like that, but it isn't clear 
to me that it makes it any easier to understand.

> 
>> +
>> +			/*
>> +			 * Find the split point.
>> +			 */
>> +			if (uasm_insn_has_bdelay(relocs, split - 1))
>> +				split--;
>> +		}
> 
> The code itself makes sense. Does this case actually happen much, or was
> this just an itch?
> 

For my CPU it was happening 100% of the time when I add the soon to be 
submitted hugeTLBfs support patch.  Although I have not measured it, 
this code is so hot that keeping the normal case fitting on a single 
cache line should be a big win.

David Daney

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] MIPS: Don't branch to eret in TLB refill.
  2009-05-13  1:12   ` David Daney
@ 2009-05-13  2:01     ` Paul Gortmaker
  2009-05-16  7:28     ` Maciej W. Rozycki
  1 sibling, 0 replies; 10+ messages in thread
From: Paul Gortmaker @ 2009-05-13  2:01 UTC (permalink / raw)
  To: David Daney; +Cc: David VomLehn, linux-mips, ralf

On Tue, May 12, 2009 at 9:12 PM, David Daney <ddaney@caviumnetworks.com> wrote:
> David VomLehn wrote:
>>
>> On Tue, May 12, 2009 at 03:45:16PM -0700, David Daney wrote:
>>>
>>> If the TLB refill handler is too bit and needs to be split, there is

minor nit - s/bit/big/

>>> no need to branch around the split if the branch target would be an
>>> eret.  Since the eret returns from the handler, control flow never
>>> passes it.  A branch to an eret is equivalent to the eret itself.
>>>
>>> Signed-off-by: David Daney <ddaney@caviumnetworks.com>

...

>>> +               u32 *split;
>>> +               if (split_on_eret) {
>>> +                       split = tlb_handler + 32;
>>> +               } else {
>>> +                       split = tlb_handler + 30;
>>
>> It would be a shame to pass up an opportunity to eliminate some of the
>> pile of magic constants we have in the MIPS tree. Given that the MIPS
>> documentation describes the size of the TLB Refill handler as 0x80 bytes,
>> we could add something like:
>>
>
> That would be a different patch according to the one patch per problem rule.

I don't see that as a showstopper; just replace the 30 with DVL's
definition in one separate patch that does nothing more, and then do
the conditional "-2" part in a separate patch.  Sure, it may seem like
extra cycles for nothing at this point in time, but it pays off in the
long run for folks doing a bisect on changes etc. and it improves the
readability of changesets (by having cleanups separated from technical
changes).  If you can spare the cycles, I know I would at least
appreciate the effort (and so would anyone else who ends up doing a
back or forward port.)

>
>> /* Maximum # of instructions in exception vector for TLB Refill handler */
>> #define MIPS64_TLB_REFILL_OPS   (0x80 / sizeof(union mips_instruction))
>>
>> and could change the last few lines of the code above to, for example:
>>
>>                if (split_on_eret) {
>>                        split = tlb_handler + MIPS64_TLB_REFILL_OPS;
>>                } else {
>>                        split = tlb_handler + MIPS64_TLB_REFILL_OPS - 2;
>>
>> (you'd need to include asm/inst.h to get union mips_instruction defined)
>
> It is certainly possible to do something like that, but it isn't clear to me
> that it makes it any easier to understand.

Well, for someone like me who doesn't know the low-level arch details,
I'd agree with DVL that it is easier to google "mips tlb refill" than
it is to google "32".  So it is probably a worthwhile change IMHO.

Paul.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] MIPS: Don't branch to eret in TLB refill.
  2009-05-13  1:12   ` David Daney
  2009-05-13  2:01     ` Paul Gortmaker
@ 2009-05-16  7:28     ` Maciej W. Rozycki
  2009-05-18 16:25       ` David Daney
  1 sibling, 1 reply; 10+ messages in thread
From: Maciej W. Rozycki @ 2009-05-16  7:28 UTC (permalink / raw)
  To: David Daney; +Cc: David VomLehn, linux-mips, ralf

On Tue, 12 May 2009, David Daney wrote:

> > > +			/*
> > > +			 * Find the split point.
> > > +			 */
> > > +			if (uasm_insn_has_bdelay(relocs, split - 1))
> > > +				split--;
> > > +		}
> > 
> > The code itself makes sense. Does this case actually happen much, or was
> > this just an itch?
> > 
> 
> For my CPU it was happening 100% of the time when I add the soon to be
> submitted hugeTLBfs support patch.  Although I have not measured it, this code
> is so hot that keeping the normal case fitting on a single cache line should
> be a big win.

 Rather than this hack, I'd suggest microoptimising the code by shuffling 
it such that unless the handler fits in 128 bytes entirely (I'm not sure 
if that ever happens for XTLB refill) the part built by 
build_get_pgd_vmalloc64() is placed in the TLB handler slot, saving an 
unnecessary unconditional branch there.  This way the problem of an 
unconditional branch to ERET will solve automagically as a side-effect.  
Unless the vmalloc part does not fit in 128 bytes, that is, in which case 
it would have to overflow back to the XTLB slot.  It should be pretty 
straightforward to code. ;)

  Maciej

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] MIPS: Don't branch to eret in TLB refill.
  2009-05-16  7:28     ` Maciej W. Rozycki
@ 2009-05-18 16:25       ` David Daney
  2009-05-18 17:48         ` Maciej W. Rozycki
  0 siblings, 1 reply; 10+ messages in thread
From: David Daney @ 2009-05-18 16:25 UTC (permalink / raw)
  To: Maciej W. Rozycki, ralf; +Cc: David VomLehn, linux-mips

Maciej W. Rozycki wrote:
> On Tue, 12 May 2009, David Daney wrote:
> 
>>>> +			/*
>>>> +			 * Find the split point.
>>>> +			 */
>>>> +			if (uasm_insn_has_bdelay(relocs, split - 1))
>>>> +				split--;
>>>> +		}
>>> The code itself makes sense. Does this case actually happen much, or was
>>> this just an itch?
>>>
>> For my CPU it was happening 100% of the time when I add the soon to be
>> submitted hugeTLBfs support patch.  Although I have not measured it, this code
>> is so hot that keeping the normal case fitting on a single cache line should
>> be a big win.
> 
>  Rather than this hack,

I don't really know what to say about that comment.

* We are synthesizing optimized TLB refill handlers, even small 
improvements yield big gains in system performance.

* The optimization you suggest below, although a good one, is somewhat 
different and would make a good follow on patch.

* I am trying to make forward progress and not have The perfect be the 
enemy of the good.

> I'd suggest microoptimising the code by shuffling 
> it such that unless the handler fits in 128 bytes entirely (I'm not sure 
> if that ever happens for XTLB refill) the part built by 
> build_get_pgd_vmalloc64() is placed in the TLB handler slot, saving an 
> unnecessary unconditional branch there.  This way the problem of an 
> unconditional branch to ERET will solve automagically as a side-effect.  
> Unless the vmalloc part does not fit in 128 bytes, that is, in which case 
> it would have to overflow back to the XTLB slot.  It should be pretty 
> straightforward to code. ;)
> 
>   Maciej
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] MIPS: Don't branch to eret in TLB refill.
  2009-05-18 16:25       ` David Daney
@ 2009-05-18 17:48         ` Maciej W. Rozycki
  2009-05-18 19:32           ` [PATCH] MIPS: Fold the TLB refill at the vmalloc path if possible Maciej W. Rozycki
  0 siblings, 1 reply; 10+ messages in thread
From: Maciej W. Rozycki @ 2009-05-18 17:48 UTC (permalink / raw)
  To: David Daney; +Cc: ralf, David VomLehn, linux-mips

On Mon, 18 May 2009, David Daney wrote:

> I don't really know what to say about that comment.
> 
> * We are synthesizing optimized TLB refill handlers, even small improvements
> yield big gains in system performance.
> 
> * The optimization you suggest below, although a good one, is somewhat
> different and would make a good follow on patch.
> 
> * I am trying to make forward progress and not have The perfect be the enemy
> of the good.

 What I suggested obsoletes your patch and requires it to be reverted 
because the folding point search algorithm would change.  It yields 
optimisation you (and everybody else, even if they do not realise it) are 
after not only for your corner case of ERET being exactly at offset of 
0x7c from the beginning of the XTLB handler slot, but for any systems for 
which the size of the XTLB slot is not enough to hold the whole handler, 
which, according to my knowledge, currently means all.  So why to go 
through the two-stage process at all and fix a corner case rather than the 
whole problem in the first place?

 This is my point of view; others may disagree of course.

  Maciej

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] MIPS: Fold the TLB refill at the vmalloc path if possible
  2009-05-18 17:48         ` Maciej W. Rozycki
@ 2009-05-18 19:32           ` Maciej W. Rozycki
  2009-05-18 23:26             ` David Daney
  0 siblings, 1 reply; 10+ messages in thread
From: Maciej W. Rozycki @ 2009-05-18 19:32 UTC (permalink / raw)
  To: David Daney, Ralf Baechle; +Cc: David VomLehn, linux-mips

 Try to fold the 64-bit TLB refill handler opportunistically at the 
beginning of the vmalloc path so as to avoid splitting execution flow in 
half and wasting cycles for a branch required at that point then.  Resort 
to doing the split if either of the newly created parts would not fit into 
its designated slot.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
---
David,

 So I took the opportunity and spent a few minutes cooking up thich 
change.  It works correctly with my SWARM filling the XTLB handler slot 
with the fast path up to the offset of 0x6c inclusive, where the final 
ERET is placed and the TLB handler slot with the vmalloc path, up to 0x14 
inclusive.  With the M3 workaround enabled it correctly resorts to 
inserting a branch with the ERET landing at 0x10 into the TLB slot.

 It's meant to work for your case -- please try it. :)  Unfortunately I 
lack the resources to check with a more modern kernel, but I hope it 
applies cleanly; if that turns out to be a problem, then I can look into 
it later.  I haven't updated the space exhaustion check for the corner 
case of both parts being exactly 32 instructions with the ERET at 0x7c 
into the XTLB slot; I'm leaving it as an exercise for the first one to 
actually hit such a scenario. ;)

 Further improvement might include getting rid of #ifdef MODULE_START 
directives scattered throughout the file.

 Ralf, please consider.

  Maciej

patch-mips-2.6.27-rc8-20081004-mips-tlbex-fold-0
Index: linux-mips-2.6.27-rc8-20081004-swarm-eb/arch/mips/mm/tlbex.c
===================================================================
--- linux-mips-2.6.27-rc8-20081004-swarm-eb.orig/arch/mips/mm/tlbex.c
+++ linux-mips-2.6.27-rc8-20081004-swarm-eb/arch/mips/mm/tlbex.c
@@ -6,7 +6,7 @@
  * Synthesize TLB refill handlers at runtime.
  *
  * Copyright (C) 2004, 2005, 2006, 2008  Thiemo Seufer
- * Copyright (C) 2005, 2007, 2008  Maciej W. Rozycki
+ * Copyright (C) 2005, 2007, 2008, 2009  Maciej W. Rozycki
  * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
  *
  * ... and the days got worse and worse and now you see
@@ -19,6 +19,7 @@
  * (Condolences to Napoleon XIV)
  */
 
+#include <linux/bug.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/string.h>
@@ -738,28 +739,52 @@ static void __cpuinit build_r4000_tlb_re
 		uasm_copy_handler(relocs, labels, tlb_handler, p, f);
 		final_len = p - tlb_handler;
 	} else {
-		u32 *split = tlb_handler + 30;
+#ifdef MODULE_START
+		const enum label_id ls = label_module_alloc;
+#else
+		const enum label_id ls = label_vmalloc;
+#endif
+		const int ie = sizeof(labels) / sizeof(labels[0]);
+		u32 *split;
+		int ov = 0;
+		int i;
 
 		/*
 		 * Find the split point.
 		 */
-		if (uasm_insn_has_bdelay(relocs, split - 1))
-			split--;
+		for (i = 0; i < ie && labels[i].lab != ls; i++);
+		BUG_ON(i == ie);
+		split = labels[i].addr;
+
+		/*
+		 * See if we have overflown one way or the other.
+		 */
+		if (split > tlb_handler + 32 || split < p - 32)
+			ov = 1;
+
+		if (ov) {
+			split = tlb_handler + 30;
+			if (uasm_insn_has_bdelay(relocs, split - 1))
+				split--;
+		}
 
 		/* Copy first part of the handler. */
 		uasm_copy_handler(relocs, labels, tlb_handler, split, f);
 		f += split - tlb_handler;
 
-		/* Insert branch. */
-		uasm_l_split(&l, final_handler);
-		uasm_il_b(&f, &r, label_split);
-		if (uasm_insn_has_bdelay(relocs, split))
-			uasm_i_nop(&f);
-		else {
-			uasm_copy_handler(relocs, labels, split, split + 1, f);
-			uasm_move_labels(labels, f, f + 1, -1);
-			f++;
-			split++;
+		if (ov) {
+			/* Insert branch. */
+			uasm_l_split(&l, final_handler);
+			uasm_il_b(&f, &r, label_split);
+			if (uasm_insn_has_bdelay(relocs, split))
+				uasm_i_nop(&f);
+			else {
+				uasm_copy_handler(relocs, labels,
+						  split, split + 1, f);
+				uasm_move_labels(labels, f, f + 1, -1);
+				f++;
+				split++;
+			}
 		}
 
 		/* Copy the rest of the handler. */

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] MIPS: Fold the TLB refill at the vmalloc path if possible
  2009-05-18 19:32           ` [PATCH] MIPS: Fold the TLB refill at the vmalloc path if possible Maciej W. Rozycki
@ 2009-05-18 23:26             ` David Daney
  2009-05-18 23:33               ` Maciej W. Rozycki
  0 siblings, 1 reply; 10+ messages in thread
From: David Daney @ 2009-05-18 23:26 UTC (permalink / raw)
  To: Maciej W. Rozycki; +Cc: Ralf Baechle, David VomLehn, linux-mips

Maciej W. Rozycki wrote:
>  Try to fold the 64-bit TLB refill handler opportunistically at the 
> beginning of the vmalloc path so as to avoid splitting execution flow in 
> half and wasting cycles for a branch required at that point then.  Resort 
> to doing the split if either of the newly created parts would not fit into 
> its designated slot.
> 

Unless you have an objection, I will test this, forward ported to the 
HEAD and modified to work with my patch that gets rid of all the magic 
numbers.  And then send the result to Ralf.

David Daney



> Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
> ---
> David,
> 
>  So I took the opportunity and spent a few minutes cooking up thich 
> change.  It works correctly with my SWARM filling the XTLB handler slot 
> with the fast path up to the offset of 0x6c inclusive, where the final 
> ERET is placed and the TLB handler slot with the vmalloc path, up to 0x14 
> inclusive.  With the M3 workaround enabled it correctly resorts to 
> inserting a branch with the ERET landing at 0x10 into the TLB slot.
> 
>  It's meant to work for your case -- please try it. :)  Unfortunately I 
> lack the resources to check with a more modern kernel, but I hope it 
> applies cleanly; if that turns out to be a problem, then I can look into 
> it later.  I haven't updated the space exhaustion check for the corner 
> case of both parts being exactly 32 instructions with the ERET at 0x7c 
> into the XTLB slot; I'm leaving it as an exercise for the first one to 
> actually hit such a scenario. ;)
> 
>  Further improvement might include getting rid of #ifdef MODULE_START 
> directives scattered throughout the file.
> 
>  Ralf, please consider.
> 
>   Maciej
> 
> patch-mips-2.6.27-rc8-20081004-mips-tlbex-fold-0
> Index: linux-mips-2.6.27-rc8-20081004-swarm-eb/arch/mips/mm/tlbex.c
> ===================================================================
> --- linux-mips-2.6.27-rc8-20081004-swarm-eb.orig/arch/mips/mm/tlbex.c
> +++ linux-mips-2.6.27-rc8-20081004-swarm-eb/arch/mips/mm/tlbex.c
> @@ -6,7 +6,7 @@
>   * Synthesize TLB refill handlers at runtime.
>   *
>   * Copyright (C) 2004, 2005, 2006, 2008  Thiemo Seufer
> - * Copyright (C) 2005, 2007, 2008  Maciej W. Rozycki
> + * Copyright (C) 2005, 2007, 2008, 2009  Maciej W. Rozycki
>   * Copyright (C) 2006  Ralf Baechle (ralf@linux-mips.org)
>   *
>   * ... and the days got worse and worse and now you see
> @@ -19,6 +19,7 @@
>   * (Condolences to Napoleon XIV)
>   */
>  
> +#include <linux/bug.h>
>  #include <linux/kernel.h>
>  #include <linux/types.h>
>  #include <linux/string.h>
> @@ -738,28 +739,52 @@ static void __cpuinit build_r4000_tlb_re
>  		uasm_copy_handler(relocs, labels, tlb_handler, p, f);
>  		final_len = p - tlb_handler;
>  	} else {
> -		u32 *split = tlb_handler + 30;
> +#ifdef MODULE_START
> +		const enum label_id ls = label_module_alloc;
> +#else
> +		const enum label_id ls = label_vmalloc;
> +#endif
> +		const int ie = sizeof(labels) / sizeof(labels[0]);
> +		u32 *split;
> +		int ov = 0;
> +		int i;
>  
>  		/*
>  		 * Find the split point.
>  		 */
> -		if (uasm_insn_has_bdelay(relocs, split - 1))
> -			split--;
> +		for (i = 0; i < ie && labels[i].lab != ls; i++);
> +		BUG_ON(i == ie);
> +		split = labels[i].addr;
> +
> +		/*
> +		 * See if we have overflown one way or the other.
> +		 */
> +		if (split > tlb_handler + 32 || split < p - 32)
> +			ov = 1;
> +
> +		if (ov) {
> +			split = tlb_handler + 30;
> +			if (uasm_insn_has_bdelay(relocs, split - 1))
> +				split--;
> +		}
>  
>  		/* Copy first part of the handler. */
>  		uasm_copy_handler(relocs, labels, tlb_handler, split, f);
>  		f += split - tlb_handler;
>  
> -		/* Insert branch. */
> -		uasm_l_split(&l, final_handler);
> -		uasm_il_b(&f, &r, label_split);
> -		if (uasm_insn_has_bdelay(relocs, split))
> -			uasm_i_nop(&f);
> -		else {
> -			uasm_copy_handler(relocs, labels, split, split + 1, f);
> -			uasm_move_labels(labels, f, f + 1, -1);
> -			f++;
> -			split++;
> +		if (ov) {
> +			/* Insert branch. */
> +			uasm_l_split(&l, final_handler);
> +			uasm_il_b(&f, &r, label_split);
> +			if (uasm_insn_has_bdelay(relocs, split))
> +				uasm_i_nop(&f);
> +			else {
> +				uasm_copy_handler(relocs, labels,
> +						  split, split + 1, f);
> +				uasm_move_labels(labels, f, f + 1, -1);
> +				f++;
> +				split++;
> +			}
>  		}
>  
>  		/* Copy the rest of the handler. */
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] MIPS: Fold the TLB refill at the vmalloc path if possible
  2009-05-18 23:26             ` David Daney
@ 2009-05-18 23:33               ` Maciej W. Rozycki
  0 siblings, 0 replies; 10+ messages in thread
From: Maciej W. Rozycki @ 2009-05-18 23:33 UTC (permalink / raw)
  To: David Daney; +Cc: Ralf Baechle, David VomLehn, linux-mips

On Mon, 18 May 2009, David Daney wrote:

> Unless you have an objection, I will test this, forward ported to the HEAD and
> modified to work with my patch that gets rid of all the magic numbers.  And
> then send the result to Ralf.

 Excellent -- thanks a lot!  And yes, getting rid of the magic numbers is 
a jolly good idea -- they work well until you discover you need to adjust 
them all at a time for some reason.

  Maciej

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2009-05-18 23:33 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-05-12 22:45 [PATCH] MIPS: Don't branch to eret in TLB refill David Daney
2009-05-13  0:23 ` David VomLehn
2009-05-13  1:12   ` David Daney
2009-05-13  2:01     ` Paul Gortmaker
2009-05-16  7:28     ` Maciej W. Rozycki
2009-05-18 16:25       ` David Daney
2009-05-18 17:48         ` Maciej W. Rozycki
2009-05-18 19:32           ` [PATCH] MIPS: Fold the TLB refill at the vmalloc path if possible Maciej W. Rozycki
2009-05-18 23:26             ` David Daney
2009-05-18 23:33               ` Maciej W. Rozycki

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.