linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [patch] alpha: fix alignment problem in csum_ipv6_magic()
@ 2007-06-16 21:20 Ivan Kokshaysky
  2007-06-21 23:35 ` Andrew Morton
  0 siblings, 1 reply; 4+ messages in thread
From: Ivan Kokshaysky @ 2007-06-16 21:20 UTC (permalink / raw)
  To: Andrew Morton, Richard Henderson; +Cc: linux-kernel

Hopefully this fixes http://bugzilla.kernel.org/show_bug.cgi?id=8635

The struct in6_addr passed to csum_ipv6_magic() is 4 byte aligned,
so we can't use the regular 64-bit loads.
Since the cost of handling of 4 byte and 1 byte aligned 64-bit data is
roughly the same, this code can cope with any src/dst [mis]alignment.

Signed-off-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>

Ivan.

--- 2.6.22-rc4/arch/alpha/lib/ev6-csum_ipv6_magic.S	Sun Feb  4 21:44:54 2007
+++ linux/arch/alpha/lib/ev6-csum_ipv6_magic.S	Sun Jun 17 00:41:53 2007
@@ -46,6 +46,10 @@
  * add the 3 low ushorts together, generating a uint
  * a final add of the 2 lower ushorts
  * truncating the result.
+ *
+ * Misalignment handling added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+ * The cost is 16 instructions (~8 cycles), including two extra loads which
+ * may cause additional delay in rare cases (load-load replay traps).
  */
 
 	.globl csum_ipv6_magic
@@ -55,25 +59,45 @@
 csum_ipv6_magic:
 	.prologue 0
 
-	ldq	$0,0($16)	# L : Latency: 3
+	ldq_u	$0,0($16)	# L : Latency: 3
 	inslh	$18,7,$4	# U : 0000000000AABBCC
-	ldq	$1,8($16)	# L : Latency: 3
+	ldq_u	$1,8($16)	# L : Latency: 3
 	sll	$19,8,$7	# U : U L U L : 0x00000000 00aabb00
 
+	and	$16,7,$6	# E : src misalignment
+	ldq_u	$5,15($16)	# L : Latency: 3
 	zapnot	$20,15,$20	# U : zero extend incoming csum
-	ldq	$2,0($17)	# L : Latency: 3
-	sll	$19,24,$19	# U : U L L U : 0x000000aa bb000000
-	inswl	$18,3,$18	# U : 000000CCDD000000
+	ldq_u	$2,0($17)	# L : U L U L : Latency: 3
+
+	extql	$0,$6,$0	# U :
+	extqh	$1,$6,$22	# U :
+	ldq_u	$3,8($17)	# L : Latency: 3
+	sll	$19,24,$19	# U : U U L U : 0x000000aa bb000000
+
+	cmoveq	$6,$31,$22	# E : src aligned?
+	ldq_u	$23,15($17)	# L : Latency: 3
+	or	$18,$4,$18	# E : 000000CCDDAABBCC
+	extql	$1,$6,$1	# U : U L L U :
 
-	ldq	$3,8($17)	# L : Latency: 3
-	bis	$18,$4,$18	# E : 000000CCDDAABBCC
+	or	$0,$22,$0	# E : 1st src word complete
+	extqh	$5,$6,$5	# U :
 	addl	$19,$7,$19	# E : <sign bits>bbaabb00
-	nop			# E : U L U L
+	and	$17,7,$6	# E : L U L U : dst misalignment
 
+	inswl	$18,3,$18	# U : 000000CCDD000000
+	or	$1,$5,$1	# E : 2nd src word complete
+	extql	$2,$6,$2	# U :
+	extqh	$3,$6,$22	# U : U L U U :
+
+	cmoveq	$6,$31,$22	# E : dst aligned?
+	extql	$3,$6,$3	# U :
 	addq	$20,$0,$20	# E : begin summing the words
+	extqh	$23,$6,$23	# U : L U L U :
+
 	srl	$18,16,$4	# U : 0000000000CCDDAA
+	or	$2,$22,$2	# E : 1st dst word complete
 	zap	$19,0x3,$19	# U : <sign bits>bbaa0000
-	nop			# E : L U U L
+	or	$3,$23,$3	# E : U L U L : 2nd dst word complete
 
 	cmpult	$20,$0,$0	# E :
 	addq	$20,$1,$20	# E :
--- 2.6.22-rc4/arch/alpha/lib/csum_ipv6_magic.S	Sun Feb  4 21:44:54 2007
+++ linux/arch/alpha/lib/csum_ipv6_magic.S	Sun Jun 17 00:29:28 2007
@@ -7,6 +7,9 @@
  *                                __u32 len,
  *                                unsigned short proto,
  *                                unsigned int csum);
+ *
+ * Misalignment handling (which costs 16 instructions / 8 cycles) 
+ * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
  */
 
 	.globl csum_ipv6_magic
@@ -16,37 +19,57 @@
 csum_ipv6_magic:
 	.prologue 0
 
-	ldq	$0,0($16)	# e0    : load src & dst addr words
+	ldq_u	$0,0($16)	# e0    : load src & dst addr words
 	zapnot	$20,15,$20	# .. e1 : zero extend incoming csum
 	extqh	$18,1,$4	# e0    : byte swap len & proto while we wait
-	ldq	$1,8($16)	# .. e1 :
+	ldq_u	$21,7($16)	# .. e1 : handle misalignment
 
 	extbl	$18,1,$5	# e0	:
-	ldq	$2,0($17)	# .. e1 :
+	ldq_u	$1,8($16)	# .. e1 :
 	extbl	$18,2,$6	# e0 	:
-	ldq	$3,8($17)	# .. e1 :
+	ldq_u	$22,15($16)	# .. e1 :
 
 	extbl	$18,3,$18	# e0	:
+	ldq_u	$2,0($17)	# .. e1 :
 	sra	$4,32,$4	# e0	:
+	ldq_u	$23,7($17)	# .. e1 :
+
+	extql	$0,$16,$0	# e0	:
+	ldq_u	$3,8($17)	# .. e1 :
+	extqh	$21,$16,$21	# e0	:
+	ldq_u	$24,15($17)	# .. e1 :
+
 	sll	$5,16,$5	# e0	:
+	or	$0,$21,$0	# .. e1 : 1st src word complete
+	extql	$1,$16,$1	# e0	:
 	addq	$20,$0,$20	# .. e1 : begin summing the words
 
-	sll	$6,8,$6		# e0	:
+	extqh	$22,$16,$22	# e0	:
 	cmpult	$20,$0,$0	# .. e1 :
-	extwh	$19,7,$7	# e0    :
-	or	$4,$18,$18	# .. e1 :
+	sll	$6,8,$6		# e0	:
+	or	$1,$22,$1	# .. e1 : 2nd src word complete
 
-	extbl	$19,1,$19	# e0    :
+	extql	$2,$17,$2	# e0	:
+	or	$4,$18,$18	# .. e1 :
+	extqh	$23,$17,$23	# e0	:
 	or	$5,$6,$5	# .. e1 :
-	or	$18,$5,$18	# e0    : len complete
-	or	$19,$7,$19	# .. e1 :
 
-	sll	$19,48,$19	# e0    :
+	extql	$3,$17,$3	# e0	:
+	or	$2,$23,$2	# .. e1 : 1st dst word complete
+	extqh	$24,$17,$24	# e0	:
+	or	$18,$5,$18	# .. e1 : len complete
+
+	extwh	$19,7,$7	# e0    :
+	or	$3,$24,$3	# .. e1 : 2nd dst word complete
+	extbl	$19,1,$19	# e0    :
 	addq	$20,$1,$20	# .. e1 :
-	sra	$19,32,$19	# e0    : proto complete
+
+	or	$19,$7,$19	# e0    :
 	cmpult	$20,$1,$1	# .. e1 :
+	sll	$19,48,$19	# e0    :
+	nop			# .. e0 :
 
-	nop			# e0    :
+	sra	$19,32,$19	# e0    : proto complete
 	addq	$20,$2,$20	# .. e1 :
 	cmpult	$20,$2,$2	# e0    :
 	addq	$20,$3,$20	# .. e1 :
@@ -84,7 +107,7 @@ csum_ipv6_magic:
 	extwl	$0,2,$1		# e0    : fold 17-bit value
 	zapnot	$0,3,$0		# .. e1 :
 	addq	$0,$1,$0	# e0    :
-	not	$0,$0		# e1    : and complement.
+	not	$0,$0		# .. e1 : and complement.
 
 	zapnot	$0,3,$0		# e0    :
 	ret			# .. e1 :

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] alpha: fix alignment problem in csum_ipv6_magic()
  2007-06-16 21:20 [patch] alpha: fix alignment problem in csum_ipv6_magic() Ivan Kokshaysky
@ 2007-06-21 23:35 ` Andrew Morton
  2007-06-22 11:26   ` Ivan Kokshaysky
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew Morton @ 2007-06-21 23:35 UTC (permalink / raw)
  To: Ivan Kokshaysky; +Cc: rth, linux-kernel, netdev, Dustin Marquess

> On Sun, 17 Jun 2007 01:20:20 +0400 Ivan Kokshaysky <ink@jurassic.park.msu.ru> wrote:
> Hopefully this fixes http://bugzilla.kernel.org/show_bug.cgi?id=8635
> 
> The struct in6_addr passed to csum_ipv6_magic() is 4 byte aligned,
> so we can't use the regular 64-bit loads.
> Since the cost of handling of 4 byte and 1 byte aligned 64-bit data is
> roughly the same, this code can cope with any src/dst [mis]alignment.
> 
> Signed-off-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
> 
> Ivan.
> 
> --- 2.6.22-rc4/arch/alpha/lib/ev6-csum_ipv6_magic.S	Sun Feb  4 21:44:54 2007
> +++ linux/arch/alpha/lib/ev6-csum_ipv6_magic.S	Sun Jun 17 00:41:53 2007
> @@ -46,6 +46,10 @@
>   * add the 3 low ushorts together, generating a uint
>   * a final add of the 2 lower ushorts
>   * truncating the result.
> + *
> + * Misalignment handling added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
> + * The cost is 16 instructions (~8 cycles), including two extra loads which
> + * may cause additional delay in rare cases (load-load replay traps).
>   */
>  
>  	.globl csum_ipv6_magic
> @@ -55,25 +59,45 @@
>  csum_ipv6_magic:
>  	.prologue 0
>  
> -	ldq	$0,0($16)	# L : Latency: 3
> +	ldq_u	$0,0($16)	# L : Latency: 3
>  	inslh	$18,7,$4	# U : 0000000000AABBCC
> -	ldq	$1,8($16)	# L : Latency: 3
> +	ldq_u	$1,8($16)	# L : Latency: 3
>  	sll	$19,8,$7	# U : U L U L : 0x00000000 00aabb00
>  
> +	and	$16,7,$6	# E : src misalignment
> +	ldq_u	$5,15($16)	# L : Latency: 3
>  	zapnot	$20,15,$20	# U : zero extend incoming csum
> -	ldq	$2,0($17)	# L : Latency: 3
> -	sll	$19,24,$19	# U : U L L U : 0x000000aa bb000000
> -	inswl	$18,3,$18	# U : 000000CCDD000000
> +	ldq_u	$2,0($17)	# L : U L U L : Latency: 3
> +
> +	extql	$0,$6,$0	# U :
> +	extqh	$1,$6,$22	# U :
> +	ldq_u	$3,8($17)	# L : Latency: 3
> +	sll	$19,24,$19	# U : U U L U : 0x000000aa bb000000
> +
> +	cmoveq	$6,$31,$22	# E : src aligned?
> +	ldq_u	$23,15($17)	# L : Latency: 3
> +	or	$18,$4,$18	# E : 000000CCDDAABBCC
> +	extql	$1,$6,$1	# U : U L L U :
>  
> -	ldq	$3,8($17)	# L : Latency: 3
> -	bis	$18,$4,$18	# E : 000000CCDDAABBCC
> +	or	$0,$22,$0	# E : 1st src word complete
> +	extqh	$5,$6,$5	# U :
>  	addl	$19,$7,$19	# E : <sign bits>bbaabb00
> -	nop			# E : U L U L
> +	and	$17,7,$6	# E : L U L U : dst misalignment
>  
> +	inswl	$18,3,$18	# U : 000000CCDD000000
> +	or	$1,$5,$1	# E : 2nd src word complete
> +	extql	$2,$6,$2	# U :
> +	extqh	$3,$6,$22	# U : U L U U :
> +
> +	cmoveq	$6,$31,$22	# E : dst aligned?
> +	extql	$3,$6,$3	# U :
>  	addq	$20,$0,$20	# E : begin summing the words
> +	extqh	$23,$6,$23	# U : L U L U :
> +
>  	srl	$18,16,$4	# U : 0000000000CCDDAA
> +	or	$2,$22,$2	# E : 1st dst word complete
>  	zap	$19,0x3,$19	# U : <sign bits>bbaa0000
> -	nop			# E : L U U L
> +	or	$3,$23,$3	# E : U L U L : 2nd dst word complete
>  
>  	cmpult	$20,$0,$0	# E :
>  	addq	$20,$1,$20	# E :
> --- 2.6.22-rc4/arch/alpha/lib/csum_ipv6_magic.S	Sun Feb  4 21:44:54 2007
> +++ linux/arch/alpha/lib/csum_ipv6_magic.S	Sun Jun 17 00:29:28 2007
> @@ -7,6 +7,9 @@
>   *                                __u32 len,
>   *                                unsigned short proto,
>   *                                unsigned int csum);
> + *
> + * Misalignment handling (which costs 16 instructions / 8 cycles) 
> + * added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
>   */
>  
>  	.globl csum_ipv6_magic
> @@ -16,37 +19,57 @@
>  csum_ipv6_magic:
>  	.prologue 0
>  
> -	ldq	$0,0($16)	# e0    : load src & dst addr words
> +	ldq_u	$0,0($16)	# e0    : load src & dst addr words
>  	zapnot	$20,15,$20	# .. e1 : zero extend incoming csum
>  	extqh	$18,1,$4	# e0    : byte swap len & proto while we wait
> -	ldq	$1,8($16)	# .. e1 :
> +	ldq_u	$21,7($16)	# .. e1 : handle misalignment
>  
>  	extbl	$18,1,$5	# e0	:
> -	ldq	$2,0($17)	# .. e1 :
> +	ldq_u	$1,8($16)	# .. e1 :
>  	extbl	$18,2,$6	# e0 	:
> -	ldq	$3,8($17)	# .. e1 :
> +	ldq_u	$22,15($16)	# .. e1 :
>  
>  	extbl	$18,3,$18	# e0	:
> +	ldq_u	$2,0($17)	# .. e1 :
>  	sra	$4,32,$4	# e0	:
> +	ldq_u	$23,7($17)	# .. e1 :
> +
> +	extql	$0,$16,$0	# e0	:
> +	ldq_u	$3,8($17)	# .. e1 :
> +	extqh	$21,$16,$21	# e0	:
> +	ldq_u	$24,15($17)	# .. e1 :
> +
>  	sll	$5,16,$5	# e0	:
> +	or	$0,$21,$0	# .. e1 : 1st src word complete
> +	extql	$1,$16,$1	# e0	:
>  	addq	$20,$0,$20	# .. e1 : begin summing the words
>  
> -	sll	$6,8,$6		# e0	:
> +	extqh	$22,$16,$22	# e0	:
>  	cmpult	$20,$0,$0	# .. e1 :
> -	extwh	$19,7,$7	# e0    :
> -	or	$4,$18,$18	# .. e1 :
> +	sll	$6,8,$6		# e0	:
> +	or	$1,$22,$1	# .. e1 : 2nd src word complete
>  
> -	extbl	$19,1,$19	# e0    :
> +	extql	$2,$17,$2	# e0	:
> +	or	$4,$18,$18	# .. e1 :
> +	extqh	$23,$17,$23	# e0	:
>  	or	$5,$6,$5	# .. e1 :
> -	or	$18,$5,$18	# e0    : len complete
> -	or	$19,$7,$19	# .. e1 :
>  
> -	sll	$19,48,$19	# e0    :
> +	extql	$3,$17,$3	# e0	:
> +	or	$2,$23,$2	# .. e1 : 1st dst word complete
> +	extqh	$24,$17,$24	# e0	:
> +	or	$18,$5,$18	# .. e1 : len complete
> +
> +	extwh	$19,7,$7	# e0    :
> +	or	$3,$24,$3	# .. e1 : 2nd dst word complete
> +	extbl	$19,1,$19	# e0    :
>  	addq	$20,$1,$20	# .. e1 :
> -	sra	$19,32,$19	# e0    : proto complete
> +
> +	or	$19,$7,$19	# e0    :
>  	cmpult	$20,$1,$1	# .. e1 :
> +	sll	$19,48,$19	# e0    :
> +	nop			# .. e0 :
>  
> -	nop			# e0    :
> +	sra	$19,32,$19	# e0    : proto complete
>  	addq	$20,$2,$20	# .. e1 :
>  	cmpult	$20,$2,$2	# e0    :
>  	addq	$20,$3,$20	# .. e1 :
> @@ -84,7 +107,7 @@ csum_ipv6_magic:
>  	extwl	$0,2,$1		# e0    : fold 17-bit value
>  	zapnot	$0,3,$0		# .. e1 :
>  	addq	$0,$1,$0	# e0    :
> -	not	$0,$0		# e1    : and complement.
> +	not	$0,$0		# .. e1 : and complement.
>  
>  	zapnot	$0,3,$0		# e0    :
>  	ret			# .. e1 :

In http://bugzilla.kernel.org/show_bug.cgi?id=8659, Dustin is reporting
that this patch broke tcp-on-ipv6.



^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] alpha: fix alignment problem in csum_ipv6_magic()
  2007-06-21 23:35 ` Andrew Morton
@ 2007-06-22 11:26   ` Ivan Kokshaysky
  2007-06-22 16:27     ` Dustin Marquess
  0 siblings, 1 reply; 4+ messages in thread
From: Ivan Kokshaysky @ 2007-06-22 11:26 UTC (permalink / raw)
  To: Andrew Morton; +Cc: rth, linux-kernel, netdev, Dustin Marquess

On Thu, Jun 21, 2007 at 04:35:01PM -0700, Andrew Morton wrote:
> In http://bugzilla.kernel.org/show_bug.cgi?id=8659, Dustin is reporting
> that this patch broke tcp-on-ipv6.

Oops. Two instructions operating on the 'len' arg ($18) got swapped...
This should fix ev6 version, ev5 one seems to be ok.

Signed-off-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>

Ivan.

--- 2.6.22-rc4-mm2/arch/alpha/lib/ev6-csum_ipv6_magic.S	Fri Jun 22 15:02:23 2007
+++ linux/arch/alpha/lib/ev6-csum_ipv6_magic.S	Fri Jun 22 15:05:38 2007
@@ -76,18 +76,18 @@ csum_ipv6_magic:
 
 	cmoveq	$6,$31,$22	# E : src aligned?
 	ldq_u	$23,15($17)	# L : Latency: 3
-	or	$18,$4,$18	# E : 000000CCDDAABBCC
-	extql	$1,$6,$1	# U : U L L U :
+	inswl	$18,3,$18	# U : 000000CCDD000000
+	addl	$19,$7,$19	# E : U L U L : <sign bits>bbaabb00
 
 	or	$0,$22,$0	# E : 1st src word complete
-	extqh	$5,$6,$5	# U :
-	addl	$19,$7,$19	# E : <sign bits>bbaabb00
-	and	$17,7,$6	# E : L U L U : dst misalignment
+	extql	$1,$6,$1	# U :
+	or	$18,$4,$18	# E : 000000CCDDAABBCC
+	extqh	$5,$6,$5	# U : L U L U
 
-	inswl	$18,3,$18	# U : 000000CCDD000000
-	or	$1,$5,$1	# E : 2nd src word complete
+	and	$17,7,$6	# E : dst misalignment
 	extql	$2,$6,$2	# U :
-	extqh	$3,$6,$22	# U : U L U U :
+	or	$1,$5,$1	# E : 2nd src word complete
+	extqh	$3,$6,$22	# U : L U L U :
 
 	cmoveq	$6,$31,$22	# E : dst aligned?
 	extql	$3,$6,$3	# U :

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [patch] alpha: fix alignment problem in csum_ipv6_magic()
  2007-06-22 11:26   ` Ivan Kokshaysky
@ 2007-06-22 16:27     ` Dustin Marquess
  0 siblings, 0 replies; 4+ messages in thread
From: Dustin Marquess @ 2007-06-22 16:27 UTC (permalink / raw)
  To: Ivan Kokshaysky; +Cc: Andrew Morton, rth, linux-kernel, netdev

Ivan Kokshaysky wrote:
> On Thu, Jun 21, 2007 at 04:35:01PM -0700, Andrew Morton wrote:
>> In http://bugzilla.kernel.org/show_bug.cgi?id=8659, Dustin is reporting
>> that this patch broke tcp-on-ipv6.
> 
> Oops. Two instructions operating on the 'len' arg ($18) got swapped...
> This should fix ev6 version, ev5 one seems to be ok.
> 
> Signed-off-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
> 
> Ivan.
> 
> --- 2.6.22-rc4-mm2/arch/alpha/lib/ev6-csum_ipv6_magic.S	Fri Jun 22 15:02:23 2007
> +++ linux/arch/alpha/lib/ev6-csum_ipv6_magic.S	Fri Jun 22 15:05:38 2007
> @@ -76,18 +76,18 @@ csum_ipv6_magic:
>  
>  	cmoveq	$6,$31,$22	# E : src aligned?
>  	ldq_u	$23,15($17)	# L : Latency: 3
> -	or	$18,$4,$18	# E : 000000CCDDAABBCC
> -	extql	$1,$6,$1	# U : U L L U :
> +	inswl	$18,3,$18	# U : 000000CCDD000000
> +	addl	$19,$7,$19	# E : U L U L : <sign bits>bbaabb00
>  
>  	or	$0,$22,$0	# E : 1st src word complete
> -	extqh	$5,$6,$5	# U :
> -	addl	$19,$7,$19	# E : <sign bits>bbaabb00
> -	and	$17,7,$6	# E : L U L U : dst misalignment
> +	extql	$1,$6,$1	# U :
> +	or	$18,$4,$18	# E : 000000CCDDAABBCC
> +	extqh	$5,$6,$5	# U : L U L U
>  
> -	inswl	$18,3,$18	# U : 000000CCDD000000
> -	or	$1,$5,$1	# E : 2nd src word complete
> +	and	$17,7,$6	# E : dst misalignment
>  	extql	$2,$6,$2	# U :
> -	extqh	$3,$6,$22	# U : U L U U :
> +	or	$1,$5,$1	# E : 2nd src word complete
> +	extqh	$3,$6,$22	# U : L U L U :
>  
>  	cmoveq	$6,$31,$22	# E : dst aligned?
>  	extql	$3,$6,$3	# U :
> 


Awesome! Works like a champ! Thank you guys so very much! You rock!

-Dustin

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-06-22 17:10 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-06-16 21:20 [patch] alpha: fix alignment problem in csum_ipv6_magic() Ivan Kokshaysky
2007-06-21 23:35 ` Andrew Morton
2007-06-22 11:26   ` Ivan Kokshaysky
2007-06-22 16:27     ` Dustin Marquess

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).