[PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
@ 2016-11-03  5:10 Paul Mackerras
  2016-11-03  5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Paul Mackerras @ 2016-11-03  5:10 UTC (permalink / raw)
  To: linuxppc-dev

These functions compute an IP checksum by computing a 64-bit sum and
folding it to 32 bits (the "nofold" in their names refers to folding
down to 16 bits).  However, doing (u32) (s + (s >> 32)) is not
sufficient to fold a 64-bit sum to 32 bits correctly.  The addition
can produce a carry out from bit 31, which needs to be added in to
the sum to produce the correct result.

To fix this, we copy the from64to32() function from lib/checksum.c
and use that.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/include/asm/checksum.h | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index ee655ed..c16c6f8 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -53,19 +53,27 @@ static inline __sum16 csum_fold(__wsum sum)
 	return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
 }
 
+static inline u32 from64to32(u64 x)
+{
+	/* add up 32-bit and 32-bit for 32+c bit */
+	x = (x & 0xffffffff) + (x >> 32);
+	/* add up carry.. */
+	x = (x & 0xffffffff) + (x >> 32);
+	return (u32)x;
+}
+
 static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
                                      unsigned short len,
                                      unsigned short proto,
                                      __wsum sum)
 {
 #ifdef __powerpc64__
-	unsigned long s = (__force u32)sum;
+	u64 s = (__force u32)sum;
 
 	s += (__force u32)saddr;
 	s += (__force u32)daddr;
 	s += proto + len;
-	s += (s >> 32);
-	return (__force __wsum) s;
+	return (__force __wsum) from64to32(s);
 #else
     __asm__("\n\
 	addc %0,%0,%1 \n\
@@ -127,8 +135,7 @@ static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
 
 	for (i = 0; i < ihl - 1; i++, ptr++)
 		s += *ptr;
-	s += (s >> 32);
-	return (__force __wsum)s;
+	return (__force __wsum)from64to32(s);
 #else
 	__wsum sum, tmp;
 
-- 
2.10.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian
  2016-11-03  5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
@ 2016-11-03  5:15 ` Paul Mackerras
  2017-01-27  0:40   ` [2/2] " Michael Ellerman
  2016-11-08  7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 7+ messages in thread
From: Paul Mackerras @ 2016-11-03  5:15 UTC (permalink / raw)
  To: linuxppc-dev

Currently we have optimized hand-coded assembly checksum routines
for big-endian 64-bit systems, but for little-endian we use the
generic C routines.  This modifies the optimized routines to work
for little-endian.  With this, we no longer need to enable
CONFIG_GENERIC_CSUM.  This also fixes a couple of comments
in checksum_64.S so they accurately reflect what the associated
instruction does.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/Kconfig                |  2 +-
 arch/powerpc/include/asm/checksum.h |  4 ++++
 arch/powerpc/lib/Makefile           |  2 --
 arch/powerpc/lib/checksum_64.S      | 12 ++++++++++--
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 65fba4c..514e6dd 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -164,7 +164,7 @@ config PPC
 	select HAVE_KERNEL_GZIP
 
 config GENERIC_CSUM
-	def_bool CPU_LITTLE_ENDIAN
+	def_bool n
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index c16c6f8..5f8297b 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -72,7 +72,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
 
 	s += (__force u32)saddr;
 	s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
 	s += proto + len;
+#else
+	s += (proto + len) << 8;
+#endif
 	return (__force __wsum) from64to32(s);
 #else
     __asm__("\n\
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 309361e8..0e649d7 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -21,9 +21,7 @@ obj64-y	+= copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
 obj64-$(CONFIG_SMP)	+= locks.o
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
 
-ifeq ($(CONFIG_GENERIC_CSUM),)
 obj-y			+= checksum_$(BITS).o checksum_wrappers.o
-endif
 
 obj-$(CONFIG_PPC_EMULATE_SSTEP)	+= sstep.o ldstfp.o
 
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index fd91766..4dd2761 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -36,7 +36,7 @@ _GLOBAL(__csum_partial)
 	 * work to calculate the correct checksum, we ignore that case
 	 * and take the potential slowdown of unaligned loads.
 	 */
-	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */
+	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
 	beq	.Lcsum_aligned
 
 	li	r7,4
@@ -168,8 +168,12 @@ _GLOBAL(__csum_partial)
 	beq	.Lcsum_finish
 
 	lbz	r6,0(r3)
+#ifdef __BIG_ENDIAN
 	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
 	adde	r0,r0,r9
+#else
+	adde	r0,r0,r6
+#endif
 
 .Lcsum_finish:
 	addze	r0,r0			/* add in final carry */
@@ -236,7 +240,7 @@ _GLOBAL(csum_partial_copy_generic)
 	 * If the source and destination are relatively unaligned we only
 	 * align the source. This keeps things simple.
 	 */
-	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 & 0x3) >> 1 */
+	rldicl. r6,r3,64-1,64-2		/* r6 = (r3 >> 1) & 0x3 */
 	beq	.Lcopy_aligned
 
 	li	r9,4
@@ -398,8 +402,12 @@ dstnr;	sth	r6,0(r4)
 	beq	.Lcopy_finish
 
 srcnr;	lbz	r6,0(r3)
+#ifdef __BIG_ENDIAN
 	sldi	r9,r6,8			/* Pad the byte out to 16 bits */
 	adde	r0,r0,r9
+#else
+	adde	r0,r0,r6
+#endif
 dstnr;	stb	r6,0(r4)
 
 .Lcopy_finish:
-- 
2.10.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
  2016-11-03  5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
  2016-11-03  5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
@ 2016-11-08  7:23 ` Michael Ellerman
  2016-11-08 22:05   ` Paul Mackerras
  2016-12-02  9:44 ` Michael Ellerman
  2017-01-27  0:40 ` [1/2] " Michael Ellerman
  3 siblings, 1 reply; 7+ messages in thread
From: Michael Ellerman @ 2016-11-08  7:23 UTC (permalink / raw)
  To: Paul Mackerras, linuxppc-dev

Paul Mackerras <paulus@ozlabs.org> writes:

> These functions compute an IP checksum by computing a 64-bit sum and
> folding it to 32 bits (the "nofold" in their names refers to folding
> down to 16 bits).  However, doing (u32) (s + (s >> 32)) is not
> sufficient to fold a 64-bit sum to 32 bits correctly.  The addition
> can produce a carry out from bit 31, which needs to be added in to
> the sum to produce the correct result.
>
> To fix this, we copy the from64to32() function from lib/checksum.c
> and use that.

This seems to have been broken since ~forever. Do we just not hit that
case very often, or do we just incorrectly report checksum failures?

Should it go to stable?

cheers

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
  2016-11-08  7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman
@ 2016-11-08 22:05   ` Paul Mackerras
  0 siblings, 0 replies; 7+ messages in thread
From: Paul Mackerras @ 2016-11-08 22:05 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

On Tue, Nov 08, 2016 at 06:23:30PM +1100, Michael Ellerman wrote:
> Paul Mackerras <paulus@ozlabs.org> writes:
> 
> > These functions compute an IP checksum by computing a 64-bit sum and
> > folding it to 32 bits (the "nofold" in their names refers to folding
> > down to 16 bits).  However, doing (u32) (s + (s >> 32)) is not
> > sufficient to fold a 64-bit sum to 32 bits correctly.  The addition
> > can produce a carry out from bit 31, which needs to be added in to
> > the sum to produce the correct result.
> >
> > To fix this, we copy the from64to32() function from lib/checksum.c
> > and use that.
> 
> This seems to have been broken since ~forever. Do we just not hit that
> case very often, or do we just incorrectly report checksum failures?

I think there would be about a 1 in a billion chance of hitting it by
chance, though you could probably construct a test case that would hit
it every time.  If you did hit it in real life it would result in a
packet being dropped and presumably retransmitted, and I expect that
the IP header of the retransmitted packet would be sufficiently
different (i.e. different id field or something) that it wouldn't hit
the bug a second time.

> Should it go to stable?

Probably... though nobody has actually noticed a problem in real life
and pinned it down to this.

Paul.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
  2016-11-03  5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
  2016-11-03  5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
  2016-11-08  7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman
@ 2016-12-02  9:44 ` Michael Ellerman
  2017-01-27  0:40 ` [1/2] " Michael Ellerman
  3 siblings, 0 replies; 7+ messages in thread
From: Michael Ellerman @ 2016-12-02  9:44 UTC (permalink / raw)
  To: Paul Mackerras, linuxppc-dev

Paul Mackerras <paulus@ozlabs.org> writes:

> These functions compute an IP checksum by computing a 64-bit sum and
> folding it to 32 bits (the "nofold" in their names refers to folding
> down to 16 bits).  However, doing (u32) (s + (s >> 32)) is not
> sufficient to fold a 64-bit sum to 32 bits correctly.  The addition
> can produce a carry out from bit 31, which needs to be added in to
> the sum to produce the correct result.
>
> To fix this, we copy the from64to32() function from lib/checksum.c
> and use that.

This collided with:

f9d4286b9516 ("arch/powerpc: Update parameters for csum_tcpudp_magic & csum_tcpudp_nofold")

Mind rebasing?

cheers

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
  2016-11-03  5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
                   ` (2 preceding siblings ...)
  2016-12-02  9:44 ` Michael Ellerman
@ 2017-01-27  0:40 ` Michael Ellerman
  3 siblings, 0 replies; 7+ messages in thread
From: Michael Ellerman @ 2017-01-27  0:40 UTC (permalink / raw)
  To: Paul Mackerras, linuxppc-dev

On Thu, 2016-11-03 at 05:10:55 UTC, Paul Mackerras wrote:
> These functions compute an IP checksum by computing a 64-bit sum and
> folding it to 32 bits (the "nofold" in their names refers to folding
> down to 16 bits).  However, doing (u32) (s + (s >> 32)) is not
> sufficient to fold a 64-bit sum to 32 bits correctly.  The addition
> can produce a carry out from bit 31, which needs to be added in to
> the sum to produce the correct result.
> 
> To fix this, we copy the from64to32() function from lib/checksum.c
> and use that.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/b492f7e4e07a28e706db26cf4943bb

cheers

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [2/2] powerpc/64: Use optimized checksum routines on little-endian
  2016-11-03  5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
@ 2017-01-27  0:40   ` Michael Ellerman
  0 siblings, 0 replies; 7+ messages in thread
From: Michael Ellerman @ 2017-01-27  0:40 UTC (permalink / raw)
  To: Paul Mackerras, linuxppc-dev

On Thu, 2016-11-03 at 05:15:42 UTC, Paul Mackerras wrote:
> Currently we have optimized hand-coded assembly checksum routines
> for big-endian 64-bit systems, but for little-endian we use the
> generic C routines.  This modifies the optimized routines to work
> for little-endian.  With this, we no longer need to enable
> CONFIG_GENERIC_CSUM.  This also fixes a couple of comments
> in checksum_64.S so they accurately reflect what the associated
> instruction does.
> 
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>

Applied to powerpc next, thanks.

https://git.kernel.org/powerpc/c/d4fde568a34a93897dfb9ae64cfe9d

cheers

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-01-27  0:40 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-03  5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
2016-11-03  5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
2017-01-27  0:40   ` [2/2] " Michael Ellerman
2016-11-08  7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman
2016-11-08 22:05   ` Paul Mackerras
2016-12-02  9:44 ` Michael Ellerman
2017-01-27  0:40 ` [1/2] " Michael Ellerman

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.