* [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
@ 2016-11-03 5:10 Paul Mackerras
2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
` (3 more replies)
0 siblings, 4 replies; 7+ messages in thread
From: Paul Mackerras @ 2016-11-03 5:10 UTC (permalink / raw)
To: linuxppc-dev
These functions compute an IP checksum by computing a 64-bit sum and
folding it to 32 bits (the "nofold" in their names refers to folding
down to 16 bits). However, doing (u32) (s + (s >> 32)) is not
sufficient to fold a 64-bit sum to 32 bits correctly. The addition
can produce a carry out from bit 31, which needs to be added in to
the sum to produce the correct result.
To fix this, we copy the from64to32() function from lib/checksum.c
and use that.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
arch/powerpc/include/asm/checksum.h | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index ee655ed..c16c6f8 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -53,19 +53,27 @@ static inline __sum16 csum_fold(__wsum sum)
return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
}
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
unsigned short len,
unsigned short proto,
__wsum sum)
{
#ifdef __powerpc64__
- unsigned long s = (__force u32)sum;
+ u64 s = (__force u32)sum;
s += (__force u32)saddr;
s += (__force u32)daddr;
s += proto + len;
- s += (s >> 32);
- return (__force __wsum) s;
+ return (__force __wsum) from64to32(s);
#else
__asm__("\n\
addc %0,%0,%1 \n\
@@ -127,8 +135,7 @@ static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
for (i = 0; i < ihl - 1; i++, ptr++)
s += *ptr;
- s += (s >> 32);
- return (__force __wsum)s;
+ return (__force __wsum)from64to32(s);
#else
__wsum sum, tmp;
--
2.10.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian
2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
@ 2016-11-03 5:15 ` Paul Mackerras
2017-01-27 0:40 ` [2/2] " Michael Ellerman
2016-11-08 7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: Paul Mackerras @ 2016-11-03 5:15 UTC (permalink / raw)
To: linuxppc-dev
Currently we have optimized hand-coded assembly checksum routines
for big-endian 64-bit systems, but for little-endian we use the
generic C routines. This modifies the optimized routines to work
for little-endian. With this, we no longer need to enable
CONFIG_GENERIC_CSUM. This also fixes a couple of comments
in checksum_64.S so they accurately reflect what the associated
instruction does.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
arch/powerpc/Kconfig | 2 +-
arch/powerpc/include/asm/checksum.h | 4 ++++
arch/powerpc/lib/Makefile | 2 --
arch/powerpc/lib/checksum_64.S | 12 ++++++++++--
4 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 65fba4c..514e6dd 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -164,7 +164,7 @@ config PPC
select HAVE_KERNEL_GZIP
config GENERIC_CSUM
- def_bool CPU_LITTLE_ENDIAN
+ def_bool n
config EARLY_PRINTK
bool
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index c16c6f8..5f8297b 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -72,7 +72,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
s += (__force u32)saddr;
s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
return (__force __wsum) from64to32(s);
#else
__asm__("\n\
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 309361e8..0e649d7 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
-ifeq ($(CONFIG_GENERIC_CSUM),)
obj-y += checksum_$(BITS).o checksum_wrappers.o
-endif
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index fd91766..4dd2761 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -36,7 +36,7 @@ _GLOBAL(__csum_partial)
* work to calculate the correct checksum, we ignore that case
* and take the potential slowdown of unaligned loads.
*/
- rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
beq .Lcsum_aligned
li r7,4
@@ -168,8 +168,12 @@ _GLOBAL(__csum_partial)
beq .Lcsum_finish
lbz r6,0(r3)
+#ifdef __BIG_ENDIAN
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
+#else
+ adde r0,r0,r6
+#endif
.Lcsum_finish:
addze r0,r0 /* add in final carry */
@@ -236,7 +240,7 @@ _GLOBAL(csum_partial_copy_generic)
* If the source and destination are relatively unaligned we only
* align the source. This keeps things simple.
*/
- rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
beq .Lcopy_aligned
li r9,4
@@ -398,8 +402,12 @@ dstnr; sth r6,0(r4)
beq .Lcopy_finish
srcnr; lbz r6,0(r3)
+#ifdef __BIG_ENDIAN
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
+#else
+ adde r0,r0,r6
+#endif
dstnr; stb r6,0(r4)
.Lcopy_finish:
--
2.10.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
@ 2016-11-08 7:23 ` Michael Ellerman
2016-11-08 22:05 ` Paul Mackerras
2016-12-02 9:44 ` Michael Ellerman
2017-01-27 0:40 ` [1/2] " Michael Ellerman
3 siblings, 1 reply; 7+ messages in thread
From: Michael Ellerman @ 2016-11-08 7:23 UTC (permalink / raw)
To: Paul Mackerras, linuxppc-dev
Paul Mackerras <paulus@ozlabs.org> writes:
> These functions compute an IP checksum by computing a 64-bit sum and
> folding it to 32 bits (the "nofold" in their names refers to folding
> down to 16 bits). However, doing (u32) (s + (s >> 32)) is not
> sufficient to fold a 64-bit sum to 32 bits correctly. The addition
> can produce a carry out from bit 31, which needs to be added in to
> the sum to produce the correct result.
>
> To fix this, we copy the from64to32() function from lib/checksum.c
> and use that.
This seems to have been broken since ~forever. Do we just not hit that
case very often, or do we just incorrectly report checksum failures?
Should it go to stable?
cheers
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
2016-11-08 7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman
@ 2016-11-08 22:05 ` Paul Mackerras
0 siblings, 0 replies; 7+ messages in thread
From: Paul Mackerras @ 2016-11-08 22:05 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
On Tue, Nov 08, 2016 at 06:23:30PM +1100, Michael Ellerman wrote:
> Paul Mackerras <paulus@ozlabs.org> writes:
>
> > These functions compute an IP checksum by computing a 64-bit sum and
> > folding it to 32 bits (the "nofold" in their names refers to folding
> > down to 16 bits). However, doing (u32) (s + (s >> 32)) is not
> > sufficient to fold a 64-bit sum to 32 bits correctly. The addition
> > can produce a carry out from bit 31, which needs to be added in to
> > the sum to produce the correct result.
> >
> > To fix this, we copy the from64to32() function from lib/checksum.c
> > and use that.
>
> This seems to have been broken since ~forever. Do we just not hit that
> case very often, or do we just incorrectly report checksum failures?
I think there would be about a 1 in a billion chance of hitting it by
chance, though you could probably construct a test case that would hit
it every time. If you did hit it in real life it would result in a
packet being dropped and presumably retransmitted, and I expect that
the IP header of the retransmitted packet would be sufficiently
different (i.e. different id field or something) that it wouldn't hit
the bug a second time.
> Should it go to stable?
Probably... though nobody has actually noticed a problem in real life
and pinned it down to this.
Paul.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
2016-11-08 7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman
@ 2016-12-02 9:44 ` Michael Ellerman
2017-01-27 0:40 ` [1/2] " Michael Ellerman
3 siblings, 0 replies; 7+ messages in thread
From: Michael Ellerman @ 2016-12-02 9:44 UTC (permalink / raw)
To: Paul Mackerras, linuxppc-dev
Paul Mackerras <paulus@ozlabs.org> writes:
> These functions compute an IP checksum by computing a 64-bit sum and
> folding it to 32 bits (the "nofold" in their names refers to folding
> down to 16 bits). However, doing (u32) (s + (s >> 32)) is not
> sufficient to fold a 64-bit sum to 32 bits correctly. The addition
> can produce a carry out from bit 31, which needs to be added in to
> the sum to produce the correct result.
>
> To fix this, we copy the from64to32() function from lib/checksum.c
> and use that.
This collided with:
f9d4286b9516 ("arch/powerpc: Update parameters for csum_tcpudp_magic & csum_tcpudp_nofold")
Mind rebasing?
cheers
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold
2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
` (2 preceding siblings ...)
2016-12-02 9:44 ` Michael Ellerman
@ 2017-01-27 0:40 ` Michael Ellerman
3 siblings, 0 replies; 7+ messages in thread
From: Michael Ellerman @ 2017-01-27 0:40 UTC (permalink / raw)
To: Paul Mackerras, linuxppc-dev
On Thu, 2016-11-03 at 05:10:55 UTC, Paul Mackerras wrote:
> These functions compute an IP checksum by computing a 64-bit sum and
> folding it to 32 bits (the "nofold" in their names refers to folding
> down to 16 bits). However, doing (u32) (s + (s >> 32)) is not
> sufficient to fold a 64-bit sum to 32 bits correctly. The addition
> can produce a carry out from bit 31, which needs to be added in to
> the sum to produce the correct result.
>
> To fix this, we copy the from64to32() function from lib/checksum.c
> and use that.
>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/b492f7e4e07a28e706db26cf4943bb
cheers
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [2/2] powerpc/64: Use optimized checksum routines on little-endian
2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
@ 2017-01-27 0:40 ` Michael Ellerman
0 siblings, 0 replies; 7+ messages in thread
From: Michael Ellerman @ 2017-01-27 0:40 UTC (permalink / raw)
To: Paul Mackerras, linuxppc-dev
On Thu, 2016-11-03 at 05:15:42 UTC, Paul Mackerras wrote:
> Currently we have optimized hand-coded assembly checksum routines
> for big-endian 64-bit systems, but for little-endian we use the
> generic C routines. This modifies the optimized routines to work
> for little-endian. With this, we no longer need to enable
> CONFIG_GENERIC_CSUM. This also fixes a couple of comments
> in checksum_64.S so they accurately reflect what the associated
> instruction does.
>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Applied to powerpc next, thanks.
https://git.kernel.org/powerpc/c/d4fde568a34a93897dfb9ae64cfe9d
cheers
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2017-01-27 0:40 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-03 5:10 [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Paul Mackerras
2016-11-03 5:15 ` [PATCH 2/2] powerpc/64: Use optimized checksum routines on little-endian Paul Mackerras
2017-01-27 0:40 ` [2/2] " Michael Ellerman
2016-11-08 7:23 ` [PATCH 1/2] powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold Michael Ellerman
2016-11-08 22:05 ` Paul Mackerras
2016-12-02 9:44 ` Michael Ellerman
2017-01-27 0:40 ` [1/2] " Michael Ellerman
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.