From: "Guan Xuetao" <guanxuetao@mprc.pku.edu.cn> To: <linux-arch@vger.kernel.org>, <linux-kernel@vger.kernel.org> Subject: [PATCHv1 7/8] unicore32 additional architecture files: low-level lib: checksum Date: Mon, 3 Jan 2011 19:54:03 +0800 [thread overview] Message-ID: <013101cbab3c$eb0a7670$c11f6350$@mprc.pku.edu.cn> (raw) From: Guan Xuetao <guanxuetao@mprc.pku.edu.cn> Patch 7 implements low-level checksum libraries. Signed-off-by: Guan Xuetao <guanxuetao@mprc.pku.edu.cn> --- arch/unicore32/include/asm/checksum.h | 142 +++++++++++ arch/unicore32/lib/csumipv6.S | 36 +++ arch/unicore32/lib/csumpartial.S | 126 ++++++++++ arch/unicore32/lib/csumpartialcopy.S | 61 +++++ arch/unicore32/lib/csumpartialcopygeneric.S | 335 +++++++++++++++++++++++++++ arch/unicore32/lib/csumpartialcopyuser.S | 92 ++++++++ 6 files changed, 792 insertions(+), 0 deletions(-) diff --git a/arch/unicore32/include/asm/checksum.h b/arch/unicore32/include/asm/checksum.h new file mode 100644 index 0000000..59a97d8 --- /dev/null +++ b/arch/unicore32/include/asm/checksum.h @@ -0,0 +1,142 @@ +/* + * linux/arch/unicore32/include/asm/checksum.h + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * IP checksum routines + */ +#ifndef __UNICORE_CHECKSUM_H__ +#define __UNICORE_CHECKSUM_H__ + +#include <linux/in6.h> + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +__wsum +csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum); + +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr); + +/* + * Fold a partial checksum without adding pseudo headers + */ +static inline __sum16 csum_fold(__wsum sum) +{ + __asm__( + "add %0, %1, %1 <> #16 @ csum_fold" + : "=r" (sum) + : "r" (sum) + : "cc"); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +static inline __sum16 +ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int tmp1; + __wsum sum; + + __asm__ __volatile__( + "ldw.w %0, [%1]+, #4 @ ip_fast_csum" + "ldw.w %3, [%1]+, #4" + "sub %2, %2, #5" + "add.a %0, %0, %3" + "ldw.w %3, [%1]+, #4" + "addc.a %0, %0, %3" + "ldw.w %3, [%1]+, #4" +"1: addc.a %0, %0, %3" + "ldw.w %3, [%1]+, #4" + "cmpand.a %2, #15 @ do this carefully" + "beq 2f" + "sub %2, %2, #1 @ without destroying" + "bne 1b @ the carry flag" +"2: addc.a %0, %0, %3" + "addc %0, %0, #0" + : "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (tmp1) + : "1" (iph), "2" (ihl) + : "cc", "memory"); + return csum_fold(sum); +} + +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + __asm__( + "add.a %0, %1, %2 @ csum_tcpudp_nofold" + "addc.a %0, %0, %3" + "addc.a %0, %0, %4 << #8" + "addc.a %0, %0, %5" + "addc %0, %0, #0" + : "=&r"(sum) + : "r" (sum), "r" (daddr), "r" (saddr), "r" (len), "Ir" (htons(proto)) + : "cc"); + return sum; +} +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +static inline __sum16 +ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +extern __wsum +__csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, + __be32 len, __be32 proto, __wsum sum); + +static inline __sum16 +csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, + __u32 len, unsigned short proto, __wsum sum) +{ + return csum_fold(__csum_ipv6_magic(saddr, daddr, htonl(len), + htonl(proto), sum)); +} +#endif diff --git a/arch/unicore32/lib/csumipv6.S b/arch/unicore32/lib/csumipv6.S new file mode 100644 index 0000000..47fad61 --- /dev/null +++ b/arch/unicore32/lib/csumipv6.S @@ -0,0 +1,36 @@ +/* + * linux/arch/unicore32/lib/csumipv6.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +ENTRY(__csum_ipv6_magic) + stw.w lr, [sp+], #-4 + add.a ip, r2, r3 + + ldm (r1 - r4), [r1]+ + addc.a ip, ip, r1 + addc.a ip, ip, r2 + addc.a ip, ip, r3 + addc.a ip, ip, r4 + ldm (r0 - r3), [r0]+ + addc.a r0, ip, r0 + addc.a r0, r0, r1 + addc.a r0, r0, r2 + ldw r2, [sp+], #4 + addc.a r0, r0, r3 + addc.a r0, r0, r2 + addc.a r0, r0, #0 + ldm.w (pc), [sp]+ +ENDPROC(__csum_ipv6_magic) + diff --git a/arch/unicore32/lib/csumpartial.S b/arch/unicore32/lib/csumpartial.S new file mode 100644 index 0000000..23e36c5 --- /dev/null +++ b/arch/unicore32/lib/csumpartial.S @@ -0,0 +1,126 @@ +/* + * linux/arch/unicore32/lib/csumpartial.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Function: __u32 csum_partial(const char *src, int len, __u32 sum) + * Params : r0 = buffer, r1 = len, r2 = checksum + * Returns : r0 = new checksum + */ + +buf .req r0 +len .req r1 +sum .req r2 +td0 .req r3 +td1 .req r4 +td2 .req r5 +td3 .req r6 + +.Lzero: mov r0, sum + add sp, sp, #4 + ldw.w pc, [sp]+, #4 + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: cxor.a len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + cand.a buf, #1 @ odd address? + beq .Lless4 + mov sum, sum <> #8 + ldb.w td0, [buf]+, #1 + sub len, len, #1 + addc.a sum, sum, td0 put_byte_1 + +.Lless4: cand.a len, #6 + beq .Lless8_byte + + /* we are now half-word aligned */ + +.Lless8_wordlp: + ldh.w td0, [buf]+, #2 + sub len, len, #2 + addc.a sum, sum, td0 + cand.a len, #6 + bne .Lless8_wordlp + +.Lless8_byte: cand.a len, #1 @ odd number of bytes + beq .Ldone + ldb.w td0, [buf]+, #1 @ include last byte + addc.a sum, sum, td0 put_byte_0 @ update checksum + +.Ldone: addc r0, sum, #0 @ collect up the last carry + ldw.w td0, [sp]+, #4 + cand.a td0, #1 @ check buffer alignment + cmovne r0, r0 <> #8 @ rotate checksum by 8 bits + ldw.w pc, [sp]+, #4 @ return + +.Lnot_aligned: cand.a buf, #1 @ odd address + beq 201f + ldb.w td0, [buf]+, #1 @ make even + sub len, len, #1 + addc.a sum, sum, td0 put_byte_1 @ update checksum + 201: + cand.a buf, #2 @ 32-bit aligned? + beq 201f + ldh.w td0, [buf]+, #2 @ make 32-bit aligned + sub len, len, #2 + addc.a sum, sum, td0 @ update checksum + 201: + mov pc, lr + +ENTRY(csum_partial) + stm.w (lr), [sp-] + stm.w (buf), [sp-] + csub.a len, #8 @ Ensure that we have at least + bub .Lless8 @ 8 bytes to copy. + + cand.a buf, #1 + cmovne sum, sum <> #8 + + add.a sum, sum, #0 @ C = 0 + cand.a buf, #3 @ Test destination alignment + bne.l .Lnot_aligned @ align destination, return here + +1: andn.a ip, len, #31 + beq 3f + +2: ldm.w (td0, td1, td2, td3), [buf]+ + addc.a sum, sum, td0 + addc.a sum, sum, td1 + addc.a sum, sum, td2 + addc.a sum, sum, td3 + ldm.w (td0, td1, td2, td3), [buf]+ + addc.a sum, sum, td0 + addc.a sum, sum, td1 + addc.a sum, sum, td2 + addc.a sum, sum, td3 + sub ip, ip, #32 + cxor.a ip, #0 + bne 2b + +3: cand.a len, #0x1c @ should not change C + beq .Lless4 + +4: ldw.w td0, [buf]+, #4 + sub len, len, #4 + addc.a sum, sum, td0 + cand.a len, #0x1c + bne 4b + b .Lless4 +ENDPROC(csum_partial) diff --git a/arch/unicore32/lib/csumpartialcopy.S b/arch/unicore32/lib/csumpartialcopy.S new file mode 100644 index 0000000..e4fa5c2 --- /dev/null +++ b/arch/unicore32/lib/csumpartialcopy.S @@ -0,0 +1,61 @@ +/* + * linux/arch/unicore32/lib/csumpartialcopy.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Function: __u32 csum_partial_copy_nocheck + * (const char *src, char *dst, int len, __u32 sum) + * Params : r0 = src, r1 = dst, r2 = len, r3 = checksum + * Returns : r0 = new checksum + */ + + .macro save_regs + mov ip, sp + stm.w (fp, ip, lr, pc), [sp-] + stm.w (r1), [sp-] + sub fp, ip, #4 + .endm + + .macro load_regs + ldm.w (r1), [sp]+ + ldm (fp, sp, pc), [sp]+ + .endm + + .macro load1b, reg1 + ldb.w \reg1, [r0]+, #1 + .endm + + .macro load2b, reg1, reg2 + ldb.w \reg1, [r0]+, #1 + ldb.w \reg2, [r0]+, #1 + .endm + + .macro load1l, reg1 + ldw.w \reg1, [r0]+, #4 + .endm + + .macro load2l, reg1, reg2 + ldw.w \reg1, [r0]+, #4 + ldw.w \reg2, [r0]+, #4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldm.w (\reg1, \reg2, \reg3, \reg4), [r0]+ + .endm + +#define FN_ENTRY ENTRY(csum_partial_copy_nocheck) +#define FN_EXIT ENDPROC(csum_partial_copy_nocheck) + +#include "csumpartialcopygeneric.S" diff --git a/arch/unicore32/lib/csumpartialcopygeneric.S b/arch/unicore32/lib/csumpartialcopygeneric.S new file mode 100644 index 0000000..d5a4a3d --- /dev/null +++ b/arch/unicore32/lib/csumpartialcopygeneric.S @@ -0,0 +1,335 @@ +/* + * linux/arch/unicore32/lib/csumpartialcopygeneric.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * unsigned int + * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) + * r0 = src, r1 = dst, r2 = len, r3 = sum + * Returns : r0 = checksum + * + * Note that 'tst' and 'teq' preserve the carry flag. + */ + +src .req r0 +dst .req r1 +len .req r2 +sum .req r3 + +.Lzero: mov r0, sum + load_regs + + /* + * Align an unaligned destination pointer. We know that + * we have >= 8 bytes here, so we don't need to check + * the length. Note that the source pointer hasn't been + * aligned yet. + */ +.Ldst_unaligned: + cand.a dst, #1 + beq .Ldst_16bit + + load1b ip + sub len, len, #1 + addc.a sum, sum, ip put_byte_1 @ update checksum + stb.w ip, [dst]+, #1 + cand.a dst, #2 + cmoveq pc, lr @ dst is now 32bit aligned + +.Ldst_16bit: load2b r8, ip + sub len, len, #2 + addc.a sum, sum, r8 put_byte_0 + stb.w r8, [dst]+, #1 + addc.a sum, sum, ip put_byte_1 + stb.w ip, [dst]+, #1 + mov pc, lr @ dst is now 32bit aligned + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: cxor.a len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + cand.a dst, #1 @ dst 16-bit aligned + beq .Lless8_aligned + + /* Align dst */ + load1b ip + sub len, len, #1 + addc.a sum, sum, ip put_byte_1 @ update checksum + stb.w ip, [dst]+, #1 + cand.a len, #6 + beq .Lless8_byteonly + +1: load2b r8, ip + sub len, len, #2 + addc.a sum, sum, r8 put_byte_0 + stb.w r8, [dst]+, #1 + addc.a sum, sum, ip put_byte_1 + stb.w ip, [dst]+, #1 +.Lless8_aligned: + cand.a len, #6 + bne 1b +.Lless8_byteonly: + cand.a len, #1 + beq .Ldone + load1b r8 + addc.a sum, sum, r8 put_byte_0 @ update checksum + stb.w r8, [dst]+, #1 + b .Ldone + +FN_ENTRY + save_regs + + csub.a len, #8 @ Ensure that we have at least + bub .Lless8 @ 8 bytes to copy. + + add.a sum, sum, #0 @ C = 0 + cand.a dst, #3 @ Test destination alignment + bne.l .Ldst_unaligned @ align destination, return here + + /* + * Ok, the dst pointer is now 32bit aligned, and we know + * that we must have more than 4 bytes to copy. Note + * that C contains the carry from the dst alignment above. + */ + + cand.a src, #3 @ Test source alignment + bne .Lsrc_not_aligned + + /* Routine for src & dst aligned */ + + andn.a ip, len, #15 + beq 2f + +1: load4l r4, r5, r6, r7 + stm.w (r4, r5, r6, r7), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + addc.a sum, sum, r6 + addc.a sum, sum, r7 + sub ip, ip, #16 + cxor.a ip, #0 + bne 1b + +2: and.a ip, len, #12 + beq 4f + cand.a ip, #8 + beq 3f + load2l r4, r5 + stm.w (r4, r5), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + cand.a ip, #4 + beq 4f + +3: load1l r4 + stw.w r4, [dst]+, #4 + addc.a sum, sum, r4 + +4: and.a len, len, #3 + beq .Ldone + load1l r4 + cand.a len, #2 + mov r5, r4 get_byte_0 + beq .Lexit + addc.a sum, sum, r4 push #16 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_1 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_2 +.Lexit: cand.a len, #1 + beq .Ldone + stb.w r5, [dst]+, #1 + and r5, r5, #255 + addc.a sum, sum, r5 put_byte_0 + + /* + * If the dst pointer was not 16-bit aligned, we + * need to rotate the checksum here to get around + * the inefficient byte manipulations in the + * architecture independent code. + */ +.Ldone: addc r0, sum, #0 + ldw sum, [sp+], #0 @ dst + cand.a sum, #1 + cmovne r0, r0 <> #8 + load_regs + +.Lsrc_not_aligned: + addc sum, sum, #0 @ include C from dst alignment + and ip, src, #3 + andn src, src, #3 + load1l r5 + csub.a ip, #2 + beq .Lsrc2_aligned + bua .Lsrc3_aligned + mov r4, r5 pull #8 @ C = 0 + andn.a ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + or r4, r4, r5 push #24 + mov r5, r5 pull #8 + or r5, r5, r6 push #24 + mov r6, r6 pull #8 + or r6, r6, r7 push #24 + mov r7, r7 pull #8 + or r7, r7, r8 push #24 + stm.w (r4, r5, r6, r7), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + addc.a sum, sum, r6 + addc.a sum, sum, r7 + mov r4, r8 pull #8 + sub ip, ip, #16 + cxor.a ip, #0 + bne 1b +2: and.a ip, len, #12 + beq 4f + cand.a ip, #8 + beq 3f + load2l r5, r6 + or r4, r4, r5 push #24 + mov r5, r5 pull #8 + or r5, r5, r6 push #24 + stm.w (r4, r5), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + mov r4, r6 pull #8 + cand.a ip, #4 + beq 4f +3: load1l r5 + or r4, r4, r5 push #24 + stw.w r4, [dst]+, #4 + addc.a sum, sum, r4 + mov r4, r5 pull #8 +4: and.a len, len, #3 + beq .Ldone + mov r5, r4 get_byte_0 + cand.a len, #2 + beq .Lexit + addc.a sum, sum, r4 push #16 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_1 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_2 + b .Lexit + +.Lsrc2_aligned: mov r4, r5 pull #16 + add.a sum, sum, #0 + andn.a ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + or r4, r4, r5 push #16 + mov r5, r5 pull #16 + or r5, r5, r6 push #16 + mov r6, r6 pull #16 + or r6, r6, r7 push #16 + mov r7, r7 pull #16 + or r7, r7, r8 push #16 + stm.w (r4, r5, r6, r7), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + addc.a sum, sum, r6 + addc.a sum, sum, r7 + mov r4, r8 pull #16 + sub ip, ip, #16 + cxor.a ip, #0 + bne 1b +2: and.a ip, len, #12 + beq 4f + cand.a ip, #8 + beq 3f + load2l r5, r6 + or r4, r4, r5 push #16 + mov r5, r5 pull #16 + or r5, r5, r6 push #16 + stm.w (r4, r5), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + mov r4, r6 pull #16 + cand.a ip, #4 + beq 4f +3: load1l r5 + or r4, r4, r5 push #16 + stw.w r4, [dst]+, #4 + addc.a sum, sum, r4 + mov r4, r5 pull #16 +4: and.a len, len, #3 + beq .Ldone + mov r5, r4 get_byte_0 + cand.a len, #2 + beq .Lexit + addc.a sum, sum, r4 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_1 + stb.w r5, [dst]+, #1 + cand.a len, #1 + beq .Ldone + load1b r5 + b .Lexit + +.Lsrc3_aligned: mov r4, r5 pull #24 + add.a sum, sum, #0 + andn.a ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + or r4, r4, r5 push #8 + mov r5, r5 pull #24 + or r5, r5, r6 push #8 + mov r6, r6 pull #24 + or r6, r6, r7 push #8 + mov r7, r7 pull #24 + or r7, r7, r8 push #8 + stm.w (r4, r5, r6, r7), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + addc.a sum, sum, r6 + addc.a sum, sum, r7 + mov r4, r8 pull #24 + sub ip, ip, #16 + cxor.a ip, #0 + bne 1b +2: and.a ip, len, #12 + beq 4f + cand.a ip, #8 + beq 3f + load2l r5, r6 + or r4, r4, r5 push #8 + mov r5, r5 pull #24 + or r5, r5, r6 push #8 + stm.w (r4, r5), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + mov r4, r6 pull #24 + cand.a ip, #4 + beq 4f +3: load1l r5 + or r4, r4, r5 push #8 + stw.w r4, [dst]+, #4 + addc.a sum, sum, r4 + mov r4, r5 pull #24 +4: and.a len, len, #3 + beq .Ldone + mov r5, r4 get_byte_0 + cand.a len, #2 + beq .Lexit + stb.w r5, [dst]+, #1 + addc.a sum, sum, r4 + load1l r4 + mov r5, r4 get_byte_0 + stb.w r5, [dst]+, #1 + addc.a sum, sum, r4 push #24 + mov r5, r4 get_byte_1 + b .Lexit +FN_EXIT diff --git a/arch/unicore32/lib/csumpartialcopyuser.S b/arch/unicore32/lib/csumpartialcopyuser.S new file mode 100644 index 0000000..23a292f --- /dev/null +++ b/arch/unicore32/lib/csumpartialcopyuser.S @@ -0,0 +1,92 @@ +/* + * linux/arch/unicore32/lib/csumpartialcopyuser.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <generated/asm-offsets.h> + + .text + + .macro save_regs + mov ip, sp + stm.w (fp, ip, lr, pc), [sp-] + stm.w (r1 - r2), [sp-] + sub fp, ip, #4 + .endm + + .macro load_regs + ldm.w (r1 - r2), [sp]+ + ldm (fp, sp, pc), [sp]+ + .endm + + .macro load1b, reg1 + ldrusr \reg1, r0, 1 + .endm + + .macro load2b, reg1, reg2 + ldrusr \reg1, r0, 1 + ldrusr \reg2, r0, 1 + .endm + + .macro load1l, reg1 + ldrusr \reg1, r0, 4 + .endm + + .macro load2l, reg1, reg2 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + ldrusr \reg3, r0, 4 + ldrusr \reg4, r0, 4 + .endm + +/* + * unsigned int + * csum_partial_copy_from_user + * (const char *src, char *dst, int len, int sum, int *err_ptr) + * r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr + * Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT + */ + +#define FN_ENTRY ENTRY(csum_partial_copy_from_user) +#define FN_EXIT ENDPROC(csum_partial_copy_from_user) + +#include "csumpartialcopygeneric.S" + +/* + * FIXME: minor buglet here + * We don't return the checksum for the data present in the buffer. To do + * so properly, we would have to add in whatever registers were loaded before + * the fault, which, with the current asm above is not predictable. + */ + .pushsection .fixup,"ax" + .align 4 +9001: mov r4, #-EFAULT + ldw r5, [sp+], #8*4 @ *err_ptr + stw r4, [r5] + ldm (r1, r2), [sp]+ @ retrieve dst, len + add r2, r2, r1 + mov r0, #0 @ zero the buffer +9002: cxor.a r2, r1 + beq 201f + stb.w r0, [r1]+, #1 + b 9002b +201: + load_regs + .popsection
WARNING: multiple messages have this Message-ID (diff)
From: "Guan Xuetao" <guanxuetao@mprc.pku.edu.cn> To: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCHv1 7/8] unicore32 additional architecture files: low-level lib: checksum Date: Mon, 3 Jan 2011 19:54:03 +0800 [thread overview] Message-ID: <013101cbab3c$eb0a7670$c11f6350$@mprc.pku.edu.cn> (raw) From: Guan Xuetao <guanxuetao@mprc.pku.edu.cn> Patch 7 implements low-level checksum libraries. Signed-off-by: Guan Xuetao <guanxuetao@mprc.pku.edu.cn> --- arch/unicore32/include/asm/checksum.h | 142 +++++++++++ arch/unicore32/lib/csumipv6.S | 36 +++ arch/unicore32/lib/csumpartial.S | 126 ++++++++++ arch/unicore32/lib/csumpartialcopy.S | 61 +++++ arch/unicore32/lib/csumpartialcopygeneric.S | 335 +++++++++++++++++++++++++++ arch/unicore32/lib/csumpartialcopyuser.S | 92 ++++++++ 6 files changed, 792 insertions(+), 0 deletions(-) diff --git a/arch/unicore32/include/asm/checksum.h b/arch/unicore32/include/asm/checksum.h new file mode 100644 index 0000000..59a97d8 --- /dev/null +++ b/arch/unicore32/include/asm/checksum.h @@ -0,0 +1,142 @@ +/* + * linux/arch/unicore32/include/asm/checksum.h + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * IP checksum routines + */ +#ifndef __UNICORE_CHECKSUM_H__ +#define __UNICORE_CHECKSUM_H__ + +#include <linux/in6.h> + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * the same as csum_partial, but copies from src while it + * checksums, and handles user-space pointer exceptions correctly, when needed. + * + * here even more important to align src and dst on a 32-bit (or even + * better 64-bit) boundary + */ + +__wsum +csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum); + +__wsum +csum_partial_copy_from_user(const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr); + +/* + * Fold a partial checksum without adding pseudo headers + */ +static inline __sum16 csum_fold(__wsum sum) +{ + __asm__( + "add %0, %1, %1 <> #16 @ csum_fold" + : "=r" (sum) + : "r" (sum) + : "cc"); + return (__force __sum16)(~(__force u32)sum >> 16); +} + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +static inline __sum16 +ip_fast_csum(const void *iph, unsigned int ihl) +{ + unsigned int tmp1; + __wsum sum; + + __asm__ __volatile__( + "ldw.w %0, [%1]+, #4 @ ip_fast_csum" + "ldw.w %3, [%1]+, #4" + "sub %2, %2, #5" + "add.a %0, %0, %3" + "ldw.w %3, [%1]+, #4" + "addc.a %0, %0, %3" + "ldw.w %3, [%1]+, #4" +"1: addc.a %0, %0, %3" + "ldw.w %3, [%1]+, #4" + "cmpand.a %2, #15 @ do this carefully" + "beq 2f" + "sub %2, %2, #1 @ without destroying" + "bne 1b @ the carry flag" +"2: addc.a %0, %0, %3" + "addc %0, %0, #0" + : "=r" (sum), "=r" (iph), "=r" (ihl), "=r" (tmp1) + : "1" (iph), "2" (ihl) + : "cc", "memory"); + return csum_fold(sum); +} + +static inline __wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + __asm__( + "add.a %0, %1, %2 @ csum_tcpudp_nofold" + "addc.a %0, %0, %3" + "addc.a %0, %0, %4 << #8" + "addc.a %0, %0, %5" + "addc %0, %0, #0" + : "=&r"(sum) + : "r" (sum), "r" (daddr), "r" (saddr), "r" (len), "Ir" (htons(proto)) + : "cc"); + return sum; +} +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, unsigned short len, + unsigned short proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +static inline __sum16 +ip_compute_csum(const void *buff, int len) +{ + return csum_fold(csum_partial(buff, len, 0)); +} + +#define _HAVE_ARCH_IPV6_CSUM +extern __wsum +__csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, + __be32 len, __be32 proto, __wsum sum); + +static inline __sum16 +csum_ipv6_magic(const struct in6_addr *saddr, const struct in6_addr *daddr, + __u32 len, unsigned short proto, __wsum sum) +{ + return csum_fold(__csum_ipv6_magic(saddr, daddr, htonl(len), + htonl(proto), sum)); +} +#endif diff --git a/arch/unicore32/lib/csumipv6.S b/arch/unicore32/lib/csumipv6.S new file mode 100644 index 0000000..47fad61 --- /dev/null +++ b/arch/unicore32/lib/csumipv6.S @@ -0,0 +1,36 @@ +/* + * linux/arch/unicore32/lib/csumipv6.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +ENTRY(__csum_ipv6_magic) + stw.w lr, [sp+], #-4 + add.a ip, r2, r3 + + ldm (r1 - r4), [r1]+ + addc.a ip, ip, r1 + addc.a ip, ip, r2 + addc.a ip, ip, r3 + addc.a ip, ip, r4 + ldm (r0 - r3), [r0]+ + addc.a r0, ip, r0 + addc.a r0, r0, r1 + addc.a r0, r0, r2 + ldw r2, [sp+], #4 + addc.a r0, r0, r3 + addc.a r0, r0, r2 + addc.a r0, r0, #0 + ldm.w (pc), [sp]+ +ENDPROC(__csum_ipv6_magic) + diff --git a/arch/unicore32/lib/csumpartial.S b/arch/unicore32/lib/csumpartial.S new file mode 100644 index 0000000..23e36c5 --- /dev/null +++ b/arch/unicore32/lib/csumpartial.S @@ -0,0 +1,126 @@ +/* + * linux/arch/unicore32/lib/csumpartial.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Function: __u32 csum_partial(const char *src, int len, __u32 sum) + * Params : r0 = buffer, r1 = len, r2 = checksum + * Returns : r0 = new checksum + */ + +buf .req r0 +len .req r1 +sum .req r2 +td0 .req r3 +td1 .req r4 +td2 .req r5 +td3 .req r6 + +.Lzero: mov r0, sum + add sp, sp, #4 + ldw.w pc, [sp]+, #4 + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: cxor.a len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + cand.a buf, #1 @ odd address? + beq .Lless4 + mov sum, sum <> #8 + ldb.w td0, [buf]+, #1 + sub len, len, #1 + addc.a sum, sum, td0 put_byte_1 + +.Lless4: cand.a len, #6 + beq .Lless8_byte + + /* we are now half-word aligned */ + +.Lless8_wordlp: + ldh.w td0, [buf]+, #2 + sub len, len, #2 + addc.a sum, sum, td0 + cand.a len, #6 + bne .Lless8_wordlp + +.Lless8_byte: cand.a len, #1 @ odd number of bytes + beq .Ldone + ldb.w td0, [buf]+, #1 @ include last byte + addc.a sum, sum, td0 put_byte_0 @ update checksum + +.Ldone: addc r0, sum, #0 @ collect up the last carry + ldw.w td0, [sp]+, #4 + cand.a td0, #1 @ check buffer alignment + cmovne r0, r0 <> #8 @ rotate checksum by 8 bits + ldw.w pc, [sp]+, #4 @ return + +.Lnot_aligned: cand.a buf, #1 @ odd address + beq 201f + ldb.w td0, [buf]+, #1 @ make even + sub len, len, #1 + addc.a sum, sum, td0 put_byte_1 @ update checksum + 201: + cand.a buf, #2 @ 32-bit aligned? + beq 201f + ldh.w td0, [buf]+, #2 @ make 32-bit aligned + sub len, len, #2 + addc.a sum, sum, td0 @ update checksum + 201: + mov pc, lr + +ENTRY(csum_partial) + stm.w (lr), [sp-] + stm.w (buf), [sp-] + csub.a len, #8 @ Ensure that we have at least + bub .Lless8 @ 8 bytes to copy. + + cand.a buf, #1 + cmovne sum, sum <> #8 + + add.a sum, sum, #0 @ C = 0 + cand.a buf, #3 @ Test destination alignment + bne.l .Lnot_aligned @ align destination, return here + +1: andn.a ip, len, #31 + beq 3f + +2: ldm.w (td0, td1, td2, td3), [buf]+ + addc.a sum, sum, td0 + addc.a sum, sum, td1 + addc.a sum, sum, td2 + addc.a sum, sum, td3 + ldm.w (td0, td1, td2, td3), [buf]+ + addc.a sum, sum, td0 + addc.a sum, sum, td1 + addc.a sum, sum, td2 + addc.a sum, sum, td3 + sub ip, ip, #32 + cxor.a ip, #0 + bne 2b + +3: cand.a len, #0x1c @ should not change C + beq .Lless4 + +4: ldw.w td0, [buf]+, #4 + sub len, len, #4 + addc.a sum, sum, td0 + cand.a len, #0x1c + bne 4b + b .Lless4 +ENDPROC(csum_partial) diff --git a/arch/unicore32/lib/csumpartialcopy.S b/arch/unicore32/lib/csumpartialcopy.S new file mode 100644 index 0000000..e4fa5c2 --- /dev/null +++ b/arch/unicore32/lib/csumpartialcopy.S @@ -0,0 +1,61 @@ +/* + * linux/arch/unicore32/lib/csumpartialcopy.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/linkage.h> +#include <asm/assembler.h> + + .text + +/* + * Function: __u32 csum_partial_copy_nocheck + * (const char *src, char *dst, int len, __u32 sum) + * Params : r0 = src, r1 = dst, r2 = len, r3 = checksum + * Returns : r0 = new checksum + */ + + .macro save_regs + mov ip, sp + stm.w (fp, ip, lr, pc), [sp-] + stm.w (r1), [sp-] + sub fp, ip, #4 + .endm + + .macro load_regs + ldm.w (r1), [sp]+ + ldm (fp, sp, pc), [sp]+ + .endm + + .macro load1b, reg1 + ldb.w \reg1, [r0]+, #1 + .endm + + .macro load2b, reg1, reg2 + ldb.w \reg1, [r0]+, #1 + ldb.w \reg2, [r0]+, #1 + .endm + + .macro load1l, reg1 + ldw.w \reg1, [r0]+, #4 + .endm + + .macro load2l, reg1, reg2 + ldw.w \reg1, [r0]+, #4 + ldw.w \reg2, [r0]+, #4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldm.w (\reg1, \reg2, \reg3, \reg4), [r0]+ + .endm + +#define FN_ENTRY ENTRY(csum_partial_copy_nocheck) +#define FN_EXIT ENDPROC(csum_partial_copy_nocheck) + +#include "csumpartialcopygeneric.S" diff --git a/arch/unicore32/lib/csumpartialcopygeneric.S b/arch/unicore32/lib/csumpartialcopygeneric.S new file mode 100644 index 0000000..d5a4a3d --- /dev/null +++ b/arch/unicore32/lib/csumpartialcopygeneric.S @@ -0,0 +1,335 @@ +/* + * linux/arch/unicore32/lib/csumpartialcopygeneric.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * unsigned int + * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) + * r0 = src, r1 = dst, r2 = len, r3 = sum + * Returns : r0 = checksum + * + * Note that 'tst' and 'teq' preserve the carry flag. + */ + +src .req r0 +dst .req r1 +len .req r2 +sum .req r3 + +.Lzero: mov r0, sum + load_regs + + /* + * Align an unaligned destination pointer. We know that + * we have >= 8 bytes here, so we don't need to check + * the length. Note that the source pointer hasn't been + * aligned yet. + */ +.Ldst_unaligned: + cand.a dst, #1 + beq .Ldst_16bit + + load1b ip + sub len, len, #1 + addc.a sum, sum, ip put_byte_1 @ update checksum + stb.w ip, [dst]+, #1 + cand.a dst, #2 + cmoveq pc, lr @ dst is now 32bit aligned + +.Ldst_16bit: load2b r8, ip + sub len, len, #2 + addc.a sum, sum, r8 put_byte_0 + stb.w r8, [dst]+, #1 + addc.a sum, sum, ip put_byte_1 + stb.w ip, [dst]+, #1 + mov pc, lr @ dst is now 32bit aligned + + /* + * Handle 0 to 7 bytes, with any alignment of source and + * destination pointers. Note that when we get here, C = 0 + */ +.Lless8: cxor.a len, #0 @ check for zero count + beq .Lzero + + /* we must have at least one byte. */ + cand.a dst, #1 @ dst 16-bit aligned + beq .Lless8_aligned + + /* Align dst */ + load1b ip + sub len, len, #1 + addc.a sum, sum, ip put_byte_1 @ update checksum + stb.w ip, [dst]+, #1 + cand.a len, #6 + beq .Lless8_byteonly + +1: load2b r8, ip + sub len, len, #2 + addc.a sum, sum, r8 put_byte_0 + stb.w r8, [dst]+, #1 + addc.a sum, sum, ip put_byte_1 + stb.w ip, [dst]+, #1 +.Lless8_aligned: + cand.a len, #6 + bne 1b +.Lless8_byteonly: + cand.a len, #1 + beq .Ldone + load1b r8 + addc.a sum, sum, r8 put_byte_0 @ update checksum + stb.w r8, [dst]+, #1 + b .Ldone + +FN_ENTRY + save_regs + + csub.a len, #8 @ Ensure that we have at least + bub .Lless8 @ 8 bytes to copy. + + add.a sum, sum, #0 @ C = 0 + cand.a dst, #3 @ Test destination alignment + bne.l .Ldst_unaligned @ align destination, return here + + /* + * Ok, the dst pointer is now 32bit aligned, and we know + * that we must have more than 4 bytes to copy. Note + * that C contains the carry from the dst alignment above. + */ + + cand.a src, #3 @ Test source alignment + bne .Lsrc_not_aligned + + /* Routine for src & dst aligned */ + + andn.a ip, len, #15 + beq 2f + +1: load4l r4, r5, r6, r7 + stm.w (r4, r5, r6, r7), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + addc.a sum, sum, r6 + addc.a sum, sum, r7 + sub ip, ip, #16 + cxor.a ip, #0 + bne 1b + +2: and.a ip, len, #12 + beq 4f + cand.a ip, #8 + beq 3f + load2l r4, r5 + stm.w (r4, r5), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + cand.a ip, #4 + beq 4f + +3: load1l r4 + stw.w r4, [dst]+, #4 + addc.a sum, sum, r4 + +4: and.a len, len, #3 + beq .Ldone + load1l r4 + cand.a len, #2 + mov r5, r4 get_byte_0 + beq .Lexit + addc.a sum, sum, r4 push #16 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_1 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_2 +.Lexit: cand.a len, #1 + beq .Ldone + stb.w r5, [dst]+, #1 + and r5, r5, #255 + addc.a sum, sum, r5 put_byte_0 + + /* + * If the dst pointer was not 16-bit aligned, we + * need to rotate the checksum here to get around + * the inefficient byte manipulations in the + * architecture independent code. + */ +.Ldone: addc r0, sum, #0 + ldw sum, [sp+], #0 @ dst + cand.a sum, #1 + cmovne r0, r0 <> #8 + load_regs + +.Lsrc_not_aligned: + addc sum, sum, #0 @ include C from dst alignment + and ip, src, #3 + andn src, src, #3 + load1l r5 + csub.a ip, #2 + beq .Lsrc2_aligned + bua .Lsrc3_aligned + mov r4, r5 pull #8 @ C = 0 + andn.a ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + or r4, r4, r5 push #24 + mov r5, r5 pull #8 + or r5, r5, r6 push #24 + mov r6, r6 pull #8 + or r6, r6, r7 push #24 + mov r7, r7 pull #8 + or r7, r7, r8 push #24 + stm.w (r4, r5, r6, r7), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + addc.a sum, sum, r6 + addc.a sum, sum, r7 + mov r4, r8 pull #8 + sub ip, ip, #16 + cxor.a ip, #0 + bne 1b +2: and.a ip, len, #12 + beq 4f + cand.a ip, #8 + beq 3f + load2l r5, r6 + or r4, r4, r5 push #24 + mov r5, r5 pull #8 + or r5, r5, r6 push #24 + stm.w (r4, r5), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + mov r4, r6 pull #8 + cand.a ip, #4 + beq 4f +3: load1l r5 + or r4, r4, r5 push #24 + stw.w r4, [dst]+, #4 + addc.a sum, sum, r4 + mov r4, r5 pull #8 +4: and.a len, len, #3 + beq .Ldone + mov r5, r4 get_byte_0 + cand.a len, #2 + beq .Lexit + addc.a sum, sum, r4 push #16 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_1 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_2 + b .Lexit + +.Lsrc2_aligned: mov r4, r5 pull #16 + add.a sum, sum, #0 + andn.a ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + or r4, r4, r5 push #16 + mov r5, r5 pull #16 + or r5, r5, r6 push #16 + mov r6, r6 pull #16 + or r6, r6, r7 push #16 + mov r7, r7 pull #16 + or r7, r7, r8 push #16 + stm.w (r4, r5, r6, r7), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + addc.a sum, sum, r6 + addc.a sum, sum, r7 + mov r4, r8 pull #16 + sub ip, ip, #16 + cxor.a ip, #0 + bne 1b +2: and.a ip, len, #12 + beq 4f + cand.a ip, #8 + beq 3f + load2l r5, r6 + or r4, r4, r5 push #16 + mov r5, r5 pull #16 + or r5, r5, r6 push #16 + stm.w (r4, r5), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + mov r4, r6 pull #16 + cand.a ip, #4 + beq 4f +3: load1l r5 + or r4, r4, r5 push #16 + stw.w r4, [dst]+, #4 + addc.a sum, sum, r4 + mov r4, r5 pull #16 +4: and.a len, len, #3 + beq .Ldone + mov r5, r4 get_byte_0 + cand.a len, #2 + beq .Lexit + addc.a sum, sum, r4 + stb.w r5, [dst]+, #1 + mov r5, r4 get_byte_1 + stb.w r5, [dst]+, #1 + cand.a len, #1 + beq .Ldone + load1b r5 + b .Lexit + +.Lsrc3_aligned: mov r4, r5 pull #24 + add.a sum, sum, #0 + andn.a ip, len, #15 + beq 2f +1: load4l r5, r6, r7, r8 + or r4, r4, r5 push #8 + mov r5, r5 pull #24 + or r5, r5, r6 push #8 + mov r6, r6 pull #24 + or r6, r6, r7 push #8 + mov r7, r7 pull #24 + or r7, r7, r8 push #8 + stm.w (r4, r5, r6, r7), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + addc.a sum, sum, r6 + addc.a sum, sum, r7 + mov r4, r8 pull #24 + sub ip, ip, #16 + cxor.a ip, #0 + bne 1b +2: and.a ip, len, #12 + beq 4f + cand.a ip, #8 + beq 3f + load2l r5, r6 + or r4, r4, r5 push #8 + mov r5, r5 pull #24 + or r5, r5, r6 push #8 + stm.w (r4, r5), [dst]+ + addc.a sum, sum, r4 + addc.a sum, sum, r5 + mov r4, r6 pull #24 + cand.a ip, #4 + beq 4f +3: load1l r5 + or r4, r4, r5 push #8 + stw.w r4, [dst]+, #4 + addc.a sum, sum, r4 + mov r4, r5 pull #24 +4: and.a len, len, #3 + beq .Ldone + mov r5, r4 get_byte_0 + cand.a len, #2 + beq .Lexit + stb.w r5, [dst]+, #1 + addc.a sum, sum, r4 + load1l r4 + mov r5, r4 get_byte_0 + stb.w r5, [dst]+, #1 + addc.a sum, sum, r4 push #24 + mov r5, r4 get_byte_1 + b .Lexit +FN_EXIT diff --git a/arch/unicore32/lib/csumpartialcopyuser.S b/arch/unicore32/lib/csumpartialcopyuser.S new file mode 100644 index 0000000..23a292f --- /dev/null +++ b/arch/unicore32/lib/csumpartialcopyuser.S @@ -0,0 +1,92 @@ +/* + * linux/arch/unicore32/lib/csumpartialcopyuser.S + * + * Code specific to PKUnity SoC and UniCore ISA + * + * Copyright (C) 2001-2010 GUAN Xue-tao + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 27/03/03 Ian Molton Clean up CONFIG_CPU + * + */ +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/errno.h> +#include <generated/asm-offsets.h> + + .text + + .macro save_regs + mov ip, sp + stm.w (fp, ip, lr, pc), [sp-] + stm.w (r1 - r2), [sp-] + sub fp, ip, #4 + .endm + + .macro load_regs + ldm.w (r1 - r2), [sp]+ + ldm (fp, sp, pc), [sp]+ + .endm + + .macro load1b, reg1 + ldrusr \reg1, r0, 1 + .endm + + .macro load2b, reg1, reg2 + ldrusr \reg1, r0, 1 + ldrusr \reg2, r0, 1 + .endm + + .macro load1l, reg1 + ldrusr \reg1, r0, 4 + .endm + + .macro load2l, reg1, reg2 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + .endm + + .macro load4l, reg1, reg2, reg3, reg4 + ldrusr \reg1, r0, 4 + ldrusr \reg2, r0, 4 + ldrusr \reg3, r0, 4 + ldrusr \reg4, r0, 4 + .endm + +/* + * unsigned int + * csum_partial_copy_from_user + * (const char *src, char *dst, int len, int sum, int *err_ptr) + * r0 = src, r1 = dst, r2 = len, r3 = sum, [sp] = *err_ptr + * Returns : r0 = checksum, [[sp, #0], #0] = 0 or -EFAULT + */ + +#define FN_ENTRY ENTRY(csum_partial_copy_from_user) +#define FN_EXIT ENDPROC(csum_partial_copy_from_user) + +#include "csumpartialcopygeneric.S" + +/* + * FIXME: minor buglet here + * We don't return the checksum for the data present in the buffer. To do + * so properly, we would have to add in whatever registers were loaded before + * the fault, which, with the current asm above is not predictable. + */ + .pushsection .fixup,"ax" + .align 4 +9001: mov r4, #-EFAULT + ldw r5, [sp+], #8*4 @ *err_ptr + stw r4, [r5] + ldm (r1, r2), [sp]+ @ retrieve dst, len + add r2, r2, r1 + mov r0, #0 @ zero the buffer +9002: cxor.a r2, r1 + beq 201f + stb.w r0, [r1]+, #1 + b 9002b +201: + load_regs + .popsection
next reply other threads:[~2011-01-03 11:54 UTC|newest] Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top 2011-01-03 11:54 Guan Xuetao [this message] 2011-01-03 11:54 ` [PATCHv1 7/8] unicore32 additional architecture files: low-level lib: checksum Guan Xuetao 2011-01-25 16:22 ` Arnd Bergmann
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to='013101cbab3c$eb0a7670$c11f6350$@mprc.pku.edu.cn' \ --to=guanxuetao@mprc.pku.edu.cn \ --cc=linux-arch@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.