From: Al Viro <viro@ZenIV.linux.org.uk>
To: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org
Subject: [PATCH 17/18] amd64: switch csum_partial_copy_generic() to new calling conventions
Date: Tue, 21 Jul 2020 21:25:48 +0100 [thread overview]
Message-ID: <20200721202549.4150745-17-viro@ZenIV.linux.org.uk> (raw)
In-Reply-To: <20200721202549.4150745-1-viro@ZenIV.linux.org.uk>
From: Al Viro <viro@zeniv.linux.org.uk>
... and fold handling of misaligned case into it.
Implementation note: we stash the "will we need to rol8 the sum in the end"
flag into the MSB of %rcx (the lower 32 bits are used for length); the rest
is pretty straightforward.
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
arch/x86/include/asm/checksum_64.h | 5 +-
arch/x86/lib/csum-copy_64.S | 140 ++++++++++++++++++++++---------------
arch/x86/lib/csum-wrappers_64.c | 72 +++----------------
3 files changed, 94 insertions(+), 123 deletions(-)
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 9af3aed54c6b..407beebadaf4 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -130,10 +130,7 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
/* Do not call this directly. Use the wrappers below */
-extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst,
- int len, __wsum sum,
- int *src_err_ptr, int *dst_err_ptr);
-
+extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len);
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 3394a8ff7fd0..1fbd8ee9642d 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -18,9 +18,6 @@
* rdi source
* rsi destination
* edx len (32bit)
- * ecx sum (32bit)
- * r8 src_err_ptr (int)
- * r9 dst_err_ptr (int)
*
* Output
* eax 64bit sum. undefined in case of exception.
@@ -31,44 +28,32 @@
.macro source
10:
- _ASM_EXTABLE_UA(10b, .Lbad_source)
+ _ASM_EXTABLE_UA(10b, .Lfault)
.endm
.macro dest
20:
- _ASM_EXTABLE_UA(20b, .Lbad_dest)
+ _ASM_EXTABLE_UA(20b, .Lfault)
.endm
- /*
- * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
- * potentially unmapped kernel address.
- */
- .macro ignore L=.Lignore
-30:
- _ASM_EXTABLE(30b, \L)
- .endm
-
-
SYM_FUNC_START(csum_partial_copy_generic)
- cmpl $3*64, %edx
- jle .Lignore
-
-.Lignore:
- subq $7*8, %rsp
- movq %rbx, 2*8(%rsp)
- movq %r12, 3*8(%rsp)
- movq %r14, 4*8(%rsp)
- movq %r13, 5*8(%rsp)
- movq %r15, 6*8(%rsp)
+ subq $5*8, %rsp
+ movq %rbx, 0*8(%rsp)
+ movq %r12, 1*8(%rsp)
+ movq %r14, 2*8(%rsp)
+ movq %r13, 3*8(%rsp)
+ movq %r15, 4*8(%rsp)
- movq %r8, (%rsp)
- movq %r9, 1*8(%rsp)
-
- movl %ecx, %eax
+ movl $-1, %eax
+ xorl %r9d, %r9d
movl %edx, %ecx
+ cmpl $8, %ecx
+ jb .Lshort
- xorl %r9d, %r9d
- movq %rcx, %r12
+ testb $7, %sil
+ jne .Lunaligned
+.Laligned:
+ movl %ecx, %r12d
shrq $6, %r12
jz .Lhandle_tail /* < 64 */
@@ -99,7 +84,12 @@ SYM_FUNC_START(csum_partial_copy_generic)
source
movq 56(%rdi), %r13
- ignore 2f
+30:
+ /*
+ * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
+ * potentially unmapped kernel address.
+ */
+ _ASM_EXTABLE(30b, 2f)
prefetcht0 5*64(%rdi)
2:
adcq %rbx, %rax
@@ -131,8 +121,6 @@ SYM_FUNC_START(csum_partial_copy_generic)
dest
movq %r13, 56(%rsi)
-3:
-
leaq 64(%rdi), %rdi
leaq 64(%rsi), %rsi
@@ -142,8 +130,8 @@ SYM_FUNC_START(csum_partial_copy_generic)
/* do last up to 56 bytes */
.Lhandle_tail:
- /* ecx: count */
- movl %ecx, %r10d
+ /* ecx: count, rcx.63: the end result needs to be rol8 */
+ movq %rcx, %r10
andl $63, %ecx
shrl $3, %ecx
jz .Lfold
@@ -172,6 +160,7 @@ SYM_FUNC_START(csum_partial_copy_generic)
.Lhandle_7:
movl %r10d, %ecx
andl $7, %ecx
+.L1: /* .Lshort rejoins the common path here */
shrl $1, %ecx
jz .Lhandle_1
movl $2, %edx
@@ -203,26 +192,65 @@ SYM_FUNC_START(csum_partial_copy_generic)
adcl %r9d, %eax /* carry */
.Lende:
- movq 2*8(%rsp), %rbx
- movq 3*8(%rsp), %r12
- movq 4*8(%rsp), %r14
- movq 5*8(%rsp), %r13
- movq 6*8(%rsp), %r15
- addq $7*8, %rsp
+ testq %r10, %r10
+ js .Lwas_odd
+.Lout:
+ movq 0*8(%rsp), %rbx
+ movq 1*8(%rsp), %r12
+ movq 2*8(%rsp), %r14
+ movq 3*8(%rsp), %r13
+ movq 4*8(%rsp), %r15
+ addq $5*8, %rsp
ret
+.Lshort:
+ movl %ecx, %r10d
+ jmp .L1
+.Lunaligned:
+ xorl %ebx, %ebx
+ testb $1, %sil
+ jne .Lodd
+1: testb $2, %sil
+ je 2f
+ source
+ movw (%rdi), %bx
+ dest
+ movw %bx, (%rsi)
+ leaq 2(%rdi), %rdi
+ subq $2, %rcx
+ leaq 2(%rsi), %rsi
+ addq %rbx, %rax
+2: testb $4, %sil
+ je .Laligned
+ source
+ movl (%rdi), %ebx
+ dest
+ movl %ebx, (%rsi)
+ leaq 4(%rdi), %rdi
+ subq $4, %rcx
+ leaq 4(%rsi), %rsi
+ addq %rbx, %rax
+ jmp .Laligned
+
+.Lodd:
+ source
+ movb (%rdi), %bl
+ dest
+ movb %bl, (%rsi)
+ leaq 1(%rdi), %rdi
+ leaq 1(%rsi), %rsi
+ /* decrement, set MSB */
+ leaq -1(%rcx, %rcx), %rcx
+ rorq $1, %rcx
+ shll $8, %ebx
+ addq %rbx, %rax
+ jmp 1b
+
+.Lwas_odd:
+ roll $8, %eax
+ jmp .Lout
- /* Exception handlers. Very simple, zeroing is done in the wrappers */
-.Lbad_source:
- movq (%rsp), %rax
- testq %rax, %rax
- jz .Lende
- movl $-EFAULT, (%rax)
- jmp .Lende
-
-.Lbad_dest:
- movq 8(%rsp), %rax
- testq %rax, %rax
- jz .Lende
- movl $-EFAULT, (%rax)
- jmp .Lende
+ /* Exception: just return 0 */
+.Lfault:
+ xorl %eax, %eax
+ jmp .Lout
SYM_FUNC_END(csum_partial_copy_generic)
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index ae2fb87e2274..189344924a2b 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -21,49 +21,16 @@
* src and dst are best aligned to 64bits.
*/
__wsum
-csum_and_copy_from_user(const void __user *src, void *dst,
- int len)
+csum_and_copy_from_user(const void __user *src, void *dst, int len)
{
- int err = 0;
- __wsum isum = ~0U;
+ __wsum sum;
might_sleep();
-
if (!user_access_begin(src, len))
return 0;
-
- /*
- * Why 6, not 7? To handle odd addresses aligned we
- * would need to do considerable complications to fix the
- * checksum which is defined as an 16bit accumulator. The
- * fix alignment code is primarily for performance
- * compatibility with 32bit and that will handle odd
- * addresses slowly too.
- */
- if (unlikely((unsigned long)src & 6)) {
- while (((unsigned long)src & 6) && len >= 2) {
- __u16 val16;
-
- unsafe_get_user(val16, (const __u16 __user *)src, out);
-
- *(__u16 *)dst = val16;
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- src += 2;
- dst += 2;
- len -= 2;
- }
- }
- isum = csum_partial_copy_generic((__force const void *)src,
- dst, len, isum, &err, NULL);
- user_access_end();
- if (unlikely(err))
- isum = 0;
- return isum;
-
-out:
+ sum = csum_partial_copy_generic((__force const void *)src, dst, len);
user_access_end();
- return 0;
+ return sum;
}
EXPORT_SYMBOL(csum_and_copy_from_user);
@@ -79,37 +46,16 @@ EXPORT_SYMBOL(csum_and_copy_from_user);
* src and dst are best aligned to 64bits.
*/
__wsum
-csum_and_copy_to_user(const void *src, void __user *dst,
- int len)
+csum_and_copy_to_user(const void *src, void __user *dst, int len)
{
- __wsum ret, isum = ~0U;
- int err = 0;
+ __wsum sum;
might_sleep();
-
if (!user_access_begin(dst, len))
return 0;
-
- if (unlikely((unsigned long)dst & 6)) {
- while (((unsigned long)dst & 6) && len >= 2) {
- __u16 val16 = *(__u16 *)src;
-
- isum = (__force __wsum)add32_with_carry(
- (__force unsigned)isum, val16);
- unsafe_put_user(val16, (__u16 __user *)dst, out);
- src += 2;
- dst += 2;
- len -= 2;
- }
- }
-
- ret = csum_partial_copy_generic(src, (void __force *)dst,
- len, isum, NULL, &err);
- user_access_end();
- return err ? 0 : ret;
-out:
+ sum = csum_partial_copy_generic(src, (void __force *)dst, len);
user_access_end();
- return 0;
+ return sum;
}
EXPORT_SYMBOL(csum_and_copy_to_user);
@@ -125,7 +71,7 @@ EXPORT_SYMBOL(csum_and_copy_to_user);
__wsum
csum_partial_copy_nocheck(const void *src, void *dst, int len)
{
- return csum_partial_copy_generic(src, dst, len, 0, NULL, NULL);
+ return csum_partial_copy_generic(src, dst, len);
}
EXPORT_SYMBOL(csum_partial_copy_nocheck);
--
2.11.0
next prev parent reply other threads:[~2020-07-21 20:25 UTC|newest]
Thread overview: 102+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-07-21 20:24 [RFC][CFT][PATCHSET] saner calling conventions for csum-and-copy primitives Al Viro
2020-07-21 20:25 ` [PATCH 01/18] skb_copy_and_csum_bits(): don't bother with the last argument Al Viro
2020-07-21 20:25 ` [PATCH 02/18] icmp_push_reply(): reorder adding the checksum up Al Viro
2020-07-21 20:25 ` [PATCH 03/18] csum_partial_copy_nocheck(): drop the last argument Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-21 20:25 ` [PATCH 04/18] csum_and_copy_..._user(): pass 0xffffffff instead of 0 as initial sum Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-21 20:55 ` Linus Torvalds
2020-07-21 20:58 ` Linus Torvalds
2020-07-21 21:11 ` Al Viro
2020-07-21 21:16 ` Linus Torvalds
2020-07-21 21:16 ` Linus Torvalds
2020-07-25 17:54 ` Al Viro
2020-07-22 9:45 ` David Laight
2020-07-22 9:27 ` David Laight
2020-07-22 14:42 ` Al Viro
2020-07-22 15:22 ` David Laight
2020-07-22 15:54 ` Al Viro
2020-07-22 16:17 ` David Laight
2020-07-22 17:39 ` Al Viro
2020-07-23 8:29 ` David Laight
2020-07-23 13:54 ` David Laight
2020-07-23 14:30 ` David Laight
2020-07-23 14:53 ` Al Viro
2020-07-23 15:19 ` David Laight
2020-07-23 15:21 ` Al Viro
2020-07-23 15:36 ` David Laight
2020-07-21 20:25 ` [PATCH 05/18] saner calling conventions for csum_and_copy_..._user() Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-21 20:25 ` [PATCH 06/18] alpha: propagate the calling convention changes down to csum_partial_copy.c helpers Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-21 20:25 ` [PATCH 07/18] arm: propagate the calling convention changes down to csum_partial_copy_from_user() Al Viro
2020-07-21 20:25 ` [PATCH 08/18] m68k: get rid of zeroing destination on error in csum_and_copy_from_user() Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-21 20:25 ` [PATCH 09/18] sh: propage the calling conventions change down to csum_partial_copy_generic() Al Viro
2020-07-21 20:25 ` [PATCH 10/18] i386: propagate " Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-21 20:25 ` [PATCH 11/18] sparc32: propagate the calling conventions change down to __csum_partial_copy_sparc_generic() Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-22 1:20 ` David Miller
2020-07-21 20:25 ` [PATCH 12/18] mips: csum_and_copy_{to,from}_user() are never called under KERNEL_DS Al Viro
2020-07-21 20:25 ` [PATCH 13/18] mips: __csum_partial_copy_kernel() has no users left Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-21 20:25 ` [PATCH 14/18] mips: propagate the calling convention change down into __csum_partial_copy_..._user() Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-21 20:25 ` [PATCH 15/18] xtensa: propagate the calling conventions change down into csum_partial_copy_generic() Al Viro
2020-07-22 8:56 ` Max Filippov
2020-07-21 20:25 ` [PATCH 16/18] sparc64: propagate the calling convention changes down to __csum_partial_copy_...() Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-22 1:21 ` David Miller
2020-07-21 20:25 ` Al Viro [this message]
2020-07-21 20:25 ` [PATCH 17/18] amd64: switch csum_partial_copy_generic() to new calling conventions Al Viro
2020-07-21 20:25 ` [PATCH 18/18] ppc: propagate the calling conventions change down to csum_partial_copy_generic() Al Viro
2020-07-21 20:25 ` Al Viro
2020-07-24 1:25 ` [RFC][CFT][PATCHSET v2] saner calling conventions for csum-and-copy primitives Al Viro
2020-07-24 1:25 ` [PATCH v2 01/20] xtensa: fix access check in csum_and_copy_from_user Al Viro
2020-07-24 1:25 ` [PATCH v2 02/20] skb_copy_and_csum_bits(): don't bother with the last argument Al Viro
2020-07-24 1:25 ` [PATCH v2 03/20] icmp_push_reply(): reorder adding the checksum up Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 04/20] unify generic instances of csum_partial_copy_nocheck() Al Viro
2020-07-24 6:41 ` Christoph Hellwig
2020-07-24 12:19 ` Al Viro
2020-07-24 12:23 ` Christoph Hellwig
2020-07-24 12:30 ` Al Viro
2020-07-26 7:11 ` Christoph Hellwig
2020-07-26 7:11 ` Christoph Hellwig
2020-07-27 3:58 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 05/20] csum_partial_copy_nocheck(): drop the last argument Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 06/20] csum_and_copy_..._user(): pass 0xffffffff instead of 0 as initial sum Al Viro
2020-07-24 1:25 ` [PATCH v2 07/20] saner calling conventions for csum_and_copy_..._user() Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 08/20] alpha: propagate the calling convention changes down to csum_partial_copy.c helpers Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 09/20] arm: propagate the calling convention changes down to csum_partial_copy_from_user() Al Viro
2020-07-24 1:25 ` [PATCH v2 10/20] m68k: get rid of zeroing destination on error in csum_and_copy_from_user() Al Viro
2020-07-24 1:25 ` [PATCH v2 11/20] sh: propage the calling conventions change down to csum_partial_copy_generic() Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 12/20] i386: propagate " Al Viro
2020-07-24 1:25 ` [PATCH v2 13/20] sparc32: propagate the calling conventions change down to __csum_partial_copy_sparc_generic() Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 14/20] mips: csum_and_copy_{to,from}_user() are never called under KERNEL_DS Al Viro
2020-07-24 1:25 ` [PATCH v2 15/20] mips: __csum_partial_copy_kernel() has no users left Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 16/20] mips: propagate the calling convention change down into __csum_partial_copy_..._user() Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 17/20] xtensa: propagate the calling conventions change down into csum_partial_copy_generic() Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 18/20] sparc64: propagate the calling convention changes down to __csum_partial_copy_...() Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 19/20] amd64: switch csum_partial_copy_generic() to new calling conventions Al Viro
2020-07-24 1:25 ` Al Viro
2020-07-24 1:25 ` [PATCH v2 20/20] ppc: propagate the calling conventions change down to csum_partial_copy_generic() Al Viro
2020-07-24 1:25 ` Al Viro
2020-10-14 22:26 ` Jason A. Donenfeld
2020-10-14 22:51 ` Linus Torvalds
2020-10-14 22:53 ` Linus Torvalds
2020-10-14 22:54 ` Jason A. Donenfeld
2020-10-14 22:53 ` Jason A. Donenfeld
2020-10-14 23:12 ` Al Viro
2020-10-14 23:02 ` [PATCH] powerpc32: don't adjust unmoved stack pointer in csum_partial_copy_generic() epilogue Jason A. Donenfeld
2020-10-14 23:05 ` Linus Torvalds
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200721202549.4150745-17-viro@ZenIV.linux.org.uk \
--to=viro@zeniv.linux.org.uk \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).