linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Dumazet <edumazet@google.com>
To: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Dumazet <eric.dumazet@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	linux-kernel <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()
Date: Fri, 16 Apr 2021 22:11:26 +0200	[thread overview]
Message-ID: <CANn89i+mWh3=36R8Y8Fra0wQY4p82EPDNgZ=O5P7+d8meGxsiA@mail.gmail.com> (raw)
In-Reply-To: <YHnpBm36PcIINhWi@zeniv-ca.linux.org.uk>

On Fri, Apr 16, 2021 at 9:44 PM Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> On Fri, Apr 16, 2021 at 12:24:13PM -0700, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@google.com>
> >
> > We have to loop only to copy u64 values.
> > After this first loop, we copy at most one u32, one u16 and one byte.
>
> Does it actually yield a better code?
>

Yes, my patch gives a better code, on actual kernel use-case

(net-next tree, look at put_cmsg())

5ca: 48 89 0f              mov    %rcx,(%rdi)
 5cd: 89 77 08              mov    %esi,0x8(%rdi)
 5d0: 89 57 0c              mov    %edx,0xc(%rdi)
 5d3: 48 83 c7 10          add    $0x10,%rdi
 5d7: 48 83 c1 f0          add    $0xfffffffffffffff0,%rcx
 5db: 48 83 f9 07          cmp    $0x7,%rcx
 5df: 76 40                jbe    621 <put_cmsg+0x111>
 5e1: 66 66 66 66 66 66 2e data16 data16 data16 data16 data16 nopw
%cs:0x0(%rax,%rax,1)
 5e8: 0f 1f 84 00 00 00 00
 5ef: 00
 5f0: 49 8b 10              mov    (%r8),%rdx
 5f3: 48 89 17              mov    %rdx,(%rdi)
 5f6: 48 83 c7 08          add    $0x8,%rdi
 5fa: 49 83 c0 08          add    $0x8,%r8
 5fe: 48 83 c1 f8          add    $0xfffffffffffffff8,%rcx
 602: 48 83 f9 07          cmp    $0x7,%rcx
 606: 77 e8                ja     5f0 <put_cmsg+0xe0>
 608: eb 17                jmp    621 <put_cmsg+0x111>
 60a: 66 0f 1f 44 00 00    nopw   0x0(%rax,%rax,1)
 610: 41 8b 10              mov    (%r8),%edx
 613: 89 17                mov    %edx,(%rdi)
 615: 48 83 c7 04          add    $0x4,%rdi
 619: 49 83 c0 04          add    $0x4,%r8
 61d: 48 83 c1 fc          add    $0xfffffffffffffffc,%rcx
 621: 48 83 f9 03          cmp    $0x3,%rcx
 625: 77 e9                ja     610 <put_cmsg+0x100>
 627: eb 1a                jmp    643 <put_cmsg+0x133>
 629: 0f 1f 80 00 00 00 00 nopl   0x0(%rax)
 630: 41 0f b7 10          movzwl (%r8),%edx
 634: 66 89 17              mov    %dx,(%rdi)
 637: 48 83 c7 02          add    $0x2,%rdi
 63b: 49 83 c0 02          add    $0x2,%r8
 63f: 48 83 c1 fe          add    $0xfffffffffffffffe,%rcx
 643: 48 83 f9 01          cmp    $0x1,%rcx
 647: 77 e7                ja     630 <put_cmsg+0x120>
 649: eb 15                jmp    660 <put_cmsg+0x150>
 64b: 0f 1f 44 00 00        nopl   0x0(%rax,%rax,1)
 650: 41 0f b6 08          movzbl (%r8),%ecx
 654: 88 0f                mov    %cl,(%rdi)
 656: 48 83 c7 01          add    $0x1,%rdi
 65a: 49 83 c0 01          add    $0x1,%r8
 65e: 31 c9                xor    %ecx,%ecx
 660: 48 85 c9              test   %rcx,%rcx
 663: 75 eb                jne    650 <put_cmsg+0x140>


> FWIW, this
> void bar(unsigned);
> void foo(unsigned n)
> {
>         while (n >= 8) {
>                 bar(n);
>                 n -= 8;
>         }
>         while (n >= 4) {
>                 bar(n);
>                 n -= 4;
>         }
>         while (n >= 2) {
>                 bar(n);
>                 n -= 2;
>         }
>         while (n >= 1) {
>                 bar(n);
>                 n -= 1;
>         }
> }
>
> will compile (with -O2) to
>         pushq   %rbp
>         pushq   %rbx
>         movl    %edi, %ebx
>         subq    $8, %rsp
>         cmpl    $7, %edi
>         jbe     .L2
>         movl    %edi, %ebp
> .L3:
>         movl    %ebp, %edi
>         subl    $8, %ebp
>         call    bar@PLT
>         cmpl    $7, %ebp
>         ja      .L3
>         andl    $7, %ebx
> .L2:
>         cmpl    $3, %ebx
>         jbe     .L4
>         movl    %ebx, %edi
>         andl    $3, %ebx
>         call    bar@PLT
> .L4:
>         cmpl    $1, %ebx
>         jbe     .L5
>         movl    %ebx, %edi
>         andl    $1, %ebx
>         call    bar@PLT
> .L5:
>         testl   %ebx, %ebx
>         je      .L1
>         addq    $8, %rsp
>         movl    $1, %edi
>         popq    %rbx
>         popq    %rbp
>         jmp     bar@PLT
> .L1:
>         addq    $8, %rsp
>         popq    %rbx
>         popq    %rbp
>         ret
>
> i.e. loop + if + if + if...

  reply	other threads:[~2021-04-16 20:11 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-16 19:24 [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user() Eric Dumazet
2021-04-16 19:44 ` Al Viro
2021-04-16 20:11   ` Eric Dumazet [this message]
2021-04-16 20:57     ` Eric Dumazet
2021-04-17 13:59   ` David Laight
2021-04-17 16:03 ` Linus Torvalds
2021-04-17 16:08   ` Linus Torvalds
2021-04-17 16:27     ` Linus Torvalds
2021-04-17 18:09       ` Al Viro
2021-04-17 20:30         ` Al Viro
2021-04-17 20:35           ` Al Viro
2021-04-17 22:11             ` Linus Torvalds
2021-04-18  0:50               ` Al Viro
2021-04-17 19:44   ` Eric Dumazet
2021-04-17 19:51     ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CANn89i+mWh3=36R8Y8Fra0wQY4p82EPDNgZ=O5P7+d8meGxsiA@mail.gmail.com' \
    --to=edumazet@google.com \
    --cc=eric.dumazet@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).