All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH rcu 0/6] nolibc updates for v5.17
@ 2021-12-02  0:33 Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 1/6] tools/nolibc: x86-64: Fix startup code bug Paul E. McKenney
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Paul E. McKenney @ 2021-12-02  0:33 UTC (permalink / raw)
  To: rcu
  Cc: linux-kernel, kernel-team, mingo, jiangshanlai, akpm,
	mathieu.desnoyers, josh, tglx, peterz, rostedt, dhowells,
	edumazet, fweisbec, oleg, joel

Hello!

This series provides nolibc updates, perhaps most notably the addition
of the gettid() system call:

1.	x86-64: Fix startup code bug, courtesy of Ammar Faizi.

2.	i386: fix initial stack alignment, courtesy of Willy Tarreau.

3.	fix incorrect truncation of exit code, courtesy of Willy Tarreau.

4.	x86: Remove `r8`, `r9` and `r10` from the clobber list, courtesy
	of Ammar Faizi.

5.	x86-64: Use `mov $60,%eax` instead of `mov $60,%rax`, courtesy
	of Ammar Faizi.

6.	Implement gettid(), courtesy of Mark Brown.

						Thanx, Paul

------------------------------------------------------------------------

 b/tools/include/nolibc/nolibc.h |   10 ++++-
 tools/include/nolibc/nolibc.h   |   76 +++++++++++++++++++++++++++-------------
 2 files changed, 60 insertions(+), 26 deletions(-)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH rcu 1/6] tools/nolibc: x86-64: Fix startup code bug
  2021-12-02  0:33 [PATCH rcu 0/6] nolibc updates for v5.17 Paul E. McKenney
@ 2021-12-02  0:33 ` Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 2/6] tools/nolibc: i386: fix initial stack alignment Paul E. McKenney
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2021-12-02  0:33 UTC (permalink / raw)
  To: rcu
  Cc: linux-kernel, kernel-team, mingo, jiangshanlai, akpm,
	mathieu.desnoyers, josh, tglx, peterz, rostedt, dhowells,
	edumazet, fweisbec, oleg, joel, Ammar Faizi, Bedirhan KURT,
	Louvian Lyndal, Peter Cordes, stable, Willy Tarreau,
	Paul E . McKenney

From: Ammar Faizi <ammar.faizi@students.amikom.ac.id>

Before this patch, the `_start` function looks like this:
```
0000000000001170 <_start>:
    1170:	pop    %rdi
    1171:	mov    %rsp,%rsi
    1174:	lea    0x8(%rsi,%rdi,8),%rdx
    1179:	and    $0xfffffffffffffff0,%rsp
    117d:	sub    $0x8,%rsp
    1181:	call   1000 <main>
    1186:	movzbq %al,%rdi
    118a:	mov    $0x3c,%rax
    1191:	syscall
    1193:	hlt
    1194:	data16 cs nopw 0x0(%rax,%rax,1)
    119f:	nop
```
Note the "and" to %rsp with $-16, it makes the %rsp be 16-byte aligned,
but then there is a "sub" with $0x8 which makes the %rsp no longer
16-byte aligned, then it calls main. That's the bug!

What actually the x86-64 System V ABI mandates is that right before the
"call", the %rsp must be 16-byte aligned, not after the "call". So the
"sub" with $0x8 here breaks the alignment. Remove it.

An example where this rule matters is when the callee needs to align
its stack at 16-byte for aligned move instruction, like `movdqa` and
`movaps`. If the callee can't align its stack properly, it will result
in segmentation fault.

x86-64 System V ABI also mandates the deepest stack frame should be
zero. Just to be safe, let's zero the %rbp on startup as the content
of %rbp may be unspecified when the program starts. Now it looks like
this:
```
0000000000001170 <_start>:
    1170:	pop    %rdi
    1171:	mov    %rsp,%rsi
    1174:	lea    0x8(%rsi,%rdi,8),%rdx
    1179:	xor    %ebp,%ebp                # zero the %rbp
    117b:	and    $0xfffffffffffffff0,%rsp # align the %rsp
    117f:	call   1000 <main>
    1184:	movzbq %al,%rdi
    1188:	mov    $0x3c,%rax
    118f:	syscall
    1191:	hlt
    1192:	data16 cs nopw 0x0(%rax,%rax,1)
    119d:	nopl   (%rax)
```

Cc: Bedirhan KURT <windowz414@gnuweeb.org>
Cc: Louvian Lyndal <louvianlyndal@gmail.com>
Reported-by: Peter Cordes <peter@cordes.ca>
Signed-off-by: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
[wt: I did this on purpose due to a misunderstanding of the spec, other
     archs will thus have to be rechecked, particularly i386]
Cc: stable@vger.kernel.org
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 3430667b0d241..96b6d56acb572 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -399,14 +399,20 @@ struct stat {
 })
 
 /* startup code */
+/*
+ * x86-64 System V ABI mandates:
+ * 1) %rsp must be 16-byte aligned right before the function call.
+ * 2) The deepest stack frame should be zero (the %rbp).
+ *
+ */
 asm(".section .text\n"
     ".global _start\n"
     "_start:\n"
     "pop %rdi\n"                // argc   (first arg, %rdi)
     "mov %rsp, %rsi\n"          // argv[] (second arg, %rsi)
     "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx)
-    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned when
-    "sub $8, %rsp\n"            // entering the callee
+    "xor %ebp, %ebp\n"          // zero the stack frame
+    "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
     "call main\n"               // main() returns the status code, we'll exit with it.
     "movzb %al, %rdi\n"         // retrieve exit code from 8 lower bits
     "mov $60, %rax\n"           // NR_exit == 60
-- 
2.31.1.189.g2e36527f23


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH rcu 2/6] tools/nolibc: i386: fix initial stack alignment
  2021-12-02  0:33 [PATCH rcu 0/6] nolibc updates for v5.17 Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 1/6] tools/nolibc: x86-64: Fix startup code bug Paul E. McKenney
@ 2021-12-02  0:33 ` Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 3/6] tools/nolibc: fix incorrect truncation of exit code Paul E. McKenney
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2021-12-02  0:33 UTC (permalink / raw)
  To: rcu
  Cc: linux-kernel, kernel-team, mingo, jiangshanlai, akpm,
	mathieu.desnoyers, josh, tglx, peterz, rostedt, dhowells,
	edumazet, fweisbec, oleg, joel, Willy Tarreau, Ammar Faizi,
	stable, Paul E . McKenney

From: Willy Tarreau <w@1wt.eu>

After re-checking in the spec and comparing stack offsets with glibc,
The last pushed argument must be 16-byte aligned (i.e. aligned before the
call) so that in the callee esp+4 is multiple of 16, so the principle is
the 32-bit equivalent to what Ammar fixed for x86_64. It's possible that
32-bit code using SSE2 or MMX could have been affected. In addition the
frame pointer ought to be zero at the deepest level.

Link: https://gitlab.com/x86-psABIs/i386-ABI/-/wikis/Intel386-psABI
Cc: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
Cc: stable@vger.kernel.org
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 96b6d56acb572..7f300dc379e70 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -583,13 +583,21 @@ struct sys_stat_struct {
 })
 
 /* startup code */
+/*
+ * i386 System V ABI mandates:
+ * 1) last pushed argument must be 16-byte aligned.
+ * 2) The deepest stack frame should be set to zero
+ *
+ */
 asm(".section .text\n"
     ".global _start\n"
     "_start:\n"
     "pop %eax\n"                // argc   (first arg, %eax)
     "mov %esp, %ebx\n"          // argv[] (second arg, %ebx)
     "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx)
-    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned when
+    "xor %ebp, %ebp\n"          // zero the stack frame
+    "and $-16, %esp\n"          // x86 ABI : esp must be 16-byte aligned before
+    "sub $4, %esp\n"            // the call instruction (args are aligned)
     "push %ecx\n"               // push all registers on the stack so that we
     "push %ebx\n"               // support both regparm and plain stack modes
     "push %eax\n"
-- 
2.31.1.189.g2e36527f23


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH rcu 3/6] tools/nolibc: fix incorrect truncation of exit code
  2021-12-02  0:33 [PATCH rcu 0/6] nolibc updates for v5.17 Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 1/6] tools/nolibc: x86-64: Fix startup code bug Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 2/6] tools/nolibc: i386: fix initial stack alignment Paul E. McKenney
@ 2021-12-02  0:33 ` Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 4/6] tools/nolibc: x86: Remove `r8`, `r9` and `r10` from the clobber list Paul E. McKenney
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2021-12-02  0:33 UTC (permalink / raw)
  To: rcu
  Cc: linux-kernel, kernel-team, mingo, jiangshanlai, akpm,
	mathieu.desnoyers, josh, tglx, peterz, rostedt, dhowells,
	edumazet, fweisbec, oleg, joel, Willy Tarreau, Ammar Faizi,
	stable, Paul E . McKenney

From: Willy Tarreau <w@1wt.eu>

Ammar Faizi reported that our exit code handling is wrong. We truncate
it to the lowest 8 bits but the syscall itself is expected to take a
regular 32-bit signed integer, not an unsigned char. It's the kernel
that later truncates it to the lowest 8 bits. The difference is visible
in strace, where the program below used to show exit(255) instead of
exit(-1):

  int main(void)
  {
        return -1;
  }

This patch applies the fix to all archs. x86_64, i386, arm64, armv7 and
mips were all tested and confirmed to work fine now. Risc-v was not
tested but the change is trivial and exactly the same as for other archs.

Reported-by: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
Cc: stable@vger.kernel.org
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 7f300dc379e70..3e2c6f2ed587f 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -414,7 +414,7 @@ asm(".section .text\n"
     "xor %ebp, %ebp\n"          // zero the stack frame
     "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
     "call main\n"               // main() returns the status code, we'll exit with it.
-    "movzb %al, %rdi\n"         // retrieve exit code from 8 lower bits
+    "mov %eax, %edi\n"          // retrieve exit code (32 bit)
     "mov $60, %rax\n"           // NR_exit == 60
     "syscall\n"                 // really exit
     "hlt\n"                     // ensure it does not return
@@ -602,9 +602,9 @@ asm(".section .text\n"
     "push %ebx\n"               // support both regparm and plain stack modes
     "push %eax\n"
     "call main\n"               // main() returns the status code in %eax
-    "movzbl %al, %ebx\n"        // retrieve exit code from lower 8 bits
-    "movl   $1, %eax\n"         // NR_exit == 1
-    "int    $0x80\n"            // exit now
+    "mov %eax, %ebx\n"          // retrieve exit code (32-bit int)
+    "movl $1, %eax\n"           // NR_exit == 1
+    "int $0x80\n"               // exit now
     "hlt\n"                     // ensure it does not
     "");
 
@@ -788,7 +788,6 @@ asm(".section .text\n"
     "and %r3, %r1, $-8\n"         // AAPCS : sp must be 8-byte aligned in the
     "mov %sp, %r3\n"              //         callee, an bl doesn't push (lr=pc)
     "bl main\n"                   // main() returns the status code, we'll exit with it.
-    "and %r0, %r0, $0xff\n"       // limit exit code to 8 bits
     "movs r7, $1\n"               // NR_exit == 1
     "svc $0x00\n"
     "");
@@ -985,7 +984,6 @@ asm(".section .text\n"
     "add x2, x2, x1\n"            //           + argv
     "and sp, x1, -16\n"           // sp must be 16-byte aligned in the callee
     "bl main\n"                   // main() returns the status code, we'll exit with it.
-    "and x0, x0, 0xff\n"          // limit exit code to 8 bits
     "mov x8, 93\n"                // NR_exit == 93
     "svc #0\n"
     "");
@@ -1190,7 +1188,7 @@ asm(".section .text\n"
     "addiu $sp,$sp,-16\n"         // the callee expects to save a0..a3 there!
     "jal main\n"                  // main() returns the status code, we'll exit with it.
     "nop\n"                       // delayed slot
-    "and $a0, $v0, 0xff\n"        // limit exit code to 8 bits
+    "move $a0, $v0\n"             // retrieve 32-bit exit code from v0
     "li $v0, 4001\n"              // NR_exit == 4001
     "syscall\n"
     ".end __start\n"
@@ -1388,7 +1386,6 @@ asm(".section .text\n"
     "add   a2,a2,a1\n"           //             + argv
     "andi  sp,a1,-16\n"          // sp must be 16-byte aligned
     "call  main\n"               // main() returns the status code, we'll exit with it.
-    "andi  a0, a0, 0xff\n"       // limit exit code to 8 bits
     "li a7, 93\n"                // NR_exit == 93
     "ecall\n"
     "");
-- 
2.31.1.189.g2e36527f23


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH rcu 4/6] tools/nolibc: x86: Remove `r8`, `r9` and `r10` from the clobber list
  2021-12-02  0:33 [PATCH rcu 0/6] nolibc updates for v5.17 Paul E. McKenney
                   ` (2 preceding siblings ...)
  2021-12-02  0:33 ` [PATCH rcu 3/6] tools/nolibc: fix incorrect truncation of exit code Paul E. McKenney
@ 2021-12-02  0:33 ` Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 5/6] tools/nolibc: x86-64: Use `mov $60,%eax` instead of `mov $60,%rax` Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 6/6] tools/nolibc: Implement gettid() Paul E. McKenney
  5 siblings, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2021-12-02  0:33 UTC (permalink / raw)
  To: rcu
  Cc: linux-kernel, kernel-team, mingo, jiangshanlai, akpm,
	mathieu.desnoyers, josh, tglx, peterz, rostedt, dhowells,
	edumazet, fweisbec, oleg, joel, Ammar Faizi, Andy Lutomirski,
	Ingo Molnar, Borislav Petkov, x86, H. Peter Anvin, David Laight,
	Willy Tarreau, Paul E . McKenney

From: Ammar Faizi <ammar.faizi@students.amikom.ac.id>

Linux x86-64 syscall only clobbers rax, rcx and r11 (and "memory").

  - rax for the return value.
  - rcx to save the return address.
  - r11 to save the rflags.

Other registers are preserved.

Having r8, r9 and r10 in the syscall clobber list is harmless, but this
results in a missed-optimization.

As the syscall doesn't clobber r8-r10, GCC should be allowed to reuse
their value after the syscall returns to userspace. But since they are
in the clobber list, GCC will always miss this opportunity.

Remove them from the x86-64 syscall clobber list to help GCC generate
better code and fix the comment.

See also the x86-64 ABI, section A.2 AMD64 Linux Kernel Conventions,
A.2.1 Calling Conventions [1].

Extra note:
Some people may think it does not really give a benefit to remove r8,
r9 and r10 from the syscall clobber list because the impression of
syscall is a C function call, and function call always clobbers those 3.

However, that is not the case for nolibc.h, because we have a potential
to inline the "syscall" instruction (which its opcode is "0f 05") to the
user functions.

All syscalls in the nolibc.h are written as a static function with inline
ASM and are likely always inline if we use optimization flag, so this is
a profit not to have r8, r9 and r10 in the clobber list.

Here is the example where this matters.

Consider the following C code:
```
  #include "tools/include/nolibc/nolibc.h"
  #define read_abc(a, b, c) __asm__ volatile("nop"::"r"(a),"r"(b),"r"(c))

  int main(void)
  {
  	int a = 0xaa;
  	int b = 0xbb;
  	int c = 0xcc;

  	read_abc(a, b, c);
  	write(1, "test\n", 5);
  	read_abc(a, b, c);

  	return 0;
  }
```

Compile with:
    gcc -Os test.c -o test -nostdlib

With r8, r9, r10 in the clobber list, GCC generates this:

0000000000001000 <main>:
    1000:	f3 0f 1e fa          	endbr64
    1004:	41 54                	push   %r12
    1006:	41 bc cc 00 00 00    	mov    $0xcc,%r12d
    100c:	55                   	push   %rbp
    100d:	bd bb 00 00 00       	mov    $0xbb,%ebp
    1012:	53                   	push   %rbx
    1013:	bb aa 00 00 00       	mov    $0xaa,%ebx
    1018:	90                   	nop
    1019:	b8 01 00 00 00       	mov    $0x1,%eax
    101e:	bf 01 00 00 00       	mov    $0x1,%edi
    1023:	ba 05 00 00 00       	mov    $0x5,%edx
    1028:	48 8d 35 d1 0f 00 00 	lea    0xfd1(%rip),%rsi
    102f:	0f 05                	syscall
    1031:	90                   	nop
    1032:	31 c0                	xor    %eax,%eax
    1034:	5b                   	pop    %rbx
    1035:	5d                   	pop    %rbp
    1036:	41 5c                	pop    %r12
    1038:	c3                   	ret

GCC thinks that syscall will clobber r8, r9, r10. So it spills 0xaa,
0xbb and 0xcc to callee saved registers (r12, rbp and rbx). This is
clearly extra memory access and extra stack size for preserving them.

But syscall does not actually clobber them, so this is a missed
optimization.

Now without r8, r9, r10 in the clobber list, GCC generates better code:

0000000000001000 <main>:
    1000:	f3 0f 1e fa          	endbr64
    1004:	41 b8 aa 00 00 00    	mov    $0xaa,%r8d
    100a:	41 b9 bb 00 00 00    	mov    $0xbb,%r9d
    1010:	41 ba cc 00 00 00    	mov    $0xcc,%r10d
    1016:	90                   	nop
    1017:	b8 01 00 00 00       	mov    $0x1,%eax
    101c:	bf 01 00 00 00       	mov    $0x1,%edi
    1021:	ba 05 00 00 00       	mov    $0x5,%edx
    1026:	48 8d 35 d3 0f 00 00 	lea    0xfd3(%rip),%rsi
    102d:	0f 05                	syscall
    102f:	90                   	nop
    1030:	31 c0                	xor    %eax,%eax
    1032:	c3                   	ret

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: David Laight <David.Laight@ACULAB.COM>
Acked-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
Link: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI [1]
Link: https://lore.kernel.org/lkml/20211011040344.437264-1-ammar.faizi@students.amikom.ac.id/
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 3e2c6f2ed587f..f9afe89ec6f26 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -265,12 +265,17 @@ struct stat {
  *   - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively
  *   - the system call is performed by calling the syscall instruction
  *   - syscall return comes in rax
- *   - rcx and r8..r11 may be clobbered, others are preserved.
+ *   - rcx and r11 are clobbered, others are preserved.
  *   - the arguments are cast to long and assigned into the target registers
  *     which are then simply passed as registers to the asm code, so that we
  *     don't have to experience issues with register constraints.
  *   - the syscall number is always specified last in order to allow to force
  *     some registers before (gcc refuses a %-register at the last position).
+ *   - see also x86-64 ABI section A.2 AMD64 Linux Kernel Conventions, A.2.1
+ *     Calling Conventions.
+ *
+ * Link x86-64 ABI: https://gitlab.com/x86-psABIs/x86-64-ABI/-/wikis/x86-64-psABI
+ *
  */
 
 #define my_syscall0(num)                                                      \
@@ -280,9 +285,9 @@ struct stat {
 									      \
 	asm volatile (                                                        \
 		"syscall\n"                                                   \
-		: "=a" (_ret)                                                 \
+		: "=a"(_ret)                                                  \
 		: "0"(_num)                                                   \
-		: "rcx", "r8", "r9", "r10", "r11", "memory", "cc"             \
+		: "rcx", "r11", "memory", "cc"                                \
 	);                                                                    \
 	_ret;                                                                 \
 })
@@ -295,10 +300,10 @@ struct stat {
 									      \
 	asm volatile (                                                        \
 		"syscall\n"                                                   \
-		: "=a" (_ret)                                                 \
+		: "=a"(_ret)                                                  \
 		: "r"(_arg1),                                                 \
 		  "0"(_num)                                                   \
-		: "rcx", "r8", "r9", "r10", "r11", "memory", "cc"             \
+		: "rcx", "r11", "memory", "cc"                                \
 	);                                                                    \
 	_ret;                                                                 \
 })
@@ -312,10 +317,10 @@ struct stat {
 									      \
 	asm volatile (                                                        \
 		"syscall\n"                                                   \
-		: "=a" (_ret)                                                 \
+		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2),                                     \
 		  "0"(_num)                                                   \
-		: "rcx", "r8", "r9", "r10", "r11", "memory", "cc"             \
+		: "rcx", "r11", "memory", "cc"                                \
 	);                                                                    \
 	_ret;                                                                 \
 })
@@ -330,10 +335,10 @@ struct stat {
 									      \
 	asm volatile (                                                        \
 		"syscall\n"                                                   \
-		: "=a" (_ret)                                                 \
+		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3),                         \
 		  "0"(_num)                                                   \
-		: "rcx", "r8", "r9", "r10", "r11", "memory", "cc"             \
+		: "rcx", "r11", "memory", "cc"                                \
 	);                                                                    \
 	_ret;                                                                 \
 })
@@ -349,10 +354,10 @@ struct stat {
 									      \
 	asm volatile (                                                        \
 		"syscall\n"                                                   \
-		: "=a" (_ret), "=r"(_arg4)                                    \
+		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4),             \
 		  "0"(_num)                                                   \
-		: "rcx", "r8", "r9", "r11", "memory", "cc"                    \
+		: "rcx", "r11", "memory", "cc"                                \
 	);                                                                    \
 	_ret;                                                                 \
 })
@@ -369,10 +374,10 @@ struct stat {
 									      \
 	asm volatile (                                                        \
 		"syscall\n"                                                   \
-		: "=a" (_ret), "=r"(_arg4), "=r"(_arg5)                       \
+		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
 		  "0"(_num)                                                   \
-		: "rcx", "r9", "r11", "memory", "cc"                          \
+		: "rcx", "r11", "memory", "cc"                                \
 	);                                                                    \
 	_ret;                                                                 \
 })
@@ -390,7 +395,7 @@ struct stat {
 									      \
 	asm volatile (                                                        \
 		"syscall\n"                                                   \
-		: "=a" (_ret), "=r"(_arg4), "=r"(_arg5)                       \
+		: "=a"(_ret)                                                  \
 		: "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \
 		  "r"(_arg6), "0"(_num)                                       \
 		: "rcx", "r11", "memory", "cc"                                \
-- 
2.31.1.189.g2e36527f23


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH rcu 5/6] tools/nolibc: x86-64: Use `mov $60,%eax` instead of `mov $60,%rax`
  2021-12-02  0:33 [PATCH rcu 0/6] nolibc updates for v5.17 Paul E. McKenney
                   ` (3 preceding siblings ...)
  2021-12-02  0:33 ` [PATCH rcu 4/6] tools/nolibc: x86: Remove `r8`, `r9` and `r10` from the clobber list Paul E. McKenney
@ 2021-12-02  0:33 ` Paul E. McKenney
  2021-12-02  0:33 ` [PATCH rcu 6/6] tools/nolibc: Implement gettid() Paul E. McKenney
  5 siblings, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2021-12-02  0:33 UTC (permalink / raw)
  To: rcu
  Cc: linux-kernel, kernel-team, mingo, jiangshanlai, akpm,
	mathieu.desnoyers, josh, tglx, peterz, rostedt, dhowells,
	edumazet, fweisbec, oleg, joel, Ammar Faizi, Willy Tarreau,
	Paul E . McKenney

From: Ammar Faizi <ammar.faizi@students.amikom.ac.id>

Note that mov to 32-bit register will zero extend to 64-bit register.
Thus `mov $60,%eax` has the same effect with `mov $60,%rax`. Use the
shorter opcode to achieve the same thing.
```
  b8 3c 00 00 00       	mov    $60,%eax (5 bytes) [1]
  48 c7 c0 3c 00 00 00 	mov    $60,%rax (7 bytes) [2]
```
Currently, we use [2]. Change it to [1] for shorter code.

Signed-off-by: Ammar Faizi <ammar.faizi@students.amikom.ac.id>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index f9afe89ec6f26..4988866af0b58 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -420,7 +420,7 @@ asm(".section .text\n"
     "and $-16, %rsp\n"          // x86 ABI : esp must be 16-byte aligned before call
     "call main\n"               // main() returns the status code, we'll exit with it.
     "mov %eax, %edi\n"          // retrieve exit code (32 bit)
-    "mov $60, %rax\n"           // NR_exit == 60
+    "mov $60, %eax\n"           // NR_exit == 60
     "syscall\n"                 // really exit
     "hlt\n"                     // ensure it does not return
     "");
-- 
2.31.1.189.g2e36527f23


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH rcu 6/6] tools/nolibc: Implement gettid()
  2021-12-02  0:33 [PATCH rcu 0/6] nolibc updates for v5.17 Paul E. McKenney
                   ` (4 preceding siblings ...)
  2021-12-02  0:33 ` [PATCH rcu 5/6] tools/nolibc: x86-64: Use `mov $60,%eax` instead of `mov $60,%rax` Paul E. McKenney
@ 2021-12-02  0:33 ` Paul E. McKenney
  5 siblings, 0 replies; 7+ messages in thread
From: Paul E. McKenney @ 2021-12-02  0:33 UTC (permalink / raw)
  To: rcu
  Cc: linux-kernel, kernel-team, mingo, jiangshanlai, akpm,
	mathieu.desnoyers, josh, tglx, peterz, rostedt, dhowells,
	edumazet, fweisbec, oleg, joel, Mark Brown, Willy Tarreau,
	Paul E . McKenney

From: Mark Brown <broonie@kernel.org>

Allow test programs to determine their thread ID.

Signed-off-by: Mark Brown <broonie@kernel.org>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 tools/include/nolibc/nolibc.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index 4988866af0b58..c1c285fe494aa 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -1571,6 +1571,12 @@ pid_t sys_getpid(void)
 	return my_syscall0(__NR_getpid);
 }
 
+static __attribute__((unused))
+pid_t sys_gettid(void)
+{
+	return my_syscall0(__NR_gettid);
+}
+
 static __attribute__((unused))
 int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
 {
@@ -2029,6 +2035,18 @@ pid_t getpid(void)
 	return ret;
 }
 
+static __attribute__((unused))
+pid_t gettid(void)
+{
+	pid_t ret = sys_gettid();
+
+	if (ret < 0) {
+		SET_ERRNO(-ret);
+		ret = -1;
+	}
+	return ret;
+}
+
 static __attribute__((unused))
 int gettimeofday(struct timeval *tv, struct timezone *tz)
 {
-- 
2.31.1.189.g2e36527f23


^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-12-02  0:34 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-02  0:33 [PATCH rcu 0/6] nolibc updates for v5.17 Paul E. McKenney
2021-12-02  0:33 ` [PATCH rcu 1/6] tools/nolibc: x86-64: Fix startup code bug Paul E. McKenney
2021-12-02  0:33 ` [PATCH rcu 2/6] tools/nolibc: i386: fix initial stack alignment Paul E. McKenney
2021-12-02  0:33 ` [PATCH rcu 3/6] tools/nolibc: fix incorrect truncation of exit code Paul E. McKenney
2021-12-02  0:33 ` [PATCH rcu 4/6] tools/nolibc: x86: Remove `r8`, `r9` and `r10` from the clobber list Paul E. McKenney
2021-12-02  0:33 ` [PATCH rcu 5/6] tools/nolibc: x86-64: Use `mov $60,%eax` instead of `mov $60,%rax` Paul E. McKenney
2021-12-02  0:33 ` [PATCH rcu 6/6] tools/nolibc: Implement gettid() Paul E. McKenney

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.