From: Mark Rutland <mark.rutland@arm.com>
To: linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org, gcc@gcc.gnu.org,
catalin.marinas@arm.com, will@kernel.org, marcan@marcan.st,
maz@kernel.org, szabolcs.nagy@arm.com, f.fainelli@gmail.com,
opendmb@gmail.com, Andrew Pinski <pinskia@gmail.com>,
Ard Biesheuvel <ardb@kernel.org>,
Peter Zijlstra <peterz@infradead.org>,
x86@kernel.org, andrew.cooper3@citrix.com,
Jeremy Linton <jeremy.linton@arm.com>
Subject: GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
Date: Tue, 5 Apr 2022 13:51:30 +0100 [thread overview]
Message-ID: <Ykw7UnlTnx63z/Ca@FVFF77S0Q05N> (raw)
In-Reply-To: <Ykc0xrLv391/jdJj@FVFF77S0Q05N>
Hi all,
[adding kernel folk who work on asm stuff]
As a heads-up, GCC 12 (not yet released) appears to erroneously optimize away
calls to functions with volatile asm. Szabolcs has raised an issue on the GCC
bugzilla:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160
... which is a P1 release blocker, and is currently being investigated.
Jemery originally reported this as an issue with {readl,writel}_relaxed(), but
the underlying problem doesn't have anything to do with those specifically.
I'm dumping a bunch of info here largely for posterity / archival, and to find
out who (from the kernel side) is willing and able to test proposed compiler
fixes, once those are available.
I'm happy to do so for aarch64; Peter, I assume you'd be happy to look at the
x86 side?
This is a generic issue, and
I wrote test cases for aarch64 and x86_64. Those are inline later in this mail,
and currently you can see them on compiler explorer:
aarch64: https://godbolt.org/z/vMczqjYvs
x86_64: https://godbolt.org/z/cveff9hq5
My aarch64 test case is:
| #define sysreg_read(regname) \
| ({ \
| unsigned long __sr_val; \
| asm volatile( \
| "mrs %0, " #regname "\n" \
| : "=r" (__sr_val)); \
| \
| __sr_val; \
| })
|
| #define sysreg_write(regname, __sw_val) \
| do { \
| asm volatile( \
| "msr " #regname ", %0\n" \
| : \
| : "r" (__sw_val)); \
| } while (0)
|
| #define isb() \
| do { \
| asm volatile( \
| "isb" \
| : \
| : \
| : "memory"); \
| } while (0)
|
| static unsigned long sctlr_read(void)
| {
| return sysreg_read(sctlr_el1);
| }
|
| static void sctlr_write(unsigned long val)
| {
| sysreg_write(sctlr_el1, val);
| }
|
| static void sctlr_rmw(void)
| {
| unsigned long val;
|
| val = sctlr_read();
| val |= 1UL << 7;
| sctlr_write(val);
| }
|
| void sctlr_read_multiple(void)
| {
| sctlr_read();
| sctlr_read();
| sctlr_read();
| sctlr_read();
| }
|
| void sctlr_write_multiple(void)
| {
| sctlr_write(0);
| sctlr_write(0);
| sctlr_write(0);
| sctlr_write(0);
| sctlr_write(0);
| }
|
| void sctlr_rmw_multiple(void)
| {
| sctlr_rmw();
| sctlr_rmw();
| sctlr_rmw();
| sctlr_rmw();
| }
|
| void function(void)
| {
| sctlr_read_multiple();
| sctlr_write_multiple();
| sctlr_rmw_multiple();
|
| isb();
| }
Per compiler explorer (https://godbolt.org/z/vMczqjYvs) GCC trunk currently
compiles this as:
| sctlr_rmw:
| mrs x0, sctlr_el1
| orr x0, x0, 128
| msr sctlr_el1, x0
| ret
| sctlr_read_multiple:
| mrs x0, sctlr_el1
| mrs x0, sctlr_el1
| mrs x0, sctlr_el1
| mrs x0, sctlr_el1
| ret
| sctlr_write_multiple:
| mov x0, 0
| msr sctlr_el1, x0
| msr sctlr_el1, x0
| msr sctlr_el1, x0
| msr sctlr_el1, x0
| msr sctlr_el1, x0
| ret
| sctlr_rmw_multiple:
| ret
| function:
| isb
| ret
Whereas GCC 11.2 compiles this as:
| sctlr_rmw:
| mrs x0, sctlr_el1
| orr x0, x0, 128
| msr sctlr_el1, x0
| ret
| sctlr_read_multiple:
| mrs x0, sctlr_el1
| mrs x0, sctlr_el1
| mrs x0, sctlr_el1
| mrs x0, sctlr_el1
| ret
| sctlr_write_multiple:
| mov x0, 0
| msr sctlr_el1, x0
| msr sctlr_el1, x0
| msr sctlr_el1, x0
| msr sctlr_el1, x0
| msr sctlr_el1, x0
| ret
| sctlr_rmw_multiple:
| stp x29, x30, [sp, -16]!
| mov x29, sp
| bl sctlr_rmw
| bl sctlr_rmw
| bl sctlr_rmw
| bl sctlr_rmw
| ldp x29, x30, [sp], 16
| ret
| function:
| stp x29, x30, [sp, -16]!
| mov x29, sp
| bl sctlr_read_multiple
| bl sctlr_write_multiple
| bl sctlr_rmw_multiple
| isb
| ldp x29, x30, [sp], 16
| ret
My x86_64 test case is:
| unsigned long rdmsr(unsigned long reg)
| {
| unsigned int lo, hi;
|
| asm volatile(
| "rdmsr"
| : "=d" (hi), "=a" (lo)
| : "c" (reg)
| );
|
| return ((unsigned long)hi << 32) | lo;
| }
|
| void wrmsr(unsigned long reg, unsigned long val)
| {
| unsigned int lo = val;
| unsigned int hi = val >> 32;
|
| asm volatile(
| "wrmsr"
| :
| : "d" (hi), "a" (lo), "c" (reg)
| );
| }
|
| void msr_rmw_set_bits(unsigned long reg, unsigned long bits)
| {
| unsigned long val;
|
| val = rdmsr(reg);
| val |= bits;
| wrmsr(reg, val);
| }
|
| void func_with_msr_side_effects(unsigned long reg)
| {
| msr_rmw_set_bits(reg, 1UL << 0);
| msr_rmw_set_bits(reg, 1UL << 1);
| msr_rmw_set_bits(reg, 1UL << 2);
| msr_rmw_set_bits(reg, 1UL << 3);
| }
Per compiler explorer (https://godbolt.org/z/cveff9hq5) GCC trunk currently
compiles this as:
| msr_rmw_set_bits:
| mov rcx, rdi
| rdmsr
| sal rdx, 32
| mov eax, eax
| or rax, rsi
| or rax, rdx
| mov rdx, rax
| shr rdx, 32
| wrmsr
| ret
| func_with_msr_side_effects:
| ret
While GCC 11.2 compiles that as:
| msr_rmw_set_bits:
| mov rcx, rdi
| rdmsr
| sal rdx, 32
| mov eax, eax
| or rax, rsi
| or rax, rdx
| mov rdx, rax
| shr rdx, 32
| wrmsr
| ret
| func_with_msr_side_effects:
| push rbp
| push rbx
| mov rbx, rdi
| mov rbp, rsi
| call msr_rmw_set_bits
| mov rsi, rbp
| mov rdi, rbx
| call msr_rmw_set_bits
| mov rsi, rbp
| mov rdi, rbx
| call msr_rmw_set_bits
| mov rsi, rbp
| mov rdi, rbx
| call msr_rmw_set_bits
| pop rbx
| pop rbp
| ret
Thanks,
Mark.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2022-04-05 12:53 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-04-01 16:44 [PATCH] arm64/io: Remind compiler that there is a memory side effect Jeremy Linton
2022-04-01 17:22 ` Mark Rutland
2022-04-03 7:36 ` Andrew Pinski
2022-04-03 7:47 ` Ard Biesheuvel
2022-04-03 7:47 ` Ard Biesheuvel
2022-04-04 9:14 ` Will Deacon
2022-04-03 17:40 ` Doug Berger
2022-04-05 12:51 ` Mark Rutland [this message]
2022-04-05 13:04 ` GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect) Mark Rutland
2022-04-05 13:20 ` Andrew Cooper
2022-04-05 14:05 ` Peter Zijlstra
2022-04-11 10:22 ` Mark Rutland
2022-04-11 10:31 ` Mark Rutland
2022-04-11 19:02 ` Jeremy Linton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Ykw7UnlTnx63z/Ca@FVFF77S0Q05N \
--to=mark.rutland@arm.com \
--cc=andrew.cooper3@citrix.com \
--cc=ardb@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=f.fainelli@gmail.com \
--cc=gcc@gcc.gnu.org \
--cc=jeremy.linton@arm.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=marcan@marcan.st \
--cc=maz@kernel.org \
--cc=opendmb@gmail.com \
--cc=peterz@infradead.org \
--cc=pinskia@gmail.com \
--cc=szabolcs.nagy@arm.com \
--cc=will@kernel.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).