linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
From: Mark Rutland <mark.rutland@arm.com>
To: linux-arm-kernel@lists.infradead.org, linux-kernel@vger.kernel.org
Cc: linux-arch@vger.kernel.org, gcc@gcc.gnu.org,
	catalin.marinas@arm.com, will@kernel.org, marcan@marcan.st,
	maz@kernel.org, szabolcs.nagy@arm.com, f.fainelli@gmail.com,
	opendmb@gmail.com, Andrew Pinski <pinskia@gmail.com>,
	Ard Biesheuvel <ardb@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	x86@kernel.org, andrew.cooper3@citrix.com,
	Jeremy Linton <jeremy.linton@arm.com>
Subject: GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect)
Date: Tue, 5 Apr 2022 13:51:30 +0100	[thread overview]
Message-ID: <Ykw7UnlTnx63z/Ca@FVFF77S0Q05N> (raw)
In-Reply-To: <Ykc0xrLv391/jdJj@FVFF77S0Q05N>

Hi all,

[adding kernel folk who work on asm stuff]

As a heads-up, GCC 12 (not yet released) appears to erroneously optimize away
calls to functions with volatile asm. Szabolcs has raised an issue on the GCC
bugzilla:  

  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105160

... which is a P1 release blocker, and is currently being investigated.

Jemery originally reported this as an issue with {readl,writel}_relaxed(), but
the underlying problem doesn't have anything to do with those specifically.

I'm dumping a bunch of info here largely for posterity / archival, and to find
out who (from the kernel side) is willing and able to test proposed compiler
fixes, once those are available.

I'm happy to do so for aarch64; Peter, I assume you'd be happy to look at the
x86 side?

This is a generic issue, and 

I wrote test cases for aarch64 and x86_64. Those are inline later in this mail,
and currently you can see them on compiler explorer:

  aarch64: https://godbolt.org/z/vMczqjYvs

  x86_64: https://godbolt.org/z/cveff9hq5



My aarch64 test case is:

| #define sysreg_read(regname)		\
| ({					\
| 	unsigned long __sr_val;		\
| 	asm volatile(			\
| 	"mrs %0, " #regname "\n"	\
| 	: "=r" (__sr_val));		\
| 					\
| 	__sr_val;			\
| })
| 
| #define sysreg_write(regname, __sw_val)	\
| do {					\
| 	asm volatile(			\
| 	"msr " #regname ", %0\n"	\
| 	:				\
| 	: "r" (__sw_val));		\
| } while (0)
| 
| #define isb()				\
| do {					\
| 	asm volatile(			\
| 	"isb"				\
| 	:				\
| 	:				\
| 	: "memory");			\
| } while (0)
| 
| static unsigned long sctlr_read(void)
| {
| 	return sysreg_read(sctlr_el1);
| }
| 
| static void sctlr_write(unsigned long val)
| {
| 	sysreg_write(sctlr_el1, val);
| }
| 
| static void sctlr_rmw(void)
| {
| 	unsigned long val;
| 
| 	val = sctlr_read();
| 	val |= 1UL << 7;
| 	sctlr_write(val);
| }
| 
| void sctlr_read_multiple(void)
| {
| 	sctlr_read();
| 	sctlr_read();
| 	sctlr_read();
| 	sctlr_read();
| }
| 
| void sctlr_write_multiple(void)
| {
| 	sctlr_write(0);
| 	sctlr_write(0);
| 	sctlr_write(0);
| 	sctlr_write(0);
| 	sctlr_write(0);
| }
| 
| void sctlr_rmw_multiple(void)
| {
| 	sctlr_rmw();
| 	sctlr_rmw();
| 	sctlr_rmw();
| 	sctlr_rmw();
| }
| 
| void function(void)
| {
| 	sctlr_read_multiple();
| 	sctlr_write_multiple();
| 	sctlr_rmw_multiple();
| 
| 	isb();
| }

Per compiler explorer (https://godbolt.org/z/vMczqjYvs) GCC trunk currently
compiles this as:

| sctlr_rmw:
|         mrs x0, sctlr_el1
|         orr     x0, x0, 128
|         msr sctlr_el1, x0
|         ret
| sctlr_read_multiple:
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         ret
| sctlr_write_multiple:
|         mov     x0, 0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         ret
| sctlr_rmw_multiple:
|         ret
| function:
|         isb
|         ret

Whereas GCC 11.2 compiles this as:

| sctlr_rmw:
|         mrs x0, sctlr_el1
|         orr     x0, x0, 128
|         msr sctlr_el1, x0
|         ret
| sctlr_read_multiple:
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         mrs x0, sctlr_el1
|         ret
| sctlr_write_multiple:
|         mov     x0, 0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         msr sctlr_el1, x0
|         ret
| sctlr_rmw_multiple:
|         stp     x29, x30, [sp, -16]!
|         mov     x29, sp
|         bl      sctlr_rmw
|         bl      sctlr_rmw
|         bl      sctlr_rmw
|         bl      sctlr_rmw
|         ldp     x29, x30, [sp], 16
|         ret
| function:
|         stp     x29, x30, [sp, -16]!
|         mov     x29, sp
|         bl      sctlr_read_multiple
|         bl      sctlr_write_multiple
|         bl      sctlr_rmw_multiple
|         isb
|         ldp     x29, x30, [sp], 16
|         ret



My x86_64 test case is:

| unsigned long rdmsr(unsigned long reg)
| {
|     unsigned int lo, hi;
| 
|     asm volatile(
|     "rdmsr"
|     : "=d" (hi), "=a" (lo)
|     : "c" (reg)
|     );
| 
|     return ((unsigned long)hi << 32) | lo;
| }
| 
| void wrmsr(unsigned long reg, unsigned long val)
| {
|     unsigned int lo = val;
|     unsigned int hi = val >> 32;
| 
|     asm volatile(
|     "wrmsr"
|     :
|     : "d" (hi), "a" (lo), "c" (reg)
|     );
| }
| 
| void msr_rmw_set_bits(unsigned long reg, unsigned long bits)
| {
|     unsigned long val;
| 
|     val = rdmsr(reg);
|     val |= bits;
|     wrmsr(reg, val);
| }
| 
| void func_with_msr_side_effects(unsigned long reg)
| {
|     msr_rmw_set_bits(reg, 1UL << 0);
|     msr_rmw_set_bits(reg, 1UL << 1);
|     msr_rmw_set_bits(reg, 1UL << 2);
|     msr_rmw_set_bits(reg, 1UL << 3);
| }

Per compiler explorer (https://godbolt.org/z/cveff9hq5) GCC trunk currently
compiles this as:

| msr_rmw_set_bits:
|         mov     rcx, rdi
|         rdmsr
|         sal     rdx, 32
|         mov     eax, eax
|         or      rax, rsi
|         or      rax, rdx
|         mov     rdx, rax
|         shr     rdx, 32
|         wrmsr
|         ret
| func_with_msr_side_effects:
|         ret

While GCC 11.2 compiles that as:

| msr_rmw_set_bits:
|         mov     rcx, rdi
|         rdmsr
|         sal     rdx, 32
|         mov     eax, eax
|         or      rax, rsi
|         or      rax, rdx
|         mov     rdx, rax
|         shr     rdx, 32
|         wrmsr
|         ret
| func_with_msr_side_effects:
|         push    rbp
|         push    rbx
|         mov     rbx, rdi
|         mov     rbp, rsi
|         call    msr_rmw_set_bits
|         mov     rsi, rbp
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         mov     rsi, rbp
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         mov     rsi, rbp
|         mov     rdi, rbx
|         call    msr_rmw_set_bits
|         pop     rbx
|         pop     rbp
|         ret

Thanks,
Mark.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  parent reply	other threads:[~2022-04-05 12:53 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-04-01 16:44 [PATCH] arm64/io: Remind compiler that there is a memory side effect Jeremy Linton
2022-04-01 17:22 ` Mark Rutland
2022-04-03  7:36   ` Andrew Pinski
2022-04-03  7:47     ` Ard Biesheuvel
2022-04-03  7:47       ` Ard Biesheuvel
2022-04-04  9:14         ` Will Deacon
2022-04-03 17:40     ` Doug Berger
2022-04-05 12:51   ` Mark Rutland [this message]
2022-04-05 13:04     ` GCC 12 miscompilation of volatile asm (was: Re: [PATCH] arm64/io: Remind compiler that there is a memory side effect) Mark Rutland
2022-04-05 13:20       ` Andrew Cooper
2022-04-05 14:05     ` Peter Zijlstra
2022-04-11 10:22       ` Mark Rutland
2022-04-11 10:31     ` Mark Rutland
2022-04-11 19:02       ` Jeremy Linton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Ykw7UnlTnx63z/Ca@FVFF77S0Q05N \
    --to=mark.rutland@arm.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=ardb@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=f.fainelli@gmail.com \
    --cc=gcc@gcc.gnu.org \
    --cc=jeremy.linton@arm.com \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marcan@marcan.st \
    --cc=maz@kernel.org \
    --cc=opendmb@gmail.com \
    --cc=peterz@infradead.org \
    --cc=pinskia@gmail.com \
    --cc=szabolcs.nagy@arm.com \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).