linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers
@ 2019-11-26 14:04 Marco Elver
  2019-11-26 14:04 ` [PATCH v3 2/3] asm-generic/atomic: Use __always_inline for fallback wrappers Marco Elver
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Marco Elver @ 2019-11-26 14:04 UTC (permalink / raw)
  To: elver
  Cc: mark.rutland, paulmck, linux-kernel, will, peterz, boqun.feng,
	arnd, dvyukov, linux-arch, kasan-dev, Randy Dunlap

Prefer __always_inline for atomic wrappers. When building for size
(CC_OPTIMIZE_FOR_SIZE), some compilers appear to be less inclined to
inline even relatively small static inline functions that are assumed to
be inlinable such as atomic ops. This can cause problems, for example in
UACCESS regions.

By using __always_inline, we let the real implementation and not the
wrapper determine the final inlining preference.

For x86 tinyconfig we observe:
- vmlinux baseline: 1316204
- vmlinux with patch: 1315988 (-216 bytes)

This came up when addressing UACCESS warnings with CC_OPTIMIZE_FOR_SIZE
in the KCSAN runtime:
http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Marco Elver <elver@google.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
---
v3:
* Reorder includes alphabetically (except <asm/..>).

v2: http://lkml.kernel.org/r/20191126114121.85552-1-elver@google.com
* Add missing '#include <linux/compiler.h>'
* Add size diff to commit message.

v1: http://lkml.kernel.org/r/20191122154221.247680-1-elver@google.com
---
 include/asm-generic/atomic-instrumented.h | 335 +++++++++++-----------
 include/asm-generic/atomic-long.h         | 331 ++++++++++-----------
 scripts/atomic/gen-atomic-instrumented.sh |   7 +-
 scripts/atomic/gen-atomic-long.sh         |   3 +-
 4 files changed, 340 insertions(+), 336 deletions(-)

diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
index 3dc0f38544f6..63869ded73ac 100644
--- a/include/asm-generic/atomic-instrumented.h
+++ b/include/asm-generic/atomic-instrumented.h
@@ -18,22 +18,23 @@
 #define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
 
 #include <linux/build_bug.h>
+#include <linux/compiler.h>
 #include <linux/kasan-checks.h>
 #include <linux/kcsan-checks.h>
 
-static inline void __atomic_check_read(const volatile void *v, size_t size)
+static __always_inline void __atomic_check_read(const volatile void *v, size_t size)
 {
 	kasan_check_read(v, size);
 	kcsan_check_atomic_read(v, size);
 }
 
-static inline void __atomic_check_write(const volatile void *v, size_t size)
+static __always_inline void __atomic_check_write(const volatile void *v, size_t size)
 {
 	kasan_check_write(v, size);
 	kcsan_check_atomic_write(v, size);
 }
 
-static inline int
+static __always_inline int
 atomic_read(const atomic_t *v)
 {
 	__atomic_check_read(v, sizeof(*v));
@@ -42,7 +43,7 @@ atomic_read(const atomic_t *v)
 #define atomic_read atomic_read
 
 #if defined(arch_atomic_read_acquire)
-static inline int
+static __always_inline int
 atomic_read_acquire(const atomic_t *v)
 {
 	__atomic_check_read(v, sizeof(*v));
@@ -51,7 +52,7 @@ atomic_read_acquire(const atomic_t *v)
 #define atomic_read_acquire atomic_read_acquire
 #endif
 
-static inline void
+static __always_inline void
 atomic_set(atomic_t *v, int i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -60,7 +61,7 @@ atomic_set(atomic_t *v, int i)
 #define atomic_set atomic_set
 
 #if defined(arch_atomic_set_release)
-static inline void
+static __always_inline void
 atomic_set_release(atomic_t *v, int i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -69,7 +70,7 @@ atomic_set_release(atomic_t *v, int i)
 #define atomic_set_release atomic_set_release
 #endif
 
-static inline void
+static __always_inline void
 atomic_add(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -78,7 +79,7 @@ atomic_add(int i, atomic_t *v)
 #define atomic_add atomic_add
 
 #if !defined(arch_atomic_add_return_relaxed) || defined(arch_atomic_add_return)
-static inline int
+static __always_inline int
 atomic_add_return(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -88,7 +89,7 @@ atomic_add_return(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_add_return_acquire)
-static inline int
+static __always_inline int
 atomic_add_return_acquire(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -98,7 +99,7 @@ atomic_add_return_acquire(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_add_return_release)
-static inline int
+static __always_inline int
 atomic_add_return_release(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -108,7 +109,7 @@ atomic_add_return_release(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_add_return_relaxed)
-static inline int
+static __always_inline int
 atomic_add_return_relaxed(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -118,7 +119,7 @@ atomic_add_return_relaxed(int i, atomic_t *v)
 #endif
 
 #if !defined(arch_atomic_fetch_add_relaxed) || defined(arch_atomic_fetch_add)
-static inline int
+static __always_inline int
 atomic_fetch_add(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -128,7 +129,7 @@ atomic_fetch_add(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_add_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_add_acquire(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -138,7 +139,7 @@ atomic_fetch_add_acquire(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_add_release)
-static inline int
+static __always_inline int
 atomic_fetch_add_release(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -148,7 +149,7 @@ atomic_fetch_add_release(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_add_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_add_relaxed(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -157,7 +158,7 @@ atomic_fetch_add_relaxed(int i, atomic_t *v)
 #define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic_sub(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -166,7 +167,7 @@ atomic_sub(int i, atomic_t *v)
 #define atomic_sub atomic_sub
 
 #if !defined(arch_atomic_sub_return_relaxed) || defined(arch_atomic_sub_return)
-static inline int
+static __always_inline int
 atomic_sub_return(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -176,7 +177,7 @@ atomic_sub_return(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_sub_return_acquire)
-static inline int
+static __always_inline int
 atomic_sub_return_acquire(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -186,7 +187,7 @@ atomic_sub_return_acquire(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_sub_return_release)
-static inline int
+static __always_inline int
 atomic_sub_return_release(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -196,7 +197,7 @@ atomic_sub_return_release(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_sub_return_relaxed)
-static inline int
+static __always_inline int
 atomic_sub_return_relaxed(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -206,7 +207,7 @@ atomic_sub_return_relaxed(int i, atomic_t *v)
 #endif
 
 #if !defined(arch_atomic_fetch_sub_relaxed) || defined(arch_atomic_fetch_sub)
-static inline int
+static __always_inline int
 atomic_fetch_sub(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -216,7 +217,7 @@ atomic_fetch_sub(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_sub_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -226,7 +227,7 @@ atomic_fetch_sub_acquire(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_sub_release)
-static inline int
+static __always_inline int
 atomic_fetch_sub_release(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -236,7 +237,7 @@ atomic_fetch_sub_release(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_sub_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_sub_relaxed(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -246,7 +247,7 @@ atomic_fetch_sub_relaxed(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_inc)
-static inline void
+static __always_inline void
 atomic_inc(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -256,7 +257,7 @@ atomic_inc(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_inc_return)
-static inline int
+static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -266,7 +267,7 @@ atomic_inc_return(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_inc_return_acquire)
-static inline int
+static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -276,7 +277,7 @@ atomic_inc_return_acquire(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_inc_return_release)
-static inline int
+static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -286,7 +287,7 @@ atomic_inc_return_release(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_inc_return_relaxed)
-static inline int
+static __always_inline int
 atomic_inc_return_relaxed(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -296,7 +297,7 @@ atomic_inc_return_relaxed(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_inc)
-static inline int
+static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -306,7 +307,7 @@ atomic_fetch_inc(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_inc_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -316,7 +317,7 @@ atomic_fetch_inc_acquire(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_inc_release)
-static inline int
+static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -326,7 +327,7 @@ atomic_fetch_inc_release(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_inc_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_inc_relaxed(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -336,7 +337,7 @@ atomic_fetch_inc_relaxed(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_dec)
-static inline void
+static __always_inline void
 atomic_dec(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -346,7 +347,7 @@ atomic_dec(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_dec_return)
-static inline int
+static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -356,7 +357,7 @@ atomic_dec_return(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_dec_return_acquire)
-static inline int
+static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -366,7 +367,7 @@ atomic_dec_return_acquire(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_dec_return_release)
-static inline int
+static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -376,7 +377,7 @@ atomic_dec_return_release(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_dec_return_relaxed)
-static inline int
+static __always_inline int
 atomic_dec_return_relaxed(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -386,7 +387,7 @@ atomic_dec_return_relaxed(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_dec)
-static inline int
+static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -396,7 +397,7 @@ atomic_fetch_dec(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_dec_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -406,7 +407,7 @@ atomic_fetch_dec_acquire(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_dec_release)
-static inline int
+static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -416,7 +417,7 @@ atomic_fetch_dec_release(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_dec_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_dec_relaxed(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -425,7 +426,7 @@ atomic_fetch_dec_relaxed(atomic_t *v)
 #define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic_and(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -434,7 +435,7 @@ atomic_and(int i, atomic_t *v)
 #define atomic_and atomic_and
 
 #if !defined(arch_atomic_fetch_and_relaxed) || defined(arch_atomic_fetch_and)
-static inline int
+static __always_inline int
 atomic_fetch_and(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -444,7 +445,7 @@ atomic_fetch_and(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_and_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_and_acquire(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -454,7 +455,7 @@ atomic_fetch_and_acquire(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_and_release)
-static inline int
+static __always_inline int
 atomic_fetch_and_release(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -464,7 +465,7 @@ atomic_fetch_and_release(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_and_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_and_relaxed(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -474,7 +475,7 @@ atomic_fetch_and_relaxed(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_andnot)
-static inline void
+static __always_inline void
 atomic_andnot(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -484,7 +485,7 @@ atomic_andnot(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_andnot)
-static inline int
+static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -494,7 +495,7 @@ atomic_fetch_andnot(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_andnot_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -504,7 +505,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_andnot_release)
-static inline int
+static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -514,7 +515,7 @@ atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_andnot_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -523,7 +524,7 @@ atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 #define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic_or(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -532,7 +533,7 @@ atomic_or(int i, atomic_t *v)
 #define atomic_or atomic_or
 
 #if !defined(arch_atomic_fetch_or_relaxed) || defined(arch_atomic_fetch_or)
-static inline int
+static __always_inline int
 atomic_fetch_or(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -542,7 +543,7 @@ atomic_fetch_or(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_or_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_or_acquire(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -552,7 +553,7 @@ atomic_fetch_or_acquire(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_or_release)
-static inline int
+static __always_inline int
 atomic_fetch_or_release(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -562,7 +563,7 @@ atomic_fetch_or_release(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_or_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_or_relaxed(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -571,7 +572,7 @@ atomic_fetch_or_relaxed(int i, atomic_t *v)
 #define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic_xor(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -580,7 +581,7 @@ atomic_xor(int i, atomic_t *v)
 #define atomic_xor atomic_xor
 
 #if !defined(arch_atomic_fetch_xor_relaxed) || defined(arch_atomic_fetch_xor)
-static inline int
+static __always_inline int
 atomic_fetch_xor(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -590,7 +591,7 @@ atomic_fetch_xor(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_xor_acquire)
-static inline int
+static __always_inline int
 atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -600,7 +601,7 @@ atomic_fetch_xor_acquire(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_xor_release)
-static inline int
+static __always_inline int
 atomic_fetch_xor_release(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -610,7 +611,7 @@ atomic_fetch_xor_release(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_xor_relaxed)
-static inline int
+static __always_inline int
 atomic_fetch_xor_relaxed(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -620,7 +621,7 @@ atomic_fetch_xor_relaxed(int i, atomic_t *v)
 #endif
 
 #if !defined(arch_atomic_xchg_relaxed) || defined(arch_atomic_xchg)
-static inline int
+static __always_inline int
 atomic_xchg(atomic_t *v, int i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -630,7 +631,7 @@ atomic_xchg(atomic_t *v, int i)
 #endif
 
 #if defined(arch_atomic_xchg_acquire)
-static inline int
+static __always_inline int
 atomic_xchg_acquire(atomic_t *v, int i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -640,7 +641,7 @@ atomic_xchg_acquire(atomic_t *v, int i)
 #endif
 
 #if defined(arch_atomic_xchg_release)
-static inline int
+static __always_inline int
 atomic_xchg_release(atomic_t *v, int i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -650,7 +651,7 @@ atomic_xchg_release(atomic_t *v, int i)
 #endif
 
 #if defined(arch_atomic_xchg_relaxed)
-static inline int
+static __always_inline int
 atomic_xchg_relaxed(atomic_t *v, int i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -660,7 +661,7 @@ atomic_xchg_relaxed(atomic_t *v, int i)
 #endif
 
 #if !defined(arch_atomic_cmpxchg_relaxed) || defined(arch_atomic_cmpxchg)
-static inline int
+static __always_inline int
 atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -670,7 +671,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new)
 #endif
 
 #if defined(arch_atomic_cmpxchg_acquire)
-static inline int
+static __always_inline int
 atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -680,7 +681,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 #endif
 
 #if defined(arch_atomic_cmpxchg_release)
-static inline int
+static __always_inline int
 atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -690,7 +691,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new)
 #endif
 
 #if defined(arch_atomic_cmpxchg_relaxed)
-static inline int
+static __always_inline int
 atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -700,7 +701,7 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
 #endif
 
 #if defined(arch_atomic_try_cmpxchg)
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -711,7 +712,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 #endif
 
 #if defined(arch_atomic_try_cmpxchg_acquire)
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -722,7 +723,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 #endif
 
 #if defined(arch_atomic_try_cmpxchg_release)
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -733,7 +734,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 #endif
 
 #if defined(arch_atomic_try_cmpxchg_relaxed)
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -744,7 +745,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 #endif
 
 #if defined(arch_atomic_sub_and_test)
-static inline bool
+static __always_inline bool
 atomic_sub_and_test(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -754,7 +755,7 @@ atomic_sub_and_test(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_dec_and_test)
-static inline bool
+static __always_inline bool
 atomic_dec_and_test(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -764,7 +765,7 @@ atomic_dec_and_test(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_inc_and_test)
-static inline bool
+static __always_inline bool
 atomic_inc_and_test(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -774,7 +775,7 @@ atomic_inc_and_test(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_add_negative)
-static inline bool
+static __always_inline bool
 atomic_add_negative(int i, atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -784,7 +785,7 @@ atomic_add_negative(int i, atomic_t *v)
 #endif
 
 #if defined(arch_atomic_fetch_add_unless)
-static inline int
+static __always_inline int
 atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -794,7 +795,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
 #endif
 
 #if defined(arch_atomic_add_unless)
-static inline bool
+static __always_inline bool
 atomic_add_unless(atomic_t *v, int a, int u)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -804,7 +805,7 @@ atomic_add_unless(atomic_t *v, int a, int u)
 #endif
 
 #if defined(arch_atomic_inc_not_zero)
-static inline bool
+static __always_inline bool
 atomic_inc_not_zero(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -814,7 +815,7 @@ atomic_inc_not_zero(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_inc_unless_negative)
-static inline bool
+static __always_inline bool
 atomic_inc_unless_negative(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -824,7 +825,7 @@ atomic_inc_unless_negative(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_dec_unless_positive)
-static inline bool
+static __always_inline bool
 atomic_dec_unless_positive(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -834,7 +835,7 @@ atomic_dec_unless_positive(atomic_t *v)
 #endif
 
 #if defined(arch_atomic_dec_if_positive)
-static inline int
+static __always_inline int
 atomic_dec_if_positive(atomic_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -843,7 +844,7 @@ atomic_dec_if_positive(atomic_t *v)
 #define atomic_dec_if_positive atomic_dec_if_positive
 #endif
 
-static inline s64
+static __always_inline s64
 atomic64_read(const atomic64_t *v)
 {
 	__atomic_check_read(v, sizeof(*v));
@@ -852,7 +853,7 @@ atomic64_read(const atomic64_t *v)
 #define atomic64_read atomic64_read
 
 #if defined(arch_atomic64_read_acquire)
-static inline s64
+static __always_inline s64
 atomic64_read_acquire(const atomic64_t *v)
 {
 	__atomic_check_read(v, sizeof(*v));
@@ -861,7 +862,7 @@ atomic64_read_acquire(const atomic64_t *v)
 #define atomic64_read_acquire atomic64_read_acquire
 #endif
 
-static inline void
+static __always_inline void
 atomic64_set(atomic64_t *v, s64 i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -870,7 +871,7 @@ atomic64_set(atomic64_t *v, s64 i)
 #define atomic64_set atomic64_set
 
 #if defined(arch_atomic64_set_release)
-static inline void
+static __always_inline void
 atomic64_set_release(atomic64_t *v, s64 i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -879,7 +880,7 @@ atomic64_set_release(atomic64_t *v, s64 i)
 #define atomic64_set_release atomic64_set_release
 #endif
 
-static inline void
+static __always_inline void
 atomic64_add(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -888,7 +889,7 @@ atomic64_add(s64 i, atomic64_t *v)
 #define atomic64_add atomic64_add
 
 #if !defined(arch_atomic64_add_return_relaxed) || defined(arch_atomic64_add_return)
-static inline s64
+static __always_inline s64
 atomic64_add_return(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -898,7 +899,7 @@ atomic64_add_return(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_add_return_acquire)
-static inline s64
+static __always_inline s64
 atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -908,7 +909,7 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_add_return_release)
-static inline s64
+static __always_inline s64
 atomic64_add_return_release(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -918,7 +919,7 @@ atomic64_add_return_release(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_add_return_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -928,7 +929,7 @@ atomic64_add_return_relaxed(s64 i, atomic64_t *v)
 #endif
 
 #if !defined(arch_atomic64_fetch_add_relaxed) || defined(arch_atomic64_fetch_add)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -938,7 +939,7 @@ atomic64_fetch_add(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_add_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -948,7 +949,7 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_add_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -958,7 +959,7 @@ atomic64_fetch_add_release(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_add_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -967,7 +968,7 @@ atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
 #define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic64_sub(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -976,7 +977,7 @@ atomic64_sub(s64 i, atomic64_t *v)
 #define atomic64_sub atomic64_sub
 
 #if !defined(arch_atomic64_sub_return_relaxed) || defined(arch_atomic64_sub_return)
-static inline s64
+static __always_inline s64
 atomic64_sub_return(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -986,7 +987,7 @@ atomic64_sub_return(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_sub_return_acquire)
-static inline s64
+static __always_inline s64
 atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -996,7 +997,7 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_sub_return_release)
-static inline s64
+static __always_inline s64
 atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1006,7 +1007,7 @@ atomic64_sub_return_release(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_sub_return_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1016,7 +1017,7 @@ atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
 #endif
 
 #if !defined(arch_atomic64_fetch_sub_relaxed) || defined(arch_atomic64_fetch_sub)
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1026,7 +1027,7 @@ atomic64_fetch_sub(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_sub_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1036,7 +1037,7 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_sub_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1046,7 +1047,7 @@ atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_sub_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1056,7 +1057,7 @@ atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_inc)
-static inline void
+static __always_inline void
 atomic64_inc(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1066,7 +1067,7 @@ atomic64_inc(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_inc_return)
-static inline s64
+static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1076,7 +1077,7 @@ atomic64_inc_return(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_inc_return_acquire)
-static inline s64
+static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1086,7 +1087,7 @@ atomic64_inc_return_acquire(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_inc_return_release)
-static inline s64
+static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1096,7 +1097,7 @@ atomic64_inc_return_release(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_inc_return_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_inc_return_relaxed(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1106,7 +1107,7 @@ atomic64_inc_return_relaxed(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_inc)
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1116,7 +1117,7 @@ atomic64_fetch_inc(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_inc_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1126,7 +1127,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_inc_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1136,7 +1137,7 @@ atomic64_fetch_inc_release(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_inc_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1146,7 +1147,7 @@ atomic64_fetch_inc_relaxed(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_dec)
-static inline void
+static __always_inline void
 atomic64_dec(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1156,7 +1157,7 @@ atomic64_dec(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_dec_return)
-static inline s64
+static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1166,7 +1167,7 @@ atomic64_dec_return(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_dec_return_acquire)
-static inline s64
+static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1176,7 +1177,7 @@ atomic64_dec_return_acquire(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_dec_return_release)
-static inline s64
+static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1186,7 +1187,7 @@ atomic64_dec_return_release(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_dec_return_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_dec_return_relaxed(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1196,7 +1197,7 @@ atomic64_dec_return_relaxed(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_dec)
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1206,7 +1207,7 @@ atomic64_fetch_dec(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_dec_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1216,7 +1217,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_dec_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1226,7 +1227,7 @@ atomic64_fetch_dec_release(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_dec_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1235,7 +1236,7 @@ atomic64_fetch_dec_relaxed(atomic64_t *v)
 #define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic64_and(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1244,7 +1245,7 @@ atomic64_and(s64 i, atomic64_t *v)
 #define atomic64_and atomic64_and
 
 #if !defined(arch_atomic64_fetch_and_relaxed) || defined(arch_atomic64_fetch_and)
-static inline s64
+static __always_inline s64
 atomic64_fetch_and(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1254,7 +1255,7 @@ atomic64_fetch_and(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_and_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1264,7 +1265,7 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_and_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1274,7 +1275,7 @@ atomic64_fetch_and_release(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_and_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1284,7 +1285,7 @@ atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_andnot)
-static inline void
+static __always_inline void
 atomic64_andnot(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1294,7 +1295,7 @@ atomic64_andnot(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_andnot)
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1304,7 +1305,7 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_andnot_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1314,7 +1315,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_andnot_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1324,7 +1325,7 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_andnot_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1333,7 +1334,7 @@ atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 #define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic64_or(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1342,7 +1343,7 @@ atomic64_or(s64 i, atomic64_t *v)
 #define atomic64_or atomic64_or
 
 #if !defined(arch_atomic64_fetch_or_relaxed) || defined(arch_atomic64_fetch_or)
-static inline s64
+static __always_inline s64
 atomic64_fetch_or(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1352,7 +1353,7 @@ atomic64_fetch_or(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_or_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1362,7 +1363,7 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_or_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1372,7 +1373,7 @@ atomic64_fetch_or_release(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_or_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1381,7 +1382,7 @@ atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
 #define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
 #endif
 
-static inline void
+static __always_inline void
 atomic64_xor(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1390,7 +1391,7 @@ atomic64_xor(s64 i, atomic64_t *v)
 #define atomic64_xor atomic64_xor
 
 #if !defined(arch_atomic64_fetch_xor_relaxed) || defined(arch_atomic64_fetch_xor)
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1400,7 +1401,7 @@ atomic64_fetch_xor(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_xor_acquire)
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1410,7 +1411,7 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_xor_release)
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1420,7 +1421,7 @@ atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_xor_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1430,7 +1431,7 @@ atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
 #endif
 
 #if !defined(arch_atomic64_xchg_relaxed) || defined(arch_atomic64_xchg)
-static inline s64
+static __always_inline s64
 atomic64_xchg(atomic64_t *v, s64 i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1440,7 +1441,7 @@ atomic64_xchg(atomic64_t *v, s64 i)
 #endif
 
 #if defined(arch_atomic64_xchg_acquire)
-static inline s64
+static __always_inline s64
 atomic64_xchg_acquire(atomic64_t *v, s64 i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1450,7 +1451,7 @@ atomic64_xchg_acquire(atomic64_t *v, s64 i)
 #endif
 
 #if defined(arch_atomic64_xchg_release)
-static inline s64
+static __always_inline s64
 atomic64_xchg_release(atomic64_t *v, s64 i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1460,7 +1461,7 @@ atomic64_xchg_release(atomic64_t *v, s64 i)
 #endif
 
 #if defined(arch_atomic64_xchg_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_xchg_relaxed(atomic64_t *v, s64 i)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1470,7 +1471,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 i)
 #endif
 
 #if !defined(arch_atomic64_cmpxchg_relaxed) || defined(arch_atomic64_cmpxchg)
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1480,7 +1481,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 #endif
 
 #if defined(arch_atomic64_cmpxchg_acquire)
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1490,7 +1491,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 #endif
 
 #if defined(arch_atomic64_cmpxchg_release)
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1500,7 +1501,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 #endif
 
 #if defined(arch_atomic64_cmpxchg_relaxed)
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1510,7 +1511,7 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
 #endif
 
 #if defined(arch_atomic64_try_cmpxchg)
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1521,7 +1522,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #if defined(arch_atomic64_try_cmpxchg_acquire)
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1532,7 +1533,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #if defined(arch_atomic64_try_cmpxchg_release)
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1543,7 +1544,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #if defined(arch_atomic64_try_cmpxchg_relaxed)
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1554,7 +1555,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #if defined(arch_atomic64_sub_and_test)
-static inline bool
+static __always_inline bool
 atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1564,7 +1565,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_dec_and_test)
-static inline bool
+static __always_inline bool
 atomic64_dec_and_test(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1574,7 +1575,7 @@ atomic64_dec_and_test(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_inc_and_test)
-static inline bool
+static __always_inline bool
 atomic64_inc_and_test(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1584,7 +1585,7 @@ atomic64_inc_and_test(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_add_negative)
-static inline bool
+static __always_inline bool
 atomic64_add_negative(s64 i, atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1594,7 +1595,7 @@ atomic64_add_negative(s64 i, atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_fetch_add_unless)
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1604,7 +1605,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 #endif
 
 #if defined(arch_atomic64_add_unless)
-static inline bool
+static __always_inline bool
 atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1614,7 +1615,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 #endif
 
 #if defined(arch_atomic64_inc_not_zero)
-static inline bool
+static __always_inline bool
 atomic64_inc_not_zero(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1624,7 +1625,7 @@ atomic64_inc_not_zero(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_inc_unless_negative)
-static inline bool
+static __always_inline bool
 atomic64_inc_unless_negative(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1634,7 +1635,7 @@ atomic64_inc_unless_negative(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_dec_unless_positive)
-static inline bool
+static __always_inline bool
 atomic64_dec_unless_positive(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1644,7 +1645,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
 #endif
 
 #if defined(arch_atomic64_dec_if_positive)
-static inline s64
+static __always_inline s64
 atomic64_dec_if_positive(atomic64_t *v)
 {
 	__atomic_check_write(v, sizeof(*v));
@@ -1798,4 +1799,4 @@ atomic64_dec_if_positive(atomic64_t *v)
 })
 
 #endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
-// beea41c2a0f2c69e4958ed71bf26f59740fa4b12
+// 7b7e2af0e75c8ecb6f02298a7075f503f30d244c
diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 881c7e27af28..073cf40f431b 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -6,6 +6,7 @@
 #ifndef _ASM_GENERIC_ATOMIC_LONG_H
 #define _ASM_GENERIC_ATOMIC_LONG_H
 
+#include <linux/compiler.h>
 #include <asm/types.h>
 
 #ifdef CONFIG_64BIT
@@ -22,493 +23,493 @@ typedef atomic_t atomic_long_t;
 
 #ifdef CONFIG_64BIT
 
-static inline long
+static __always_inline long
 atomic_long_read(const atomic_long_t *v)
 {
 	return atomic64_read(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_read_acquire(const atomic_long_t *v)
 {
 	return atomic64_read_acquire(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_set(atomic_long_t *v, long i)
 {
 	atomic64_set(v, i);
 }
 
-static inline void
+static __always_inline void
 atomic_long_set_release(atomic_long_t *v, long i)
 {
 	atomic64_set_release(v, i);
 }
 
-static inline void
+static __always_inline void
 atomic_long_add(long i, atomic_long_t *v)
 {
 	atomic64_add(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return(long i, atomic_long_t *v)
 {
 	return atomic64_add_return(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
 	return atomic64_add_return_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_release(long i, atomic_long_t *v)
 {
 	return atomic64_add_return_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
 	return atomic64_add_return_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_add(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_add_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_add_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_add_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_sub(long i, atomic_long_t *v)
 {
 	atomic64_sub(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return(long i, atomic_long_t *v)
 {
 	return atomic64_sub_return(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
 	return atomic64_sub_return_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
 	return atomic64_sub_return_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
 	return atomic64_sub_return_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_sub(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_sub_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_sub_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_sub_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_inc(atomic_long_t *v)
 {
 	atomic64_inc(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return(atomic_long_t *v)
 {
 	return atomic64_inc_return(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_acquire(atomic_long_t *v)
 {
 	return atomic64_inc_return_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_release(atomic_long_t *v)
 {
 	return atomic64_inc_return_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
 	return atomic64_inc_return_relaxed(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc(atomic_long_t *v)
 {
 	return atomic64_fetch_inc(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
 	return atomic64_fetch_inc_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_release(atomic_long_t *v)
 {
 	return atomic64_fetch_inc_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
 	return atomic64_fetch_inc_relaxed(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_dec(atomic_long_t *v)
 {
 	atomic64_dec(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return(atomic_long_t *v)
 {
 	return atomic64_dec_return(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_acquire(atomic_long_t *v)
 {
 	return atomic64_dec_return_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_release(atomic_long_t *v)
 {
 	return atomic64_dec_return_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
 	return atomic64_dec_return_relaxed(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec(atomic_long_t *v)
 {
 	return atomic64_fetch_dec(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
 	return atomic64_fetch_dec_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_release(atomic_long_t *v)
 {
 	return atomic64_fetch_dec_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
 	return atomic64_fetch_dec_relaxed(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_and(long i, atomic_long_t *v)
 {
 	atomic64_and(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_and(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_and_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_and_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_and_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_andnot(long i, atomic_long_t *v)
 {
 	atomic64_andnot(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_andnot(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_andnot_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_andnot_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_andnot_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_or(long i, atomic_long_t *v)
 {
 	atomic64_or(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_or(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_or_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_or_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_or_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_xor(long i, atomic_long_t *v)
 {
 	atomic64_xor(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_xor(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_xor_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_xor_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
 	return atomic64_fetch_xor_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg(atomic_long_t *v, long i)
 {
 	return atomic64_xchg(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
 	return atomic64_xchg_acquire(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_release(atomic_long_t *v, long i)
 {
 	return atomic64_xchg_release(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
 	return atomic64_xchg_relaxed(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
 	return atomic64_cmpxchg(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
 	return atomic64_cmpxchg_acquire(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
 	return atomic64_cmpxchg_release(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
 	return atomic64_cmpxchg_relaxed(v, old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
 	return atomic64_try_cmpxchg(v, (s64 *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
 	return atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
 	return atomic64_try_cmpxchg_release(v, (s64 *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
 	return atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
 	return atomic64_sub_and_test(i, v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_dec_and_test(atomic_long_t *v)
 {
 	return atomic64_dec_and_test(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_and_test(atomic_long_t *v)
 {
 	return atomic64_inc_and_test(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_add_negative(long i, atomic_long_t *v)
 {
 	return atomic64_add_negative(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
 	return atomic64_fetch_add_unless(v, a, u);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
 	return atomic64_add_unless(v, a, u);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_not_zero(atomic_long_t *v)
 {
 	return atomic64_inc_not_zero(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_unless_negative(atomic_long_t *v)
 {
 	return atomic64_inc_unless_negative(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_dec_unless_positive(atomic_long_t *v)
 {
 	return atomic64_dec_unless_positive(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_if_positive(atomic_long_t *v)
 {
 	return atomic64_dec_if_positive(v);
@@ -516,493 +517,493 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 #else /* CONFIG_64BIT */
 
-static inline long
+static __always_inline long
 atomic_long_read(const atomic_long_t *v)
 {
 	return atomic_read(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_read_acquire(const atomic_long_t *v)
 {
 	return atomic_read_acquire(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_set(atomic_long_t *v, long i)
 {
 	atomic_set(v, i);
 }
 
-static inline void
+static __always_inline void
 atomic_long_set_release(atomic_long_t *v, long i)
 {
 	atomic_set_release(v, i);
 }
 
-static inline void
+static __always_inline void
 atomic_long_add(long i, atomic_long_t *v)
 {
 	atomic_add(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return(long i, atomic_long_t *v)
 {
 	return atomic_add_return(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_acquire(long i, atomic_long_t *v)
 {
 	return atomic_add_return_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_release(long i, atomic_long_t *v)
 {
 	return atomic_add_return_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_add_return_relaxed(long i, atomic_long_t *v)
 {
 	return atomic_add_return_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add(long i, atomic_long_t *v)
 {
 	return atomic_fetch_add(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
 {
 	return atomic_fetch_add_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_release(long i, atomic_long_t *v)
 {
 	return atomic_fetch_add_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
 {
 	return atomic_fetch_add_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_sub(long i, atomic_long_t *v)
 {
 	atomic_sub(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return(long i, atomic_long_t *v)
 {
 	return atomic_sub_return(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_acquire(long i, atomic_long_t *v)
 {
 	return atomic_sub_return_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_release(long i, atomic_long_t *v)
 {
 	return atomic_sub_return_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
 {
 	return atomic_sub_return_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub(long i, atomic_long_t *v)
 {
 	return atomic_fetch_sub(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
 {
 	return atomic_fetch_sub_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_release(long i, atomic_long_t *v)
 {
 	return atomic_fetch_sub_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
 {
 	return atomic_fetch_sub_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_inc(atomic_long_t *v)
 {
 	atomic_inc(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return(atomic_long_t *v)
 {
 	return atomic_inc_return(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_acquire(atomic_long_t *v)
 {
 	return atomic_inc_return_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_release(atomic_long_t *v)
 {
 	return atomic_inc_return_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_inc_return_relaxed(atomic_long_t *v)
 {
 	return atomic_inc_return_relaxed(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc(atomic_long_t *v)
 {
 	return atomic_fetch_inc(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_acquire(atomic_long_t *v)
 {
 	return atomic_fetch_inc_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_release(atomic_long_t *v)
 {
 	return atomic_fetch_inc_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_inc_relaxed(atomic_long_t *v)
 {
 	return atomic_fetch_inc_relaxed(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_dec(atomic_long_t *v)
 {
 	atomic_dec(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return(atomic_long_t *v)
 {
 	return atomic_dec_return(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_acquire(atomic_long_t *v)
 {
 	return atomic_dec_return_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_release(atomic_long_t *v)
 {
 	return atomic_dec_return_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_return_relaxed(atomic_long_t *v)
 {
 	return atomic_dec_return_relaxed(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec(atomic_long_t *v)
 {
 	return atomic_fetch_dec(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_acquire(atomic_long_t *v)
 {
 	return atomic_fetch_dec_acquire(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_release(atomic_long_t *v)
 {
 	return atomic_fetch_dec_release(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_dec_relaxed(atomic_long_t *v)
 {
 	return atomic_fetch_dec_relaxed(v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_and(long i, atomic_long_t *v)
 {
 	atomic_and(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and(long i, atomic_long_t *v)
 {
 	return atomic_fetch_and(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
 {
 	return atomic_fetch_and_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_release(long i, atomic_long_t *v)
 {
 	return atomic_fetch_and_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
 {
 	return atomic_fetch_and_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_andnot(long i, atomic_long_t *v)
 {
 	atomic_andnot(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot(long i, atomic_long_t *v)
 {
 	return atomic_fetch_andnot(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
 {
 	return atomic_fetch_andnot_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
 {
 	return atomic_fetch_andnot_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
 {
 	return atomic_fetch_andnot_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_or(long i, atomic_long_t *v)
 {
 	atomic_or(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or(long i, atomic_long_t *v)
 {
 	return atomic_fetch_or(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
 {
 	return atomic_fetch_or_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_release(long i, atomic_long_t *v)
 {
 	return atomic_fetch_or_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
 {
 	return atomic_fetch_or_relaxed(i, v);
 }
 
-static inline void
+static __always_inline void
 atomic_long_xor(long i, atomic_long_t *v)
 {
 	atomic_xor(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor(long i, atomic_long_t *v)
 {
 	return atomic_fetch_xor(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
 {
 	return atomic_fetch_xor_acquire(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_release(long i, atomic_long_t *v)
 {
 	return atomic_fetch_xor_release(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
 {
 	return atomic_fetch_xor_relaxed(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg(atomic_long_t *v, long i)
 {
 	return atomic_xchg(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_acquire(atomic_long_t *v, long i)
 {
 	return atomic_xchg_acquire(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_release(atomic_long_t *v, long i)
 {
 	return atomic_xchg_release(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_xchg_relaxed(atomic_long_t *v, long i)
 {
 	return atomic_xchg_relaxed(v, i);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
 {
 	return atomic_cmpxchg(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
 {
 	return atomic_cmpxchg_acquire(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
 {
 	return atomic_cmpxchg_release(v, old, new);
 }
 
-static inline long
+static __always_inline long
 atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
 {
 	return atomic_cmpxchg_relaxed(v, old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
 {
 	return atomic_try_cmpxchg(v, (int *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
 {
 	return atomic_try_cmpxchg_acquire(v, (int *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
 {
 	return atomic_try_cmpxchg_release(v, (int *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
 {
 	return atomic_try_cmpxchg_relaxed(v, (int *)old, new);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_sub_and_test(long i, atomic_long_t *v)
 {
 	return atomic_sub_and_test(i, v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_dec_and_test(atomic_long_t *v)
 {
 	return atomic_dec_and_test(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_and_test(atomic_long_t *v)
 {
 	return atomic_inc_and_test(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_add_negative(long i, atomic_long_t *v)
 {
 	return atomic_add_negative(i, v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
 {
 	return atomic_fetch_add_unless(v, a, u);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_add_unless(atomic_long_t *v, long a, long u)
 {
 	return atomic_add_unless(v, a, u);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_not_zero(atomic_long_t *v)
 {
 	return atomic_inc_not_zero(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_inc_unless_negative(atomic_long_t *v)
 {
 	return atomic_inc_unless_negative(v);
 }
 
-static inline bool
+static __always_inline bool
 atomic_long_dec_unless_positive(atomic_long_t *v)
 {
 	return atomic_dec_unless_positive(v);
 }
 
-static inline long
+static __always_inline long
 atomic_long_dec_if_positive(atomic_long_t *v)
 {
 	return atomic_dec_if_positive(v);
@@ -1010,4 +1011,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
 
 #endif /* CONFIG_64BIT */
 #endif /* _ASM_GENERIC_ATOMIC_LONG_H */
-// 77558968132ce4f911ad53f6f52ce423006f6268
+// a624200981f552b2c6be4f32fe44da8289f30d87
diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
index 8b8b2a6f8d68..fb4222548b22 100755
--- a/scripts/atomic/gen-atomic-instrumented.sh
+++ b/scripts/atomic/gen-atomic-instrumented.sh
@@ -84,7 +84,7 @@ gen_proto_order_variant()
 	[ ! -z "${guard}" ] && printf "#if ${guard}\n"
 
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomicname}(${params})
 {
 ${checks}
@@ -147,16 +147,17 @@ cat << EOF
 #define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
 
 #include <linux/build_bug.h>
+#include <linux/compiler.h>
 #include <linux/kasan-checks.h>
 #include <linux/kcsan-checks.h>
 
-static inline void __atomic_check_read(const volatile void *v, size_t size)
+static __always_inline void __atomic_check_read(const volatile void *v, size_t size)
 {
 	kasan_check_read(v, size);
 	kcsan_check_atomic_read(v, size);
 }
 
-static inline void __atomic_check_write(const volatile void *v, size_t size)
+static __always_inline void __atomic_check_write(const volatile void *v, size_t size)
 {
 	kasan_check_write(v, size);
 	kcsan_check_atomic_write(v, size);
diff --git a/scripts/atomic/gen-atomic-long.sh b/scripts/atomic/gen-atomic-long.sh
index c240a7231b2e..e318d3f92e53 100755
--- a/scripts/atomic/gen-atomic-long.sh
+++ b/scripts/atomic/gen-atomic-long.sh
@@ -46,7 +46,7 @@ gen_proto_order_variant()
 	local retstmt="$(gen_ret_stmt "${meta}")"
 
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 atomic_long_${name}(${params})
 {
 	${retstmt}${atomic}_${name}(${argscast});
@@ -64,6 +64,7 @@ cat << EOF
 #ifndef _ASM_GENERIC_ATOMIC_LONG_H
 #define _ASM_GENERIC_ATOMIC_LONG_H
 
+#include <linux/compiler.h>
 #include <asm/types.h>
 
 #ifdef CONFIG_64BIT
-- 
2.24.0.432.g9d3f5f5b63-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 2/3] asm-generic/atomic: Use __always_inline for fallback wrappers
  2019-11-26 14:04 [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers Marco Elver
@ 2019-11-26 14:04 ` Marco Elver
  2019-11-26 14:04 ` [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path Marco Elver
  2019-12-02 21:36 ` [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers Paul E. McKenney
  2 siblings, 0 replies; 9+ messages in thread
From: Marco Elver @ 2019-11-26 14:04 UTC (permalink / raw)
  To: elver
  Cc: mark.rutland, paulmck, linux-kernel, will, peterz, boqun.feng,
	arnd, dvyukov, linux-arch, kasan-dev

Use __always_inline for atomic fallback wrappers. When building for size
(CC_OPTIMIZE_FOR_SIZE), some compilers appear to be less inclined to
inline even relatively small static inline functions that are assumed to
be inlinable such as atomic ops. This can cause problems, for example in
UACCESS regions.

While the fallback wrappers aren't pure wrappers, they are trivial
nonetheless, and the function they wrap should determine the final
inlining policy.

For x86 tinyconfig we observe:
- vmlinux baseline: 1315988
- vmlinux with patch: 1315928 (-60 bytes)

Suggested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marco Elver <elver@google.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
---
v2:
* Add patch to series.
---
 include/linux/atomic-fallback.h              | 340 ++++++++++---------
 scripts/atomic/fallbacks/acquire             |   2 +-
 scripts/atomic/fallbacks/add_negative        |   2 +-
 scripts/atomic/fallbacks/add_unless          |   2 +-
 scripts/atomic/fallbacks/andnot              |   2 +-
 scripts/atomic/fallbacks/dec                 |   2 +-
 scripts/atomic/fallbacks/dec_and_test        |   2 +-
 scripts/atomic/fallbacks/dec_if_positive     |   2 +-
 scripts/atomic/fallbacks/dec_unless_positive |   2 +-
 scripts/atomic/fallbacks/fence               |   2 +-
 scripts/atomic/fallbacks/fetch_add_unless    |   2 +-
 scripts/atomic/fallbacks/inc                 |   2 +-
 scripts/atomic/fallbacks/inc_and_test        |   2 +-
 scripts/atomic/fallbacks/inc_not_zero        |   2 +-
 scripts/atomic/fallbacks/inc_unless_negative |   2 +-
 scripts/atomic/fallbacks/read_acquire        |   2 +-
 scripts/atomic/fallbacks/release             |   2 +-
 scripts/atomic/fallbacks/set_release         |   2 +-
 scripts/atomic/fallbacks/sub_and_test        |   2 +-
 scripts/atomic/fallbacks/try_cmpxchg         |   2 +-
 scripts/atomic/gen-atomic-fallback.sh        |   2 +
 21 files changed, 192 insertions(+), 188 deletions(-)

diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h
index a7d240e465c0..656b5489b673 100644
--- a/include/linux/atomic-fallback.h
+++ b/include/linux/atomic-fallback.h
@@ -6,6 +6,8 @@
 #ifndef _LINUX_ATOMIC_FALLBACK_H
 #define _LINUX_ATOMIC_FALLBACK_H
 
+#include <linux/compiler.h>
+
 #ifndef xchg_relaxed
 #define xchg_relaxed		xchg
 #define xchg_acquire		xchg
@@ -76,7 +78,7 @@
 #endif /* cmpxchg64_relaxed */
 
 #ifndef atomic_read_acquire
-static inline int
+static __always_inline int
 atomic_read_acquire(const atomic_t *v)
 {
 	return smp_load_acquire(&(v)->counter);
@@ -85,7 +87,7 @@ atomic_read_acquire(const atomic_t *v)
 #endif
 
 #ifndef atomic_set_release
-static inline void
+static __always_inline void
 atomic_set_release(atomic_t *v, int i)
 {
 	smp_store_release(&(v)->counter, i);
@@ -100,7 +102,7 @@ atomic_set_release(atomic_t *v, int i)
 #else /* atomic_add_return_relaxed */
 
 #ifndef atomic_add_return_acquire
-static inline int
+static __always_inline int
 atomic_add_return_acquire(int i, atomic_t *v)
 {
 	int ret = atomic_add_return_relaxed(i, v);
@@ -111,7 +113,7 @@ atomic_add_return_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_add_return_release
-static inline int
+static __always_inline int
 atomic_add_return_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
@@ -121,7 +123,7 @@ atomic_add_return_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_add_return
-static inline int
+static __always_inline int
 atomic_add_return(int i, atomic_t *v)
 {
 	int ret;
@@ -142,7 +144,7 @@ atomic_add_return(int i, atomic_t *v)
 #else /* atomic_fetch_add_relaxed */
 
 #ifndef atomic_fetch_add_acquire
-static inline int
+static __always_inline int
 atomic_fetch_add_acquire(int i, atomic_t *v)
 {
 	int ret = atomic_fetch_add_relaxed(i, v);
@@ -153,7 +155,7 @@ atomic_fetch_add_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_add_release
-static inline int
+static __always_inline int
 atomic_fetch_add_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
@@ -163,7 +165,7 @@ atomic_fetch_add_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_add
-static inline int
+static __always_inline int
 atomic_fetch_add(int i, atomic_t *v)
 {
 	int ret;
@@ -184,7 +186,7 @@ atomic_fetch_add(int i, atomic_t *v)
 #else /* atomic_sub_return_relaxed */
 
 #ifndef atomic_sub_return_acquire
-static inline int
+static __always_inline int
 atomic_sub_return_acquire(int i, atomic_t *v)
 {
 	int ret = atomic_sub_return_relaxed(i, v);
@@ -195,7 +197,7 @@ atomic_sub_return_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_sub_return_release
-static inline int
+static __always_inline int
 atomic_sub_return_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
@@ -205,7 +207,7 @@ atomic_sub_return_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_sub_return
-static inline int
+static __always_inline int
 atomic_sub_return(int i, atomic_t *v)
 {
 	int ret;
@@ -226,7 +228,7 @@ atomic_sub_return(int i, atomic_t *v)
 #else /* atomic_fetch_sub_relaxed */
 
 #ifndef atomic_fetch_sub_acquire
-static inline int
+static __always_inline int
 atomic_fetch_sub_acquire(int i, atomic_t *v)
 {
 	int ret = atomic_fetch_sub_relaxed(i, v);
@@ -237,7 +239,7 @@ atomic_fetch_sub_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_sub_release
-static inline int
+static __always_inline int
 atomic_fetch_sub_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
@@ -247,7 +249,7 @@ atomic_fetch_sub_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_sub
-static inline int
+static __always_inline int
 atomic_fetch_sub(int i, atomic_t *v)
 {
 	int ret;
@@ -262,7 +264,7 @@ atomic_fetch_sub(int i, atomic_t *v)
 #endif /* atomic_fetch_sub_relaxed */
 
 #ifndef atomic_inc
-static inline void
+static __always_inline void
 atomic_inc(atomic_t *v)
 {
 	atomic_add(1, v);
@@ -278,7 +280,7 @@ atomic_inc(atomic_t *v)
 #endif /* atomic_inc_return */
 
 #ifndef atomic_inc_return
-static inline int
+static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
 	return atomic_add_return(1, v);
@@ -287,7 +289,7 @@ atomic_inc_return(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return_acquire
-static inline int
+static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
 	return atomic_add_return_acquire(1, v);
@@ -296,7 +298,7 @@ atomic_inc_return_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return_release
-static inline int
+static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
 	return atomic_add_return_release(1, v);
@@ -305,7 +307,7 @@ atomic_inc_return_release(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return_relaxed
-static inline int
+static __always_inline int
 atomic_inc_return_relaxed(atomic_t *v)
 {
 	return atomic_add_return_relaxed(1, v);
@@ -316,7 +318,7 @@ atomic_inc_return_relaxed(atomic_t *v)
 #else /* atomic_inc_return_relaxed */
 
 #ifndef atomic_inc_return_acquire
-static inline int
+static __always_inline int
 atomic_inc_return_acquire(atomic_t *v)
 {
 	int ret = atomic_inc_return_relaxed(v);
@@ -327,7 +329,7 @@ atomic_inc_return_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return_release
-static inline int
+static __always_inline int
 atomic_inc_return_release(atomic_t *v)
 {
 	__atomic_release_fence();
@@ -337,7 +339,7 @@ atomic_inc_return_release(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_return
-static inline int
+static __always_inline int
 atomic_inc_return(atomic_t *v)
 {
 	int ret;
@@ -359,7 +361,7 @@ atomic_inc_return(atomic_t *v)
 #endif /* atomic_fetch_inc */
 
 #ifndef atomic_fetch_inc
-static inline int
+static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
 	return atomic_fetch_add(1, v);
@@ -368,7 +370,7 @@ atomic_fetch_inc(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc_acquire
-static inline int
+static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
 	return atomic_fetch_add_acquire(1, v);
@@ -377,7 +379,7 @@ atomic_fetch_inc_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc_release
-static inline int
+static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
 	return atomic_fetch_add_release(1, v);
@@ -386,7 +388,7 @@ atomic_fetch_inc_release(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc_relaxed
-static inline int
+static __always_inline int
 atomic_fetch_inc_relaxed(atomic_t *v)
 {
 	return atomic_fetch_add_relaxed(1, v);
@@ -397,7 +399,7 @@ atomic_fetch_inc_relaxed(atomic_t *v)
 #else /* atomic_fetch_inc_relaxed */
 
 #ifndef atomic_fetch_inc_acquire
-static inline int
+static __always_inline int
 atomic_fetch_inc_acquire(atomic_t *v)
 {
 	int ret = atomic_fetch_inc_relaxed(v);
@@ -408,7 +410,7 @@ atomic_fetch_inc_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc_release
-static inline int
+static __always_inline int
 atomic_fetch_inc_release(atomic_t *v)
 {
 	__atomic_release_fence();
@@ -418,7 +420,7 @@ atomic_fetch_inc_release(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_inc
-static inline int
+static __always_inline int
 atomic_fetch_inc(atomic_t *v)
 {
 	int ret;
@@ -433,7 +435,7 @@ atomic_fetch_inc(atomic_t *v)
 #endif /* atomic_fetch_inc_relaxed */
 
 #ifndef atomic_dec
-static inline void
+static __always_inline void
 atomic_dec(atomic_t *v)
 {
 	atomic_sub(1, v);
@@ -449,7 +451,7 @@ atomic_dec(atomic_t *v)
 #endif /* atomic_dec_return */
 
 #ifndef atomic_dec_return
-static inline int
+static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
 	return atomic_sub_return(1, v);
@@ -458,7 +460,7 @@ atomic_dec_return(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return_acquire
-static inline int
+static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
 	return atomic_sub_return_acquire(1, v);
@@ -467,7 +469,7 @@ atomic_dec_return_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return_release
-static inline int
+static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
 	return atomic_sub_return_release(1, v);
@@ -476,7 +478,7 @@ atomic_dec_return_release(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return_relaxed
-static inline int
+static __always_inline int
 atomic_dec_return_relaxed(atomic_t *v)
 {
 	return atomic_sub_return_relaxed(1, v);
@@ -487,7 +489,7 @@ atomic_dec_return_relaxed(atomic_t *v)
 #else /* atomic_dec_return_relaxed */
 
 #ifndef atomic_dec_return_acquire
-static inline int
+static __always_inline int
 atomic_dec_return_acquire(atomic_t *v)
 {
 	int ret = atomic_dec_return_relaxed(v);
@@ -498,7 +500,7 @@ atomic_dec_return_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return_release
-static inline int
+static __always_inline int
 atomic_dec_return_release(atomic_t *v)
 {
 	__atomic_release_fence();
@@ -508,7 +510,7 @@ atomic_dec_return_release(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_return
-static inline int
+static __always_inline int
 atomic_dec_return(atomic_t *v)
 {
 	int ret;
@@ -530,7 +532,7 @@ atomic_dec_return(atomic_t *v)
 #endif /* atomic_fetch_dec */
 
 #ifndef atomic_fetch_dec
-static inline int
+static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
 	return atomic_fetch_sub(1, v);
@@ -539,7 +541,7 @@ atomic_fetch_dec(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec_acquire
-static inline int
+static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
 	return atomic_fetch_sub_acquire(1, v);
@@ -548,7 +550,7 @@ atomic_fetch_dec_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec_release
-static inline int
+static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
 	return atomic_fetch_sub_release(1, v);
@@ -557,7 +559,7 @@ atomic_fetch_dec_release(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec_relaxed
-static inline int
+static __always_inline int
 atomic_fetch_dec_relaxed(atomic_t *v)
 {
 	return atomic_fetch_sub_relaxed(1, v);
@@ -568,7 +570,7 @@ atomic_fetch_dec_relaxed(atomic_t *v)
 #else /* atomic_fetch_dec_relaxed */
 
 #ifndef atomic_fetch_dec_acquire
-static inline int
+static __always_inline int
 atomic_fetch_dec_acquire(atomic_t *v)
 {
 	int ret = atomic_fetch_dec_relaxed(v);
@@ -579,7 +581,7 @@ atomic_fetch_dec_acquire(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec_release
-static inline int
+static __always_inline int
 atomic_fetch_dec_release(atomic_t *v)
 {
 	__atomic_release_fence();
@@ -589,7 +591,7 @@ atomic_fetch_dec_release(atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_dec
-static inline int
+static __always_inline int
 atomic_fetch_dec(atomic_t *v)
 {
 	int ret;
@@ -610,7 +612,7 @@ atomic_fetch_dec(atomic_t *v)
 #else /* atomic_fetch_and_relaxed */
 
 #ifndef atomic_fetch_and_acquire
-static inline int
+static __always_inline int
 atomic_fetch_and_acquire(int i, atomic_t *v)
 {
 	int ret = atomic_fetch_and_relaxed(i, v);
@@ -621,7 +623,7 @@ atomic_fetch_and_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_and_release
-static inline int
+static __always_inline int
 atomic_fetch_and_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
@@ -631,7 +633,7 @@ atomic_fetch_and_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_and
-static inline int
+static __always_inline int
 atomic_fetch_and(int i, atomic_t *v)
 {
 	int ret;
@@ -646,7 +648,7 @@ atomic_fetch_and(int i, atomic_t *v)
 #endif /* atomic_fetch_and_relaxed */
 
 #ifndef atomic_andnot
-static inline void
+static __always_inline void
 atomic_andnot(int i, atomic_t *v)
 {
 	atomic_and(~i, v);
@@ -662,7 +664,7 @@ atomic_andnot(int i, atomic_t *v)
 #endif /* atomic_fetch_andnot */
 
 #ifndef atomic_fetch_andnot
-static inline int
+static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
 	return atomic_fetch_and(~i, v);
@@ -671,7 +673,7 @@ atomic_fetch_andnot(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot_acquire
-static inline int
+static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
 	return atomic_fetch_and_acquire(~i, v);
@@ -680,7 +682,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot_release
-static inline int
+static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
 	return atomic_fetch_and_release(~i, v);
@@ -689,7 +691,7 @@ atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot_relaxed
-static inline int
+static __always_inline int
 atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 {
 	return atomic_fetch_and_relaxed(~i, v);
@@ -700,7 +702,7 @@ atomic_fetch_andnot_relaxed(int i, atomic_t *v)
 #else /* atomic_fetch_andnot_relaxed */
 
 #ifndef atomic_fetch_andnot_acquire
-static inline int
+static __always_inline int
 atomic_fetch_andnot_acquire(int i, atomic_t *v)
 {
 	int ret = atomic_fetch_andnot_relaxed(i, v);
@@ -711,7 +713,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot_release
-static inline int
+static __always_inline int
 atomic_fetch_andnot_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
@@ -721,7 +723,7 @@ atomic_fetch_andnot_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_andnot
-static inline int
+static __always_inline int
 atomic_fetch_andnot(int i, atomic_t *v)
 {
 	int ret;
@@ -742,7 +744,7 @@ atomic_fetch_andnot(int i, atomic_t *v)
 #else /* atomic_fetch_or_relaxed */
 
 #ifndef atomic_fetch_or_acquire
-static inline int
+static __always_inline int
 atomic_fetch_or_acquire(int i, atomic_t *v)
 {
 	int ret = atomic_fetch_or_relaxed(i, v);
@@ -753,7 +755,7 @@ atomic_fetch_or_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_or_release
-static inline int
+static __always_inline int
 atomic_fetch_or_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
@@ -763,7 +765,7 @@ atomic_fetch_or_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_or
-static inline int
+static __always_inline int
 atomic_fetch_or(int i, atomic_t *v)
 {
 	int ret;
@@ -784,7 +786,7 @@ atomic_fetch_or(int i, atomic_t *v)
 #else /* atomic_fetch_xor_relaxed */
 
 #ifndef atomic_fetch_xor_acquire
-static inline int
+static __always_inline int
 atomic_fetch_xor_acquire(int i, atomic_t *v)
 {
 	int ret = atomic_fetch_xor_relaxed(i, v);
@@ -795,7 +797,7 @@ atomic_fetch_xor_acquire(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_xor_release
-static inline int
+static __always_inline int
 atomic_fetch_xor_release(int i, atomic_t *v)
 {
 	__atomic_release_fence();
@@ -805,7 +807,7 @@ atomic_fetch_xor_release(int i, atomic_t *v)
 #endif
 
 #ifndef atomic_fetch_xor
-static inline int
+static __always_inline int
 atomic_fetch_xor(int i, atomic_t *v)
 {
 	int ret;
@@ -826,7 +828,7 @@ atomic_fetch_xor(int i, atomic_t *v)
 #else /* atomic_xchg_relaxed */
 
 #ifndef atomic_xchg_acquire
-static inline int
+static __always_inline int
 atomic_xchg_acquire(atomic_t *v, int i)
 {
 	int ret = atomic_xchg_relaxed(v, i);
@@ -837,7 +839,7 @@ atomic_xchg_acquire(atomic_t *v, int i)
 #endif
 
 #ifndef atomic_xchg_release
-static inline int
+static __always_inline int
 atomic_xchg_release(atomic_t *v, int i)
 {
 	__atomic_release_fence();
@@ -847,7 +849,7 @@ atomic_xchg_release(atomic_t *v, int i)
 #endif
 
 #ifndef atomic_xchg
-static inline int
+static __always_inline int
 atomic_xchg(atomic_t *v, int i)
 {
 	int ret;
@@ -868,7 +870,7 @@ atomic_xchg(atomic_t *v, int i)
 #else /* atomic_cmpxchg_relaxed */
 
 #ifndef atomic_cmpxchg_acquire
-static inline int
+static __always_inline int
 atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 {
 	int ret = atomic_cmpxchg_relaxed(v, old, new);
@@ -879,7 +881,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
 #endif
 
 #ifndef atomic_cmpxchg_release
-static inline int
+static __always_inline int
 atomic_cmpxchg_release(atomic_t *v, int old, int new)
 {
 	__atomic_release_fence();
@@ -889,7 +891,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new)
 #endif
 
 #ifndef atomic_cmpxchg
-static inline int
+static __always_inline int
 atomic_cmpxchg(atomic_t *v, int old, int new)
 {
 	int ret;
@@ -911,7 +913,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new)
 #endif /* atomic_try_cmpxchg */
 
 #ifndef atomic_try_cmpxchg
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
@@ -924,7 +926,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg_acquire
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
@@ -937,7 +939,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg_release
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
@@ -950,7 +952,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg_relaxed
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 {
 	int r, o = *old;
@@ -965,7 +967,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
 #else /* atomic_try_cmpxchg_relaxed */
 
 #ifndef atomic_try_cmpxchg_acquire
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 {
 	bool ret = atomic_try_cmpxchg_relaxed(v, old, new);
@@ -976,7 +978,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg_release
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 {
 	__atomic_release_fence();
@@ -986,7 +988,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
 #endif
 
 #ifndef atomic_try_cmpxchg
-static inline bool
+static __always_inline bool
 atomic_try_cmpxchg(atomic_t *v, int *old, int new)
 {
 	bool ret;
@@ -1010,7 +1012,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 atomic_sub_and_test(int i, atomic_t *v)
 {
 	return atomic_sub_return(i, v) == 0;
@@ -1027,7 +1029,7 @@ atomic_sub_and_test(int i, atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline bool
+static __always_inline bool
 atomic_dec_and_test(atomic_t *v)
 {
 	return atomic_dec_return(v) == 0;
@@ -1044,7 +1046,7 @@ atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 atomic_inc_and_test(atomic_t *v)
 {
 	return atomic_inc_return(v) == 0;
@@ -1062,7 +1064,7 @@ atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline bool
+static __always_inline bool
 atomic_add_negative(int i, atomic_t *v)
 {
 	return atomic_add_return(i, v) < 0;
@@ -1080,7 +1082,7 @@ atomic_add_negative(int i, atomic_t *v)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns original value of @v
  */
-static inline int
+static __always_inline int
 atomic_fetch_add_unless(atomic_t *v, int a, int u)
 {
 	int c = atomic_read(v);
@@ -1105,7 +1107,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
  * Atomically adds @a to @v, if @v was not already @u.
  * Returns true if the addition was done.
  */
-static inline bool
+static __always_inline bool
 atomic_add_unless(atomic_t *v, int a, int u)
 {
 	return atomic_fetch_add_unless(v, a, u) != u;
@@ -1121,7 +1123,7 @@ atomic_add_unless(atomic_t *v, int a, int u)
  * Atomically increments @v by 1, if @v is non-zero.
  * Returns true if the increment was done.
  */
-static inline bool
+static __always_inline bool
 atomic_inc_not_zero(atomic_t *v)
 {
 	return atomic_add_unless(v, 1, 0);
@@ -1130,7 +1132,7 @@ atomic_inc_not_zero(atomic_t *v)
 #endif
 
 #ifndef atomic_inc_unless_negative
-static inline bool
+static __always_inline bool
 atomic_inc_unless_negative(atomic_t *v)
 {
 	int c = atomic_read(v);
@@ -1146,7 +1148,7 @@ atomic_inc_unless_negative(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_unless_positive
-static inline bool
+static __always_inline bool
 atomic_dec_unless_positive(atomic_t *v)
 {
 	int c = atomic_read(v);
@@ -1162,7 +1164,7 @@ atomic_dec_unless_positive(atomic_t *v)
 #endif
 
 #ifndef atomic_dec_if_positive
-static inline int
+static __always_inline int
 atomic_dec_if_positive(atomic_t *v)
 {
 	int dec, c = atomic_read(v);
@@ -1186,7 +1188,7 @@ atomic_dec_if_positive(atomic_t *v)
 #endif
 
 #ifndef atomic64_read_acquire
-static inline s64
+static __always_inline s64
 atomic64_read_acquire(const atomic64_t *v)
 {
 	return smp_load_acquire(&(v)->counter);
@@ -1195,7 +1197,7 @@ atomic64_read_acquire(const atomic64_t *v)
 #endif
 
 #ifndef atomic64_set_release
-static inline void
+static __always_inline void
 atomic64_set_release(atomic64_t *v, s64 i)
 {
 	smp_store_release(&(v)->counter, i);
@@ -1210,7 +1212,7 @@ atomic64_set_release(atomic64_t *v, s64 i)
 #else /* atomic64_add_return_relaxed */
 
 #ifndef atomic64_add_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_add_return_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = atomic64_add_return_relaxed(i, v);
@@ -1221,7 +1223,7 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_add_return_release
-static inline s64
+static __always_inline s64
 atomic64_add_return_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1231,7 +1233,7 @@ atomic64_add_return_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_add_return
-static inline s64
+static __always_inline s64
 atomic64_add_return(s64 i, atomic64_t *v)
 {
 	s64 ret;
@@ -1252,7 +1254,7 @@ atomic64_add_return(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_add_relaxed */
 
 #ifndef atomic64_fetch_add_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = atomic64_fetch_add_relaxed(i, v);
@@ -1263,7 +1265,7 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_add_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1273,7 +1275,7 @@ atomic64_fetch_add_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_add
-static inline s64
+static __always_inline s64
 atomic64_fetch_add(s64 i, atomic64_t *v)
 {
 	s64 ret;
@@ -1294,7 +1296,7 @@ atomic64_fetch_add(s64 i, atomic64_t *v)
 #else /* atomic64_sub_return_relaxed */
 
 #ifndef atomic64_sub_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = atomic64_sub_return_relaxed(i, v);
@@ -1305,7 +1307,7 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_sub_return_release
-static inline s64
+static __always_inline s64
 atomic64_sub_return_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1315,7 +1317,7 @@ atomic64_sub_return_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_sub_return
-static inline s64
+static __always_inline s64
 atomic64_sub_return(s64 i, atomic64_t *v)
 {
 	s64 ret;
@@ -1336,7 +1338,7 @@ atomic64_sub_return(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_sub_relaxed */
 
 #ifndef atomic64_fetch_sub_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = atomic64_fetch_sub_relaxed(i, v);
@@ -1347,7 +1349,7 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_sub_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1357,7 +1359,7 @@ atomic64_fetch_sub_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_sub
-static inline s64
+static __always_inline s64
 atomic64_fetch_sub(s64 i, atomic64_t *v)
 {
 	s64 ret;
@@ -1372,7 +1374,7 @@ atomic64_fetch_sub(s64 i, atomic64_t *v)
 #endif /* atomic64_fetch_sub_relaxed */
 
 #ifndef atomic64_inc
-static inline void
+static __always_inline void
 atomic64_inc(atomic64_t *v)
 {
 	atomic64_add(1, v);
@@ -1388,7 +1390,7 @@ atomic64_inc(atomic64_t *v)
 #endif /* atomic64_inc_return */
 
 #ifndef atomic64_inc_return
-static inline s64
+static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
 	return atomic64_add_return(1, v);
@@ -1397,7 +1399,7 @@ atomic64_inc_return(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
 	return atomic64_add_return_acquire(1, v);
@@ -1406,7 +1408,7 @@ atomic64_inc_return_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return_release
-static inline s64
+static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
 	return atomic64_add_return_release(1, v);
@@ -1415,7 +1417,7 @@ atomic64_inc_return_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return_relaxed
-static inline s64
+static __always_inline s64
 atomic64_inc_return_relaxed(atomic64_t *v)
 {
 	return atomic64_add_return_relaxed(1, v);
@@ -1426,7 +1428,7 @@ atomic64_inc_return_relaxed(atomic64_t *v)
 #else /* atomic64_inc_return_relaxed */
 
 #ifndef atomic64_inc_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_inc_return_acquire(atomic64_t *v)
 {
 	s64 ret = atomic64_inc_return_relaxed(v);
@@ -1437,7 +1439,7 @@ atomic64_inc_return_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return_release
-static inline s64
+static __always_inline s64
 atomic64_inc_return_release(atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1447,7 +1449,7 @@ atomic64_inc_return_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_return
-static inline s64
+static __always_inline s64
 atomic64_inc_return(atomic64_t *v)
 {
 	s64 ret;
@@ -1469,7 +1471,7 @@ atomic64_inc_return(atomic64_t *v)
 #endif /* atomic64_fetch_inc */
 
 #ifndef atomic64_fetch_inc
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
 	return atomic64_fetch_add(1, v);
@@ -1478,7 +1480,7 @@ atomic64_fetch_inc(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
 	return atomic64_fetch_add_acquire(1, v);
@@ -1487,7 +1489,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
 	return atomic64_fetch_add_release(1, v);
@@ -1496,7 +1498,7 @@ atomic64_fetch_inc_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc_relaxed
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_relaxed(atomic64_t *v)
 {
 	return atomic64_fetch_add_relaxed(1, v);
@@ -1507,7 +1509,7 @@ atomic64_fetch_inc_relaxed(atomic64_t *v)
 #else /* atomic64_fetch_inc_relaxed */
 
 #ifndef atomic64_fetch_inc_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_acquire(atomic64_t *v)
 {
 	s64 ret = atomic64_fetch_inc_relaxed(v);
@@ -1518,7 +1520,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc_release(atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1528,7 +1530,7 @@ atomic64_fetch_inc_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_inc
-static inline s64
+static __always_inline s64
 atomic64_fetch_inc(atomic64_t *v)
 {
 	s64 ret;
@@ -1543,7 +1545,7 @@ atomic64_fetch_inc(atomic64_t *v)
 #endif /* atomic64_fetch_inc_relaxed */
 
 #ifndef atomic64_dec
-static inline void
+static __always_inline void
 atomic64_dec(atomic64_t *v)
 {
 	atomic64_sub(1, v);
@@ -1559,7 +1561,7 @@ atomic64_dec(atomic64_t *v)
 #endif /* atomic64_dec_return */
 
 #ifndef atomic64_dec_return
-static inline s64
+static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
 	return atomic64_sub_return(1, v);
@@ -1568,7 +1570,7 @@ atomic64_dec_return(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
 	return atomic64_sub_return_acquire(1, v);
@@ -1577,7 +1579,7 @@ atomic64_dec_return_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return_release
-static inline s64
+static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
 	return atomic64_sub_return_release(1, v);
@@ -1586,7 +1588,7 @@ atomic64_dec_return_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return_relaxed
-static inline s64
+static __always_inline s64
 atomic64_dec_return_relaxed(atomic64_t *v)
 {
 	return atomic64_sub_return_relaxed(1, v);
@@ -1597,7 +1599,7 @@ atomic64_dec_return_relaxed(atomic64_t *v)
 #else /* atomic64_dec_return_relaxed */
 
 #ifndef atomic64_dec_return_acquire
-static inline s64
+static __always_inline s64
 atomic64_dec_return_acquire(atomic64_t *v)
 {
 	s64 ret = atomic64_dec_return_relaxed(v);
@@ -1608,7 +1610,7 @@ atomic64_dec_return_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return_release
-static inline s64
+static __always_inline s64
 atomic64_dec_return_release(atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1618,7 +1620,7 @@ atomic64_dec_return_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_return
-static inline s64
+static __always_inline s64
 atomic64_dec_return(atomic64_t *v)
 {
 	s64 ret;
@@ -1640,7 +1642,7 @@ atomic64_dec_return(atomic64_t *v)
 #endif /* atomic64_fetch_dec */
 
 #ifndef atomic64_fetch_dec
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
 	return atomic64_fetch_sub(1, v);
@@ -1649,7 +1651,7 @@ atomic64_fetch_dec(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
 	return atomic64_fetch_sub_acquire(1, v);
@@ -1658,7 +1660,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
 	return atomic64_fetch_sub_release(1, v);
@@ -1667,7 +1669,7 @@ atomic64_fetch_dec_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec_relaxed
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_relaxed(atomic64_t *v)
 {
 	return atomic64_fetch_sub_relaxed(1, v);
@@ -1678,7 +1680,7 @@ atomic64_fetch_dec_relaxed(atomic64_t *v)
 #else /* atomic64_fetch_dec_relaxed */
 
 #ifndef atomic64_fetch_dec_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_acquire(atomic64_t *v)
 {
 	s64 ret = atomic64_fetch_dec_relaxed(v);
@@ -1689,7 +1691,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec_release(atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1699,7 +1701,7 @@ atomic64_fetch_dec_release(atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_dec
-static inline s64
+static __always_inline s64
 atomic64_fetch_dec(atomic64_t *v)
 {
 	s64 ret;
@@ -1720,7 +1722,7 @@ atomic64_fetch_dec(atomic64_t *v)
 #else /* atomic64_fetch_and_relaxed */
 
 #ifndef atomic64_fetch_and_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = atomic64_fetch_and_relaxed(i, v);
@@ -1731,7 +1733,7 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_and_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_and_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1741,7 +1743,7 @@ atomic64_fetch_and_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_and
-static inline s64
+static __always_inline s64
 atomic64_fetch_and(s64 i, atomic64_t *v)
 {
 	s64 ret;
@@ -1756,7 +1758,7 @@ atomic64_fetch_and(s64 i, atomic64_t *v)
 #endif /* atomic64_fetch_and_relaxed */
 
 #ifndef atomic64_andnot
-static inline void
+static __always_inline void
 atomic64_andnot(s64 i, atomic64_t *v)
 {
 	atomic64_and(~i, v);
@@ -1772,7 +1774,7 @@ atomic64_andnot(s64 i, atomic64_t *v)
 #endif /* atomic64_fetch_andnot */
 
 #ifndef atomic64_fetch_andnot
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
 	return atomic64_fetch_and(~i, v);
@@ -1781,7 +1783,7 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
 	return atomic64_fetch_and_acquire(~i, v);
@@ -1790,7 +1792,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
 	return atomic64_fetch_and_release(~i, v);
@@ -1799,7 +1801,7 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot_relaxed
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 {
 	return atomic64_fetch_and_relaxed(~i, v);
@@ -1810,7 +1812,7 @@ atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_andnot_relaxed */
 
 #ifndef atomic64_fetch_andnot_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = atomic64_fetch_andnot_relaxed(i, v);
@@ -1821,7 +1823,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1831,7 +1833,7 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_andnot
-static inline s64
+static __always_inline s64
 atomic64_fetch_andnot(s64 i, atomic64_t *v)
 {
 	s64 ret;
@@ -1852,7 +1854,7 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_or_relaxed */
 
 #ifndef atomic64_fetch_or_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = atomic64_fetch_or_relaxed(i, v);
@@ -1863,7 +1865,7 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_or_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_or_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1873,7 +1875,7 @@ atomic64_fetch_or_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_or
-static inline s64
+static __always_inline s64
 atomic64_fetch_or(s64 i, atomic64_t *v)
 {
 	s64 ret;
@@ -1894,7 +1896,7 @@ atomic64_fetch_or(s64 i, atomic64_t *v)
 #else /* atomic64_fetch_xor_relaxed */
 
 #ifndef atomic64_fetch_xor_acquire
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 {
 	s64 ret = atomic64_fetch_xor_relaxed(i, v);
@@ -1905,7 +1907,7 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_xor_release
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 {
 	__atomic_release_fence();
@@ -1915,7 +1917,7 @@ atomic64_fetch_xor_release(s64 i, atomic64_t *v)
 #endif
 
 #ifndef atomic64_fetch_xor
-static inline s64
+static __always_inline s64
 atomic64_fetch_xor(s64 i, atomic64_t *v)
 {
 	s64 ret;
@@ -1936,7 +1938,7 @@ atomic64_fetch_xor(s64 i, atomic64_t *v)
 #else /* atomic64_xchg_relaxed */
 
 #ifndef atomic64_xchg_acquire
-static inline s64
+static __always_inline s64
 atomic64_xchg_acquire(atomic64_t *v, s64 i)
 {
 	s64 ret = atomic64_xchg_relaxed(v, i);
@@ -1947,7 +1949,7 @@ atomic64_xchg_acquire(atomic64_t *v, s64 i)
 #endif
 
 #ifndef atomic64_xchg_release
-static inline s64
+static __always_inline s64
 atomic64_xchg_release(atomic64_t *v, s64 i)
 {
 	__atomic_release_fence();
@@ -1957,7 +1959,7 @@ atomic64_xchg_release(atomic64_t *v, s64 i)
 #endif
 
 #ifndef atomic64_xchg
-static inline s64
+static __always_inline s64
 atomic64_xchg(atomic64_t *v, s64 i)
 {
 	s64 ret;
@@ -1978,7 +1980,7 @@ atomic64_xchg(atomic64_t *v, s64 i)
 #else /* atomic64_cmpxchg_relaxed */
 
 #ifndef atomic64_cmpxchg_acquire
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 {
 	s64 ret = atomic64_cmpxchg_relaxed(v, old, new);
@@ -1989,7 +1991,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
 #endif
 
 #ifndef atomic64_cmpxchg_release
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 {
 	__atomic_release_fence();
@@ -1999,7 +2001,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
 #endif
 
 #ifndef atomic64_cmpxchg
-static inline s64
+static __always_inline s64
 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 {
 	s64 ret;
@@ -2021,7 +2023,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
 #endif /* atomic64_try_cmpxchg */
 
 #ifndef atomic64_try_cmpxchg
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
 	s64 r, o = *old;
@@ -2034,7 +2036,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg_acquire
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
 	s64 r, o = *old;
@@ -2047,7 +2049,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg_release
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
 	s64 r, o = *old;
@@ -2060,7 +2062,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg_relaxed
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 {
 	s64 r, o = *old;
@@ -2075,7 +2077,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
 #else /* atomic64_try_cmpxchg_relaxed */
 
 #ifndef atomic64_try_cmpxchg_acquire
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 {
 	bool ret = atomic64_try_cmpxchg_relaxed(v, old, new);
@@ -2086,7 +2088,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg_release
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 {
 	__atomic_release_fence();
@@ -2096,7 +2098,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
 #endif
 
 #ifndef atomic64_try_cmpxchg
-static inline bool
+static __always_inline bool
 atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
 {
 	bool ret;
@@ -2120,7 +2122,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 atomic64_sub_and_test(s64 i, atomic64_t *v)
 {
 	return atomic64_sub_return(i, v) == 0;
@@ -2137,7 +2139,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline bool
+static __always_inline bool
 atomic64_dec_and_test(atomic64_t *v)
 {
 	return atomic64_dec_return(v) == 0;
@@ -2154,7 +2156,7 @@ atomic64_dec_and_test(atomic64_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 atomic64_inc_and_test(atomic64_t *v)
 {
 	return atomic64_inc_return(v) == 0;
@@ -2172,7 +2174,7 @@ atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline bool
+static __always_inline bool
 atomic64_add_negative(s64 i, atomic64_t *v)
 {
 	return atomic64_add_return(i, v) < 0;
@@ -2190,7 +2192,7 @@ atomic64_add_negative(s64 i, atomic64_t *v)
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns original value of @v
  */
-static inline s64
+static __always_inline s64
 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	s64 c = atomic64_read(v);
@@ -2215,7 +2217,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
  * Atomically adds @a to @v, if @v was not already @u.
  * Returns true if the addition was done.
  */
-static inline bool
+static __always_inline bool
 atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
 {
 	return atomic64_fetch_add_unless(v, a, u) != u;
@@ -2231,7 +2233,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
  * Atomically increments @v by 1, if @v is non-zero.
  * Returns true if the increment was done.
  */
-static inline bool
+static __always_inline bool
 atomic64_inc_not_zero(atomic64_t *v)
 {
 	return atomic64_add_unless(v, 1, 0);
@@ -2240,7 +2242,7 @@ atomic64_inc_not_zero(atomic64_t *v)
 #endif
 
 #ifndef atomic64_inc_unless_negative
-static inline bool
+static __always_inline bool
 atomic64_inc_unless_negative(atomic64_t *v)
 {
 	s64 c = atomic64_read(v);
@@ -2256,7 +2258,7 @@ atomic64_inc_unless_negative(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_unless_positive
-static inline bool
+static __always_inline bool
 atomic64_dec_unless_positive(atomic64_t *v)
 {
 	s64 c = atomic64_read(v);
@@ -2272,7 +2274,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
 #endif
 
 #ifndef atomic64_dec_if_positive
-static inline s64
+static __always_inline s64
 atomic64_dec_if_positive(atomic64_t *v)
 {
 	s64 dec, c = atomic64_read(v);
@@ -2292,4 +2294,4 @@ atomic64_dec_if_positive(atomic64_t *v)
 #define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c))
 
 #endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 25de4a2804d70f57e994fe3b419148658bb5378a
+// baaf45f4c24ed88ceae58baca39d7fd80bb8101b
diff --git a/scripts/atomic/fallbacks/acquire b/scripts/atomic/fallbacks/acquire
index e38871e64db6..ea489acc285e 100755
--- a/scripts/atomic/fallbacks/acquire
+++ b/scripts/atomic/fallbacks/acquire
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomic}_${pfx}${name}${sfx}_acquire(${params})
 {
 	${ret} ret = ${atomic}_${pfx}${name}${sfx}_relaxed(${args});
diff --git a/scripts/atomic/fallbacks/add_negative b/scripts/atomic/fallbacks/add_negative
index e6f4815637de..03cc2e07fac5 100755
--- a/scripts/atomic/fallbacks/add_negative
+++ b/scripts/atomic/fallbacks/add_negative
@@ -8,7 +8,7 @@ cat <<EOF
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-static inline bool
+static __always_inline bool
 ${atomic}_add_negative(${int} i, ${atomic}_t *v)
 {
 	return ${atomic}_add_return(i, v) < 0;
diff --git a/scripts/atomic/fallbacks/add_unless b/scripts/atomic/fallbacks/add_unless
index 792533885fbf..daf87a04c850 100755
--- a/scripts/atomic/fallbacks/add_unless
+++ b/scripts/atomic/fallbacks/add_unless
@@ -8,7 +8,7 @@ cat << EOF
  * Atomically adds @a to @v, if @v was not already @u.
  * Returns true if the addition was done.
  */
-static inline bool
+static __always_inline bool
 ${atomic}_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 {
 	return ${atomic}_fetch_add_unless(v, a, u) != u;
diff --git a/scripts/atomic/fallbacks/andnot b/scripts/atomic/fallbacks/andnot
index 9f3a3216b5e3..14efce01225a 100755
--- a/scripts/atomic/fallbacks/andnot
+++ b/scripts/atomic/fallbacks/andnot
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomic}_${pfx}andnot${sfx}${order}(${int} i, ${atomic}_t *v)
 {
 	${retstmt}${atomic}_${pfx}and${sfx}${order}(~i, v);
diff --git a/scripts/atomic/fallbacks/dec b/scripts/atomic/fallbacks/dec
index 10bbc82be31d..118282f3a5a3 100755
--- a/scripts/atomic/fallbacks/dec
+++ b/scripts/atomic/fallbacks/dec
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomic}_${pfx}dec${sfx}${order}(${atomic}_t *v)
 {
 	${retstmt}${atomic}_${pfx}sub${sfx}${order}(1, v);
diff --git a/scripts/atomic/fallbacks/dec_and_test b/scripts/atomic/fallbacks/dec_and_test
index 0ce7103b3df2..f8967a891117 100755
--- a/scripts/atomic/fallbacks/dec_and_test
+++ b/scripts/atomic/fallbacks/dec_and_test
@@ -7,7 +7,7 @@ cat <<EOF
  * returns true if the result is 0, or false for all other
  * cases.
  */
-static inline bool
+static __always_inline bool
 ${atomic}_dec_and_test(${atomic}_t *v)
 {
 	return ${atomic}_dec_return(v) == 0;
diff --git a/scripts/atomic/fallbacks/dec_if_positive b/scripts/atomic/fallbacks/dec_if_positive
index c52eacec43c8..cfb380bd2da6 100755
--- a/scripts/atomic/fallbacks/dec_if_positive
+++ b/scripts/atomic/fallbacks/dec_if_positive
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomic}_dec_if_positive(${atomic}_t *v)
 {
 	${int} dec, c = ${atomic}_read(v);
diff --git a/scripts/atomic/fallbacks/dec_unless_positive b/scripts/atomic/fallbacks/dec_unless_positive
index 8a2578f14268..69cb7aa01f9c 100755
--- a/scripts/atomic/fallbacks/dec_unless_positive
+++ b/scripts/atomic/fallbacks/dec_unless_positive
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline bool
+static __always_inline bool
 ${atomic}_dec_unless_positive(${atomic}_t *v)
 {
 	${int} c = ${atomic}_read(v);
diff --git a/scripts/atomic/fallbacks/fence b/scripts/atomic/fallbacks/fence
index 82f68fa6931a..92a3a4691bab 100755
--- a/scripts/atomic/fallbacks/fence
+++ b/scripts/atomic/fallbacks/fence
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomic}_${pfx}${name}${sfx}(${params})
 {
 	${ret} ret;
diff --git a/scripts/atomic/fallbacks/fetch_add_unless b/scripts/atomic/fallbacks/fetch_add_unless
index d2c091db7eae..fffbc0d16fdf 100755
--- a/scripts/atomic/fallbacks/fetch_add_unless
+++ b/scripts/atomic/fallbacks/fetch_add_unless
@@ -8,7 +8,7 @@ cat << EOF
  * Atomically adds @a to @v, so long as @v was not already @u.
  * Returns original value of @v
  */
-static inline ${int}
+static __always_inline ${int}
 ${atomic}_fetch_add_unless(${atomic}_t *v, ${int} a, ${int} u)
 {
 	${int} c = ${atomic}_read(v);
diff --git a/scripts/atomic/fallbacks/inc b/scripts/atomic/fallbacks/inc
index f866b3ad2353..10751cd62829 100755
--- a/scripts/atomic/fallbacks/inc
+++ b/scripts/atomic/fallbacks/inc
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomic}_${pfx}inc${sfx}${order}(${atomic}_t *v)
 {
 	${retstmt}${atomic}_${pfx}add${sfx}${order}(1, v);
diff --git a/scripts/atomic/fallbacks/inc_and_test b/scripts/atomic/fallbacks/inc_and_test
index 4e2068869f7e..4acea9c93604 100755
--- a/scripts/atomic/fallbacks/inc_and_test
+++ b/scripts/atomic/fallbacks/inc_and_test
@@ -7,7 +7,7 @@ cat <<EOF
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 ${atomic}_inc_and_test(${atomic}_t *v)
 {
 	return ${atomic}_inc_return(v) == 0;
diff --git a/scripts/atomic/fallbacks/inc_not_zero b/scripts/atomic/fallbacks/inc_not_zero
index a7c45c8d107c..d9f7b97aab42 100755
--- a/scripts/atomic/fallbacks/inc_not_zero
+++ b/scripts/atomic/fallbacks/inc_not_zero
@@ -6,7 +6,7 @@ cat <<EOF
  * Atomically increments @v by 1, if @v is non-zero.
  * Returns true if the increment was done.
  */
-static inline bool
+static __always_inline bool
 ${atomic}_inc_not_zero(${atomic}_t *v)
 {
 	return ${atomic}_add_unless(v, 1, 0);
diff --git a/scripts/atomic/fallbacks/inc_unless_negative b/scripts/atomic/fallbacks/inc_unless_negative
index 0c266e71dbd4..177a7cb51eda 100755
--- a/scripts/atomic/fallbacks/inc_unless_negative
+++ b/scripts/atomic/fallbacks/inc_unless_negative
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline bool
+static __always_inline bool
 ${atomic}_inc_unless_negative(${atomic}_t *v)
 {
 	${int} c = ${atomic}_read(v);
diff --git a/scripts/atomic/fallbacks/read_acquire b/scripts/atomic/fallbacks/read_acquire
index 75863b5203f7..12fa83cb3a6d 100755
--- a/scripts/atomic/fallbacks/read_acquire
+++ b/scripts/atomic/fallbacks/read_acquire
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomic}_read_acquire(const ${atomic}_t *v)
 {
 	return smp_load_acquire(&(v)->counter);
diff --git a/scripts/atomic/fallbacks/release b/scripts/atomic/fallbacks/release
index 3f628a3802d9..730d2a6d3e07 100755
--- a/scripts/atomic/fallbacks/release
+++ b/scripts/atomic/fallbacks/release
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline ${ret}
+static __always_inline ${ret}
 ${atomic}_${pfx}${name}${sfx}_release(${params})
 {
 	__atomic_release_fence();
diff --git a/scripts/atomic/fallbacks/set_release b/scripts/atomic/fallbacks/set_release
index 45bb5e0cfc08..e5d72c717434 100755
--- a/scripts/atomic/fallbacks/set_release
+++ b/scripts/atomic/fallbacks/set_release
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline void
+static __always_inline void
 ${atomic}_set_release(${atomic}_t *v, ${int} i)
 {
 	smp_store_release(&(v)->counter, i);
diff --git a/scripts/atomic/fallbacks/sub_and_test b/scripts/atomic/fallbacks/sub_and_test
index 289ef17a2d7a..6cfe4ed49746 100755
--- a/scripts/atomic/fallbacks/sub_and_test
+++ b/scripts/atomic/fallbacks/sub_and_test
@@ -8,7 +8,7 @@ cat <<EOF
  * true if the result is zero, or false for all
  * other cases.
  */
-static inline bool
+static __always_inline bool
 ${atomic}_sub_and_test(${int} i, ${atomic}_t *v)
 {
 	return ${atomic}_sub_return(i, v) == 0;
diff --git a/scripts/atomic/fallbacks/try_cmpxchg b/scripts/atomic/fallbacks/try_cmpxchg
index 4ed85e2f5378..c7a26213b978 100755
--- a/scripts/atomic/fallbacks/try_cmpxchg
+++ b/scripts/atomic/fallbacks/try_cmpxchg
@@ -1,5 +1,5 @@
 cat <<EOF
-static inline bool
+static __always_inline bool
 ${atomic}_try_cmpxchg${order}(${atomic}_t *v, ${int} *old, ${int} new)
 {
 	${int} r, o = *old;
diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh
index 1bd7c1707633..b6c6f5d306a7 100755
--- a/scripts/atomic/gen-atomic-fallback.sh
+++ b/scripts/atomic/gen-atomic-fallback.sh
@@ -149,6 +149,8 @@ cat << EOF
 #ifndef _LINUX_ATOMIC_FALLBACK_H
 #define _LINUX_ATOMIC_FALLBACK_H
 
+#include <linux/compiler.h>
+
 EOF
 
 for xchg in "xchg" "cmpxchg" "cmpxchg64"; do
-- 
2.24.0.432.g9d3f5f5b63-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path
  2019-11-26 14:04 [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers Marco Elver
  2019-11-26 14:04 ` [PATCH v3 2/3] asm-generic/atomic: Use __always_inline for fallback wrappers Marco Elver
@ 2019-11-26 14:04 ` Marco Elver
  2019-12-03  5:30   ` Randy Dunlap
  2019-12-02 21:36 ` [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers Paul E. McKenney
  2 siblings, 1 reply; 9+ messages in thread
From: Marco Elver @ 2019-11-26 14:04 UTC (permalink / raw)
  To: elver
  Cc: mark.rutland, paulmck, linux-kernel, will, peterz, boqun.feng,
	arnd, dvyukov, linux-arch, kasan-dev, Randy Dunlap

Prefer __always_inline for fast-path functions that are called outside
of user_access_save, to avoid generating UACCESS warnings when
optimizing for size (CC_OPTIMIZE_FOR_SIZE). It will also avoid future
surprises with compiler versions that change the inlining heuristic even
when optimizing for performance.

Report: http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org
Reported-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Marco Elver <elver@google.com>
---
Rebased on: locking/kcsan branch of tip tree.
---
 kernel/kcsan/atomic.h   |  2 +-
 kernel/kcsan/core.c     | 16 +++++++---------
 kernel/kcsan/encoding.h | 14 +++++++-------
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
index 576e03ddd6a3..a9c193053491 100644
--- a/kernel/kcsan/atomic.h
+++ b/kernel/kcsan/atomic.h
@@ -18,7 +18,7 @@
  * than cast to volatile. Eventually, we hope to be able to remove this
  * function.
  */
-static inline bool kcsan_is_atomic(const volatile void *ptr)
+static __always_inline bool kcsan_is_atomic(const volatile void *ptr)
 {
 	/* only jiffies for now */
 	return ptr == &jiffies;
diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
index 3314fc29e236..c616fec639cd 100644
--- a/kernel/kcsan/core.c
+++ b/kernel/kcsan/core.c
@@ -78,10 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
  */
 static DEFINE_PER_CPU(long, kcsan_skip);
 
-static inline atomic_long_t *find_watchpoint(unsigned long addr,
-					     size_t size,
-					     bool expect_write,
-					     long *encoded_watchpoint)
+static __always_inline atomic_long_t *
+find_watchpoint(unsigned long addr, size_t size, bool expect_write, long *encoded_watchpoint)
 {
 	const int slot = watchpoint_slot(addr);
 	const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
@@ -146,7 +144,7 @@ insert_watchpoint(unsigned long addr, size_t size, bool is_write)
  *	2. the thread that set up the watchpoint already removed it;
  *	3. the watchpoint was removed and then re-used.
  */
-static inline bool
+static __always_inline bool
 try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
 {
 	return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
@@ -160,7 +158,7 @@ static inline bool remove_watchpoint(atomic_long_t *watchpoint)
 	return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT;
 }
 
-static inline struct kcsan_ctx *get_ctx(void)
+static __always_inline struct kcsan_ctx *get_ctx(void)
 {
 	/*
 	 * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
@@ -169,7 +167,7 @@ static inline struct kcsan_ctx *get_ctx(void)
 	return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
 }
 
-static inline bool is_atomic(const volatile void *ptr)
+static __always_inline bool is_atomic(const volatile void *ptr)
 {
 	struct kcsan_ctx *ctx = get_ctx();
 
@@ -193,7 +191,7 @@ static inline bool is_atomic(const volatile void *ptr)
 	return kcsan_is_atomic(ptr);
 }
 
-static inline bool should_watch(const volatile void *ptr, int type)
+static __always_inline bool should_watch(const volatile void *ptr, int type)
 {
 	/*
 	 * Never set up watchpoints when memory operations are atomic.
@@ -226,7 +224,7 @@ static inline void reset_kcsan_skip(void)
 	this_cpu_write(kcsan_skip, skip_count);
 }
 
-static inline bool kcsan_is_enabled(void)
+static __always_inline bool kcsan_is_enabled(void)
 {
 	return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
 }
diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
index b63890e86449..f03562aaf2eb 100644
--- a/kernel/kcsan/encoding.h
+++ b/kernel/kcsan/encoding.h
@@ -59,10 +59,10 @@ encode_watchpoint(unsigned long addr, size_t size, bool is_write)
 		      (addr & WATCHPOINT_ADDR_MASK));
 }
 
-static inline bool decode_watchpoint(long watchpoint,
-				     unsigned long *addr_masked,
-				     size_t *size,
-				     bool *is_write)
+static __always_inline bool decode_watchpoint(long watchpoint,
+					      unsigned long *addr_masked,
+					      size_t *size,
+					      bool *is_write)
 {
 	if (watchpoint == INVALID_WATCHPOINT ||
 	    watchpoint == CONSUMED_WATCHPOINT)
@@ -78,13 +78,13 @@ static inline bool decode_watchpoint(long watchpoint,
 /*
  * Return watchpoint slot for an address.
  */
-static inline int watchpoint_slot(unsigned long addr)
+static __always_inline int watchpoint_slot(unsigned long addr)
 {
 	return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS;
 }
 
-static inline bool matching_access(unsigned long addr1, size_t size1,
-				   unsigned long addr2, size_t size2)
+static __always_inline bool matching_access(unsigned long addr1, size_t size1,
+					    unsigned long addr2, size_t size2)
 {
 	unsigned long end_range1 = addr1 + size1 - 1;
 	unsigned long end_range2 = addr2 + size2 - 1;
-- 
2.24.0.432.g9d3f5f5b63-goog


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers
  2019-11-26 14:04 [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers Marco Elver
  2019-11-26 14:04 ` [PATCH v3 2/3] asm-generic/atomic: Use __always_inline for fallback wrappers Marco Elver
  2019-11-26 14:04 ` [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path Marco Elver
@ 2019-12-02 21:36 ` Paul E. McKenney
  2 siblings, 0 replies; 9+ messages in thread
From: Paul E. McKenney @ 2019-12-02 21:36 UTC (permalink / raw)
  To: Marco Elver
  Cc: mark.rutland, linux-kernel, will, peterz, boqun.feng, arnd,
	dvyukov, linux-arch, kasan-dev, Randy Dunlap

On Tue, Nov 26, 2019 at 03:04:04PM +0100, Marco Elver wrote:
> Prefer __always_inline for atomic wrappers. When building for size
> (CC_OPTIMIZE_FOR_SIZE), some compilers appear to be less inclined to
> inline even relatively small static inline functions that are assumed to
> be inlinable such as atomic ops. This can cause problems, for example in
> UACCESS regions.
> 
> By using __always_inline, we let the real implementation and not the
> wrapper determine the final inlining preference.
> 
> For x86 tinyconfig we observe:
> - vmlinux baseline: 1316204
> - vmlinux with patch: 1315988 (-216 bytes)
> 
> This came up when addressing UACCESS warnings with CC_OPTIMIZE_FOR_SIZE
> in the KCSAN runtime:
> http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org
> 
> Reported-by: Randy Dunlap <rdunlap@infradead.org>
> Signed-off-by: Marco Elver <elver@google.com>
> Acked-by: Mark Rutland <mark.rutland@arm.com>

I queued the first two of these, thank you all!

I got conflicts on the third, probably due to mainline having moved on.
I will retry after I do my -rc1 rebase.

Randy, if you have time, could you please check whether the third patch
fixes things for you?

							Thanx, Paul

> ---
> v3:
> * Reorder includes alphabetically (except <asm/..>).
> 
> v2: http://lkml.kernel.org/r/20191126114121.85552-1-elver@google.com
> * Add missing '#include <linux/compiler.h>'
> * Add size diff to commit message.
> 
> v1: http://lkml.kernel.org/r/20191122154221.247680-1-elver@google.com
> ---
>  include/asm-generic/atomic-instrumented.h | 335 +++++++++++-----------
>  include/asm-generic/atomic-long.h         | 331 ++++++++++-----------
>  scripts/atomic/gen-atomic-instrumented.sh |   7 +-
>  scripts/atomic/gen-atomic-long.sh         |   3 +-
>  4 files changed, 340 insertions(+), 336 deletions(-)
> 
> diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h
> index 3dc0f38544f6..63869ded73ac 100644
> --- a/include/asm-generic/atomic-instrumented.h
> +++ b/include/asm-generic/atomic-instrumented.h
> @@ -18,22 +18,23 @@
>  #define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
>  
>  #include <linux/build_bug.h>
> +#include <linux/compiler.h>
>  #include <linux/kasan-checks.h>
>  #include <linux/kcsan-checks.h>
>  
> -static inline void __atomic_check_read(const volatile void *v, size_t size)
> +static __always_inline void __atomic_check_read(const volatile void *v, size_t size)
>  {
>  	kasan_check_read(v, size);
>  	kcsan_check_atomic_read(v, size);
>  }
>  
> -static inline void __atomic_check_write(const volatile void *v, size_t size)
> +static __always_inline void __atomic_check_write(const volatile void *v, size_t size)
>  {
>  	kasan_check_write(v, size);
>  	kcsan_check_atomic_write(v, size);
>  }
>  
> -static inline int
> +static __always_inline int
>  atomic_read(const atomic_t *v)
>  {
>  	__atomic_check_read(v, sizeof(*v));
> @@ -42,7 +43,7 @@ atomic_read(const atomic_t *v)
>  #define atomic_read atomic_read
>  
>  #if defined(arch_atomic_read_acquire)
> -static inline int
> +static __always_inline int
>  atomic_read_acquire(const atomic_t *v)
>  {
>  	__atomic_check_read(v, sizeof(*v));
> @@ -51,7 +52,7 @@ atomic_read_acquire(const atomic_t *v)
>  #define atomic_read_acquire atomic_read_acquire
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic_set(atomic_t *v, int i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -60,7 +61,7 @@ atomic_set(atomic_t *v, int i)
>  #define atomic_set atomic_set
>  
>  #if defined(arch_atomic_set_release)
> -static inline void
> +static __always_inline void
>  atomic_set_release(atomic_t *v, int i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -69,7 +70,7 @@ atomic_set_release(atomic_t *v, int i)
>  #define atomic_set_release atomic_set_release
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic_add(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -78,7 +79,7 @@ atomic_add(int i, atomic_t *v)
>  #define atomic_add atomic_add
>  
>  #if !defined(arch_atomic_add_return_relaxed) || defined(arch_atomic_add_return)
> -static inline int
> +static __always_inline int
>  atomic_add_return(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -88,7 +89,7 @@ atomic_add_return(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_add_return_acquire)
> -static inline int
> +static __always_inline int
>  atomic_add_return_acquire(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -98,7 +99,7 @@ atomic_add_return_acquire(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_add_return_release)
> -static inline int
> +static __always_inline int
>  atomic_add_return_release(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -108,7 +109,7 @@ atomic_add_return_release(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_add_return_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_add_return_relaxed(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -118,7 +119,7 @@ atomic_add_return_relaxed(int i, atomic_t *v)
>  #endif
>  
>  #if !defined(arch_atomic_fetch_add_relaxed) || defined(arch_atomic_fetch_add)
> -static inline int
> +static __always_inline int
>  atomic_fetch_add(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -128,7 +129,7 @@ atomic_fetch_add(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_add_acquire)
> -static inline int
> +static __always_inline int
>  atomic_fetch_add_acquire(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -138,7 +139,7 @@ atomic_fetch_add_acquire(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_add_release)
> -static inline int
> +static __always_inline int
>  atomic_fetch_add_release(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -148,7 +149,7 @@ atomic_fetch_add_release(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_add_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_fetch_add_relaxed(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -157,7 +158,7 @@ atomic_fetch_add_relaxed(int i, atomic_t *v)
>  #define atomic_fetch_add_relaxed atomic_fetch_add_relaxed
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic_sub(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -166,7 +167,7 @@ atomic_sub(int i, atomic_t *v)
>  #define atomic_sub atomic_sub
>  
>  #if !defined(arch_atomic_sub_return_relaxed) || defined(arch_atomic_sub_return)
> -static inline int
> +static __always_inline int
>  atomic_sub_return(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -176,7 +177,7 @@ atomic_sub_return(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_sub_return_acquire)
> -static inline int
> +static __always_inline int
>  atomic_sub_return_acquire(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -186,7 +187,7 @@ atomic_sub_return_acquire(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_sub_return_release)
> -static inline int
> +static __always_inline int
>  atomic_sub_return_release(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -196,7 +197,7 @@ atomic_sub_return_release(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_sub_return_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_sub_return_relaxed(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -206,7 +207,7 @@ atomic_sub_return_relaxed(int i, atomic_t *v)
>  #endif
>  
>  #if !defined(arch_atomic_fetch_sub_relaxed) || defined(arch_atomic_fetch_sub)
> -static inline int
> +static __always_inline int
>  atomic_fetch_sub(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -216,7 +217,7 @@ atomic_fetch_sub(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_sub_acquire)
> -static inline int
> +static __always_inline int
>  atomic_fetch_sub_acquire(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -226,7 +227,7 @@ atomic_fetch_sub_acquire(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_sub_release)
> -static inline int
> +static __always_inline int
>  atomic_fetch_sub_release(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -236,7 +237,7 @@ atomic_fetch_sub_release(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_sub_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_fetch_sub_relaxed(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -246,7 +247,7 @@ atomic_fetch_sub_relaxed(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_inc)
> -static inline void
> +static __always_inline void
>  atomic_inc(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -256,7 +257,7 @@ atomic_inc(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_inc_return)
> -static inline int
> +static __always_inline int
>  atomic_inc_return(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -266,7 +267,7 @@ atomic_inc_return(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_inc_return_acquire)
> -static inline int
> +static __always_inline int
>  atomic_inc_return_acquire(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -276,7 +277,7 @@ atomic_inc_return_acquire(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_inc_return_release)
> -static inline int
> +static __always_inline int
>  atomic_inc_return_release(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -286,7 +287,7 @@ atomic_inc_return_release(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_inc_return_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_inc_return_relaxed(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -296,7 +297,7 @@ atomic_inc_return_relaxed(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_inc)
> -static inline int
> +static __always_inline int
>  atomic_fetch_inc(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -306,7 +307,7 @@ atomic_fetch_inc(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_inc_acquire)
> -static inline int
> +static __always_inline int
>  atomic_fetch_inc_acquire(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -316,7 +317,7 @@ atomic_fetch_inc_acquire(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_inc_release)
> -static inline int
> +static __always_inline int
>  atomic_fetch_inc_release(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -326,7 +327,7 @@ atomic_fetch_inc_release(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_inc_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_fetch_inc_relaxed(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -336,7 +337,7 @@ atomic_fetch_inc_relaxed(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_dec)
> -static inline void
> +static __always_inline void
>  atomic_dec(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -346,7 +347,7 @@ atomic_dec(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_dec_return)
> -static inline int
> +static __always_inline int
>  atomic_dec_return(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -356,7 +357,7 @@ atomic_dec_return(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_dec_return_acquire)
> -static inline int
> +static __always_inline int
>  atomic_dec_return_acquire(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -366,7 +367,7 @@ atomic_dec_return_acquire(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_dec_return_release)
> -static inline int
> +static __always_inline int
>  atomic_dec_return_release(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -376,7 +377,7 @@ atomic_dec_return_release(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_dec_return_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_dec_return_relaxed(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -386,7 +387,7 @@ atomic_dec_return_relaxed(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_dec)
> -static inline int
> +static __always_inline int
>  atomic_fetch_dec(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -396,7 +397,7 @@ atomic_fetch_dec(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_dec_acquire)
> -static inline int
> +static __always_inline int
>  atomic_fetch_dec_acquire(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -406,7 +407,7 @@ atomic_fetch_dec_acquire(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_dec_release)
> -static inline int
> +static __always_inline int
>  atomic_fetch_dec_release(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -416,7 +417,7 @@ atomic_fetch_dec_release(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_dec_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_fetch_dec_relaxed(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -425,7 +426,7 @@ atomic_fetch_dec_relaxed(atomic_t *v)
>  #define atomic_fetch_dec_relaxed atomic_fetch_dec_relaxed
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic_and(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -434,7 +435,7 @@ atomic_and(int i, atomic_t *v)
>  #define atomic_and atomic_and
>  
>  #if !defined(arch_atomic_fetch_and_relaxed) || defined(arch_atomic_fetch_and)
> -static inline int
> +static __always_inline int
>  atomic_fetch_and(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -444,7 +445,7 @@ atomic_fetch_and(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_and_acquire)
> -static inline int
> +static __always_inline int
>  atomic_fetch_and_acquire(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -454,7 +455,7 @@ atomic_fetch_and_acquire(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_and_release)
> -static inline int
> +static __always_inline int
>  atomic_fetch_and_release(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -464,7 +465,7 @@ atomic_fetch_and_release(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_and_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_fetch_and_relaxed(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -474,7 +475,7 @@ atomic_fetch_and_relaxed(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_andnot)
> -static inline void
> +static __always_inline void
>  atomic_andnot(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -484,7 +485,7 @@ atomic_andnot(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_andnot)
> -static inline int
> +static __always_inline int
>  atomic_fetch_andnot(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -494,7 +495,7 @@ atomic_fetch_andnot(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_andnot_acquire)
> -static inline int
> +static __always_inline int
>  atomic_fetch_andnot_acquire(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -504,7 +505,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_andnot_release)
> -static inline int
> +static __always_inline int
>  atomic_fetch_andnot_release(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -514,7 +515,7 @@ atomic_fetch_andnot_release(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_andnot_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_fetch_andnot_relaxed(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -523,7 +524,7 @@ atomic_fetch_andnot_relaxed(int i, atomic_t *v)
>  #define atomic_fetch_andnot_relaxed atomic_fetch_andnot_relaxed
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic_or(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -532,7 +533,7 @@ atomic_or(int i, atomic_t *v)
>  #define atomic_or atomic_or
>  
>  #if !defined(arch_atomic_fetch_or_relaxed) || defined(arch_atomic_fetch_or)
> -static inline int
> +static __always_inline int
>  atomic_fetch_or(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -542,7 +543,7 @@ atomic_fetch_or(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_or_acquire)
> -static inline int
> +static __always_inline int
>  atomic_fetch_or_acquire(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -552,7 +553,7 @@ atomic_fetch_or_acquire(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_or_release)
> -static inline int
> +static __always_inline int
>  atomic_fetch_or_release(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -562,7 +563,7 @@ atomic_fetch_or_release(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_or_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_fetch_or_relaxed(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -571,7 +572,7 @@ atomic_fetch_or_relaxed(int i, atomic_t *v)
>  #define atomic_fetch_or_relaxed atomic_fetch_or_relaxed
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic_xor(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -580,7 +581,7 @@ atomic_xor(int i, atomic_t *v)
>  #define atomic_xor atomic_xor
>  
>  #if !defined(arch_atomic_fetch_xor_relaxed) || defined(arch_atomic_fetch_xor)
> -static inline int
> +static __always_inline int
>  atomic_fetch_xor(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -590,7 +591,7 @@ atomic_fetch_xor(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_xor_acquire)
> -static inline int
> +static __always_inline int
>  atomic_fetch_xor_acquire(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -600,7 +601,7 @@ atomic_fetch_xor_acquire(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_xor_release)
> -static inline int
> +static __always_inline int
>  atomic_fetch_xor_release(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -610,7 +611,7 @@ atomic_fetch_xor_release(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_xor_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_fetch_xor_relaxed(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -620,7 +621,7 @@ atomic_fetch_xor_relaxed(int i, atomic_t *v)
>  #endif
>  
>  #if !defined(arch_atomic_xchg_relaxed) || defined(arch_atomic_xchg)
> -static inline int
> +static __always_inline int
>  atomic_xchg(atomic_t *v, int i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -630,7 +631,7 @@ atomic_xchg(atomic_t *v, int i)
>  #endif
>  
>  #if defined(arch_atomic_xchg_acquire)
> -static inline int
> +static __always_inline int
>  atomic_xchg_acquire(atomic_t *v, int i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -640,7 +641,7 @@ atomic_xchg_acquire(atomic_t *v, int i)
>  #endif
>  
>  #if defined(arch_atomic_xchg_release)
> -static inline int
> +static __always_inline int
>  atomic_xchg_release(atomic_t *v, int i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -650,7 +651,7 @@ atomic_xchg_release(atomic_t *v, int i)
>  #endif
>  
>  #if defined(arch_atomic_xchg_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_xchg_relaxed(atomic_t *v, int i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -660,7 +661,7 @@ atomic_xchg_relaxed(atomic_t *v, int i)
>  #endif
>  
>  #if !defined(arch_atomic_cmpxchg_relaxed) || defined(arch_atomic_cmpxchg)
> -static inline int
> +static __always_inline int
>  atomic_cmpxchg(atomic_t *v, int old, int new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -670,7 +671,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new)
>  #endif
>  
>  #if defined(arch_atomic_cmpxchg_acquire)
> -static inline int
> +static __always_inline int
>  atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -680,7 +681,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new)
>  #endif
>  
>  #if defined(arch_atomic_cmpxchg_release)
> -static inline int
> +static __always_inline int
>  atomic_cmpxchg_release(atomic_t *v, int old, int new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -690,7 +691,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new)
>  #endif
>  
>  #if defined(arch_atomic_cmpxchg_relaxed)
> -static inline int
> +static __always_inline int
>  atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -700,7 +701,7 @@ atomic_cmpxchg_relaxed(atomic_t *v, int old, int new)
>  #endif
>  
>  #if defined(arch_atomic_try_cmpxchg)
> -static inline bool
> +static __always_inline bool
>  atomic_try_cmpxchg(atomic_t *v, int *old, int new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -711,7 +712,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new)
>  #endif
>  
>  #if defined(arch_atomic_try_cmpxchg_acquire)
> -static inline bool
> +static __always_inline bool
>  atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -722,7 +723,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new)
>  #endif
>  
>  #if defined(arch_atomic_try_cmpxchg_release)
> -static inline bool
> +static __always_inline bool
>  atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -733,7 +734,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new)
>  #endif
>  
>  #if defined(arch_atomic_try_cmpxchg_relaxed)
> -static inline bool
> +static __always_inline bool
>  atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -744,7 +745,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new)
>  #endif
>  
>  #if defined(arch_atomic_sub_and_test)
> -static inline bool
> +static __always_inline bool
>  atomic_sub_and_test(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -754,7 +755,7 @@ atomic_sub_and_test(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_dec_and_test)
> -static inline bool
> +static __always_inline bool
>  atomic_dec_and_test(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -764,7 +765,7 @@ atomic_dec_and_test(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_inc_and_test)
> -static inline bool
> +static __always_inline bool
>  atomic_inc_and_test(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -774,7 +775,7 @@ atomic_inc_and_test(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_add_negative)
> -static inline bool
> +static __always_inline bool
>  atomic_add_negative(int i, atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -784,7 +785,7 @@ atomic_add_negative(int i, atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_fetch_add_unless)
> -static inline int
> +static __always_inline int
>  atomic_fetch_add_unless(atomic_t *v, int a, int u)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -794,7 +795,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u)
>  #endif
>  
>  #if defined(arch_atomic_add_unless)
> -static inline bool
> +static __always_inline bool
>  atomic_add_unless(atomic_t *v, int a, int u)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -804,7 +805,7 @@ atomic_add_unless(atomic_t *v, int a, int u)
>  #endif
>  
>  #if defined(arch_atomic_inc_not_zero)
> -static inline bool
> +static __always_inline bool
>  atomic_inc_not_zero(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -814,7 +815,7 @@ atomic_inc_not_zero(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_inc_unless_negative)
> -static inline bool
> +static __always_inline bool
>  atomic_inc_unless_negative(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -824,7 +825,7 @@ atomic_inc_unless_negative(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_dec_unless_positive)
> -static inline bool
> +static __always_inline bool
>  atomic_dec_unless_positive(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -834,7 +835,7 @@ atomic_dec_unless_positive(atomic_t *v)
>  #endif
>  
>  #if defined(arch_atomic_dec_if_positive)
> -static inline int
> +static __always_inline int
>  atomic_dec_if_positive(atomic_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -843,7 +844,7 @@ atomic_dec_if_positive(atomic_t *v)
>  #define atomic_dec_if_positive atomic_dec_if_positive
>  #endif
>  
> -static inline s64
> +static __always_inline s64
>  atomic64_read(const atomic64_t *v)
>  {
>  	__atomic_check_read(v, sizeof(*v));
> @@ -852,7 +853,7 @@ atomic64_read(const atomic64_t *v)
>  #define atomic64_read atomic64_read
>  
>  #if defined(arch_atomic64_read_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_read_acquire(const atomic64_t *v)
>  {
>  	__atomic_check_read(v, sizeof(*v));
> @@ -861,7 +862,7 @@ atomic64_read_acquire(const atomic64_t *v)
>  #define atomic64_read_acquire atomic64_read_acquire
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic64_set(atomic64_t *v, s64 i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -870,7 +871,7 @@ atomic64_set(atomic64_t *v, s64 i)
>  #define atomic64_set atomic64_set
>  
>  #if defined(arch_atomic64_set_release)
> -static inline void
> +static __always_inline void
>  atomic64_set_release(atomic64_t *v, s64 i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -879,7 +880,7 @@ atomic64_set_release(atomic64_t *v, s64 i)
>  #define atomic64_set_release atomic64_set_release
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic64_add(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -888,7 +889,7 @@ atomic64_add(s64 i, atomic64_t *v)
>  #define atomic64_add atomic64_add
>  
>  #if !defined(arch_atomic64_add_return_relaxed) || defined(arch_atomic64_add_return)
> -static inline s64
> +static __always_inline s64
>  atomic64_add_return(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -898,7 +899,7 @@ atomic64_add_return(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_add_return_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_add_return_acquire(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -908,7 +909,7 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_add_return_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_add_return_release(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -918,7 +919,7 @@ atomic64_add_return_release(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_add_return_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_add_return_relaxed(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -928,7 +929,7 @@ atomic64_add_return_relaxed(s64 i, atomic64_t *v)
>  #endif
>  
>  #if !defined(arch_atomic64_fetch_add_relaxed) || defined(arch_atomic64_fetch_add)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_add(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -938,7 +939,7 @@ atomic64_fetch_add(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_add_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -948,7 +949,7 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_add_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_add_release(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -958,7 +959,7 @@ atomic64_fetch_add_release(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_add_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -967,7 +968,7 @@ atomic64_fetch_add_relaxed(s64 i, atomic64_t *v)
>  #define atomic64_fetch_add_relaxed atomic64_fetch_add_relaxed
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic64_sub(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -976,7 +977,7 @@ atomic64_sub(s64 i, atomic64_t *v)
>  #define atomic64_sub atomic64_sub
>  
>  #if !defined(arch_atomic64_sub_return_relaxed) || defined(arch_atomic64_sub_return)
> -static inline s64
> +static __always_inline s64
>  atomic64_sub_return(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -986,7 +987,7 @@ atomic64_sub_return(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_sub_return_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_sub_return_acquire(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -996,7 +997,7 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_sub_return_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_sub_return_release(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1006,7 +1007,7 @@ atomic64_sub_return_release(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_sub_return_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1016,7 +1017,7 @@ atomic64_sub_return_relaxed(s64 i, atomic64_t *v)
>  #endif
>  
>  #if !defined(arch_atomic64_fetch_sub_relaxed) || defined(arch_atomic64_fetch_sub)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_sub(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1026,7 +1027,7 @@ atomic64_fetch_sub(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_sub_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1036,7 +1037,7 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_sub_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_sub_release(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1046,7 +1047,7 @@ atomic64_fetch_sub_release(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_sub_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1056,7 +1057,7 @@ atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_inc)
> -static inline void
> +static __always_inline void
>  atomic64_inc(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1066,7 +1067,7 @@ atomic64_inc(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_inc_return)
> -static inline s64
> +static __always_inline s64
>  atomic64_inc_return(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1076,7 +1077,7 @@ atomic64_inc_return(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_inc_return_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_inc_return_acquire(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1086,7 +1087,7 @@ atomic64_inc_return_acquire(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_inc_return_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_inc_return_release(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1096,7 +1097,7 @@ atomic64_inc_return_release(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_inc_return_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_inc_return_relaxed(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1106,7 +1107,7 @@ atomic64_inc_return_relaxed(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_inc)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_inc(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1116,7 +1117,7 @@ atomic64_fetch_inc(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_inc_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_inc_acquire(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1126,7 +1127,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_inc_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_inc_release(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1136,7 +1137,7 @@ atomic64_fetch_inc_release(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_inc_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_inc_relaxed(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1146,7 +1147,7 @@ atomic64_fetch_inc_relaxed(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_dec)
> -static inline void
> +static __always_inline void
>  atomic64_dec(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1156,7 +1157,7 @@ atomic64_dec(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_dec_return)
> -static inline s64
> +static __always_inline s64
>  atomic64_dec_return(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1166,7 +1167,7 @@ atomic64_dec_return(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_dec_return_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_dec_return_acquire(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1176,7 +1177,7 @@ atomic64_dec_return_acquire(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_dec_return_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_dec_return_release(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1186,7 +1187,7 @@ atomic64_dec_return_release(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_dec_return_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_dec_return_relaxed(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1196,7 +1197,7 @@ atomic64_dec_return_relaxed(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_dec)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_dec(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1206,7 +1207,7 @@ atomic64_fetch_dec(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_dec_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_dec_acquire(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1216,7 +1217,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_dec_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_dec_release(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1226,7 +1227,7 @@ atomic64_fetch_dec_release(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_dec_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_dec_relaxed(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1235,7 +1236,7 @@ atomic64_fetch_dec_relaxed(atomic64_t *v)
>  #define atomic64_fetch_dec_relaxed atomic64_fetch_dec_relaxed
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic64_and(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1244,7 +1245,7 @@ atomic64_and(s64 i, atomic64_t *v)
>  #define atomic64_and atomic64_and
>  
>  #if !defined(arch_atomic64_fetch_and_relaxed) || defined(arch_atomic64_fetch_and)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_and(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1254,7 +1255,7 @@ atomic64_fetch_and(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_and_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1264,7 +1265,7 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_and_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_and_release(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1274,7 +1275,7 @@ atomic64_fetch_and_release(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_and_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1284,7 +1285,7 @@ atomic64_fetch_and_relaxed(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_andnot)
> -static inline void
> +static __always_inline void
>  atomic64_andnot(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1294,7 +1295,7 @@ atomic64_andnot(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_andnot)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_andnot(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1304,7 +1305,7 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_andnot_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1314,7 +1315,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_andnot_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1324,7 +1325,7 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_andnot_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1333,7 +1334,7 @@ atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v)
>  #define atomic64_fetch_andnot_relaxed atomic64_fetch_andnot_relaxed
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic64_or(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1342,7 +1343,7 @@ atomic64_or(s64 i, atomic64_t *v)
>  #define atomic64_or atomic64_or
>  
>  #if !defined(arch_atomic64_fetch_or_relaxed) || defined(arch_atomic64_fetch_or)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_or(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1352,7 +1353,7 @@ atomic64_fetch_or(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_or_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1362,7 +1363,7 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_or_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_or_release(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1372,7 +1373,7 @@ atomic64_fetch_or_release(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_or_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1381,7 +1382,7 @@ atomic64_fetch_or_relaxed(s64 i, atomic64_t *v)
>  #define atomic64_fetch_or_relaxed atomic64_fetch_or_relaxed
>  #endif
>  
> -static inline void
> +static __always_inline void
>  atomic64_xor(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1390,7 +1391,7 @@ atomic64_xor(s64 i, atomic64_t *v)
>  #define atomic64_xor atomic64_xor
>  
>  #if !defined(arch_atomic64_fetch_xor_relaxed) || defined(arch_atomic64_fetch_xor)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_xor(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1400,7 +1401,7 @@ atomic64_fetch_xor(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_xor_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1410,7 +1411,7 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_xor_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_xor_release(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1420,7 +1421,7 @@ atomic64_fetch_xor_release(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_xor_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1430,7 +1431,7 @@ atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v)
>  #endif
>  
>  #if !defined(arch_atomic64_xchg_relaxed) || defined(arch_atomic64_xchg)
> -static inline s64
> +static __always_inline s64
>  atomic64_xchg(atomic64_t *v, s64 i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1440,7 +1441,7 @@ atomic64_xchg(atomic64_t *v, s64 i)
>  #endif
>  
>  #if defined(arch_atomic64_xchg_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_xchg_acquire(atomic64_t *v, s64 i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1450,7 +1451,7 @@ atomic64_xchg_acquire(atomic64_t *v, s64 i)
>  #endif
>  
>  #if defined(arch_atomic64_xchg_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_xchg_release(atomic64_t *v, s64 i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1460,7 +1461,7 @@ atomic64_xchg_release(atomic64_t *v, s64 i)
>  #endif
>  
>  #if defined(arch_atomic64_xchg_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_xchg_relaxed(atomic64_t *v, s64 i)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1470,7 +1471,7 @@ atomic64_xchg_relaxed(atomic64_t *v, s64 i)
>  #endif
>  
>  #if !defined(arch_atomic64_cmpxchg_relaxed) || defined(arch_atomic64_cmpxchg)
> -static inline s64
> +static __always_inline s64
>  atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1480,7 +1481,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
>  #endif
>  
>  #if defined(arch_atomic64_cmpxchg_acquire)
> -static inline s64
> +static __always_inline s64
>  atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1490,7 +1491,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new)
>  #endif
>  
>  #if defined(arch_atomic64_cmpxchg_release)
> -static inline s64
> +static __always_inline s64
>  atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1500,7 +1501,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new)
>  #endif
>  
>  #if defined(arch_atomic64_cmpxchg_relaxed)
> -static inline s64
> +static __always_inline s64
>  atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1510,7 +1511,7 @@ atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new)
>  #endif
>  
>  #if defined(arch_atomic64_try_cmpxchg)
> -static inline bool
> +static __always_inline bool
>  atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1521,7 +1522,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
>  #endif
>  
>  #if defined(arch_atomic64_try_cmpxchg_acquire)
> -static inline bool
> +static __always_inline bool
>  atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1532,7 +1533,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new)
>  #endif
>  
>  #if defined(arch_atomic64_try_cmpxchg_release)
> -static inline bool
> +static __always_inline bool
>  atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1543,7 +1544,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new)
>  #endif
>  
>  #if defined(arch_atomic64_try_cmpxchg_relaxed)
> -static inline bool
> +static __always_inline bool
>  atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1554,7 +1555,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new)
>  #endif
>  
>  #if defined(arch_atomic64_sub_and_test)
> -static inline bool
> +static __always_inline bool
>  atomic64_sub_and_test(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1564,7 +1565,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_dec_and_test)
> -static inline bool
> +static __always_inline bool
>  atomic64_dec_and_test(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1574,7 +1575,7 @@ atomic64_dec_and_test(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_inc_and_test)
> -static inline bool
> +static __always_inline bool
>  atomic64_inc_and_test(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1584,7 +1585,7 @@ atomic64_inc_and_test(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_add_negative)
> -static inline bool
> +static __always_inline bool
>  atomic64_add_negative(s64 i, atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1594,7 +1595,7 @@ atomic64_add_negative(s64 i, atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_fetch_add_unless)
> -static inline s64
> +static __always_inline s64
>  atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1604,7 +1605,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u)
>  #endif
>  
>  #if defined(arch_atomic64_add_unless)
> -static inline bool
> +static __always_inline bool
>  atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1614,7 +1615,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
>  #endif
>  
>  #if defined(arch_atomic64_inc_not_zero)
> -static inline bool
> +static __always_inline bool
>  atomic64_inc_not_zero(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1624,7 +1625,7 @@ atomic64_inc_not_zero(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_inc_unless_negative)
> -static inline bool
> +static __always_inline bool
>  atomic64_inc_unless_negative(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1634,7 +1635,7 @@ atomic64_inc_unless_negative(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_dec_unless_positive)
> -static inline bool
> +static __always_inline bool
>  atomic64_dec_unless_positive(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1644,7 +1645,7 @@ atomic64_dec_unless_positive(atomic64_t *v)
>  #endif
>  
>  #if defined(arch_atomic64_dec_if_positive)
> -static inline s64
> +static __always_inline s64
>  atomic64_dec_if_positive(atomic64_t *v)
>  {
>  	__atomic_check_write(v, sizeof(*v));
> @@ -1798,4 +1799,4 @@ atomic64_dec_if_positive(atomic64_t *v)
>  })
>  
>  #endif /* _ASM_GENERIC_ATOMIC_INSTRUMENTED_H */
> -// beea41c2a0f2c69e4958ed71bf26f59740fa4b12
> +// 7b7e2af0e75c8ecb6f02298a7075f503f30d244c
> diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
> index 881c7e27af28..073cf40f431b 100644
> --- a/include/asm-generic/atomic-long.h
> +++ b/include/asm-generic/atomic-long.h
> @@ -6,6 +6,7 @@
>  #ifndef _ASM_GENERIC_ATOMIC_LONG_H
>  #define _ASM_GENERIC_ATOMIC_LONG_H
>  
> +#include <linux/compiler.h>
>  #include <asm/types.h>
>  
>  #ifdef CONFIG_64BIT
> @@ -22,493 +23,493 @@ typedef atomic_t atomic_long_t;
>  
>  #ifdef CONFIG_64BIT
>  
> -static inline long
> +static __always_inline long
>  atomic_long_read(const atomic_long_t *v)
>  {
>  	return atomic64_read(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_read_acquire(const atomic_long_t *v)
>  {
>  	return atomic64_read_acquire(v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_set(atomic_long_t *v, long i)
>  {
>  	atomic64_set(v, i);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_set_release(atomic_long_t *v, long i)
>  {
>  	atomic64_set_release(v, i);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_add(long i, atomic_long_t *v)
>  {
>  	atomic64_add(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_add_return(long i, atomic_long_t *v)
>  {
>  	return atomic64_add_return(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_add_return_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic64_add_return_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_add_return_release(long i, atomic_long_t *v)
>  {
>  	return atomic64_add_return_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_add_return_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic64_add_return_relaxed(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_add(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_add_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add_release(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_add_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_add_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_sub(long i, atomic_long_t *v)
>  {
>  	atomic64_sub(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_sub_return(long i, atomic_long_t *v)
>  {
>  	return atomic64_sub_return(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_sub_return_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic64_sub_return_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_sub_return_release(long i, atomic_long_t *v)
>  {
>  	return atomic64_sub_return_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic64_sub_return_relaxed(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_sub(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_sub(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_sub_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_sub_release(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_sub_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_sub_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_inc(atomic_long_t *v)
>  {
>  	atomic64_inc(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_inc_return(atomic_long_t *v)
>  {
>  	return atomic64_inc_return(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_inc_return_acquire(atomic_long_t *v)
>  {
>  	return atomic64_inc_return_acquire(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_inc_return_release(atomic_long_t *v)
>  {
>  	return atomic64_inc_return_release(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_inc_return_relaxed(atomic_long_t *v)
>  {
>  	return atomic64_inc_return_relaxed(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_inc(atomic_long_t *v)
>  {
>  	return atomic64_fetch_inc(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_inc_acquire(atomic_long_t *v)
>  {
>  	return atomic64_fetch_inc_acquire(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_inc_release(atomic_long_t *v)
>  {
>  	return atomic64_fetch_inc_release(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_inc_relaxed(atomic_long_t *v)
>  {
>  	return atomic64_fetch_inc_relaxed(v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_dec(atomic_long_t *v)
>  {
>  	atomic64_dec(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_return(atomic_long_t *v)
>  {
>  	return atomic64_dec_return(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_return_acquire(atomic_long_t *v)
>  {
>  	return atomic64_dec_return_acquire(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_return_release(atomic_long_t *v)
>  {
>  	return atomic64_dec_return_release(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_return_relaxed(atomic_long_t *v)
>  {
>  	return atomic64_dec_return_relaxed(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_dec(atomic_long_t *v)
>  {
>  	return atomic64_fetch_dec(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_dec_acquire(atomic_long_t *v)
>  {
>  	return atomic64_fetch_dec_acquire(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_dec_release(atomic_long_t *v)
>  {
>  	return atomic64_fetch_dec_release(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_dec_relaxed(atomic_long_t *v)
>  {
>  	return atomic64_fetch_dec_relaxed(v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_and(long i, atomic_long_t *v)
>  {
>  	atomic64_and(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_and(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_and(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_and_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_and_release(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_and_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_and_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_andnot(long i, atomic_long_t *v)
>  {
>  	atomic64_andnot(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_andnot(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_andnot(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_andnot_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_andnot_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_andnot_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_or(long i, atomic_long_t *v)
>  {
>  	atomic64_or(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_or(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_or(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_or_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_or_release(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_or_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_or_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_xor(long i, atomic_long_t *v)
>  {
>  	atomic64_xor(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_xor(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_xor(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_xor_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_xor_release(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_xor_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic64_fetch_xor_relaxed(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_xchg(atomic_long_t *v, long i)
>  {
>  	return atomic64_xchg(v, i);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_xchg_acquire(atomic_long_t *v, long i)
>  {
>  	return atomic64_xchg_acquire(v, i);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_xchg_release(atomic_long_t *v, long i)
>  {
>  	return atomic64_xchg_release(v, i);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_xchg_relaxed(atomic_long_t *v, long i)
>  {
>  	return atomic64_xchg_relaxed(v, i);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
>  {
>  	return atomic64_cmpxchg(v, old, new);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
>  {
>  	return atomic64_cmpxchg_acquire(v, old, new);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
>  {
>  	return atomic64_cmpxchg_release(v, old, new);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
>  {
>  	return atomic64_cmpxchg_relaxed(v, old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
>  {
>  	return atomic64_try_cmpxchg(v, (s64 *)old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
>  {
>  	return atomic64_try_cmpxchg_acquire(v, (s64 *)old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
>  {
>  	return atomic64_try_cmpxchg_release(v, (s64 *)old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
>  {
>  	return atomic64_try_cmpxchg_relaxed(v, (s64 *)old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_sub_and_test(long i, atomic_long_t *v)
>  {
>  	return atomic64_sub_and_test(i, v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_dec_and_test(atomic_long_t *v)
>  {
>  	return atomic64_dec_and_test(v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_inc_and_test(atomic_long_t *v)
>  {
>  	return atomic64_inc_and_test(v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_add_negative(long i, atomic_long_t *v)
>  {
>  	return atomic64_add_negative(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
>  {
>  	return atomic64_fetch_add_unless(v, a, u);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_add_unless(atomic_long_t *v, long a, long u)
>  {
>  	return atomic64_add_unless(v, a, u);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_inc_not_zero(atomic_long_t *v)
>  {
>  	return atomic64_inc_not_zero(v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_inc_unless_negative(atomic_long_t *v)
>  {
>  	return atomic64_inc_unless_negative(v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_dec_unless_positive(atomic_long_t *v)
>  {
>  	return atomic64_dec_unless_positive(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_if_positive(atomic_long_t *v)
>  {
>  	return atomic64_dec_if_positive(v);
> @@ -516,493 +517,493 @@ atomic_long_dec_if_positive(atomic_long_t *v)
>  
>  #else /* CONFIG_64BIT */
>  
> -static inline long
> +static __always_inline long
>  atomic_long_read(const atomic_long_t *v)
>  {
>  	return atomic_read(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_read_acquire(const atomic_long_t *v)
>  {
>  	return atomic_read_acquire(v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_set(atomic_long_t *v, long i)
>  {
>  	atomic_set(v, i);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_set_release(atomic_long_t *v, long i)
>  {
>  	atomic_set_release(v, i);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_add(long i, atomic_long_t *v)
>  {
>  	atomic_add(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_add_return(long i, atomic_long_t *v)
>  {
>  	return atomic_add_return(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_add_return_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic_add_return_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_add_return_release(long i, atomic_long_t *v)
>  {
>  	return atomic_add_return_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_add_return_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic_add_return_relaxed(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_add(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_add_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add_release(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_add_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_add_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_sub(long i, atomic_long_t *v)
>  {
>  	atomic_sub(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_sub_return(long i, atomic_long_t *v)
>  {
>  	return atomic_sub_return(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_sub_return_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic_sub_return_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_sub_return_release(long i, atomic_long_t *v)
>  {
>  	return atomic_sub_return_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_sub_return_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic_sub_return_relaxed(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_sub(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_sub(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_sub_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_sub_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_sub_release(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_sub_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_sub_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_sub_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_inc(atomic_long_t *v)
>  {
>  	atomic_inc(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_inc_return(atomic_long_t *v)
>  {
>  	return atomic_inc_return(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_inc_return_acquire(atomic_long_t *v)
>  {
>  	return atomic_inc_return_acquire(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_inc_return_release(atomic_long_t *v)
>  {
>  	return atomic_inc_return_release(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_inc_return_relaxed(atomic_long_t *v)
>  {
>  	return atomic_inc_return_relaxed(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_inc(atomic_long_t *v)
>  {
>  	return atomic_fetch_inc(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_inc_acquire(atomic_long_t *v)
>  {
>  	return atomic_fetch_inc_acquire(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_inc_release(atomic_long_t *v)
>  {
>  	return atomic_fetch_inc_release(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_inc_relaxed(atomic_long_t *v)
>  {
>  	return atomic_fetch_inc_relaxed(v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_dec(atomic_long_t *v)
>  {
>  	atomic_dec(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_return(atomic_long_t *v)
>  {
>  	return atomic_dec_return(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_return_acquire(atomic_long_t *v)
>  {
>  	return atomic_dec_return_acquire(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_return_release(atomic_long_t *v)
>  {
>  	return atomic_dec_return_release(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_return_relaxed(atomic_long_t *v)
>  {
>  	return atomic_dec_return_relaxed(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_dec(atomic_long_t *v)
>  {
>  	return atomic_fetch_dec(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_dec_acquire(atomic_long_t *v)
>  {
>  	return atomic_fetch_dec_acquire(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_dec_release(atomic_long_t *v)
>  {
>  	return atomic_fetch_dec_release(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_dec_relaxed(atomic_long_t *v)
>  {
>  	return atomic_fetch_dec_relaxed(v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_and(long i, atomic_long_t *v)
>  {
>  	atomic_and(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_and(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_and(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_and_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_and_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_and_release(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_and_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_and_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_and_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_andnot(long i, atomic_long_t *v)
>  {
>  	atomic_andnot(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_andnot(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_andnot(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_andnot_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_andnot_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_andnot_release(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_andnot_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_andnot_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_andnot_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_or(long i, atomic_long_t *v)
>  {
>  	atomic_or(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_or(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_or(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_or_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_or_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_or_release(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_or_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_or_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_or_relaxed(i, v);
>  }
>  
> -static inline void
> +static __always_inline void
>  atomic_long_xor(long i, atomic_long_t *v)
>  {
>  	atomic_xor(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_xor(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_xor(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_xor_acquire(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_xor_acquire(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_xor_release(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_xor_release(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_xor_relaxed(long i, atomic_long_t *v)
>  {
>  	return atomic_fetch_xor_relaxed(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_xchg(atomic_long_t *v, long i)
>  {
>  	return atomic_xchg(v, i);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_xchg_acquire(atomic_long_t *v, long i)
>  {
>  	return atomic_xchg_acquire(v, i);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_xchg_release(atomic_long_t *v, long i)
>  {
>  	return atomic_xchg_release(v, i);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_xchg_relaxed(atomic_long_t *v, long i)
>  {
>  	return atomic_xchg_relaxed(v, i);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_cmpxchg(atomic_long_t *v, long old, long new)
>  {
>  	return atomic_cmpxchg(v, old, new);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_cmpxchg_acquire(atomic_long_t *v, long old, long new)
>  {
>  	return atomic_cmpxchg_acquire(v, old, new);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_cmpxchg_release(atomic_long_t *v, long old, long new)
>  {
>  	return atomic_cmpxchg_release(v, old, new);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_cmpxchg_relaxed(atomic_long_t *v, long old, long new)
>  {
>  	return atomic_cmpxchg_relaxed(v, old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_try_cmpxchg(atomic_long_t *v, long *old, long new)
>  {
>  	return atomic_try_cmpxchg(v, (int *)old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_try_cmpxchg_acquire(atomic_long_t *v, long *old, long new)
>  {
>  	return atomic_try_cmpxchg_acquire(v, (int *)old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_try_cmpxchg_release(atomic_long_t *v, long *old, long new)
>  {
>  	return atomic_try_cmpxchg_release(v, (int *)old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_try_cmpxchg_relaxed(atomic_long_t *v, long *old, long new)
>  {
>  	return atomic_try_cmpxchg_relaxed(v, (int *)old, new);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_sub_and_test(long i, atomic_long_t *v)
>  {
>  	return atomic_sub_and_test(i, v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_dec_and_test(atomic_long_t *v)
>  {
>  	return atomic_dec_and_test(v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_inc_and_test(atomic_long_t *v)
>  {
>  	return atomic_inc_and_test(v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_add_negative(long i, atomic_long_t *v)
>  {
>  	return atomic_add_negative(i, v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_fetch_add_unless(atomic_long_t *v, long a, long u)
>  {
>  	return atomic_fetch_add_unless(v, a, u);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_add_unless(atomic_long_t *v, long a, long u)
>  {
>  	return atomic_add_unless(v, a, u);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_inc_not_zero(atomic_long_t *v)
>  {
>  	return atomic_inc_not_zero(v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_inc_unless_negative(atomic_long_t *v)
>  {
>  	return atomic_inc_unless_negative(v);
>  }
>  
> -static inline bool
> +static __always_inline bool
>  atomic_long_dec_unless_positive(atomic_long_t *v)
>  {
>  	return atomic_dec_unless_positive(v);
>  }
>  
> -static inline long
> +static __always_inline long
>  atomic_long_dec_if_positive(atomic_long_t *v)
>  {
>  	return atomic_dec_if_positive(v);
> @@ -1010,4 +1011,4 @@ atomic_long_dec_if_positive(atomic_long_t *v)
>  
>  #endif /* CONFIG_64BIT */
>  #endif /* _ASM_GENERIC_ATOMIC_LONG_H */
> -// 77558968132ce4f911ad53f6f52ce423006f6268
> +// a624200981f552b2c6be4f32fe44da8289f30d87
> diff --git a/scripts/atomic/gen-atomic-instrumented.sh b/scripts/atomic/gen-atomic-instrumented.sh
> index 8b8b2a6f8d68..fb4222548b22 100755
> --- a/scripts/atomic/gen-atomic-instrumented.sh
> +++ b/scripts/atomic/gen-atomic-instrumented.sh
> @@ -84,7 +84,7 @@ gen_proto_order_variant()
>  	[ ! -z "${guard}" ] && printf "#if ${guard}\n"
>  
>  cat <<EOF
> -static inline ${ret}
> +static __always_inline ${ret}
>  ${atomicname}(${params})
>  {
>  ${checks}
> @@ -147,16 +147,17 @@ cat << EOF
>  #define _ASM_GENERIC_ATOMIC_INSTRUMENTED_H
>  
>  #include <linux/build_bug.h>
> +#include <linux/compiler.h>
>  #include <linux/kasan-checks.h>
>  #include <linux/kcsan-checks.h>
>  
> -static inline void __atomic_check_read(const volatile void *v, size_t size)
> +static __always_inline void __atomic_check_read(const volatile void *v, size_t size)
>  {
>  	kasan_check_read(v, size);
>  	kcsan_check_atomic_read(v, size);
>  }
>  
> -static inline void __atomic_check_write(const volatile void *v, size_t size)
> +static __always_inline void __atomic_check_write(const volatile void *v, size_t size)
>  {
>  	kasan_check_write(v, size);
>  	kcsan_check_atomic_write(v, size);
> diff --git a/scripts/atomic/gen-atomic-long.sh b/scripts/atomic/gen-atomic-long.sh
> index c240a7231b2e..e318d3f92e53 100755
> --- a/scripts/atomic/gen-atomic-long.sh
> +++ b/scripts/atomic/gen-atomic-long.sh
> @@ -46,7 +46,7 @@ gen_proto_order_variant()
>  	local retstmt="$(gen_ret_stmt "${meta}")"
>  
>  cat <<EOF
> -static inline ${ret}
> +static __always_inline ${ret}
>  atomic_long_${name}(${params})
>  {
>  	${retstmt}${atomic}_${name}(${argscast});
> @@ -64,6 +64,7 @@ cat << EOF
>  #ifndef _ASM_GENERIC_ATOMIC_LONG_H
>  #define _ASM_GENERIC_ATOMIC_LONG_H
>  
> +#include <linux/compiler.h>
>  #include <asm/types.h>
>  
>  #ifdef CONFIG_64BIT
> -- 
> 2.24.0.432.g9d3f5f5b63-goog
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path
  2019-11-26 14:04 ` [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path Marco Elver
@ 2019-12-03  5:30   ` Randy Dunlap
  2019-12-03 16:01     ` Paul E. McKenney
  0 siblings, 1 reply; 9+ messages in thread
From: Randy Dunlap @ 2019-12-03  5:30 UTC (permalink / raw)
  To: Marco Elver
  Cc: mark.rutland, paulmck, linux-kernel, will, peterz, boqun.feng,
	arnd, dvyukov, linux-arch, kasan-dev

On 11/26/19 6:04 AM, Marco Elver wrote:
> Prefer __always_inline for fast-path functions that are called outside
> of user_access_save, to avoid generating UACCESS warnings when
> optimizing for size (CC_OPTIMIZE_FOR_SIZE). It will also avoid future
> surprises with compiler versions that change the inlining heuristic even
> when optimizing for performance.
> 
> Report: http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org
> Reported-by: Randy Dunlap <rdunlap@infradead.org>
> Signed-off-by: Marco Elver <elver@google.com>

Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested

Thanks.

> ---
> Rebased on: locking/kcsan branch of tip tree.
> ---
>  kernel/kcsan/atomic.h   |  2 +-
>  kernel/kcsan/core.c     | 16 +++++++---------
>  kernel/kcsan/encoding.h | 14 +++++++-------
>  3 files changed, 15 insertions(+), 17 deletions(-)
> 
> diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
> index 576e03ddd6a3..a9c193053491 100644
> --- a/kernel/kcsan/atomic.h
> +++ b/kernel/kcsan/atomic.h
> @@ -18,7 +18,7 @@
>   * than cast to volatile. Eventually, we hope to be able to remove this
>   * function.
>   */
> -static inline bool kcsan_is_atomic(const volatile void *ptr)
> +static __always_inline bool kcsan_is_atomic(const volatile void *ptr)
>  {
>  	/* only jiffies for now */
>  	return ptr == &jiffies;
> diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> index 3314fc29e236..c616fec639cd 100644
> --- a/kernel/kcsan/core.c
> +++ b/kernel/kcsan/core.c
> @@ -78,10 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
>   */
>  static DEFINE_PER_CPU(long, kcsan_skip);
>  
> -static inline atomic_long_t *find_watchpoint(unsigned long addr,
> -					     size_t size,
> -					     bool expect_write,
> -					     long *encoded_watchpoint)
> +static __always_inline atomic_long_t *
> +find_watchpoint(unsigned long addr, size_t size, bool expect_write, long *encoded_watchpoint)
>  {
>  	const int slot = watchpoint_slot(addr);
>  	const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
> @@ -146,7 +144,7 @@ insert_watchpoint(unsigned long addr, size_t size, bool is_write)
>   *	2. the thread that set up the watchpoint already removed it;
>   *	3. the watchpoint was removed and then re-used.
>   */
> -static inline bool
> +static __always_inline bool
>  try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
>  {
>  	return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
> @@ -160,7 +158,7 @@ static inline bool remove_watchpoint(atomic_long_t *watchpoint)
>  	return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT;
>  }
>  
> -static inline struct kcsan_ctx *get_ctx(void)
> +static __always_inline struct kcsan_ctx *get_ctx(void)
>  {
>  	/*
>  	 * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
> @@ -169,7 +167,7 @@ static inline struct kcsan_ctx *get_ctx(void)
>  	return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
>  }
>  
> -static inline bool is_atomic(const volatile void *ptr)
> +static __always_inline bool is_atomic(const volatile void *ptr)
>  {
>  	struct kcsan_ctx *ctx = get_ctx();
>  
> @@ -193,7 +191,7 @@ static inline bool is_atomic(const volatile void *ptr)
>  	return kcsan_is_atomic(ptr);
>  }
>  
> -static inline bool should_watch(const volatile void *ptr, int type)
> +static __always_inline bool should_watch(const volatile void *ptr, int type)
>  {
>  	/*
>  	 * Never set up watchpoints when memory operations are atomic.
> @@ -226,7 +224,7 @@ static inline void reset_kcsan_skip(void)
>  	this_cpu_write(kcsan_skip, skip_count);
>  }
>  
> -static inline bool kcsan_is_enabled(void)
> +static __always_inline bool kcsan_is_enabled(void)
>  {
>  	return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
>  }
> diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
> index b63890e86449..f03562aaf2eb 100644
> --- a/kernel/kcsan/encoding.h
> +++ b/kernel/kcsan/encoding.h
> @@ -59,10 +59,10 @@ encode_watchpoint(unsigned long addr, size_t size, bool is_write)
>  		      (addr & WATCHPOINT_ADDR_MASK));
>  }
>  
> -static inline bool decode_watchpoint(long watchpoint,
> -				     unsigned long *addr_masked,
> -				     size_t *size,
> -				     bool *is_write)
> +static __always_inline bool decode_watchpoint(long watchpoint,
> +					      unsigned long *addr_masked,
> +					      size_t *size,
> +					      bool *is_write)
>  {
>  	if (watchpoint == INVALID_WATCHPOINT ||
>  	    watchpoint == CONSUMED_WATCHPOINT)
> @@ -78,13 +78,13 @@ static inline bool decode_watchpoint(long watchpoint,
>  /*
>   * Return watchpoint slot for an address.
>   */
> -static inline int watchpoint_slot(unsigned long addr)
> +static __always_inline int watchpoint_slot(unsigned long addr)
>  {
>  	return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS;
>  }
>  
> -static inline bool matching_access(unsigned long addr1, size_t size1,
> -				   unsigned long addr2, size_t size2)
> +static __always_inline bool matching_access(unsigned long addr1, size_t size1,
> +					    unsigned long addr2, size_t size2)
>  {
>  	unsigned long end_range1 = addr1 + size1 - 1;
>  	unsigned long end_range2 = addr2 + size2 - 1;
> 


-- 
~Randy


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path
  2019-12-03  5:30   ` Randy Dunlap
@ 2019-12-03 16:01     ` Paul E. McKenney
  2019-12-12 21:11       ` Marco Elver
  0 siblings, 1 reply; 9+ messages in thread
From: Paul E. McKenney @ 2019-12-03 16:01 UTC (permalink / raw)
  To: Randy Dunlap
  Cc: Marco Elver, mark.rutland, linux-kernel, will, peterz,
	boqun.feng, arnd, dvyukov, linux-arch, kasan-dev

On Mon, Dec 02, 2019 at 09:30:22PM -0800, Randy Dunlap wrote:
> On 11/26/19 6:04 AM, Marco Elver wrote:
> > Prefer __always_inline for fast-path functions that are called outside
> > of user_access_save, to avoid generating UACCESS warnings when
> > optimizing for size (CC_OPTIMIZE_FOR_SIZE). It will also avoid future
> > surprises with compiler versions that change the inlining heuristic even
> > when optimizing for performance.
> > 
> > Report: http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org
> > Reported-by: Randy Dunlap <rdunlap@infradead.org>
> > Signed-off-by: Marco Elver <elver@google.com>
> 
> Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested

Thank you, Randy!

							Thanx, Paul

> Thanks.
> 
> > ---
> > Rebased on: locking/kcsan branch of tip tree.
> > ---
> >  kernel/kcsan/atomic.h   |  2 +-
> >  kernel/kcsan/core.c     | 16 +++++++---------
> >  kernel/kcsan/encoding.h | 14 +++++++-------
> >  3 files changed, 15 insertions(+), 17 deletions(-)
> > 
> > diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
> > index 576e03ddd6a3..a9c193053491 100644
> > --- a/kernel/kcsan/atomic.h
> > +++ b/kernel/kcsan/atomic.h
> > @@ -18,7 +18,7 @@
> >   * than cast to volatile. Eventually, we hope to be able to remove this
> >   * function.
> >   */
> > -static inline bool kcsan_is_atomic(const volatile void *ptr)
> > +static __always_inline bool kcsan_is_atomic(const volatile void *ptr)
> >  {
> >  	/* only jiffies for now */
> >  	return ptr == &jiffies;
> > diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> > index 3314fc29e236..c616fec639cd 100644
> > --- a/kernel/kcsan/core.c
> > +++ b/kernel/kcsan/core.c
> > @@ -78,10 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
> >   */
> >  static DEFINE_PER_CPU(long, kcsan_skip);
> >  
> > -static inline atomic_long_t *find_watchpoint(unsigned long addr,
> > -					     size_t size,
> > -					     bool expect_write,
> > -					     long *encoded_watchpoint)
> > +static __always_inline atomic_long_t *
> > +find_watchpoint(unsigned long addr, size_t size, bool expect_write, long *encoded_watchpoint)
> >  {
> >  	const int slot = watchpoint_slot(addr);
> >  	const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
> > @@ -146,7 +144,7 @@ insert_watchpoint(unsigned long addr, size_t size, bool is_write)
> >   *	2. the thread that set up the watchpoint already removed it;
> >   *	3. the watchpoint was removed and then re-used.
> >   */
> > -static inline bool
> > +static __always_inline bool
> >  try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
> >  {
> >  	return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
> > @@ -160,7 +158,7 @@ static inline bool remove_watchpoint(atomic_long_t *watchpoint)
> >  	return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT;
> >  }
> >  
> > -static inline struct kcsan_ctx *get_ctx(void)
> > +static __always_inline struct kcsan_ctx *get_ctx(void)
> >  {
> >  	/*
> >  	 * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
> > @@ -169,7 +167,7 @@ static inline struct kcsan_ctx *get_ctx(void)
> >  	return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
> >  }
> >  
> > -static inline bool is_atomic(const volatile void *ptr)
> > +static __always_inline bool is_atomic(const volatile void *ptr)
> >  {
> >  	struct kcsan_ctx *ctx = get_ctx();
> >  
> > @@ -193,7 +191,7 @@ static inline bool is_atomic(const volatile void *ptr)
> >  	return kcsan_is_atomic(ptr);
> >  }
> >  
> > -static inline bool should_watch(const volatile void *ptr, int type)
> > +static __always_inline bool should_watch(const volatile void *ptr, int type)
> >  {
> >  	/*
> >  	 * Never set up watchpoints when memory operations are atomic.
> > @@ -226,7 +224,7 @@ static inline void reset_kcsan_skip(void)
> >  	this_cpu_write(kcsan_skip, skip_count);
> >  }
> >  
> > -static inline bool kcsan_is_enabled(void)
> > +static __always_inline bool kcsan_is_enabled(void)
> >  {
> >  	return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
> >  }
> > diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
> > index b63890e86449..f03562aaf2eb 100644
> > --- a/kernel/kcsan/encoding.h
> > +++ b/kernel/kcsan/encoding.h
> > @@ -59,10 +59,10 @@ encode_watchpoint(unsigned long addr, size_t size, bool is_write)
> >  		      (addr & WATCHPOINT_ADDR_MASK));
> >  }
> >  
> > -static inline bool decode_watchpoint(long watchpoint,
> > -				     unsigned long *addr_masked,
> > -				     size_t *size,
> > -				     bool *is_write)
> > +static __always_inline bool decode_watchpoint(long watchpoint,
> > +					      unsigned long *addr_masked,
> > +					      size_t *size,
> > +					      bool *is_write)
> >  {
> >  	if (watchpoint == INVALID_WATCHPOINT ||
> >  	    watchpoint == CONSUMED_WATCHPOINT)
> > @@ -78,13 +78,13 @@ static inline bool decode_watchpoint(long watchpoint,
> >  /*
> >   * Return watchpoint slot for an address.
> >   */
> > -static inline int watchpoint_slot(unsigned long addr)
> > +static __always_inline int watchpoint_slot(unsigned long addr)
> >  {
> >  	return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS;
> >  }
> >  
> > -static inline bool matching_access(unsigned long addr1, size_t size1,
> > -				   unsigned long addr2, size_t size2)
> > +static __always_inline bool matching_access(unsigned long addr1, size_t size1,
> > +					    unsigned long addr2, size_t size2)
> >  {
> >  	unsigned long end_range1 = addr1 + size1 - 1;
> >  	unsigned long end_range2 = addr2 + size2 - 1;
> > 
> 
> 
> -- 
> ~Randy
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path
  2019-12-03 16:01     ` Paul E. McKenney
@ 2019-12-12 21:11       ` Marco Elver
  2019-12-13  1:31         ` Paul E. McKenney
  0 siblings, 1 reply; 9+ messages in thread
From: Marco Elver @ 2019-12-12 21:11 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Randy Dunlap, Mark Rutland, LKML, Will Deacon, Peter Zijlstra,
	Boqun Feng, Arnd Bergmann, Dmitry Vyukov, linux-arch, kasan-dev

On Tue, 3 Dec 2019 at 17:01, Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Mon, Dec 02, 2019 at 09:30:22PM -0800, Randy Dunlap wrote:
> > On 11/26/19 6:04 AM, Marco Elver wrote:
> > > Prefer __always_inline for fast-path functions that are called outside
> > > of user_access_save, to avoid generating UACCESS warnings when
> > > optimizing for size (CC_OPTIMIZE_FOR_SIZE). It will also avoid future
> > > surprises with compiler versions that change the inlining heuristic even
> > > when optimizing for performance.
> > >
> > > Report: http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org
> > > Reported-by: Randy Dunlap <rdunlap@infradead.org>
> > > Signed-off-by: Marco Elver <elver@google.com>
> >
> > Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
>
> Thank you, Randy!

Hoped this would have applied by now, but since KCSAN isn't in
mainline yet, should I send a version of this patch rebased on
-rcu/kcsan?
It will just conflict with the style cleanup that is in
-tip/locking/kcsan when another eventual merge happens. Alternatively,
we can delay it for now and just have to remember to apply eventually
(and have to live with things being messy for a bit longer :-)).

The version as-is here applies on -tip/locking/kcsan and -next (which
merged -tip/locking/kcsan).

Thanks,
-- Marco


>                                                         Thanx, Paul
>
> > Thanks.
> >
> > > ---
> > > Rebased on: locking/kcsan branch of tip tree.
> > > ---
> > >  kernel/kcsan/atomic.h   |  2 +-
> > >  kernel/kcsan/core.c     | 16 +++++++---------
> > >  kernel/kcsan/encoding.h | 14 +++++++-------
> > >  3 files changed, 15 insertions(+), 17 deletions(-)
> > >
> > > diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
> > > index 576e03ddd6a3..a9c193053491 100644
> > > --- a/kernel/kcsan/atomic.h
> > > +++ b/kernel/kcsan/atomic.h
> > > @@ -18,7 +18,7 @@
> > >   * than cast to volatile. Eventually, we hope to be able to remove this
> > >   * function.
> > >   */
> > > -static inline bool kcsan_is_atomic(const volatile void *ptr)
> > > +static __always_inline bool kcsan_is_atomic(const volatile void *ptr)
> > >  {
> > >     /* only jiffies for now */
> > >     return ptr == &jiffies;
> > > diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> > > index 3314fc29e236..c616fec639cd 100644
> > > --- a/kernel/kcsan/core.c
> > > +++ b/kernel/kcsan/core.c
> > > @@ -78,10 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
> > >   */
> > >  static DEFINE_PER_CPU(long, kcsan_skip);
> > >
> > > -static inline atomic_long_t *find_watchpoint(unsigned long addr,
> > > -                                        size_t size,
> > > -                                        bool expect_write,
> > > -                                        long *encoded_watchpoint)
> > > +static __always_inline atomic_long_t *
> > > +find_watchpoint(unsigned long addr, size_t size, bool expect_write, long *encoded_watchpoint)
> > >  {
> > >     const int slot = watchpoint_slot(addr);
> > >     const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
> > > @@ -146,7 +144,7 @@ insert_watchpoint(unsigned long addr, size_t size, bool is_write)
> > >   * 2. the thread that set up the watchpoint already removed it;
> > >   * 3. the watchpoint was removed and then re-used.
> > >   */
> > > -static inline bool
> > > +static __always_inline bool
> > >  try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
> > >  {
> > >     return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
> > > @@ -160,7 +158,7 @@ static inline bool remove_watchpoint(atomic_long_t *watchpoint)
> > >     return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT;
> > >  }
> > >
> > > -static inline struct kcsan_ctx *get_ctx(void)
> > > +static __always_inline struct kcsan_ctx *get_ctx(void)
> > >  {
> > >     /*
> > >      * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
> > > @@ -169,7 +167,7 @@ static inline struct kcsan_ctx *get_ctx(void)
> > >     return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
> > >  }
> > >
> > > -static inline bool is_atomic(const volatile void *ptr)
> > > +static __always_inline bool is_atomic(const volatile void *ptr)
> > >  {
> > >     struct kcsan_ctx *ctx = get_ctx();
> > >
> > > @@ -193,7 +191,7 @@ static inline bool is_atomic(const volatile void *ptr)
> > >     return kcsan_is_atomic(ptr);
> > >  }
> > >
> > > -static inline bool should_watch(const volatile void *ptr, int type)
> > > +static __always_inline bool should_watch(const volatile void *ptr, int type)
> > >  {
> > >     /*
> > >      * Never set up watchpoints when memory operations are atomic.
> > > @@ -226,7 +224,7 @@ static inline void reset_kcsan_skip(void)
> > >     this_cpu_write(kcsan_skip, skip_count);
> > >  }
> > >
> > > -static inline bool kcsan_is_enabled(void)
> > > +static __always_inline bool kcsan_is_enabled(void)
> > >  {
> > >     return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
> > >  }
> > > diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
> > > index b63890e86449..f03562aaf2eb 100644
> > > --- a/kernel/kcsan/encoding.h
> > > +++ b/kernel/kcsan/encoding.h
> > > @@ -59,10 +59,10 @@ encode_watchpoint(unsigned long addr, size_t size, bool is_write)
> > >                   (addr & WATCHPOINT_ADDR_MASK));
> > >  }
> > >
> > > -static inline bool decode_watchpoint(long watchpoint,
> > > -                                unsigned long *addr_masked,
> > > -                                size_t *size,
> > > -                                bool *is_write)
> > > +static __always_inline bool decode_watchpoint(long watchpoint,
> > > +                                         unsigned long *addr_masked,
> > > +                                         size_t *size,
> > > +                                         bool *is_write)
> > >  {
> > >     if (watchpoint == INVALID_WATCHPOINT ||
> > >         watchpoint == CONSUMED_WATCHPOINT)
> > > @@ -78,13 +78,13 @@ static inline bool decode_watchpoint(long watchpoint,
> > >  /*
> > >   * Return watchpoint slot for an address.
> > >   */
> > > -static inline int watchpoint_slot(unsigned long addr)
> > > +static __always_inline int watchpoint_slot(unsigned long addr)
> > >  {
> > >     return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS;
> > >  }
> > >
> > > -static inline bool matching_access(unsigned long addr1, size_t size1,
> > > -                              unsigned long addr2, size_t size2)
> > > +static __always_inline bool matching_access(unsigned long addr1, size_t size1,
> > > +                                       unsigned long addr2, size_t size2)
> > >  {
> > >     unsigned long end_range1 = addr1 + size1 - 1;
> > >     unsigned long end_range2 = addr2 + size2 - 1;
> > >
> >
> >
> > --
> > ~Randy
> >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path
  2019-12-12 21:11       ` Marco Elver
@ 2019-12-13  1:31         ` Paul E. McKenney
  2019-12-13 20:53           ` Marco Elver
  0 siblings, 1 reply; 9+ messages in thread
From: Paul E. McKenney @ 2019-12-13  1:31 UTC (permalink / raw)
  To: Marco Elver
  Cc: Randy Dunlap, Mark Rutland, LKML, Will Deacon, Peter Zijlstra,
	Boqun Feng, Arnd Bergmann, Dmitry Vyukov, linux-arch, kasan-dev

On Thu, Dec 12, 2019 at 10:11:59PM +0100, Marco Elver wrote:
> On Tue, 3 Dec 2019 at 17:01, Paul E. McKenney <paulmck@kernel.org> wrote:
> >
> > On Mon, Dec 02, 2019 at 09:30:22PM -0800, Randy Dunlap wrote:
> > > On 11/26/19 6:04 AM, Marco Elver wrote:
> > > > Prefer __always_inline for fast-path functions that are called outside
> > > > of user_access_save, to avoid generating UACCESS warnings when
> > > > optimizing for size (CC_OPTIMIZE_FOR_SIZE). It will also avoid future
> > > > surprises with compiler versions that change the inlining heuristic even
> > > > when optimizing for performance.
> > > >
> > > > Report: http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org
> > > > Reported-by: Randy Dunlap <rdunlap@infradead.org>
> > > > Signed-off-by: Marco Elver <elver@google.com>
> > >
> > > Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
> >
> > Thank you, Randy!
> 
> Hoped this would have applied by now, but since KCSAN isn't in
> mainline yet, should I send a version of this patch rebased on
> -rcu/kcsan?
> It will just conflict with the style cleanup that is in
> -tip/locking/kcsan when another eventual merge happens. Alternatively,
> we can delay it for now and just have to remember to apply eventually
> (and have to live with things being messy for a bit longer :-)).

Excellent question.  ;-)

The first several commits are in -tip already, so they will go upstream
in their current state by default.  And a bunch of -tip commits have
already been merged on top of them, so it might not be easy to move them.

So please feel free to port the patch to -rcu/ksan and let's see how that
plays out.  If it gets too ugly, then maybe wait until the current set
of patches go upstream.

Another option is to port them to the kcsan merge point in -rcu.  That
would bring in v5.5-rc1.  Would that help?

							Thanx, Paul

> The version as-is here applies on -tip/locking/kcsan and -next (which
> merged -tip/locking/kcsan).
> 
> Thanks,
> -- Marco
> 
> 
> >                                                         Thanx, Paul
> >
> > > Thanks.
> > >
> > > > ---
> > > > Rebased on: locking/kcsan branch of tip tree.
> > > > ---
> > > >  kernel/kcsan/atomic.h   |  2 +-
> > > >  kernel/kcsan/core.c     | 16 +++++++---------
> > > >  kernel/kcsan/encoding.h | 14 +++++++-------
> > > >  3 files changed, 15 insertions(+), 17 deletions(-)
> > > >
> > > > diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
> > > > index 576e03ddd6a3..a9c193053491 100644
> > > > --- a/kernel/kcsan/atomic.h
> > > > +++ b/kernel/kcsan/atomic.h
> > > > @@ -18,7 +18,7 @@
> > > >   * than cast to volatile. Eventually, we hope to be able to remove this
> > > >   * function.
> > > >   */
> > > > -static inline bool kcsan_is_atomic(const volatile void *ptr)
> > > > +static __always_inline bool kcsan_is_atomic(const volatile void *ptr)
> > > >  {
> > > >     /* only jiffies for now */
> > > >     return ptr == &jiffies;
> > > > diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> > > > index 3314fc29e236..c616fec639cd 100644
> > > > --- a/kernel/kcsan/core.c
> > > > +++ b/kernel/kcsan/core.c
> > > > @@ -78,10 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
> > > >   */
> > > >  static DEFINE_PER_CPU(long, kcsan_skip);
> > > >
> > > > -static inline atomic_long_t *find_watchpoint(unsigned long addr,
> > > > -                                        size_t size,
> > > > -                                        bool expect_write,
> > > > -                                        long *encoded_watchpoint)
> > > > +static __always_inline atomic_long_t *
> > > > +find_watchpoint(unsigned long addr, size_t size, bool expect_write, long *encoded_watchpoint)
> > > >  {
> > > >     const int slot = watchpoint_slot(addr);
> > > >     const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
> > > > @@ -146,7 +144,7 @@ insert_watchpoint(unsigned long addr, size_t size, bool is_write)
> > > >   * 2. the thread that set up the watchpoint already removed it;
> > > >   * 3. the watchpoint was removed and then re-used.
> > > >   */
> > > > -static inline bool
> > > > +static __always_inline bool
> > > >  try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
> > > >  {
> > > >     return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
> > > > @@ -160,7 +158,7 @@ static inline bool remove_watchpoint(atomic_long_t *watchpoint)
> > > >     return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT;
> > > >  }
> > > >
> > > > -static inline struct kcsan_ctx *get_ctx(void)
> > > > +static __always_inline struct kcsan_ctx *get_ctx(void)
> > > >  {
> > > >     /*
> > > >      * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
> > > > @@ -169,7 +167,7 @@ static inline struct kcsan_ctx *get_ctx(void)
> > > >     return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
> > > >  }
> > > >
> > > > -static inline bool is_atomic(const volatile void *ptr)
> > > > +static __always_inline bool is_atomic(const volatile void *ptr)
> > > >  {
> > > >     struct kcsan_ctx *ctx = get_ctx();
> > > >
> > > > @@ -193,7 +191,7 @@ static inline bool is_atomic(const volatile void *ptr)
> > > >     return kcsan_is_atomic(ptr);
> > > >  }
> > > >
> > > > -static inline bool should_watch(const volatile void *ptr, int type)
> > > > +static __always_inline bool should_watch(const volatile void *ptr, int type)
> > > >  {
> > > >     /*
> > > >      * Never set up watchpoints when memory operations are atomic.
> > > > @@ -226,7 +224,7 @@ static inline void reset_kcsan_skip(void)
> > > >     this_cpu_write(kcsan_skip, skip_count);
> > > >  }
> > > >
> > > > -static inline bool kcsan_is_enabled(void)
> > > > +static __always_inline bool kcsan_is_enabled(void)
> > > >  {
> > > >     return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
> > > >  }
> > > > diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
> > > > index b63890e86449..f03562aaf2eb 100644
> > > > --- a/kernel/kcsan/encoding.h
> > > > +++ b/kernel/kcsan/encoding.h
> > > > @@ -59,10 +59,10 @@ encode_watchpoint(unsigned long addr, size_t size, bool is_write)
> > > >                   (addr & WATCHPOINT_ADDR_MASK));
> > > >  }
> > > >
> > > > -static inline bool decode_watchpoint(long watchpoint,
> > > > -                                unsigned long *addr_masked,
> > > > -                                size_t *size,
> > > > -                                bool *is_write)
> > > > +static __always_inline bool decode_watchpoint(long watchpoint,
> > > > +                                         unsigned long *addr_masked,
> > > > +                                         size_t *size,
> > > > +                                         bool *is_write)
> > > >  {
> > > >     if (watchpoint == INVALID_WATCHPOINT ||
> > > >         watchpoint == CONSUMED_WATCHPOINT)
> > > > @@ -78,13 +78,13 @@ static inline bool decode_watchpoint(long watchpoint,
> > > >  /*
> > > >   * Return watchpoint slot for an address.
> > > >   */
> > > > -static inline int watchpoint_slot(unsigned long addr)
> > > > +static __always_inline int watchpoint_slot(unsigned long addr)
> > > >  {
> > > >     return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS;
> > > >  }
> > > >
> > > > -static inline bool matching_access(unsigned long addr1, size_t size1,
> > > > -                              unsigned long addr2, size_t size2)
> > > > +static __always_inline bool matching_access(unsigned long addr1, size_t size1,
> > > > +                                       unsigned long addr2, size_t size2)
> > > >  {
> > > >     unsigned long end_range1 = addr1 + size1 - 1;
> > > >     unsigned long end_range2 = addr2 + size2 - 1;
> > > >
> > >
> > >
> > > --
> > > ~Randy
> > >

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path
  2019-12-13  1:31         ` Paul E. McKenney
@ 2019-12-13 20:53           ` Marco Elver
  0 siblings, 0 replies; 9+ messages in thread
From: Marco Elver @ 2019-12-13 20:53 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: Randy Dunlap, Mark Rutland, LKML, Will Deacon, Peter Zijlstra,
	Boqun Feng, Arnd Bergmann, Dmitry Vyukov, linux-arch, kasan-dev

On Fri, 13 Dec 2019 at 02:31, Paul E. McKenney <paulmck@kernel.org> wrote:
>
> On Thu, Dec 12, 2019 at 10:11:59PM +0100, Marco Elver wrote:
> > On Tue, 3 Dec 2019 at 17:01, Paul E. McKenney <paulmck@kernel.org> wrote:
> > >
> > > On Mon, Dec 02, 2019 at 09:30:22PM -0800, Randy Dunlap wrote:
> > > > On 11/26/19 6:04 AM, Marco Elver wrote:
> > > > > Prefer __always_inline for fast-path functions that are called outside
> > > > > of user_access_save, to avoid generating UACCESS warnings when
> > > > > optimizing for size (CC_OPTIMIZE_FOR_SIZE). It will also avoid future
> > > > > surprises with compiler versions that change the inlining heuristic even
> > > > > when optimizing for performance.
> > > > >
> > > > > Report: http://lkml.kernel.org/r/58708908-84a0-0a81-a836-ad97e33dbb62@infradead.org
> > > > > Reported-by: Randy Dunlap <rdunlap@infradead.org>
> > > > > Signed-off-by: Marco Elver <elver@google.com>
> > > >
> > > > Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
> > >
> > > Thank you, Randy!
> >
> > Hoped this would have applied by now, but since KCSAN isn't in
> > mainline yet, should I send a version of this patch rebased on
> > -rcu/kcsan?
> > It will just conflict with the style cleanup that is in
> > -tip/locking/kcsan when another eventual merge happens. Alternatively,
> > we can delay it for now and just have to remember to apply eventually
> > (and have to live with things being messy for a bit longer :-)).
>
> Excellent question.  ;-)
>
> The first several commits are in -tip already, so they will go upstream
> in their current state by default.  And a bunch of -tip commits have
> already been merged on top of them, so it might not be easy to move them.
>
> So please feel free to port the patch to -rcu/ksan and let's see how that
> plays out.  If it gets too ugly, then maybe wait until the current set
> of patches go upstream.
>
> Another option is to port them to the kcsan merge point in -rcu.  That
> would bring in v5.5-rc1.  Would that help?

For this patch it won't help, since it only conflicts with changes in
this commit which is not in v5.5-rc1:
https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?h=locking/kcsan&id=5cbaefe9743bf14c9d3106db0cc19f8cb0a3ca22

However, for this patch there are only 3 locations in
kernel/kcsan/{core.c,encoding.h} that conflict, and all of them should
be trivial to resolve. For the version rebased against -rcu/kcsan, in
the conflicting locations I simply carried over the better style, so
that upon eventual merge the resolution should be trivial (I hope). I
have sent the rebased version here:
http://lkml.kernel.org/r/20191213204946.251125-1-elver@google.com

Unrelated to this patch, we also deferred the updated bitops patch
which now applies on top of v5.5-rc1:
http://lkml.kernel.org/r/20191115115524.GA77379@google.com
but doesn't apply to -rcu/kcsan. I think the bitops patch isn't
terribly urgent, so it could wait to avoid further confusion.

Many thanks,
-- Marco


>                                                         Thanx, Paul
>
> > The version as-is here applies on -tip/locking/kcsan and -next (which
> > merged -tip/locking/kcsan).
> >
> > Thanks,
> > -- Marco
> >
> >
> > >                                                         Thanx, Paul
> > >
> > > > Thanks.
> > > >
> > > > > ---
> > > > > Rebased on: locking/kcsan branch of tip tree.
> > > > > ---
> > > > >  kernel/kcsan/atomic.h   |  2 +-
> > > > >  kernel/kcsan/core.c     | 16 +++++++---------
> > > > >  kernel/kcsan/encoding.h | 14 +++++++-------
> > > > >  3 files changed, 15 insertions(+), 17 deletions(-)
> > > > >
> > > > > diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h
> > > > > index 576e03ddd6a3..a9c193053491 100644
> > > > > --- a/kernel/kcsan/atomic.h
> > > > > +++ b/kernel/kcsan/atomic.h
> > > > > @@ -18,7 +18,7 @@
> > > > >   * than cast to volatile. Eventually, we hope to be able to remove this
> > > > >   * function.
> > > > >   */
> > > > > -static inline bool kcsan_is_atomic(const volatile void *ptr)
> > > > > +static __always_inline bool kcsan_is_atomic(const volatile void *ptr)
> > > > >  {
> > > > >     /* only jiffies for now */
> > > > >     return ptr == &jiffies;
> > > > > diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c
> > > > > index 3314fc29e236..c616fec639cd 100644
> > > > > --- a/kernel/kcsan/core.c
> > > > > +++ b/kernel/kcsan/core.c
> > > > > @@ -78,10 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1];
> > > > >   */
> > > > >  static DEFINE_PER_CPU(long, kcsan_skip);
> > > > >
> > > > > -static inline atomic_long_t *find_watchpoint(unsigned long addr,
> > > > > -                                        size_t size,
> > > > > -                                        bool expect_write,
> > > > > -                                        long *encoded_watchpoint)
> > > > > +static __always_inline atomic_long_t *
> > > > > +find_watchpoint(unsigned long addr, size_t size, bool expect_write, long *encoded_watchpoint)
> > > > >  {
> > > > >     const int slot = watchpoint_slot(addr);
> > > > >     const unsigned long addr_masked = addr & WATCHPOINT_ADDR_MASK;
> > > > > @@ -146,7 +144,7 @@ insert_watchpoint(unsigned long addr, size_t size, bool is_write)
> > > > >   * 2. the thread that set up the watchpoint already removed it;
> > > > >   * 3. the watchpoint was removed and then re-used.
> > > > >   */
> > > > > -static inline bool
> > > > > +static __always_inline bool
> > > > >  try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint)
> > > > >  {
> > > > >     return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT);
> > > > > @@ -160,7 +158,7 @@ static inline bool remove_watchpoint(atomic_long_t *watchpoint)
> > > > >     return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT;
> > > > >  }
> > > > >
> > > > > -static inline struct kcsan_ctx *get_ctx(void)
> > > > > +static __always_inline struct kcsan_ctx *get_ctx(void)
> > > > >  {
> > > > >     /*
> > > > >      * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would
> > > > > @@ -169,7 +167,7 @@ static inline struct kcsan_ctx *get_ctx(void)
> > > > >     return in_task() ? &current->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx);
> > > > >  }
> > > > >
> > > > > -static inline bool is_atomic(const volatile void *ptr)
> > > > > +static __always_inline bool is_atomic(const volatile void *ptr)
> > > > >  {
> > > > >     struct kcsan_ctx *ctx = get_ctx();
> > > > >
> > > > > @@ -193,7 +191,7 @@ static inline bool is_atomic(const volatile void *ptr)
> > > > >     return kcsan_is_atomic(ptr);
> > > > >  }
> > > > >
> > > > > -static inline bool should_watch(const volatile void *ptr, int type)
> > > > > +static __always_inline bool should_watch(const volatile void *ptr, int type)
> > > > >  {
> > > > >     /*
> > > > >      * Never set up watchpoints when memory operations are atomic.
> > > > > @@ -226,7 +224,7 @@ static inline void reset_kcsan_skip(void)
> > > > >     this_cpu_write(kcsan_skip, skip_count);
> > > > >  }
> > > > >
> > > > > -static inline bool kcsan_is_enabled(void)
> > > > > +static __always_inline bool kcsan_is_enabled(void)
> > > > >  {
> > > > >     return READ_ONCE(kcsan_enabled) && get_ctx()->disable_count == 0;
> > > > >  }
> > > > > diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h
> > > > > index b63890e86449..f03562aaf2eb 100644
> > > > > --- a/kernel/kcsan/encoding.h
> > > > > +++ b/kernel/kcsan/encoding.h
> > > > > @@ -59,10 +59,10 @@ encode_watchpoint(unsigned long addr, size_t size, bool is_write)
> > > > >                   (addr & WATCHPOINT_ADDR_MASK));
> > > > >  }
> > > > >
> > > > > -static inline bool decode_watchpoint(long watchpoint,
> > > > > -                                unsigned long *addr_masked,
> > > > > -                                size_t *size,
> > > > > -                                bool *is_write)
> > > > > +static __always_inline bool decode_watchpoint(long watchpoint,
> > > > > +                                         unsigned long *addr_masked,
> > > > > +                                         size_t *size,
> > > > > +                                         bool *is_write)
> > > > >  {
> > > > >     if (watchpoint == INVALID_WATCHPOINT ||
> > > > >         watchpoint == CONSUMED_WATCHPOINT)
> > > > > @@ -78,13 +78,13 @@ static inline bool decode_watchpoint(long watchpoint,
> > > > >  /*
> > > > >   * Return watchpoint slot for an address.
> > > > >   */
> > > > > -static inline int watchpoint_slot(unsigned long addr)
> > > > > +static __always_inline int watchpoint_slot(unsigned long addr)
> > > > >  {
> > > > >     return (addr / PAGE_SIZE) % CONFIG_KCSAN_NUM_WATCHPOINTS;
> > > > >  }
> > > > >
> > > > > -static inline bool matching_access(unsigned long addr1, size_t size1,
> > > > > -                              unsigned long addr2, size_t size2)
> > > > > +static __always_inline bool matching_access(unsigned long addr1, size_t size1,
> > > > > +                                       unsigned long addr2, size_t size2)
> > > > >  {
> > > > >     unsigned long end_range1 = addr1 + size1 - 1;
> > > > >     unsigned long end_range2 = addr2 + size2 - 1;
> > > > >
> > > >
> > > >
> > > > --
> > > > ~Randy
> > > >
>
> --
> You received this message because you are subscribed to the Google Groups "kasan-dev" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to kasan-dev+unsubscribe@googlegroups.com.
> To view this discussion on the web visit https://groups.google.com/d/msgid/kasan-dev/20191213013127.GE2889%40paulmck-ThinkPad-P72.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2019-12-13 20:53 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-11-26 14:04 [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers Marco Elver
2019-11-26 14:04 ` [PATCH v3 2/3] asm-generic/atomic: Use __always_inline for fallback wrappers Marco Elver
2019-11-26 14:04 ` [PATCH v3 3/3] kcsan: Prefer __always_inline for fast-path Marco Elver
2019-12-03  5:30   ` Randy Dunlap
2019-12-03 16:01     ` Paul E. McKenney
2019-12-12 21:11       ` Marco Elver
2019-12-13  1:31         ` Paul E. McKenney
2019-12-13 20:53           ` Marco Elver
2019-12-02 21:36 ` [PATCH v3 1/3] asm-generic/atomic: Use __always_inline for pure wrappers Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).