All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] x86/mce: Fix more noinstr fun
@ 2022-02-04  8:30 Borislav Petkov
  2022-02-04  8:30 ` [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper Borislav Petkov
                   ` (3 more replies)
  0 siblings, 4 replies; 11+ messages in thread
From: Borislav Petkov @ 2022-02-04  8:30 UTC (permalink / raw)
  To: Tony Luck; +Cc: Jakub Kicinski, Linus Torvalds, Marco Elver, X86 ML, LKML

From: Borislav Petkov <bp@suse.de>

Hi all,

here's a second small set of fixes for objtool noinstr validation issues
in the MCE code.

It goes ontop of tip:locking/core where there are some more commits
changing generic helpers to be always inlined.

As always, comments and suggestions are appreciated.

Thx.

Borislav Petkov (3):
  cpumask: Add a x86-specific cpumask_clear_cpu() helper
  x86/ptrace: Always inline v8086_mode() for instrumentation
  x86/mce: Use arch atomic and bit helpers

 arch/x86/include/asm/cpumask.h     | 10 ++++++
 arch/x86/include/asm/ptrace.h      |  2 +-
 arch/x86/kernel/cpu/mce/core.c     | 58 ++++++++++--------------------
 arch/x86/kernel/cpu/mce/internal.h | 23 ++++++++++--
 arch/x86/kernel/cpu/mce/severity.c |  2 +-
 include/linux/cpumask.h            |  4 +--
 6 files changed, 54 insertions(+), 45 deletions(-)

-- 
2.29.2


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper
  2022-02-04  8:30 [PATCH 0/3] x86/mce: Fix more noinstr fun Borislav Petkov
@ 2022-02-04  8:30 ` Borislav Petkov
  2022-02-04 16:46   ` Luck, Tony
  2022-02-13 10:59   ` [tip: locking/core] " tip-bot2 for Borislav Petkov
  2022-02-04  8:30 ` [PATCH 2/3] x86/ptrace: Always inline v8086_mode() for instrumentation Borislav Petkov
                   ` (2 subsequent siblings)
  3 siblings, 2 replies; 11+ messages in thread
From: Borislav Petkov @ 2022-02-04  8:30 UTC (permalink / raw)
  To: Tony Luck; +Cc: Jakub Kicinski, Linus Torvalds, Marco Elver, X86 ML, LKML

From: Borislav Petkov <bp@suse.de>

Add a x86-specific cpumask_clear_cpu() helper which will be used in
places where the explicit KASAN-instrumentation in the *_bit() helpers
is unwanted.

Also, always inline two more cpumask generic helpers.

allyesconfig:

   text    data     bss     dec     hex filename
190553143       159425889       32076404        382055436       16c5b40c vmlinux.before
190551812       159424945       32076404        382053161       16c5ab29 vmlinux.after

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/cpumask.h | 10 ++++++++++
 include/linux/cpumask.h        |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 3afa990d756b..c5aed9e9226c 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -20,11 +20,21 @@ static __always_inline bool arch_cpu_online(int cpu)
 {
 	return arch_test_bit(cpu, cpumask_bits(cpu_online_mask));
 }
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
 #else
 static __always_inline bool arch_cpu_online(int cpu)
 {
 	return cpu == 0;
 }
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	return;
+}
 #endif
 
 #define arch_cpu_is_offline(cpu)	unlikely(!arch_cpu_online(cpu))
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 6b06c698cd2a..fe29ac7cc469 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -102,7 +102,7 @@ extern atomic_t __num_online_cpus;
 
 extern cpumask_t cpus_booted_once_mask;
 
-static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
+static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 	WARN_ON_ONCE(cpu >= bits);
@@ -110,7 +110,7 @@ static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 }
 
 /* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static __always_inline unsigned int cpumask_check(unsigned int cpu)
 {
 	cpu_max_bits_warn(cpu, nr_cpumask_bits);
 	return cpu;
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] x86/ptrace: Always inline v8086_mode() for instrumentation
  2022-02-04  8:30 [PATCH 0/3] x86/mce: Fix more noinstr fun Borislav Petkov
  2022-02-04  8:30 ` [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper Borislav Petkov
@ 2022-02-04  8:30 ` Borislav Petkov
  2022-02-13 10:59   ` [tip: locking/core] " tip-bot2 for Borislav Petkov
  2022-02-04  8:30 ` [PATCH 3/3] x86/mce: Use arch atomic and bit helpers Borislav Petkov
  2022-02-04 11:24 ` [PATCH 0/3] x86/mce: Fix more noinstr fun Marco Elver
  3 siblings, 1 reply; 11+ messages in thread
From: Borislav Petkov @ 2022-02-04  8:30 UTC (permalink / raw)
  To: Tony Luck; +Cc: Jakub Kicinski, Linus Torvalds, Marco Elver, X86 ML, LKML

From: Borislav Petkov <bp@suse.de>

Instrumentation glue like KASAN causes the following warning:

  vmlinux.o: warning: objtool: mce_gather_info()+0x5f: call to v8086_mode.constprop.0() leaves .noinstr.text section

due to gcc creating a function call for that oneliner. Force-inline it
and even save some vmlinux bytes (.config is close to an allmodconfig):

     text    data     bss     dec     hex filename
  209431677       208257651       34411048        452100376       1af28118	vmlinux.before
  209431519       208257615       34411048        452100182       1af28056	vmlinux.after

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/ptrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 703663175a5a..4357e0f2cd5f 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -137,7 +137,7 @@ static __always_inline int user_mode(struct pt_regs *regs)
 #endif
 }
 
-static inline int v8086_mode(struct pt_regs *regs)
+static __always_inline int v8086_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
 	return (regs->flags & X86_VM_MASK);
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] x86/mce: Use arch atomic and bit helpers
  2022-02-04  8:30 [PATCH 0/3] x86/mce: Fix more noinstr fun Borislav Petkov
  2022-02-04  8:30 ` [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper Borislav Petkov
  2022-02-04  8:30 ` [PATCH 2/3] x86/ptrace: Always inline v8086_mode() for instrumentation Borislav Petkov
@ 2022-02-04  8:30 ` Borislav Petkov
  2022-02-14 10:03   ` [tip: ras/core] " tip-bot2 for Borislav Petkov
  2022-02-04 11:24 ` [PATCH 0/3] x86/mce: Fix more noinstr fun Marco Elver
  3 siblings, 1 reply; 11+ messages in thread
From: Borislav Petkov @ 2022-02-04  8:30 UTC (permalink / raw)
  To: Tony Luck; +Cc: Jakub Kicinski, Linus Torvalds, Marco Elver, X86 ML, LKML

From: Borislav Petkov <bp@suse.de>

The arch helpers do not have explicit KASAN instrumentation. Use them in
noinstr code.

Inline a couple more functions with single call sites, while at it:

mce_severity_amd_smca() has a single call-site which is noinstr so force
the inlining and fix:

  vmlinux.o: warning: objtool: mce_severity_amd.constprop.0()+0xca: call to \
	  mce_severity_amd_smca() leaves .noinstr.text section

Always inline mca_msr_reg():

   text    data     bss     dec     hex filename
16065240        128031326       36405368        180501934       ac23dae vmlinux.before
16065240        128031294       36405368        180501902       ac23d8e vmlinux.after

and mce_no_way_out() as the latter one is used only once, to fix:

  vmlinux.o: warning: objtool: mce_read_aux()+0x53: call to mca_msr_reg() leaves .noinstr.text section
  vmlinux.o: warning: objtool: do_machine_check()+0xc9: call to mce_no_way_out() leaves .noinstr.text section

Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/kernel/cpu/mce/core.c     | 58 ++++++++++--------------------
 arch/x86/kernel/cpu/mce/internal.h | 23 ++++++++++--
 arch/x86/kernel/cpu/mce/severity.c |  2 +-
 3 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d..0e7147430ec0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -173,27 +173,6 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
 
-u32 mca_msr_reg(int bank, enum mca_msr reg)
-{
-	if (mce_flags.smca) {
-		switch (reg) {
-		case MCA_CTL:	 return MSR_AMD64_SMCA_MCx_CTL(bank);
-		case MCA_ADDR:	 return MSR_AMD64_SMCA_MCx_ADDR(bank);
-		case MCA_MISC:	 return MSR_AMD64_SMCA_MCx_MISC(bank);
-		case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
-		}
-	}
-
-	switch (reg) {
-	case MCA_CTL:	 return MSR_IA32_MCx_CTL(bank);
-	case MCA_ADDR:	 return MSR_IA32_MCx_ADDR(bank);
-	case MCA_MISC:	 return MSR_IA32_MCx_MISC(bank);
-	case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
-	}
-
-	return 0;
-}
-
 static void __print_mce(struct mce *m)
 {
 	pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
@@ -814,7 +793,8 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
  * the severity assessment code. Pretend that EIPV was set, and take the
  * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
  */
-static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
+static __always_inline void
+quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
 {
 	if (bank != 0)
 		return;
@@ -838,8 +818,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
  * Do a quick check if any of the events requires a panic.
  * This decides if we keep the events around or clear them.
  */
-static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
-			  struct pt_regs *regs)
+static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+					  struct pt_regs *regs)
 {
 	char *tmp = *msg;
 	int i;
@@ -849,7 +829,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 		if (!(m->status & MCI_STATUS_VAL))
 			continue;
 
-		__set_bit(i, validp);
+		arch___set_bit(i, validp);
 		if (mce_flags.snb_ifu_quirk)
 			quirk_sandybridge_ifu(i, m, regs);
 
@@ -1015,13 +995,13 @@ static noinstr int mce_start(int *no_way_out)
 	if (!timeout)
 		return ret;
 
-	atomic_add(*no_way_out, &global_nwo);
+	arch_atomic_add(*no_way_out, &global_nwo);
 	/*
 	 * Rely on the implied barrier below, such that global_nwo
 	 * is updated before mce_callin.
 	 */
-	order = atomic_inc_return(&mce_callin);
-	cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
+	order = arch_atomic_inc_return(&mce_callin);
+	arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
 
 	/* Enable instrumentation around calls to external facilities */
 	instrumentation_begin();
@@ -1029,10 +1009,10 @@ static noinstr int mce_start(int *no_way_out)
 	/*
 	 * Wait for everyone.
 	 */
-	while (atomic_read(&mce_callin) != num_online_cpus()) {
+	while (arch_atomic_read(&mce_callin) != num_online_cpus()) {
 		if (mce_timed_out(&timeout,
 				  "Timeout: Not all CPUs entered broadcast exception handler")) {
-			atomic_set(&global_nwo, 0);
+			arch_atomic_set(&global_nwo, 0);
 			goto out;
 		}
 		ndelay(SPINUNIT);
@@ -1047,7 +1027,7 @@ static noinstr int mce_start(int *no_way_out)
 		/*
 		 * Monarch: Starts executing now, the others wait.
 		 */
-		atomic_set(&mce_executing, 1);
+		arch_atomic_set(&mce_executing, 1);
 	} else {
 		/*
 		 * Subject: Now start the scanning loop one by one in
@@ -1055,10 +1035,10 @@ static noinstr int mce_start(int *no_way_out)
 		 * This way when there are any shared banks it will be
 		 * only seen by one CPU before cleared, avoiding duplicates.
 		 */
-		while (atomic_read(&mce_executing) < order) {
+		while (arch_atomic_read(&mce_executing) < order) {
 			if (mce_timed_out(&timeout,
 					  "Timeout: Subject CPUs unable to finish machine check processing")) {
-				atomic_set(&global_nwo, 0);
+				arch_atomic_set(&global_nwo, 0);
 				goto out;
 			}
 			ndelay(SPINUNIT);
@@ -1068,7 +1048,7 @@ static noinstr int mce_start(int *no_way_out)
 	/*
 	 * Cache the global no_way_out state.
 	 */
-	*no_way_out = atomic_read(&global_nwo);
+	*no_way_out = arch_atomic_read(&global_nwo);
 
 	ret = order;
 
@@ -1153,12 +1133,12 @@ static noinstr int mce_end(int order)
 	return ret;
 }
 
-static void mce_clear_state(unsigned long *toclear)
+static __always_inline void mce_clear_state(unsigned long *toclear)
 {
 	int i;
 
 	for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
-		if (test_bit(i, toclear))
+		if (arch_test_bit(i, toclear))
 			mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
 	}
 }
@@ -1208,8 +1188,8 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
 	int severity, i, taint = 0;
 
 	for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
-		__clear_bit(i, toclear);
-		if (!test_bit(i, valid_banks))
+		arch___clear_bit(i, toclear);
+		if (!arch_test_bit(i, valid_banks))
 			continue;
 
 		if (!mce_banks[i].ctl)
@@ -1244,7 +1224,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
 		     severity == MCE_UCNA_SEVERITY) && !no_way_out)
 			continue;
 
-		__set_bit(i, toclear);
+		arch___set_bit(i, toclear);
 
 		/* Machine check event was not enabled. Clear, but ignore. */
 		if (severity == MCE_NO_SEVERITY)
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 52c633950b38..a04b61e27827 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -182,8 +182,6 @@ enum mca_msr {
 	MCA_MISC,
 };
 
-u32 mca_msr_reg(int bank, enum mca_msr reg);
-
 /* Decide whether to add MCE record to MCE event pool or filter it out. */
 extern bool filter_mce(struct mce *m);
 
@@ -209,4 +207,25 @@ static inline void winchip_machine_check(struct pt_regs *regs) {}
 
 noinstr u64 mce_rdmsrl(u32 msr);
 
+static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
+{
+	if (mce_flags.smca) {
+		switch (reg) {
+		case MCA_CTL:	 return MSR_AMD64_SMCA_MCx_CTL(bank);
+		case MCA_ADDR:	 return MSR_AMD64_SMCA_MCx_ADDR(bank);
+		case MCA_MISC:	 return MSR_AMD64_SMCA_MCx_MISC(bank);
+		case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
+		}
+	}
+
+	switch (reg) {
+	case MCA_CTL:	 return MSR_IA32_MCx_CTL(bank);
+	case MCA_ADDR:	 return MSR_IA32_MCx_ADDR(bank);
+	case MCA_MISC:	 return MSR_IA32_MCx_MISC(bank);
+	case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
+	}
+
+	return 0;
+}
+
 #endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 7aa2bda93cbb..ca0d775346fb 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -301,7 +301,7 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
 	}
 }
 
-static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
+static __always_inline int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
 {
 	u64 mcx_cfg;
 
-- 
2.29.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] x86/mce: Fix more noinstr fun
  2022-02-04  8:30 [PATCH 0/3] x86/mce: Fix more noinstr fun Borislav Petkov
                   ` (2 preceding siblings ...)
  2022-02-04  8:30 ` [PATCH 3/3] x86/mce: Use arch atomic and bit helpers Borislav Petkov
@ 2022-02-04 11:24 ` Marco Elver
  2022-02-04 14:48   ` Borislav Petkov
  3 siblings, 1 reply; 11+ messages in thread
From: Marco Elver @ 2022-02-04 11:24 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Tony Luck, Jakub Kicinski, Linus Torvalds, X86 ML, LKML

On Fri, 4 Feb 2022 at 09:30, Borislav Petkov <bp@alien8.de> wrote:
>
> From: Borislav Petkov <bp@suse.de>
>
> Hi all,
>
> here's a second small set of fixes for objtool noinstr validation issues
> in the MCE code.
>
> It goes ontop of tip:locking/core where there are some more commits
> changing generic helpers to be always inlined.
>
> As always, comments and suggestions are appreciated.
>
> Thx.
>
> Borislav Petkov (3):
>   cpumask: Add a x86-specific cpumask_clear_cpu() helper
>   x86/ptrace: Always inline v8086_mode() for instrumentation
>   x86/mce: Use arch atomic and bit helpers

I guess to solve noinstr "calling" something else the only solution is
to make the other function noinstr as well, or __always_inline.

Have you considered making some of these other functions 'noinstr' as
well? I guess __always_inline works, esp. if there's just 1 caller.
And by the looks of it you're getting a net .text reduction, so

Acked-by: Marco Elver <elver@google.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/3] x86/mce: Fix more noinstr fun
  2022-02-04 11:24 ` [PATCH 0/3] x86/mce: Fix more noinstr fun Marco Elver
@ 2022-02-04 14:48   ` Borislav Petkov
  0 siblings, 0 replies; 11+ messages in thread
From: Borislav Petkov @ 2022-02-04 14:48 UTC (permalink / raw)
  To: Marco Elver; +Cc: Tony Luck, Jakub Kicinski, Linus Torvalds, X86 ML, LKML

On Fri, Feb 04, 2022 at 12:24:00PM +0100, Marco Elver wrote:
> I guess to solve noinstr "calling" something else the only solution is
> to make the other function noinstr as well, or __always_inline.
> 
> Have you considered making some of these other functions 'noinstr' as
> well? I guess __always_inline works, esp. if there's just 1 caller.
> And by the looks of it you're getting a net .text reduction, so

Yeah, I started doing that and the savings were the persuasive
argument.

Even more so if the function has one caller only and gets inlined,
normally. I guess it doesn't get inlined when there's KASAN
instrumentation or so but I haven't verified it fully why.

Because even for oneliners like v8086_mode() which should get inlined
trivially, the compiler ends up doing this constprop thing - I guess
some constants folding optimization thing... it probably doesn't even
matter so much whether oneliners get inlined in KASAN-enabled builds so
I guess we might just as well force-inline them for the other configs.

> Acked-by: Marco Elver <elver@google.com>

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 11+ messages in thread

* RE: [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper
  2022-02-04  8:30 ` [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper Borislav Petkov
@ 2022-02-04 16:46   ` Luck, Tony
  2022-02-04 18:04     ` Borislav Petkov
  2022-02-13 10:59   ` [tip: locking/core] " tip-bot2 for Borislav Petkov
  1 sibling, 1 reply; 11+ messages in thread
From: Luck, Tony @ 2022-02-04 16:46 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Jakub Kicinski, Linus Torvalds, Marco Elver, X86 ML, LKML

> Also, always inline two more cpumask generic helpers.
>
> allyesconfig:
>
>    text    data     bss     dec     hex filename
> 190553143       159425889       32076404        382055436       16c5b40c vmlinux.before
> 190551812       159424945       32076404        382053161       16c5ab29 vmlinux.after

That looks odd. You added "always" to some inline functions, which I'd
have thought might cause *more* inlining, and thus an increase in text
size. But it actually went *down* by 1331 bytes.

-Tony

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper
  2022-02-04 16:46   ` Luck, Tony
@ 2022-02-04 18:04     ` Borislav Petkov
  0 siblings, 0 replies; 11+ messages in thread
From: Borislav Petkov @ 2022-02-04 18:04 UTC (permalink / raw)
  To: Luck, Tony; +Cc: Jakub Kicinski, Linus Torvalds, Marco Elver, X86 ML, LKML

On Fri, Feb 04, 2022 at 04:46:24PM +0000, Luck, Tony wrote:
> > Also, always inline two more cpumask generic helpers.
> >
> > allyesconfig:
> >
> >    text    data     bss     dec     hex filename
> > 190553143       159425889       32076404        382055436       16c5b40c vmlinux.before
> > 190551812       159424945       32076404        382053161       16c5ab29 vmlinux.after
> 
> That looks odd. You added "always" to some inline functions, which I'd
> have thought might cause *more* inlining, and thus an increase in text
> size. But it actually went *down* by 1331 bytes.

Or maybe the compiler did some optimizations after the inlining. Here
are the numbers on an allmodconfig - 1073 bytes less with this patch.

   text    data     bss     dec     hex filename
27409158        31062465        26710144        85181767        513c547 vmlinux.before
27408085        31062821        26710144        85181050        513c27a vmlinux.after

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [tip: locking/core] x86/ptrace: Always inline v8086_mode() for instrumentation
  2022-02-04  8:30 ` [PATCH 2/3] x86/ptrace: Always inline v8086_mode() for instrumentation Borislav Petkov
@ 2022-02-13 10:59   ` tip-bot2 for Borislav Petkov
  0 siblings, 0 replies; 11+ messages in thread
From: tip-bot2 for Borislav Petkov @ 2022-02-13 10:59 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Borislav Petkov, Marco Elver, x86, linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     b008893b08dcc8c30d756db05c229a1491bcb992
Gitweb:        https://git.kernel.org/tip/b008893b08dcc8c30d756db05c229a1491bcb992
Author:        Borislav Petkov <bp@suse.de>
AuthorDate:    Fri, 04 Feb 2022 09:30:14 +01:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Sat, 12 Feb 2022 22:07:13 +01:00

x86/ptrace: Always inline v8086_mode() for instrumentation

Instrumentation glue like KASAN causes the following warning:

  vmlinux.o: warning: objtool: mce_gather_info()+0x5f: call to v8086_mode.constprop.0() leaves .noinstr.text section

due to gcc creating a function call for that oneliner. Force-inline it
and even save some vmlinux bytes (.config is close to an allmodconfig):

     text    data     bss     dec     hex filename
  209431677       208257651       34411048        452100376       1af28118	vmlinux.before
  209431519       208257615       34411048        452100182       1af28056	vmlinux.after

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20220204083015.17317-3-bp@alien8.de
---
 arch/x86/include/asm/ptrace.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 7036631..4357e0f 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -137,7 +137,7 @@ static __always_inline int user_mode(struct pt_regs *regs)
 #endif
 }
 
-static inline int v8086_mode(struct pt_regs *regs)
+static __always_inline int v8086_mode(struct pt_regs *regs)
 {
 #ifdef CONFIG_X86_32
 	return (regs->flags & X86_VM_MASK);

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [tip: locking/core] cpumask: Add a x86-specific cpumask_clear_cpu() helper
  2022-02-04  8:30 ` [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper Borislav Petkov
  2022-02-04 16:46   ` Luck, Tony
@ 2022-02-13 10:59   ` tip-bot2 for Borislav Petkov
  1 sibling, 0 replies; 11+ messages in thread
From: tip-bot2 for Borislav Petkov @ 2022-02-13 10:59 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Borislav Petkov, Marco Elver, x86, linux-kernel

The following commit has been merged into the locking/core branch of tip:

Commit-ID:     f5c54f77b07b278cfde4a654e111c39996ac8b5b
Gitweb:        https://git.kernel.org/tip/f5c54f77b07b278cfde4a654e111c39996ac8b5b
Author:        Borislav Petkov <bp@suse.de>
AuthorDate:    Fri, 04 Feb 2022 09:30:13 +01:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Sat, 12 Feb 2022 18:20:05 +01:00

cpumask: Add a x86-specific cpumask_clear_cpu() helper

Add a x86-specific cpumask_clear_cpu() helper which will be used in
places where the explicit KASAN-instrumentation in the *_bit() helpers
is unwanted.

Also, always inline two more cpumask generic helpers.

allyesconfig:

     text    data     bss     dec     hex filename
  190553143       159425889       32076404        382055436       16c5b40c vmlinux.before
  190551812       159424945       32076404        382053161       16c5ab29 vmlinux.after

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20220204083015.17317-2-bp@alien8.de
---
 arch/x86/include/asm/cpumask.h | 10 ++++++++++
 include/linux/cpumask.h        |  4 ++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 3afa990..c5aed9e 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -20,11 +20,21 @@ static __always_inline bool arch_cpu_online(int cpu)
 {
 	return arch_test_bit(cpu, cpumask_bits(cpu_online_mask));
 }
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp));
+}
 #else
 static __always_inline bool arch_cpu_online(int cpu)
 {
 	return cpu == 0;
 }
+
+static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp)
+{
+	return;
+}
 #endif
 
 #define arch_cpu_is_offline(cpu)	unlikely(!arch_cpu_online(cpu))
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 6b06c69..fe29ac7 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -102,7 +102,7 @@ extern atomic_t __num_online_cpus;
 
 extern cpumask_t cpus_booted_once_mask;
 
-static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
+static __always_inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 {
 #ifdef CONFIG_DEBUG_PER_CPU_MAPS
 	WARN_ON_ONCE(cpu >= bits);
@@ -110,7 +110,7 @@ static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits)
 }
 
 /* verify cpu argument to cpumask_* operators */
-static inline unsigned int cpumask_check(unsigned int cpu)
+static __always_inline unsigned int cpumask_check(unsigned int cpu)
 {
 	cpu_max_bits_warn(cpu, nr_cpumask_bits);
 	return cpu;

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [tip: ras/core] x86/mce: Use arch atomic and bit helpers
  2022-02-04  8:30 ` [PATCH 3/3] x86/mce: Use arch atomic and bit helpers Borislav Petkov
@ 2022-02-14 10:03   ` tip-bot2 for Borislav Petkov
  0 siblings, 0 replies; 11+ messages in thread
From: tip-bot2 for Borislav Petkov @ 2022-02-14 10:03 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: Borislav Petkov, Marco Elver, x86, linux-kernel

The following commit has been merged into the ras/core branch of tip:

Commit-ID:     f11445ba7a1160b87615e3f863a9e66c85189399
Gitweb:        https://git.kernel.org/tip/f11445ba7a1160b87615e3f863a9e66c85189399
Author:        Borislav Petkov <bp@suse.de>
AuthorDate:    Fri, 04 Feb 2022 09:30:15 +01:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Sun, 13 Feb 2022 22:08:27 +01:00

x86/mce: Use arch atomic and bit helpers

The arch helpers do not have explicit KASAN instrumentation. Use them in
noinstr code.

Inline a couple more functions with single call sites, while at it:

mce_severity_amd_smca() has a single call-site which is noinstr so force
the inlining and fix:

  vmlinux.o: warning: objtool: mce_severity_amd.constprop.0()+0xca: call to \
	  mce_severity_amd_smca() leaves .noinstr.text section

Always inline mca_msr_reg():

     text    data     bss     dec     hex filename
  16065240        128031326       36405368        180501934       ac23dae vmlinux.before
  16065240        128031294       36405368        180501902       ac23d8e vmlinux.after

and mce_no_way_out() as the latter one is used only once, to fix:

  vmlinux.o: warning: objtool: mce_read_aux()+0x53: call to mca_msr_reg() leaves .noinstr.text section
  vmlinux.o: warning: objtool: do_machine_check()+0xc9: call to mce_no_way_out() leaves .noinstr.text section

Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Marco Elver <elver@google.com>
Link: https://lore.kernel.org/r/20220204083015.17317-4-bp@alien8.de
---
 arch/x86/kernel/cpu/mce/core.c     | 58 +++++++++--------------------
 arch/x86/kernel/cpu/mce/internal.h | 23 ++++++++++--
 arch/x86/kernel/cpu/mce/severity.c |  2 +-
 3 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b83..0e71474 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -173,27 +173,6 @@ void mce_unregister_decode_chain(struct notifier_block *nb)
 }
 EXPORT_SYMBOL_GPL(mce_unregister_decode_chain);
 
-u32 mca_msr_reg(int bank, enum mca_msr reg)
-{
-	if (mce_flags.smca) {
-		switch (reg) {
-		case MCA_CTL:	 return MSR_AMD64_SMCA_MCx_CTL(bank);
-		case MCA_ADDR:	 return MSR_AMD64_SMCA_MCx_ADDR(bank);
-		case MCA_MISC:	 return MSR_AMD64_SMCA_MCx_MISC(bank);
-		case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
-		}
-	}
-
-	switch (reg) {
-	case MCA_CTL:	 return MSR_IA32_MCx_CTL(bank);
-	case MCA_ADDR:	 return MSR_IA32_MCx_ADDR(bank);
-	case MCA_MISC:	 return MSR_IA32_MCx_MISC(bank);
-	case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
-	}
-
-	return 0;
-}
-
 static void __print_mce(struct mce *m)
 {
 	pr_emerg(HW_ERR "CPU %d: Machine Check%s: %Lx Bank %d: %016Lx\n",
@@ -814,7 +793,8 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
  * the severity assessment code. Pretend that EIPV was set, and take the
  * ip/cs values from the pt_regs that mce_gather_info() ignored earlier.
  */
-static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
+static __always_inline void
+quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
 {
 	if (bank != 0)
 		return;
@@ -838,8 +818,8 @@ static void quirk_sandybridge_ifu(int bank, struct mce *m, struct pt_regs *regs)
  * Do a quick check if any of the events requires a panic.
  * This decides if we keep the events around or clear them.
  */
-static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
-			  struct pt_regs *regs)
+static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
+					  struct pt_regs *regs)
 {
 	char *tmp = *msg;
 	int i;
@@ -849,7 +829,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 		if (!(m->status & MCI_STATUS_VAL))
 			continue;
 
-		__set_bit(i, validp);
+		arch___set_bit(i, validp);
 		if (mce_flags.snb_ifu_quirk)
 			quirk_sandybridge_ifu(i, m, regs);
 
@@ -1015,13 +995,13 @@ static noinstr int mce_start(int *no_way_out)
 	if (!timeout)
 		return ret;
 
-	atomic_add(*no_way_out, &global_nwo);
+	arch_atomic_add(*no_way_out, &global_nwo);
 	/*
 	 * Rely on the implied barrier below, such that global_nwo
 	 * is updated before mce_callin.
 	 */
-	order = atomic_inc_return(&mce_callin);
-	cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
+	order = arch_atomic_inc_return(&mce_callin);
+	arch_cpumask_clear_cpu(smp_processor_id(), &mce_missing_cpus);
 
 	/* Enable instrumentation around calls to external facilities */
 	instrumentation_begin();
@@ -1029,10 +1009,10 @@ static noinstr int mce_start(int *no_way_out)
 	/*
 	 * Wait for everyone.
 	 */
-	while (atomic_read(&mce_callin) != num_online_cpus()) {
+	while (arch_atomic_read(&mce_callin) != num_online_cpus()) {
 		if (mce_timed_out(&timeout,
 				  "Timeout: Not all CPUs entered broadcast exception handler")) {
-			atomic_set(&global_nwo, 0);
+			arch_atomic_set(&global_nwo, 0);
 			goto out;
 		}
 		ndelay(SPINUNIT);
@@ -1047,7 +1027,7 @@ static noinstr int mce_start(int *no_way_out)
 		/*
 		 * Monarch: Starts executing now, the others wait.
 		 */
-		atomic_set(&mce_executing, 1);
+		arch_atomic_set(&mce_executing, 1);
 	} else {
 		/*
 		 * Subject: Now start the scanning loop one by one in
@@ -1055,10 +1035,10 @@ static noinstr int mce_start(int *no_way_out)
 		 * This way when there are any shared banks it will be
 		 * only seen by one CPU before cleared, avoiding duplicates.
 		 */
-		while (atomic_read(&mce_executing) < order) {
+		while (arch_atomic_read(&mce_executing) < order) {
 			if (mce_timed_out(&timeout,
 					  "Timeout: Subject CPUs unable to finish machine check processing")) {
-				atomic_set(&global_nwo, 0);
+				arch_atomic_set(&global_nwo, 0);
 				goto out;
 			}
 			ndelay(SPINUNIT);
@@ -1068,7 +1048,7 @@ static noinstr int mce_start(int *no_way_out)
 	/*
 	 * Cache the global no_way_out state.
 	 */
-	*no_way_out = atomic_read(&global_nwo);
+	*no_way_out = arch_atomic_read(&global_nwo);
 
 	ret = order;
 
@@ -1153,12 +1133,12 @@ out:
 	return ret;
 }
 
-static void mce_clear_state(unsigned long *toclear)
+static __always_inline void mce_clear_state(unsigned long *toclear)
 {
 	int i;
 
 	for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
-		if (test_bit(i, toclear))
+		if (arch_test_bit(i, toclear))
 			mce_wrmsrl(mca_msr_reg(i, MCA_STATUS), 0);
 	}
 }
@@ -1208,8 +1188,8 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
 	int severity, i, taint = 0;
 
 	for (i = 0; i < this_cpu_read(mce_num_banks); i++) {
-		__clear_bit(i, toclear);
-		if (!test_bit(i, valid_banks))
+		arch___clear_bit(i, toclear);
+		if (!arch_test_bit(i, valid_banks))
 			continue;
 
 		if (!mce_banks[i].ctl)
@@ -1244,7 +1224,7 @@ __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
 		     severity == MCE_UCNA_SEVERITY) && !no_way_out)
 			continue;
 
-		__set_bit(i, toclear);
+		arch___set_bit(i, toclear);
 
 		/* Machine check event was not enabled. Clear, but ignore. */
 		if (severity == MCE_NO_SEVERITY)
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index 52c6339..a04b61e 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -182,8 +182,6 @@ enum mca_msr {
 	MCA_MISC,
 };
 
-u32 mca_msr_reg(int bank, enum mca_msr reg);
-
 /* Decide whether to add MCE record to MCE event pool or filter it out. */
 extern bool filter_mce(struct mce *m);
 
@@ -209,4 +207,25 @@ static inline void winchip_machine_check(struct pt_regs *regs) {}
 
 noinstr u64 mce_rdmsrl(u32 msr);
 
+static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
+{
+	if (mce_flags.smca) {
+		switch (reg) {
+		case MCA_CTL:	 return MSR_AMD64_SMCA_MCx_CTL(bank);
+		case MCA_ADDR:	 return MSR_AMD64_SMCA_MCx_ADDR(bank);
+		case MCA_MISC:	 return MSR_AMD64_SMCA_MCx_MISC(bank);
+		case MCA_STATUS: return MSR_AMD64_SMCA_MCx_STATUS(bank);
+		}
+	}
+
+	switch (reg) {
+	case MCA_CTL:	 return MSR_IA32_MCx_CTL(bank);
+	case MCA_ADDR:	 return MSR_IA32_MCx_ADDR(bank);
+	case MCA_MISC:	 return MSR_IA32_MCx_MISC(bank);
+	case MCA_STATUS: return MSR_IA32_MCx_STATUS(bank);
+	}
+
+	return 0;
+}
+
 #endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 7aa2bda..ca0d775 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -301,7 +301,7 @@ static noinstr int error_context(struct mce *m, struct pt_regs *regs)
 	}
 }
 
-static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
+static __always_inline int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
 {
 	u64 mcx_cfg;
 

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-02-14 10:39 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-04  8:30 [PATCH 0/3] x86/mce: Fix more noinstr fun Borislav Petkov
2022-02-04  8:30 ` [PATCH 1/3] cpumask: Add a x86-specific cpumask_clear_cpu() helper Borislav Petkov
2022-02-04 16:46   ` Luck, Tony
2022-02-04 18:04     ` Borislav Petkov
2022-02-13 10:59   ` [tip: locking/core] " tip-bot2 for Borislav Petkov
2022-02-04  8:30 ` [PATCH 2/3] x86/ptrace: Always inline v8086_mode() for instrumentation Borislav Petkov
2022-02-13 10:59   ` [tip: locking/core] " tip-bot2 for Borislav Petkov
2022-02-04  8:30 ` [PATCH 3/3] x86/mce: Use arch atomic and bit helpers Borislav Petkov
2022-02-14 10:03   ` [tip: ras/core] " tip-bot2 for Borislav Petkov
2022-02-04 11:24 ` [PATCH 0/3] x86/mce: Fix more noinstr fun Marco Elver
2022-02-04 14:48   ` Borislav Petkov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.