All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Josh Poimboeuf <jpoimboe@redhat.com>,
	Helge Deller <deller@gmx.de>,
	"David S. Miller" <davem@davemloft.net>,
	Michael Ellerman <mpe@ellerman.id.au>,
	Rich Felker <dalias@libc.org>, Heiko Carstens <hca@linux.ibm.com>,
	Kees Cook <keescook@chromium.org>,
	Lai Jiangshan <jiangshanlai+lkml@gmail.com>
Subject: [patch V2 03/13] x86/irq/64: Adjust the per CPU irq stack pointer by 8
Date: Wed, 10 Feb 2021 00:40:44 +0100	[thread overview]
Message-ID: <20210210002512.354260928@linutronix.de> (raw)
In-Reply-To: 20210209234041.127454039@linutronix.de

From: Thomas Gleixner <tglx@linutronix.de>

The per CPU hardirq_stack_ptr contains the pointer to the irq stack in the
form that it is ready to be assigned to [ER]SP so that the first push ends
up on the top entry of the stack.

But the stack switching on 64 bit has the following rules:

    1) Store the current stack pointer (RSP) in the top most stack entry
       to allow the unwinder to link back to the previous stack

    2) Set RSP to the top most stack entry

    3) Invoke functions on the irq stack

    4) Pop RSP from the top most stack entry (stored in #1) so it's back
       to the original stack.

That requires all stack switching code to decrement the stored pointer by 8
in order to be able to store the current RSP and then set RSP to that
location. That's a pointless exercise.

Do the -8 adjustment right when storing the pointer and make the data type
a void pointer to avoid confusion vs. the struct irq_stack data type which
is on 64bit only used to declare the backing store. Move the definition
next to the inuse flag so they likely end up in the same cache
line. Sticking them into a struct to enforce it is a seperate change.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
---
V2: Reworded changelog so that it's clear that ending up in the cache line
    is not guaranteed.
---
 arch/x86/include/asm/irq_stack.h |    6 +++---
 arch/x86/include/asm/processor.h |    7 +++----
 arch/x86/kernel/cpu/common.c     |    2 +-
 arch/x86/kernel/dumpstack_64.c   |   22 ++++++++++++++++------
 arch/x86/kernel/irq_64.c         |    6 ++++--
 5 files changed, 27 insertions(+), 16 deletions(-)

--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -23,7 +23,7 @@ static __always_inline void __run_on_irq
 	void *tos = __this_cpu_read(hardirq_stack_ptr);
 
 	__this_cpu_write(hardirq_stack_inuse, true);
-	asm_call_on_stack(tos - 8, func, NULL);
+	asm_call_on_stack(tos, func, NULL);
 	__this_cpu_write(hardirq_stack_inuse, false);
 }
 
@@ -34,7 +34,7 @@ static __always_inline void
 	void *tos = __this_cpu_read(hardirq_stack_ptr);
 
 	__this_cpu_write(hardirq_stack_inuse, true);
-	asm_call_sysvec_on_stack(tos - 8, func, regs);
+	asm_call_sysvec_on_stack(tos, func, regs);
 	__this_cpu_write(hardirq_stack_inuse, false);
 }
 
@@ -45,7 +45,7 @@ static __always_inline void
 	void *tos = __this_cpu_read(hardirq_stack_ptr);
 
 	__this_cpu_write(hardirq_stack_inuse, true);
-	asm_call_irq_on_stack(tos - 8, func, desc);
+	asm_call_irq_on_stack(tos, func, desc);
 	__this_cpu_write(hardirq_stack_inuse, false);
 }
 
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -426,8 +426,6 @@ struct irq_stack {
 	char		stack[IRQ_STACK_SIZE];
 } __aligned(IRQ_STACK_SIZE);
 
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
-
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 #else
@@ -454,6 +452,7 @@ static inline unsigned long cpu_kernelmo
 	return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
 }
 
+DECLARE_PER_CPU(void *, hardirq_stack_ptr);
 DECLARE_PER_CPU(bool, hardirq_stack_inuse);
 extern asmlinkage void ignore_sysret(void);
 
@@ -473,9 +472,9 @@ struct stack_canary {
 };
 DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
-/* Per CPU softirq stack pointer */
+DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
-#endif	/* X86_64 */
+#endif	/* !X86_64 */
 
 extern unsigned int fpu_kernel_xstate_size;
 extern unsigned int fpu_user_xstate_size;
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1739,7 +1739,7 @@ DEFINE_PER_CPU(struct task_struct *, cur
 	&init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 
-DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+DEFINE_PER_CPU(void *, hardirq_stack_ptr);
 DEFINE_PER_CPU(bool, hardirq_stack_inuse);
 
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -128,12 +128,21 @@ static __always_inline bool in_exception
 
 static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long *end   = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
-	unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
+	unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
+	unsigned long *begin;
 
 	/*
-	 * This is a software stack, so 'end' can be a valid stack pointer.
-	 * It just means the stack is empty.
+	 * @end points directly to the top most stack entry to avoid a -8
+	 * adjustment in the stack switch hotpath. Adjust it back before
+	 * calculating @begin.
+	 */
+	end++;
+	begin = end - (IRQ_STACK_SIZE / sizeof(long));
+
+	/*
+	 * Due to the switching logic RSP can never be == @end because the
+	 * final operation is 'popq %rsp' which means after that RSP points
+	 * to the original stack and not to @end.
 	 */
 	if (stack < begin || stack >= end)
 		return false;
@@ -143,8 +152,9 @@ static __always_inline bool in_irq_stack
 	info->end	= end;
 
 	/*
-	 * The next stack pointer is the first thing pushed by the entry code
-	 * after switching to the irq stack.
+	 * The next stack pointer is stored at the top of the irq stack
+	 * before switching to the irq stack. Actual stack entries are all
+	 * below that.
 	 */
 	info->next_sp = (unsigned long *)*(end - 1);
 
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -48,7 +48,8 @@ static int map_irq_stack(unsigned int cp
 	if (!va)
 		return -ENOMEM;
 
-	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+	/* Store actual TOS to avoid adjustment in the hotpath */
+	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
 	return 0;
 }
 #else
@@ -60,7 +61,8 @@ static int map_irq_stack(unsigned int cp
 {
 	void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
 
-	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+	/* Store actual TOS to avoid adjustment in the hotpath */
+	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
 	return 0;
 }
 #endif


  parent reply	other threads:[~2021-02-10  2:08 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-09 23:40 [patch V2 00/13] x86/irq/64: Inline irq stack switching Thomas Gleixner
2021-02-09 23:40 ` [patch V2 01/13] x86/entry: Fix instrumentation annotation Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 02/13] x86/irq: Sanitize irq stack tracking Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` Thomas Gleixner [this message]
2021-02-10 11:41   ` [patch V2 03/13] x86/irq/64: Adjust the per CPU irq stack pointer by 8 David Laight
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 04/13] x86/apic: Split out spurious handling code Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 05/13] x86/irq: Provide macro for inlining irq stack switching Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 06/13] x86/entry: Convert system vectors to irq stack macro Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 07/13] x86/entry: Convert device interrupts to inline stack switching Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 08/13] x86/entry: Use run_sysvec_on_irqstack_cond() for XEN upcall Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 09/13] x86/softirq: Remove indirection in do_softirq_own_stack() Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 10/13] x86: Select CONFIG_HAVE_IRQ_EXIT_ON_IRQ_STACK Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 11/13] softirq: Move __ARCH_HAS_DO_SOFTIRQ to Kconfig Thomas Gleixner
2021-02-10  7:03   ` Kees Cook
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 12/13] softirq: Move do_softirq_own_stack() to generic asm header Thomas Gleixner
2021-02-10  7:04   ` Kees Cook
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-09 23:40 ` [patch V2 13/13] x86/softirq/64: Inline do_softirq_own_stack() Thomas Gleixner
2021-02-11  0:50   ` [tip: x86/entry] " tip-bot2 for Thomas Gleixner
2021-02-15 16:39   ` [patch V2 13/13] " Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210210002512.354260928@linutronix.de \
    --to=tglx@linutronix.de \
    --cc=dalias@libc.org \
    --cc=davem@davemloft.net \
    --cc=deller@gmx.de \
    --cc=hca@linux.ibm.com \
    --cc=jiangshanlai+lkml@gmail.com \
    --cc=jpoimboe@redhat.com \
    --cc=keescook@chromium.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mpe@ellerman.id.au \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.