Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack

From: Alexandre Chartre <alexandre.chartre@oracle.com>
To: Thomas Gleixner <tglx@linutronix.de>,
	LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, "Paul E. McKenney" <paulmck@kernel.org>,
	Andy Lutomirski <luto@kernel.org>,
	Frederic Weisbecker <frederic@kernel.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <sean.j.christopherson@intel.com>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	Petr Mladek <pmladek@suse.com>,
	Steven Rostedt <rostedt@goodmis.org>,
	Joel Fernandes <joel@joelfernandes.org>,
	Boris Ostrovsky <boris.ostrovsky@oracle.com>,
	Juergen Gross <jgross@suse.com>, Brian Gerst <brgerst@gmail.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Josh Poimboeuf <jpoimboe@redhat.com>,
	Will Deacon <will@kernel.org>
Subject: Re: [patch V4 part 5 02/31] x86/entry: Provide helpers for execute on irqstack
Date: Mon, 11 May 2020 11:07:43 +0200	[thread overview]
Message-ID: <7477e59e-50d9-3446-dce7-3aa07e74cf5f@oracle.com> (raw)
In-Reply-To: <20200505135828.316937774@linutronix.de>

On 5/5/20 3:53 PM, Thomas Gleixner wrote:
> Device interrupt handlers and system vector handlers are executed on the
> interrupt stack. The stack switch happens in the low level assembly entry
> code. This conflicts with the efforts to consolidate the exit code in C to
> ensure correctness vs. RCU and tracing.
> 
> As there is no way to move #DB away from IST due to the MOV SS issue, the
> requirements vs. #DB and NMI for switching to the interrupt stack do not
> exist anymore. The only requirement is that interrupts are disabled.
> 
> That allows to move the stack switching to C code which simplifies the
> entry/exit handling further because it allows to switch stacks after
> handling the entry and on exit before handling RCU, return to usermode and
> kernel preemption in the same way as for regular exceptions.
> 
> That also allows to move the xen hypercall extra magic code and the softirq
> stack switching into C.
> 
> The mechanism is straight forward:
> 
>    1) Store the current stack pointer on top of the interrupt stack. That's
>       required for the unwinder.
> 
>    2) Switch the stack pointer
> 
>    3) Call the function
> 
>    4) Restore the stackpointer
> 
> The full code sequence to make the unwinder happy is:
> 
>      	pushq	%rbp
> 	movq	%rsp, %rbp
> 	movq    $(top_of_hardirq_stack - 8), %reg
> 	movq	%rsp, (%reg)
>    	movq	%reg , %rsp
> 	call    function
> 	popq	%rsp
> 	leaveq
> 	
> While the following sequence would spare the 'popq %rsp':
> 
>      	pushq	%rbp
> 	movq    $(top_of_hardirq_stack - 8), %rbp
> 	movq	%rsp, (%rrbp)

Should be (%rbp) instead of  (%rrbp).

>    	xchgq	%rbp, %rsp
> 	call    function
>   	movq	%rbp, %rsp
> 	leaveq
> 
> but that requires further changes to objtool so that the unwinder works
> correctly. Can be done on top and is not critical for now.
> 
> Provide helper functions to check whether the interrupt stack is already
> active and whether stack switching is required.
> 
> 64 bit only for now. 32 bit has a variant of that already. Once this is
> cleaned up the two implementations might be consolidated as a cleanup on
> top.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> ---
>   arch/x86/include/asm/irq_stack.h |   61 +++++++++++++++++++++++++++++++++++++++
>   1 file changed, 61 insertions(+)
> 
> --- /dev/null
> +++ b/arch/x86/include/asm/irq_stack.h
...
> +/*
> + * Macro to emit code for running @func on the irq stack.
> + */
> +#define RUN_ON_IRQSTACK(func)	{					\
> +	unsigned long tos;						\
> +									\
> +	lockdep_assert_irqs_disabled();					\
> +									\
> +	tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8;	\
> +									\
> +	__this_cpu_add(irq_count, 1);					\
> +	asm volatile(							\
> +		"pushq  %%rbp					\n"	\
> +		"movq   %%rsp, %%rbp				\n"	\
> +		"movq	%%rsp, (%[ts])				\n"	\
> +		"movq	%[ts], %%rsp				\n"	\
> +		"1:						\n"	\
> +		"	.pushsection .discard.instr_begin	\n"	\
> +		"	.long 1b - .				\n"	\
> +		"	.popsection				\n"	\
> +		"call	" __ASM_FORM(func) "			\n"	\
> +		"2:						\n"	\
> +		"	.pushsection .discard.instr_end		\n"	\
> +		"	.long 2b - .				\n"	\
> +		"	.popsection				\n"	\
> +		"popq	%%rsp					\n"	\
> +		"leaveq						\n"	\
> +		:							\
> +		: [ts] "r" (tos)					\
> +		: "memory"						\
> +		);							\
> +	__this_cpu_sub(irq_count, 1);					\
> +}

The pushsection/popsection discard.instr_begin/end sequences are used several
times in asm() statement at different places, so I wonder if it might be worth
having a macro.

In part 1, patch 20/36 adds instr_begin()/end(): they provide the sequence
but already encapsulated into an asm() statement, then we could do something
like this:

/* Begin/end of an instrumentation safe region */
#define instr_begin_insn(label)				\
	__stringify(label) ":\n\t"			\
	".pushsection .discard.instr_begin\n\t"		\
	".long " __stringify(label) "b - .\n\t"		\
	".popsection\n\t"

#define instr_end_insn(label)				\
	__stringify(label) ":\n\t"			\
	".pushsection .discard.instr_end\n\t"		\
	".long " __stringify(label) "b - .\n\t"		\
	".popsection\n\t"

#define instr_begin() ({asm volatile(instr_begin_insn(__COUNTER__));})
#define instr_end() ({asm volatile(instr_end_insn(__COUNTER__));})> +#else /* CONFIG_X86_64 */

And the RUN_ON_IRQSTACK macro would become:

#define RUN_ON_IRQSTACK(func)	{					\
	unsigned long tos;						\
									\
	lockdep_assert_irqs_disabled();					\
									\
	tos = ((unsigned long)__this_cpu_read(hardirq_stack_ptr)) - 8;	\
									\
	__this_cpu_add(irq_count, 1);					\
	asm volatile(							\
		"pushq  %%rbp					\n"	\
		"movq   %%rsp, %%rbp				\n"	\
		"movq	%%rsp, (%[ts])				\n"	\
		"movq	%[ts], %%rsp				\n"	\
		instr_begin_insn(1)					\
		"call	" __ASM_FORM(func) "			\n"	\
		instr_end_insn(2)					\
		"popq	%%rsp					\n"	\
		"leaveq						\n"	\
		:							\
		: [ts] "r" (tos)					\
		: "memory"						\
		);							\
	__this_cpu_sub(irq_count, 1);					\
}

alex.