All of lore.kernel.org
 help / color / mirror / Atom feed
From: Xin Li <xin3.li@intel.com>
To: linux-kernel@vger.kernel.org, x86@kernel.org, kvm@vger.kernel.org
Cc: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, hpa@zytor.com, peterz@infradead.org,
	andrew.cooper3@citrix.com, seanjc@google.com,
	pbonzini@redhat.com, ravi.v.shankar@intel.com
Subject: [RFC PATCH 31/32] x86/fred: allow dynamic stack frame size
Date: Mon, 19 Dec 2022 22:36:57 -0800	[thread overview]
Message-ID: <20221220063658.19271-32-xin3.li@intel.com> (raw)
In-Reply-To: <20221220063658.19271-1-xin3.li@intel.com>

A FRED stack frame could contain different amount of information for
different event types, or perhaps even for different instances of the
same event type. Thus we need to eliminate the need of any advance
information of the stack frame size to allow dynamic stack frame size.

Implement it through:
  1) add a new field user_pt_regs to thread_info, and initialize it
     with a pointer to a virtual pt_regs structure at the top of a
     thread stack.
  2) save a pointer to the user-space pt_regs structure created by
     fred_entrypoint_user() to user_pt_regs in fred_entry_from_user().
  3) initialize the init_thread_info's user_pt_regs with a pointer to
     a virtual pt_regs structure at the top of init stack.

This approach also works for IDT, thus we unify the code.

Suggested-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---
 arch/x86/entry/entry_32.S           |  2 +-
 arch/x86/entry/entry_fred.c         |  2 ++
 arch/x86/include/asm/entry-common.h |  3 +++
 arch/x86/include/asm/processor.h    | 12 +++------
 arch/x86/include/asm/switch_to.h    |  3 +--
 arch/x86/include/asm/thread_info.h  | 41 ++++-------------------------
 arch/x86/kernel/head_32.S           |  3 +--
 arch/x86/kernel/process.c           |  5 ++++
 kernel/fork.c                       |  6 +++++
 9 files changed, 27 insertions(+), 50 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index e309e7156038..d98cc64ca82b 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1244,7 +1244,7 @@ SYM_CODE_START(rewind_stack_and_make_dead)
 	xorl	%ebp, %ebp
 
 	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
-	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
+	leal	-PTREGS_SIZE(%esi), %esp
 
 	call	make_task_dead
 1:	jmp 1b
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index 56814ab0b825..140d9110bc39 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -216,6 +216,8 @@ __visible noinstr void fred_entry_from_user(struct pt_regs *regs)
 		[EVENT_TYPE_OTHER]	= fred_syscall_slow
 	};
 
+	current->thread_info.user_pt_regs = regs;
+
 	/*
 	 * FRED employs a two-level event dispatch mechanism, with
 	 * the first-level on the type of an event and the second-level
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 674ed46d3ced..21e1e3ef9e33 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -12,6 +12,9 @@
 /* Check that the stack and regs on entry from user mode are sane. */
 static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
 {
+	if (!cpu_feature_enabled(X86_FEATURE_FRED))
+		current->thread_info.user_pt_regs = regs;
+
 	if (IS_ENABLED(CONFIG_DEBUG_ENTRY)) {
 		/*
 		 * Make sure that the entry code gave us a sensible EFLAGS
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 67c9d73b31fa..6d573eeea074 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -747,17 +747,11 @@ static inline void spin_lock_prefetch(const void *x)
 	prefetchw(x);
 }
 
-#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
-			   TOP_OF_KERNEL_STACK_PADDING)
+#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack))
 
-#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
+#define task_top_of_stack(task) ((unsigned long)task_stack_page(task) + THREAD_SIZE)
 
-#define task_pt_regs(task) \
-({									\
-	unsigned long __ptr = (unsigned long)task_stack_page(task);	\
-	__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;		\
-	((struct pt_regs *)__ptr) - 1;					\
-})
+#define task_pt_regs(task) ((task)->thread_info.user_pt_regs)
 
 #ifdef CONFIG_X86_32
 #define INIT_THREAD  {							  \
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index c28170d4fbba..8ad5788da416 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -72,8 +72,7 @@ static inline void update_task_stack(struct task_struct *task)
 		this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
 #else
 	if (cpu_feature_enabled(X86_FEATURE_FRED)) {
-		wrmsrl(MSR_IA32_FRED_RSP0,
-		       task_top_of_stack(task) + TOP_OF_KERNEL_STACK_PADDING);
+		wrmsrl(MSR_IA32_FRED_RSP0, task_top_of_stack(task));
 	} else if (static_cpu_has(X86_FEATURE_XENPV)) {
 		/* Xen PV enters the kernel on the thread stack. */
 		load_sp0(task_top_of_stack(task));
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index fea0e69fc3d4..9b88b7a04fda 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -13,42 +13,6 @@
 #include <asm/percpu.h>
 #include <asm/types.h>
 
-/*
- * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
- * reserve at the top of the kernel stack.  We do it because of a nasty
- * 32-bit corner case.  On x86_32, the hardware stack frame is
- * variable-length.  Except for vm86 mode, struct pt_regs assumes a
- * maximum-length frame.  If we enter from CPL 0, the top 8 bytes of
- * pt_regs don't actually exist.  Ordinarily this doesn't matter, but it
- * does in at least one case:
- *
- * If we take an NMI early enough in SYSENTER, then we can end up with
- * pt_regs that extends above sp0.  On the way out, in the espfix code,
- * we can read the saved SS value, but that value will be above sp0.
- * Without this offset, that can result in a page fault.  (We are
- * careful that, in this case, the value we read doesn't matter.)
- *
- * In vm86 mode, the hardware frame is much longer still, so add 16
- * bytes to make room for the real-mode segments.
- *
- * x86-64 has a fixed-length stack frame, but it depends on whether
- * or not FRED is enabled. Future versions of FRED might make this
- * dynamic, but for now it is always 2 words longer.
- */
-#ifdef CONFIG_X86_32
-# ifdef CONFIG_VM86
-#  define TOP_OF_KERNEL_STACK_PADDING 16
-# else
-#  define TOP_OF_KERNEL_STACK_PADDING 8
-# endif
-#else /* x86-64 */
-# ifdef CONFIG_X86_FRED
-#  define TOP_OF_KERNEL_STACK_PADDING (2*8)
-# else
-#  define TOP_OF_KERNEL_STACK_PADDING 0
-# endif
-#endif
-
 /*
  * low level task data that entry.S needs immediate access to
  * - this struct should fit entirely inside of one cache line
@@ -56,6 +20,7 @@
  */
 #ifndef __ASSEMBLY__
 struct task_struct;
+struct pt_regs;
 #include <asm/cpufeature.h>
 #include <linux/atomic.h>
 
@@ -66,11 +31,14 @@ struct thread_info {
 #ifdef CONFIG_SMP
 	u32			cpu;		/* current CPU */
 #endif
+	struct pt_regs		*user_pt_regs;
 };
 
+#define INIT_TASK_PT_REGS ((struct pt_regs *)TOP_OF_INIT_STACK - 1)
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.flags		= 0,			\
+	.user_pt_regs   = INIT_TASK_PT_REGS,	\
 }
 
 #else /* !__ASSEMBLY__ */
@@ -235,6 +203,7 @@ static inline int arch_within_stack_frames(const void * const stack,
 
 extern void arch_task_cache_init(void);
 extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+extern void arch_init_user_pt_regs(struct task_struct *tsk);
 extern void arch_release_task_struct(struct task_struct *tsk);
 extern void arch_setup_new_exec(void);
 #define arch_setup_new_exec arch_setup_new_exec
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 9b7acc9c7874..8961946f1418 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -539,8 +539,7 @@ SYM_DATA_END(initial_page_table)
  * reliably detect the end of the stack.
  */
 SYM_DATA(initial_stack,
-		.long init_thread_union + THREAD_SIZE -
-		SIZEOF_PTREGS - TOP_OF_KERNEL_STACK_PADDING)
+		.long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS)
 
 __INITRODATA
 int_msg:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e436c9c1ef3b..6294d41f7691 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -97,6 +97,11 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 	return 0;
 }
 
+void arch_init_user_pt_regs(struct task_struct *tsk)
+{
+	tsk->thread_info.user_pt_regs = (struct pt_regs *)task_top_of_stack(tsk)- 1;
+}
+
 #ifdef CONFIG_X86_64
 void arch_release_task_struct(struct task_struct *tsk)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d..00bd585a4e07 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -948,6 +948,10 @@ int __weak arch_dup_task_struct(struct task_struct *dst,
 	return 0;
 }
 
+void __weak arch_init_user_pt_regs(struct task_struct *tsk)
+{
+}
+
 void set_task_stack_end_magic(struct task_struct *tsk)
 {
 	unsigned long *stackend;
@@ -975,6 +979,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 	if (err)
 		goto free_tsk;
 
+	arch_init_user_pt_regs(tsk);
+
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 	refcount_set(&tsk->stack_refcount, 1);
 #endif
-- 
2.34.1


  parent reply	other threads:[~2022-12-20  7:04 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-20  6:36 [RFC PATCH 00/32] x86: enable FRED for x86-64 Xin Li
2022-12-20  6:36 ` [RFC PATCH 01/32] x86/traps: let common_interrupt() handle IRQ_MOVE_CLEANUP_VECTOR Xin Li
2022-12-20  6:36 ` [RFC PATCH 02/32] x86/traps: add a system interrupt table for system interrupt dispatch Xin Li
2022-12-20  6:36 ` [RFC PATCH 03/32] x86/traps: add install_system_interrupt_handler() Xin Li
2022-12-20  6:36 ` [RFC PATCH 04/32] x86/traps: add external_interrupt() to dispatch external interrupts Xin Li
2022-12-20  6:36 ` [RFC PATCH 05/32] x86/traps: add exc_raise_irq() for VMX IRQ reinjection Xin Li
2023-01-09 18:20   ` Li, Xin3
2022-12-20  6:36 ` [RFC PATCH 06/32] x86/cpufeature: add the cpu feature bit for FRED Xin Li
2022-12-20  6:36 ` [RFC PATCH 07/32] x86/opcode: add ERETU, ERETS instructions to x86-opcode-map Xin Li
2022-12-20  6:36 ` [RFC PATCH 08/32] x86/objtool: teach objtool about ERETU and ERETS Xin Li
2022-12-20  6:36 ` [RFC PATCH 09/32] x86/cpu: add X86_CR4_FRED macro Xin Li
2022-12-20  6:36 ` [RFC PATCH 10/32] x86/fred: add Kconfig option for FRED (CONFIG_X86_FRED) Xin Li
2022-12-20  6:36 ` [RFC PATCH 11/32] x86/fred: if CONFIG_X86_FRED is disabled, disable FRED support Xin Li
2022-12-20  6:36 ` [RFC PATCH 12/32] x86/cpu: add MSR numbers for FRED configuration Xin Li
2022-12-20  6:36 ` [RFC PATCH 13/32] x86/fred: header file for event types Xin Li
2022-12-20  6:36 ` [RFC PATCH 14/32] x86/fred: header file with FRED definitions Xin Li
2022-12-20  8:56   ` Peter Zijlstra
2022-12-21  2:58     ` Li, Xin3
2022-12-22 13:03       ` Peter Zijlstra
2022-12-23 19:34         ` H. Peter Anvin
2022-12-20  6:36 ` [RFC PATCH 15/32] x86/fred: make unions for the cs and ss fields in struct pt_regs Xin Li
2022-12-20  6:36 ` [RFC PATCH 16/32] x86/fred: reserve space for the FRED stack frame Xin Li
2022-12-20  6:36 ` [RFC PATCH 17/32] x86/fred: add a page fault entry stub for FRED Xin Li
2022-12-20  6:36 ` [RFC PATCH 18/32] x86/fred: add a debug " Xin Li
2022-12-20  9:15   ` Peter Zijlstra
2022-12-20  6:36 ` [RFC PATCH 19/32] x86/fred: add a NMI " Xin Li
2022-12-20  6:36 ` [RFC PATCH 20/32] x86/fred: add a machine check " Xin Li
2022-12-20  6:36 ` [RFC PATCH 21/32] x86/fred: FRED entry/exit and dispatch code Xin Li
2022-12-20  9:35   ` Peter Zijlstra
2022-12-21  2:56     ` Li, Xin3
2022-12-22 13:08       ` Peter Zijlstra
2022-12-20  6:36 ` [RFC PATCH 22/32] x86/fred: FRED initialization code Xin Li
2022-12-20  9:45   ` Peter Zijlstra
2022-12-20  9:55     ` Andrew Cooper
2022-12-20 10:02       ` Peter Zijlstra
2022-12-21  5:28         ` Li, Xin3
2022-12-21  5:44           ` H. Peter Anvin
2022-12-22 13:09             ` Peter Zijlstra
2022-12-23 19:30               ` H. Peter Anvin
2022-12-23 19:37       ` H. Peter Anvin
2022-12-20  6:36 ` [RFC PATCH 23/32] x86/fred: update MSR_IA32_FRED_RSP0 during task switch Xin Li
2022-12-20  9:48   ` Peter Zijlstra
2022-12-20 18:47     ` Li, Xin3
2022-12-23 19:42     ` H. Peter Anvin
2022-12-24  3:02       ` Li, Xin3
2022-12-20  6:36 ` [RFC PATCH 24/32] x86/fred: let ret_from_fork() jmp to fred_exit_user when FRED is enabled Xin Li
2022-12-20  6:36 ` [RFC PATCH 25/32] x86/fred: disallow the swapgs instruction " Xin Li
2022-12-20  6:36 ` [RFC PATCH 26/32] x86/fred: no ESPFIX needed " Xin Li
2022-12-20  6:36 ` [RFC PATCH 27/32] x86/fred: allow single-step trap and NMI when starting a new thread Xin Li
2022-12-20  6:36 ` [RFC PATCH 28/32] x86/fred: fixup fault on ERETU by jumping to fred_entrypoint_user Xin Li
2022-12-20  6:36 ` [RFC PATCH 29/32] x86/ia32: do not modify the DPL bits for a null selector Xin Li
2022-12-20  6:36 ` [RFC PATCH 30/32] x86/fred: allow FRED systems to use interrupt vectors 0x10-0x1f Xin Li
2022-12-20  6:36 ` Xin Li [this message]
2022-12-20  6:36 ` [RFC PATCH 32/32] x86/fred: disable FRED by default in its early stage Xin Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221220063658.19271-32-xin3.li@intel.com \
    --to=xin3.li@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.