* [PATCH v2 1/7] x86/mce: Pass pointer to saved pt_regs to severity calculation routines
2020-09-30 23:26 ` [PATCH v2 0/7] Add machine check recovery when copying from user space Tony Luck
@ 2020-09-30 23:26 ` Tony Luck
2020-09-30 23:26 ` [PATCH v2 2/7] x86/mce: Provide method to find out the type of exception handle Tony Luck
` (5 subsequent siblings)
6 siblings, 0 replies; 49+ messages in thread
From: Tony Luck @ 2020-09-30 23:26 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Youquan Song, Tony Luck, x86, linux-kernel
From: Youquan Song <youquan.song@intel.com>
New recovery features require additional information about processor
state when a machine check occurred. Pass pt_regs down to the routines
that need it.
No functional change.
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mce/core.c | 14 +++++++-------
arch/x86/kernel/cpu/mce/internal.h | 3 ++-
arch/x86/kernel/cpu/mce/severity.c | 14 ++++++++------
3 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5b1d5f33d60b..a2bb4d4df8b7 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -793,7 +793,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
goto clear_it;
mce_read_aux(&m, i);
- m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
+ m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
@@ -842,7 +842,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
m->bank = i;
- if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i);
*msg = tmp;
return 1;
@@ -942,7 +942,7 @@ static void mce_reign(void)
*/
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
/* call mce_severity() to get "msg" for panic */
- mce_severity(m, mca_cfg.tolerant, &msg, true);
+ mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
mce_panic("Fatal machine check", m, msg);
}
@@ -1153,7 +1153,7 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
-static void __mc_scan_banks(struct mce *m, struct mce *final,
+static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
@@ -1188,7 +1188,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
- severity = mce_severity(m, cfg->tolerant, NULL, true);
+ severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
/*
* When machine check was for corrected/deferred handler don't
@@ -1340,7 +1340,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
- __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
+ __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1362,7 +1362,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
- mce_severity(&m, cfg->tolerant, &msg, true);
+ mce_severity(&m, regs, cfg->tolerant, &msg, true);
mce_panic("Local fatal machine check!", &m, msg);
}
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b122610e9046..88dcc79cfb07 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -38,7 +38,8 @@ int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
-extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
+extern int (*mce_severity)(struct mce *a, struct pt_regs *regs,
+ int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled;
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index e1da619add19..1983ef93aa25 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -209,7 +209,7 @@ static struct severity {
* distinguish an exception taken in user from from one
* taken in the kernel.
*/
-static int error_context(struct mce *m)
+static int error_context(struct mce *m, struct pt_regs *regs)
{
if ((m->cs & 3) == 3)
return IN_USER;
@@ -253,9 +253,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
-static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
+ char **msg, bool is_excp)
{
- enum context ctx = error_context(m);
+ enum context ctx = error_context(m, regs);
/* Processor Context Corrupt, no need to fumble too much, die! */
if (m->status & MCI_STATUS_PCC)
@@ -305,10 +306,11 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
return MCE_KEEP_SEVERITY;
}
-static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
+ int tolerant, char **msg, bool is_excp)
{
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
- enum context ctx = error_context(m);
+ enum context ctx = error_context(m, regs);
struct severity *s;
for (s = severities;; s++) {
@@ -336,7 +338,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
}
/* Default to mce_severity_intel */
-int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
+int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) =
mce_severity_intel;
void __init mcheck_vendor_init_severity(void)
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [PATCH v2 2/7] x86/mce: Provide method to find out the type of exception handle
2020-09-30 23:26 ` [PATCH v2 0/7] Add machine check recovery when copying from user space Tony Luck
2020-09-30 23:26 ` [PATCH v2 1/7] x86/mce: Pass pointer to saved pt_regs to severity calculation routines Tony Luck
@ 2020-09-30 23:26 ` Tony Luck
2020-10-05 16:35 ` Borislav Petkov
2020-09-30 23:26 ` [PATCH v2 3/7] x86/mce: Add _ASM_EXTABLE_CPY for copy user access Tony Luck
` (4 subsequent siblings)
6 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-09-30 23:26 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Tony Luck, Youquan Song, x86, linux-kernel
Avoid a proliferation of ex_has_*_handler() functions by having just
one function that returns the type of the handler (if any).
Drop the __visible attribute for this function. It is not called
from assembler so the attribute is not necessary.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/include/asm/extable.h | 9 ++++++++-
arch/x86/kernel/cpu/mce/severity.c | 5 ++++-
arch/x86/mm/extable.c | 12 ++++++++----
3 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index d8c2198d543b..8847988c4a6d 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,10 +29,17 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
+enum handler_type {
+ EX_HANDLER_NONE,
+ EX_HANDLER_FAULT,
+ EX_HANDLER_UACCESS,
+ EX_HANDLER_OTHER
+};
+
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
-extern bool ex_has_fault_handler(unsigned long ip);
+extern enum handler_type ex_fault_handler_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#endif
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 1983ef93aa25..8517cbf7b184 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -211,10 +211,13 @@ static struct severity {
*/
static int error_context(struct mce *m, struct pt_regs *regs)
{
+ enum handler_type t;
+
if ((m->cs & 3) == 3)
return IN_USER;
- if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) {
+ t = ex_fault_handler_type(m->ip);
+ if (mc_recoverable(m->mcgstatus) && t == EX_HANDLER_FAULT) {
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 1d6cb07f4f86..de869665309e 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -125,17 +125,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);
-__visible bool ex_has_fault_handler(unsigned long ip)
+enum handler_type ex_fault_handler_type(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(ip);
if (!e)
- return false;
+ return EX_HANDLER_NONE;
handler = ex_fixup_handler(e);
-
- return handler == ex_handler_fault;
+ if (handler == ex_handler_fault)
+ return EX_HANDLER_FAULT;
+ else if (handler == ex_handler_uaccess)
+ return EX_HANDLER_UACCESS;
+ else
+ return EX_HANDLER_OTHER;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* Re: [PATCH v2 2/7] x86/mce: Provide method to find out the type of exception handle
2020-09-30 23:26 ` [PATCH v2 2/7] x86/mce: Provide method to find out the type of exception handle Tony Luck
@ 2020-10-05 16:35 ` Borislav Petkov
0 siblings, 0 replies; 49+ messages in thread
From: Borislav Petkov @ 2020-10-05 16:35 UTC (permalink / raw)
To: Tony Luck; +Cc: Youquan Song, x86, linux-kernel
On Wed, Sep 30, 2020 at 04:26:06PM -0700, Tony Luck wrote:
> diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
> index 1d6cb07f4f86..de869665309e 100644
> --- a/arch/x86/mm/extable.c
> +++ b/arch/x86/mm/extable.c
> @@ -125,17 +125,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
> }
> EXPORT_SYMBOL(ex_handler_clear_fs);
>
> -__visible bool ex_has_fault_handler(unsigned long ip)
> +enum handler_type ex_fault_handler_type(unsigned long ip)
Function name needs a verb:
s!ex_fault_handler_type!ex_get_fault_handler_type!g
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH v2 3/7] x86/mce: Add _ASM_EXTABLE_CPY for copy user access
2020-09-30 23:26 ` [PATCH v2 0/7] Add machine check recovery when copying from user space Tony Luck
2020-09-30 23:26 ` [PATCH v2 1/7] x86/mce: Pass pointer to saved pt_regs to severity calculation routines Tony Luck
2020-09-30 23:26 ` [PATCH v2 2/7] x86/mce: Provide method to find out the type of exception handle Tony Luck
@ 2020-09-30 23:26 ` Tony Luck
2020-10-05 16:34 ` Borislav Petkov
2020-09-30 23:26 ` [PATCH v2 4/7] x86/mce: Avoid tail copy when machine check terminated a copy from user Tony Luck
` (3 subsequent siblings)
6 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-09-30 23:26 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Youquan Song, Tony Luck, x86, linux-kernel
From: Youquan Song <youquan.song@intel.com>
_ASM_EXTABLE_UA is a general exception entry to record the exception fixup
for all exception spots between kernel and user space access.
To enable recovery from machine checks while coping data from user
addresses we need to be able to distinguish the places that are
looping copying data from those that copy a single byte/word/etc.
Add a new macro _ASM_EXTABLE_CPY and use it in place of _ASM_EXTABLE_UA
in the copy functions.
Record the exception reason number to regs->ax at
ex_handler_uaccess which is used to check MCE triggered.
The new fixup routine ex_handler_copy() is almost an exact copy of
ex_handler_uaccess() The difference is that it sets regs->ax to the trap
number. Following patches use this to avoid trying to copy remaining
bytes from the tail of the copy and possibly hitting the poison again.
New mce.kflags bit MCE_IN_KERNEL_COPYIN will be used by mce_severity()
calculation to indicate that a machine check is recoverable because the
kernel was copying from user space.
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/include/asm/asm.h | 6 +++
arch/x86/include/asm/mce.h | 15 ++++++
arch/x86/lib/copy_user_64.S | 96 ++++++++++++++++++-------------------
arch/x86/mm/extable.c | 14 +++++-
4 files changed, 82 insertions(+), 49 deletions(-)
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 5c15f95b1ba7..0359cbbd0f50 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -135,6 +135,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
@@ -160,6 +163,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 109af5c7f515..a9bf34bd4457 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -136,8 +136,23 @@
#define MCE_HANDLED_NFIT BIT_ULL(3)
#define MCE_HANDLED_EDAC BIT_ULL(4)
#define MCE_HANDLED_MCELOG BIT_ULL(5)
+
+/*
+ * Indicates an MCE which has happened in kernel space but from
+ * which the kernel can recover simply by executing fixup_exception()
+ * so that an error is returned to the caller of the function that
+ * hit the machine check.
+ */
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
+/*
+ * Indicates an MCE that happened in kernel space while copying data
+ * from user. In this case fixup_exception() gets the kernel to the
+ * error exit for the copy function. Machine check handler can then
+ * treat it like a fault taken in user mode.
+ */
+#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
+
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 816f128a6d52..5b68e945bf65 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -36,8 +36,8 @@
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
+ _ASM_EXTABLE_CPY(100b, 103b)
+ _ASM_EXTABLE_CPY(101b, 103b)
.endm
/*
@@ -116,26 +116,26 @@ SYM_FUNC_START(copy_user_generic_unrolled)
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
- _ASM_EXTABLE_UA(1b, 30b)
- _ASM_EXTABLE_UA(2b, 30b)
- _ASM_EXTABLE_UA(3b, 30b)
- _ASM_EXTABLE_UA(4b, 30b)
- _ASM_EXTABLE_UA(5b, 30b)
- _ASM_EXTABLE_UA(6b, 30b)
- _ASM_EXTABLE_UA(7b, 30b)
- _ASM_EXTABLE_UA(8b, 30b)
- _ASM_EXTABLE_UA(9b, 30b)
- _ASM_EXTABLE_UA(10b, 30b)
- _ASM_EXTABLE_UA(11b, 30b)
- _ASM_EXTABLE_UA(12b, 30b)
- _ASM_EXTABLE_UA(13b, 30b)
- _ASM_EXTABLE_UA(14b, 30b)
- _ASM_EXTABLE_UA(15b, 30b)
- _ASM_EXTABLE_UA(16b, 30b)
- _ASM_EXTABLE_UA(18b, 40b)
- _ASM_EXTABLE_UA(19b, 40b)
- _ASM_EXTABLE_UA(21b, 50b)
- _ASM_EXTABLE_UA(22b, 50b)
+ _ASM_EXTABLE_CPY(1b, 30b)
+ _ASM_EXTABLE_CPY(2b, 30b)
+ _ASM_EXTABLE_CPY(3b, 30b)
+ _ASM_EXTABLE_CPY(4b, 30b)
+ _ASM_EXTABLE_CPY(5b, 30b)
+ _ASM_EXTABLE_CPY(6b, 30b)
+ _ASM_EXTABLE_CPY(7b, 30b)
+ _ASM_EXTABLE_CPY(8b, 30b)
+ _ASM_EXTABLE_CPY(9b, 30b)
+ _ASM_EXTABLE_CPY(10b, 30b)
+ _ASM_EXTABLE_CPY(11b, 30b)
+ _ASM_EXTABLE_CPY(12b, 30b)
+ _ASM_EXTABLE_CPY(13b, 30b)
+ _ASM_EXTABLE_CPY(14b, 30b)
+ _ASM_EXTABLE_CPY(15b, 30b)
+ _ASM_EXTABLE_CPY(16b, 30b)
+ _ASM_EXTABLE_CPY(18b, 40b)
+ _ASM_EXTABLE_CPY(19b, 40b)
+ _ASM_EXTABLE_CPY(21b, 50b)
+ _ASM_EXTABLE_CPY(22b, 50b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -180,8 +180,8 @@ SYM_FUNC_START(copy_user_generic_string)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 11b)
- _ASM_EXTABLE_UA(3b, 12b)
+ _ASM_EXTABLE_CPY(1b, 11b)
+ _ASM_EXTABLE_CPY(3b, 12b)
SYM_FUNC_END(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
@@ -213,7 +213,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 12b)
+ _ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
@@ -237,7 +237,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
ASM_CLAC
ret
- _ASM_EXTABLE_UA(1b, 2b)
+ _ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
@@ -366,27 +366,27 @@ SYM_FUNC_START(__copy_user_nocache)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
- _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index de869665309e..45d7026eda6a 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -80,6 +80,18 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_uaccess);
+__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = trapnr;
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_copy);
+
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
@@ -136,7 +148,7 @@ enum handler_type ex_fault_handler_type(unsigned long ip)
handler = ex_fixup_handler(e);
if (handler == ex_handler_fault)
return EX_HANDLER_FAULT;
- else if (handler == ex_handler_uaccess)
+ else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
return EX_HANDLER_UACCESS;
else
return EX_HANDLER_OTHER;
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [PATCH v2 4/7] x86/mce: Avoid tail copy when machine check terminated a copy from user
2020-09-30 23:26 ` [PATCH v2 0/7] Add machine check recovery when copying from user space Tony Luck
` (2 preceding siblings ...)
2020-09-30 23:26 ` [PATCH v2 3/7] x86/mce: Add _ASM_EXTABLE_CPY for copy user access Tony Luck
@ 2020-09-30 23:26 ` Tony Luck
2020-09-30 23:26 ` [PATCH v2 5/7] x86/mce: Change fault_in_kernel_space() from static to global Tony Luck
` (2 subsequent siblings)
6 siblings, 0 replies; 49+ messages in thread
From: Tony Luck @ 2020-09-30 23:26 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Tony Luck, Youquan Song, x86, linux-kernel
In the page fault case it is ok to see if a few more unaligned bytes
can be copied from the source address. Worst case is that the page fault
will be triggered again.
Machine checks are more serious. Just give up at the point where the
main copy loop triggered the #MC and return from the copy code as if
the copy succeeded. The machine check handler will use task_work_add() to
make sure that the task is sent a SIGBUS.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/lib/copy_user_64.S | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 5b68e945bf65..77b9b2a3b5c8 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,6 +15,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
+#include <asm/trapnr.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
@@ -221,6 +222,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
+ * Don't try to copy the tail if machine check happened
*
* Input:
* rdi destination
@@ -232,11 +234,24 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
+ cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
+ je 3f
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
+ /*
+ * Return zero to pretend that this copy succeeded. This
+ * is counter-intuitive, but needed to prevent the code
+ * in lib/iov_iter.c from retrying and running back into
+ * the poison cache line again. The machine check handler
+ * will ensure that a SIGBUS is sent to the task.
+ */
+3: xorl %eax,%eax
+ ASM_CLAC
+ ret
+
_ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [PATCH v2 5/7] x86/mce: Change fault_in_kernel_space() from static to global
2020-09-30 23:26 ` [PATCH v2 0/7] Add machine check recovery when copying from user space Tony Luck
` (3 preceding siblings ...)
2020-09-30 23:26 ` [PATCH v2 4/7] x86/mce: Avoid tail copy when machine check terminated a copy from user Tony Luck
@ 2020-09-30 23:26 ` Tony Luck
2020-10-05 16:33 ` Borislav Petkov
2020-09-30 23:26 ` [PATCH v2 6/7] x86/mce: Recover from poison found while copying from user space Tony Luck
2020-09-30 23:26 ` [PATCH v2 7/7] x86/mce: Decode a kernel instruction to determine if it is copying from user Tony Luck
6 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-09-30 23:26 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Youquan Song, tony.luck, x86, linux-kernel
From: Youquan Song <youquan.song@intel.com>
Machine check code needs to be able to determine if a faulting address
is in user or kernel space. There is already a function to do this.
Change from "static int" to "bool" and add declaration to <asm/traps.h>
No functional change.
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: <tony.luck@intel.com>
---
arch/x86/include/asm/traps.h | 2 ++
arch/x86/mm/fault.c | 2 +-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 714b1a30e7b0..df0b7bfc1234 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -35,6 +35,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
+bool fault_in_kernel_space(unsigned long address);
+
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498e9832..88ae443e4e5f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1081,7 +1081,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
return 0;
}
-static int fault_in_kernel_space(unsigned long address)
+bool fault_in_kernel_space(unsigned long address)
{
/*
* On 64-bit systems, the vsyscall page is at an address above
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* Re: [PATCH v2 5/7] x86/mce: Change fault_in_kernel_space() from static to global
2020-09-30 23:26 ` [PATCH v2 5/7] x86/mce: Change fault_in_kernel_space() from static to global Tony Luck
@ 2020-10-05 16:33 ` Borislav Petkov
0 siblings, 0 replies; 49+ messages in thread
From: Borislav Petkov @ 2020-10-05 16:33 UTC (permalink / raw)
To: Tony Luck; +Cc: Youquan Song, x86, linux-kernel
On Wed, Sep 30, 2020 at 04:26:09PM -0700, Tony Luck wrote:
> From: Youquan Song <youquan.song@intel.com>
>
> Machine check code needs to be able to determine if a faulting address
> is in user or kernel space. There is already a function to do this.
>
> Change from "static int" to "bool" and add declaration to <asm/traps.h>
>
> No functional change.
>
> Signed-off-by: Youquan Song <youquan.song@intel.com>
> Signed-off-by: <tony.luck@intel.com>
> ---
> arch/x86/include/asm/traps.h | 2 ++
> arch/x86/mm/fault.c | 2 +-
> 2 files changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
> index 714b1a30e7b0..df0b7bfc1234 100644
> --- a/arch/x86/include/asm/traps.h
> +++ b/arch/x86/include/asm/traps.h
> @@ -35,6 +35,8 @@ extern int panic_on_unrecovered_nmi;
>
> void math_emulate(struct math_emu_info *);
>
> +bool fault_in_kernel_space(unsigned long address);
> +
> #ifdef CONFIG_VMAP_STACK
> void __noreturn handle_stack_overflow(const char *message,
> struct pt_regs *regs,
> diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
> index 35f1498e9832..88ae443e4e5f 100644
> --- a/arch/x86/mm/fault.c
> +++ b/arch/x86/mm/fault.c
> @@ -1081,7 +1081,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
> return 0;
> }
>
> -static int fault_in_kernel_space(unsigned long address)
> +bool fault_in_kernel_space(unsigned long address)
> {
> /*
> * On 64-bit systems, the vsyscall page is at an address above
> --
Yeah, merge this one into the last patch where this function is used.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH v2 6/7] x86/mce: Recover from poison found while copying from user space
2020-09-30 23:26 ` [PATCH v2 0/7] Add machine check recovery when copying from user space Tony Luck
` (4 preceding siblings ...)
2020-09-30 23:26 ` [PATCH v2 5/7] x86/mce: Change fault_in_kernel_space() from static to global Tony Luck
@ 2020-09-30 23:26 ` Tony Luck
2020-10-05 16:32 ` Borislav Petkov
2020-09-30 23:26 ` [PATCH v2 7/7] x86/mce: Decode a kernel instruction to determine if it is copying from user Tony Luck
6 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-09-30 23:26 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Youquan Song, Tony Luck, x86, linux-kernel
From: Youquan Song <youquan.song@intel.com>
Existing kernel code can only recover from a machine check on code that
is tagged in the exception table with a fault handling recovery path.
Two new fields in the task structure to pass information from machine
check handler to the "task_work" that is queued to run before the task
returns to user mode:
+ mce_vaddr: will be initialized to the user virtual address of the fault
in the case where the fault occurred in the kernel copying data from
a user address. This is so that kill_me_maybe() can provide that
information to the user SIGBUS handler.
+ mce_kflags: copy of the struct mce.kflags needed by kill_me_maybe()
to determine if mce_vaddr is applicable to this error.
Add code to recover from a machine check while copying data from user
space to the kernel. Action for this case is the same as if the user
touched the poison directly; unmap the page and send a SIGBUS to the task.
Refactor the recovery code path to share common code between the "fault
in user mode" case and the "fault while copying from user" case.
New code paths will be activated by the next patch which sets
MCE_IN_KERNEL_COPYIN.
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mce/core.c | 33 +++++++++++++++++++++------------
include/linux/sched.h | 2 ++
2 files changed, 23 insertions(+), 12 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index a2bb4d4df8b7..9713825e6745 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1386,15 +1386,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
if ((m.cs & 3) == 3) {
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
-
- current->mce_addr = m.addr;
- current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
- current->mce_whole_page = whole_page(&m);
- current->mce_kill_me.func = kill_me_maybe;
- if (kill_it)
- current->mce_kill_me.func = kill_me_now;
- task_work_add(current, ¤t->mce_kill_me, true);
} else {
+ /* (m.kflags MCE_IN_KERNEL_RECOV must have been set to get here */
+ BUG_ON(!(m.kflags & MCE_IN_KERNEL_RECOV));
+
/*
* Handle an MCE which has happened in kernel space but from
* which the kernel can recover: ex_has_fault_handler() has
@@ -1404,11 +1399,25 @@ noinstr void do_machine_check(struct pt_regs *regs)
* corresponding exception handler which would do that is the
* proper one.
*/
- if (m.kflags & MCE_IN_KERNEL_RECOV) {
- if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
- mce_panic("Failed kernel mode recovery", &m, msg);
- }
+ if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
+ mce_panic("Failed kernel mode recovery", &m, msg);
+
+ /*
+ * If this wasn't a copy from user, then recovery is complete.
+ */
+ if (!(m.kflags & MCE_IN_KERNEL_COPYIN))
+ goto out;
}
+
+ current->mce_addr = m.addr;
+ current->mce_kflags = m.kflags;
+ current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
+ current->mce_whole_page = whole_page(&m);
+ current->mce_kill_me.func = kill_me_maybe;
+ if (kill_it)
+ current->mce_kill_me.func = kill_me_now;
+ task_work_add(current, ¤t->mce_kill_me, true);
+
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 93ecd930efd3..2cbba3e2b150 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1308,6 +1308,8 @@ struct task_struct {
#endif
#ifdef CONFIG_X86_MCE
+ void __user *mce_vaddr;
+ __u64 mce_kflags;
u64 mce_addr;
__u64 mce_ripv : 1,
mce_whole_page : 1,
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* Re: [PATCH v2 6/7] x86/mce: Recover from poison found while copying from user space
2020-09-30 23:26 ` [PATCH v2 6/7] x86/mce: Recover from poison found while copying from user space Tony Luck
@ 2020-10-05 16:32 ` Borislav Petkov
2020-10-05 17:47 ` Luck, Tony
0 siblings, 1 reply; 49+ messages in thread
From: Borislav Petkov @ 2020-10-05 16:32 UTC (permalink / raw)
To: Tony Luck; +Cc: Youquan Song, x86, linux-kernel
On Wed, Sep 30, 2020 at 04:26:10PM -0700, Tony Luck wrote:
> From: Youquan Song <youquan.song@intel.com>
>
> Existing kernel code can only recover from a machine check on code that
> is tagged in the exception table with a fault handling recovery path.
>
> Two new fields in the task structure to pass information from machine
> check handler to the "task_work" that is queued to run before the task
> returns to user mode:
>
> + mce_vaddr: will be initialized to the user virtual address of the fault
> in the case where the fault occurred in the kernel copying data from
> a user address. This is so that kill_me_maybe() can provide that
> information to the user SIGBUS handler.
>
> + mce_kflags: copy of the struct mce.kflags needed by kill_me_maybe()
> to determine if mce_vaddr is applicable to this error.
>
> Add code to recover from a machine check while copying data from user
> space to the kernel. Action for this case is the same as if the user
> touched the poison directly; unmap the page and send a SIGBUS to the task.
>
> Refactor the recovery code path to share common code between the "fault
> in user mode" case and the "fault while copying from user" case.
>
> New code paths will be activated by the next patch which sets
> MCE_IN_KERNEL_COPYIN.
>
> Signed-off-by: Youquan Song <youquan.song@intel.com>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> arch/x86/kernel/cpu/mce/core.c | 33 +++++++++++++++++++++------------
> include/linux/sched.h | 2 ++
> 2 files changed, 23 insertions(+), 12 deletions(-)
Isn't that just simpler?
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4d2cf08820af..dc6c83aa2ec1 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1261,6 +1261,21 @@ static void kill_me_maybe(struct callback_head *cb)
kill_me_now(cb);
}
+static inline void queue_task_work(struct mce *m, int kill_it)
+{
+ current->mce_addr = m->addr;
+ current->mce_kflags = m->kflags;
+ current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
+ current->mce_whole_page = whole_page(m);
+
+ if (kill_it)
+ current->mce_kill_me.func = kill_me_now;
+ else
+ current->mce_kill_me.func = kill_me_maybe;
+
+ task_work_add(current, ¤t->mce_kill_me, true);
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1402,13 +1417,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- current->mce_addr = m.addr;
- current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
- current->mce_whole_page = whole_page(&m);
- current->mce_kill_me.func = kill_me_maybe;
- if (kill_it)
- current->mce_kill_me.func = kill_me_now;
- task_work_add(current, ¤t->mce_kill_me, true);
+ queue_task_work(&m, kill_it);
+
} else {
/*
* Handle an MCE which has happened in kernel space but from
@@ -1423,6 +1433,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
+
+ if (m.kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&m, kill_it);
}
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e7f65573dde3..d383cf09e78f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1308,6 +1308,8 @@ struct task_struct {
#endif
#ifdef CONFIG_X86_MCE
+ void __user *mce_vaddr;
+ __u64 mce_kflags;
u64 mce_addr;
__u64 mce_ripv : 1,
mce_whole_page : 1,
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply related [flat|nested] 49+ messages in thread
* Re: [PATCH v2 6/7] x86/mce: Recover from poison found while copying from user space
2020-10-05 16:32 ` Borislav Petkov
@ 2020-10-05 17:47 ` Luck, Tony
0 siblings, 0 replies; 49+ messages in thread
From: Luck, Tony @ 2020-10-05 17:47 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Youquan Song, x86, linux-kernel
On Mon, Oct 05, 2020 at 06:32:47PM +0200, Borislav Petkov wrote:
> On Wed, Sep 30, 2020 at 04:26:10PM -0700, Tony Luck wrote:
> > arch/x86/kernel/cpu/mce/core.c | 33 +++++++++++++++++++++------------
> > include/linux/sched.h | 2 ++
> > 2 files changed, 23 insertions(+), 12 deletions(-)
>
> Isn't that just simpler?
Yes. A helper function avoids making the code a mess of if/else with subtle
fall through cases.
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 4d2cf08820af..dc6c83aa2ec1 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -1261,6 +1261,21 @@ static void kill_me_maybe(struct callback_head *cb)
> kill_me_now(cb);
> }
>
> +static inline void queue_task_work(struct mce *m, int kill_it)
Does it need to be "inline" though? I hope machine check processing
is never in the critical code path for anyone!
> +{
> + current->mce_addr = m->addr;
> + current->mce_kflags = m->kflags;
> + current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
> + current->mce_whole_page = whole_page(m);
> +
> + if (kill_it)
> + current->mce_kill_me.func = kill_me_now;
> + else
> + current->mce_kill_me.func = kill_me_maybe;
> +
> + task_work_add(current, ¤t->mce_kill_me, true);
> +}
> +
> /*
> * The actual machine check handler. This only handles real
> * exceptions when something got corrupted coming in through int 18.
> @@ -1402,13 +1417,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
> /* If this triggers there is no way to recover. Die hard. */
> BUG_ON(!on_thread_stack() || !user_mode(regs));
>
> - current->mce_addr = m.addr;
> - current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
> - current->mce_whole_page = whole_page(&m);
> - current->mce_kill_me.func = kill_me_maybe;
> - if (kill_it)
> - current->mce_kill_me.func = kill_me_now;
> - task_work_add(current, ¤t->mce_kill_me, true);
> + queue_task_work(&m, kill_it);
> +
> } else {
> /*
> * Handle an MCE which has happened in kernel space but from
> @@ -1423,6 +1433,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
> if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
> mce_panic("Failed kernel mode recovery", &m, msg);
> }
> +
> + if (m.kflags & MCE_IN_KERNEL_COPYIN)
> + queue_task_work(&m, kill_it);
> }
> out:
> mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
-Tony
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH v2 7/7] x86/mce: Decode a kernel instruction to determine if it is copying from user
2020-09-30 23:26 ` [PATCH v2 0/7] Add machine check recovery when copying from user space Tony Luck
` (5 preceding siblings ...)
2020-09-30 23:26 ` [PATCH v2 6/7] x86/mce: Recover from poison found while copying from user space Tony Luck
@ 2020-09-30 23:26 ` Tony Luck
2020-10-05 16:31 ` Borislav Petkov
6 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-09-30 23:26 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Tony Luck, Youquan Song, x86, linux-kernel
All instructions copying data between kernel and user memory
are tagged with either _ASM_EXTABLE_UA or _ASM_EXTABLE_CPY
entries in the exception table. ex_fault_handler_type() returns
EX_HANDLER_UACCESS for both of these.
Recovery is only possible when the machine check was triggered
on a read from user memory. In this case the same strategy for
recovery applies as if the user had made the access in ring3. If
the fault was in kernel memory while copying to user we do not
currently have a recovery plan.
For MOV and MOVZ instructions a full decode of the instruction
is done to find the source address. For MOVS instructions
the source address is in the %rsi register. The function
fault_in_kernel_space() determines whether the source address is
kernel or user.
Co-developed-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mce/core.c | 11 +++++--
arch/x86/kernel/cpu/mce/severity.c | 51 +++++++++++++++++++++++++++++-
2 files changed, 58 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 9713825e6745..60bacf6e0501 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1236,14 +1236,19 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
- if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
+ !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
}
- pr_err("Memory error not recovered");
- kill_me_now(cb);
+ if (p->mce_vaddr != (void __user *)~0ul) {
+ force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
+ } else {
+ pr_err("Memory error not recovered");
+ kill_me_now(cb);
+ }
}
/*
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 8517cbf7b184..6e8b38cf52d9 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -10,6 +10,9 @@
#include <linux/init.h>
#include <linux/debugfs.h>
#include <asm/mce.h>
+#include <asm/traps.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
#include <linux/uaccess.h>
#include "internal.h"
@@ -198,6 +201,45 @@ static struct severity {
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+static bool is_copy_from_user(struct pt_regs *regs)
+{
+ u8 insn_buf[MAX_INSN_SIZE];
+ struct insn insn;
+ unsigned long addr;
+
+ if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+ return false;
+
+ kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+ insn_get_opcode(&insn);
+ if (!insn.opcode.got)
+ return false;
+
+ switch (insn.opcode.value) {
+ /* MOV mem,reg */
+ case 0x8A: case 0x8B:
+ /* MOVZ mem,reg */
+ case 0xB60F: case 0xB70F:
+ insn_get_modrm(&insn);
+ insn_get_sib(&insn);
+ addr = (unsigned long)insn_get_addr_ref(&insn, regs);
+ break;
+ /* REP MOVS */
+ case 0xA4: case 0xA5:
+ addr = regs->si;
+ break;
+ default:
+ return false;
+ }
+
+ if (fault_in_kernel_space(addr))
+ return false;
+
+ current->mce_vaddr = (void __user *)addr;
+
+ return true;
+}
+
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
@@ -215,10 +257,17 @@ static int error_context(struct mce *m, struct pt_regs *regs)
if ((m->cs & 3) == 3)
return IN_USER;
+ if (!mc_recoverable(m->mcgstatus))
+ return IN_KERNEL;
t = ex_fault_handler_type(m->ip);
- if (mc_recoverable(m->mcgstatus) && t == EX_HANDLER_FAULT) {
+ if (t == EX_HANDLER_FAULT) {
+ m->kflags |= MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ }
+ if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
m->kflags |= MCE_IN_KERNEL_RECOV;
+ m->kflags |= MCE_IN_KERNEL_COPYIN;
return IN_KERNEL_RECOV;
}
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* Re: [PATCH v2 7/7] x86/mce: Decode a kernel instruction to determine if it is copying from user
2020-09-30 23:26 ` [PATCH v2 7/7] x86/mce: Decode a kernel instruction to determine if it is copying from user Tony Luck
@ 2020-10-05 16:31 ` Borislav Petkov
2020-10-06 21:09 ` [PATCH v3 0/6] Add machine check recovery when copying from user space Tony Luck
0 siblings, 1 reply; 49+ messages in thread
From: Borislav Petkov @ 2020-10-05 16:31 UTC (permalink / raw)
To: Tony Luck; +Cc: Youquan Song, x86, linux-kernel
On Wed, Sep 30, 2020 at 04:26:11PM -0700, Tony Luck wrote:
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 9713825e6745..60bacf6e0501 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -1236,14 +1236,19 @@ static void kill_me_maybe(struct callback_head *cb)
> if (!p->mce_ripv)
> flags |= MF_MUST_KILL;
>
> - if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
> + if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
> + !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
> set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
> sync_core();
> return;
> }
>
> - pr_err("Memory error not recovered");
> - kill_me_now(cb);
> + if (p->mce_vaddr != (void __user *)~0ul) {
As previously pointed out, pls test against -1L even if it is the
same value so that it is obvious this is the error value coming from
insn_get_addr_ref().
> + force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
> + } else {
> + pr_err("Memory error not recovered");
> + kill_me_now(cb);
> + }
> }
>
> /*
> diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
> index 8517cbf7b184..6e8b38cf52d9 100644
> --- a/arch/x86/kernel/cpu/mce/severity.c
> +++ b/arch/x86/kernel/cpu/mce/severity.c
> @@ -10,6 +10,9 @@
> #include <linux/init.h>
> #include <linux/debugfs.h>
> #include <asm/mce.h>
> +#include <asm/traps.h>
> +#include <asm/insn.h>
> +#include <asm/insn-eval.h>
> #include <linux/uaccess.h>
>
> #include "internal.h"
> @@ -198,6 +201,45 @@ static struct severity {
> #define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
> (MCG_STATUS_RIPV|MCG_STATUS_EIPV))
>
> +static bool is_copy_from_user(struct pt_regs *regs)
> +{
> + u8 insn_buf[MAX_INSN_SIZE];
> + struct insn insn;
> + unsigned long addr;
> +
> + if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
> + return false;
> +
> + kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
> + insn_get_opcode(&insn);
> + if (!insn.opcode.got)
> + return false;
> +
> + switch (insn.opcode.value) {
> + /* MOV mem,reg */
> + case 0x8A: case 0x8B:
> + /* MOVZ mem,reg */
> + case 0xB60F: case 0xB70F:
> + insn_get_modrm(&insn);
> + insn_get_sib(&insn);
You need to test here:
insn->modrm.got = 1;
and
insn->sib.got = 1;
I know, this is weird - those functions should return an error value
instead of being void and I've asked Masami in the past but no reply.
Who knows, one fine day I might convert the crap to do that instead.
> + addr = (unsigned long)insn_get_addr_ref(&insn, regs);
> + break;
> + /* REP MOVS */
> + case 0xA4: case 0xA5:
> + addr = regs->si;
> + break;
> + default:
> + return false;
> + }
> +
> + if (fault_in_kernel_space(addr))
> + return false;
> +
> + current->mce_vaddr = (void __user *)addr;
> +
> + return true;
> +}
> +
> /*
> * If mcgstatus indicated that ip/cs on the stack were
> * no good, then "m->cs" will be zero and we will have
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH v3 0/6] Add machine check recovery when copying from user space
2020-10-05 16:31 ` Borislav Petkov
@ 2020-10-06 21:09 ` Tony Luck
2020-10-06 21:09 ` [PATCH v3 1/6] x86/mce: Pass pointer to saved pt_regs to severity calculation routines Tony Luck
` (5 more replies)
0 siblings, 6 replies; 49+ messages in thread
From: Tony Luck @ 2020-10-06 21:09 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Tony Luck, Youquan Song, x86, linux-kernel
Machine check recovery from uncorrected memory errors currently focusses
primarily on errors that are detected while running in user mode. There
is a mechanism for recovering from errors in kernel code, but it is
currently only used for memcpy_mcsafe().
The existing recover actions for errors found in user mode (unmap the
page and send SIGBUS to the task) can also be applied when the error is
found while copying data from user space to the kernel.
Roadmap to this series:
Series is based on top of tip ras/core branch because original part 0001
has already been applied to tip ras/core branch:
13c877f4b48b ("x86/mce: Stop mce_reign() from re-computing severity for every CPU")
so new part 0001 below used to be part 0002 in v1 series.
In v3 part 0005 has been merged with the final part, so now just 6 parts.
0001: First piece of infrastructure update. Severity calculations need
access to the saved registers. So pass pointer down the call
chain.
0002: Need to know what type of exception handler is present
for a given kernel instruction. Rather than proliferate more
functions like ex_has_fault_handler() for each type, replace
with a function that looks up the handler and returns an enum
describing the type.
0003: Need slightly different handling for *copy_user*() faults from
get_user() faults. Create a new exception table tag and apply
to the copy functions.
Change since v2: Reword commit message to avoid use of "we".
0004: In fixup path of copy functions avoid dealing with the tail
when the copy took a machine check by returning that there
are no bytes left to be copied.
0005: Changes to do_machine_check() to support the new recovery flow.
Some re-factoring to avoid code duplication (since the flows
for "error in user mode" and "error while copying from user
mode" are almost identical). Couple of new fields added to the
task structure.
Change since v2: Boris supplied a helper function to make the re-factor
much simpler. Use it instead of the spaghetti code in v2.
0006: Finally the keystone patch that pulls all the parts together.
An instruction decoder figures out whether an instruction
tagged as accessing user space is reading from or writing
to user space. The instructions in the switch were found
experimentally by looking at what instructions in the base
kernel are tagged in the exception table. I didn't add the
atomic operations (0x87 = XCHG etc.) that both read and write
user addresses. I think they should be safe, but I need a test
case where a futex has been poisoned to check. Probably this
switch should be expanded with all the instructions that the
compiler could possibly generate that read from user space.
Change since v2: Merged old part 0005 into this piece since this is
where function fault_in_kernel_space() is used.
Check modrm.got and sib.got fields in "insn" were set before
calling insn_get_addr()
Change type of constant from ~0ul to -1l when checking whether
address returned by insn_get_addr() is valid.
Tony Luck (4):
x86/mce: Provide method to find out the type of exception handle
x86/mce: Avoid tail copy when machine check terminated a copy from
user
x86/mce: Recover from poison found while copying from user space
x86/mce: Decode a kernel instruction to determine if it is copying
from user
Youquan Song (2):
x86/mce: Pass pointer to saved pt_regs to severity calculation
routines
x86/mce: Add _ASM_EXTABLE_CPY for copy user access
arch/x86/include/asm/asm.h | 6 ++
arch/x86/include/asm/extable.h | 9 ++-
arch/x86/include/asm/mce.h | 15 ++++
arch/x86/include/asm/traps.h | 2 +
arch/x86/kernel/cpu/mce/core.c | 52 +++++++++-----
arch/x86/kernel/cpu/mce/internal.h | 3 +-
arch/x86/kernel/cpu/mce/severity.c | 70 ++++++++++++++++--
arch/x86/lib/copy_user_64.S | 111 ++++++++++++++++-------------
arch/x86/mm/extable.c | 24 +++++--
arch/x86/mm/fault.c | 2 +-
include/linux/sched.h | 2 +
11 files changed, 217 insertions(+), 79 deletions(-)
base-commit: 5da8e4a658109e3b7e1f45ae672b7c06ac3e7158
--
2.21.1
^ permalink raw reply [flat|nested] 49+ messages in thread
* [PATCH v3 1/6] x86/mce: Pass pointer to saved pt_regs to severity calculation routines
2020-10-06 21:09 ` [PATCH v3 0/6] Add machine check recovery when copying from user space Tony Luck
@ 2020-10-06 21:09 ` Tony Luck
2020-10-07 10:02 ` [tip: ras/core] " tip-bot2 for Youquan Song
2020-10-06 21:09 ` [PATCH v3 2/6] x86/mce: Provide method to find out the type of exception handle Tony Luck
` (4 subsequent siblings)
5 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-10-06 21:09 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Youquan Song, Tony Luck, x86, linux-kernel
From: Youquan Song <youquan.song@intel.com>
New recovery features require additional information about processor
state when a machine check occurred. Pass pt_regs down to the routines
that need it.
No functional change.
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mce/core.c | 14 +++++++-------
arch/x86/kernel/cpu/mce/internal.h | 3 ++-
arch/x86/kernel/cpu/mce/severity.c | 14 ++++++++------
3 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5b70f4b351d..2d6caf09e8e6 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -807,7 +807,7 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
goto clear_it;
mce_read_aux(&m, i);
- m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
+ m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
@@ -856,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
m->bank = i;
- if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i);
*msg = tmp;
return 1;
@@ -956,7 +956,7 @@ static void mce_reign(void)
*/
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
/* call mce_severity() to get "msg" for panic */
- mce_severity(m, mca_cfg.tolerant, &msg, true);
+ mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
mce_panic("Fatal machine check", m, msg);
}
@@ -1167,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
-static void __mc_scan_banks(struct mce *m, struct mce *final,
+static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
@@ -1202,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
- severity = mce_severity(m, cfg->tolerant, NULL, true);
+ severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
/*
* When machine check was for corrected/deferred handler don't
@@ -1354,7 +1354,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
- __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
+ __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1376,7 +1376,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
- mce_severity(&m, cfg->tolerant, &msg, true);
+ mce_severity(&m, regs, cfg->tolerant, &msg, true);
mce_panic("Local fatal machine check!", &m, msg);
}
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b122610e9046..88dcc79cfb07 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -38,7 +38,8 @@ int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
-extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
+extern int (*mce_severity)(struct mce *a, struct pt_regs *regs,
+ int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled;
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index e0722461bb57..0b072dc231ad 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -223,7 +223,7 @@ static struct severity {
* distinguish an exception taken in user from from one
* taken in the kernel.
*/
-static int error_context(struct mce *m)
+static int error_context(struct mce *m, struct pt_regs *regs)
{
if ((m->cs & 3) == 3)
return IN_USER;
@@ -267,9 +267,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
-static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
+ char **msg, bool is_excp)
{
- enum context ctx = error_context(m);
+ enum context ctx = error_context(m, regs);
/* Processor Context Corrupt, no need to fumble too much, die! */
if (m->status & MCI_STATUS_PCC)
@@ -319,10 +320,11 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
return MCE_KEEP_SEVERITY;
}
-static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
+ int tolerant, char **msg, bool is_excp)
{
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
- enum context ctx = error_context(m);
+ enum context ctx = error_context(m, regs);
struct severity *s;
for (s = severities;; s++) {
@@ -356,7 +358,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
}
/* Default to mce_severity_intel */
-int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
+int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) =
mce_severity_intel;
void __init mcheck_vendor_init_severity(void)
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [tip: ras/core] x86/mce: Pass pointer to saved pt_regs to severity calculation routines
2020-10-06 21:09 ` [PATCH v3 1/6] x86/mce: Pass pointer to saved pt_regs to severity calculation routines Tony Luck
@ 2020-10-07 10:02 ` tip-bot2 for Youquan Song
0 siblings, 0 replies; 49+ messages in thread
From: tip-bot2 for Youquan Song @ 2020-10-07 10:02 UTC (permalink / raw)
To: linux-tip-commits; +Cc: Youquan Song, Tony Luck, Borislav Petkov, x86, LKML
The following commit has been merged into the ras/core branch of tip:
Commit-ID: 41ce0564bfe2e129d56730418d8c0a9f9f2d31b5
Gitweb: https://git.kernel.org/tip/41ce0564bfe2e129d56730418d8c0a9f9f2d31b5
Author: Youquan Song <youquan.song@intel.com>
AuthorDate: Tue, 06 Oct 2020 14:09:05 -07:00
Committer: Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 07 Oct 2020 10:51:42 +02:00
x86/mce: Pass pointer to saved pt_regs to severity calculation routines
New recovery features require additional information about processor
state when a machine check occurred. Pass pt_regs down to the routines
that need it.
No functional change.
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-2-tony.luck@intel.com
---
arch/x86/kernel/cpu/mce/core.c | 14 +++++++-------
arch/x86/kernel/cpu/mce/internal.h | 3 ++-
arch/x86/kernel/cpu/mce/severity.c | 14 ++++++++------
3 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b5b70f4..2d6caf0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -807,7 +807,7 @@ log_it:
goto clear_it;
mce_read_aux(&m, i);
- m.severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
+ m.severity = mce_severity(&m, NULL, mca_cfg.tolerant, NULL, false);
/*
* Don't get the IP here because it's unlikely to
* have anything to do with the actual error location.
@@ -856,7 +856,7 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
quirk_no_way_out(i, m, regs);
m->bank = i;
- if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+ if (mce_severity(m, regs, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
mce_read_aux(m, i);
*msg = tmp;
return 1;
@@ -956,7 +956,7 @@ static void mce_reign(void)
*/
if (m && global_worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
/* call mce_severity() to get "msg" for panic */
- mce_severity(m, mca_cfg.tolerant, &msg, true);
+ mce_severity(m, NULL, mca_cfg.tolerant, &msg, true);
mce_panic("Fatal machine check", m, msg);
}
@@ -1167,7 +1167,7 @@ static noinstr bool mce_check_crashing_cpu(void)
return false;
}
-static void __mc_scan_banks(struct mce *m, struct mce *final,
+static void __mc_scan_banks(struct mce *m, struct pt_regs *regs, struct mce *final,
unsigned long *toclear, unsigned long *valid_banks,
int no_way_out, int *worst)
{
@@ -1202,7 +1202,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final,
/* Set taint even when machine check was not enabled. */
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
- severity = mce_severity(m, cfg->tolerant, NULL, true);
+ severity = mce_severity(m, regs, cfg->tolerant, NULL, true);
/*
* When machine check was for corrected/deferred handler don't
@@ -1354,7 +1354,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
order = mce_start(&no_way_out);
}
- __mc_scan_banks(&m, final, toclear, valid_banks, no_way_out, &worst);
+ __mc_scan_banks(&m, regs, final, toclear, valid_banks, no_way_out, &worst);
if (!no_way_out)
mce_clear_state(toclear);
@@ -1376,7 +1376,7 @@ noinstr void do_machine_check(struct pt_regs *regs)
* make sure we have the right "msg".
*/
if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
- mce_severity(&m, cfg->tolerant, &msg, true);
+ mce_severity(&m, regs, cfg->tolerant, &msg, true);
mce_panic("Local fatal machine check!", &m, msg);
}
}
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index b122610..88dcc79 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -38,7 +38,8 @@ int mce_gen_pool_add(struct mce *mce);
int mce_gen_pool_init(void);
struct llist_node *mce_gen_pool_prepare_records(void);
-extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
+extern int (*mce_severity)(struct mce *a, struct pt_regs *regs,
+ int tolerant, char **msg, bool is_excp);
struct dentry *mce_get_debugfs_dir(void);
extern mce_banks_t mce_banks_ce_disabled;
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index e072246..0b072dc 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -223,7 +223,7 @@ static struct severity {
* distinguish an exception taken in user from from one
* taken in the kernel.
*/
-static int error_context(struct mce *m)
+static int error_context(struct mce *m, struct pt_regs *regs)
{
if ((m->cs & 3) == 3)
return IN_USER;
@@ -267,9 +267,10 @@ static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
* See AMD Error Scope Hierarchy table in a newer BKDG. For example
* 49125_15h_Models_30h-3Fh_BKDG.pdf, section "RAS Features"
*/
-static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_amd(struct mce *m, struct pt_regs *regs, int tolerant,
+ char **msg, bool is_excp)
{
- enum context ctx = error_context(m);
+ enum context ctx = error_context(m, regs);
/* Processor Context Corrupt, no need to fumble too much, die! */
if (m->status & MCI_STATUS_PCC)
@@ -319,10 +320,11 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
return MCE_KEEP_SEVERITY;
}
-static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_excp)
+static int mce_severity_intel(struct mce *m, struct pt_regs *regs,
+ int tolerant, char **msg, bool is_excp)
{
enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
- enum context ctx = error_context(m);
+ enum context ctx = error_context(m, regs);
struct severity *s;
for (s = severities;; s++) {
@@ -356,7 +358,7 @@ static int mce_severity_intel(struct mce *m, int tolerant, char **msg, bool is_e
}
/* Default to mce_severity_intel */
-int (*mce_severity)(struct mce *m, int tolerant, char **msg, bool is_excp) =
+int (*mce_severity)(struct mce *m, struct pt_regs *regs, int tolerant, char **msg, bool is_excp) =
mce_severity_intel;
void __init mcheck_vendor_init_severity(void)
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [PATCH v3 2/6] x86/mce: Provide method to find out the type of exception handle
2020-10-06 21:09 ` [PATCH v3 0/6] Add machine check recovery when copying from user space Tony Luck
2020-10-06 21:09 ` [PATCH v3 1/6] x86/mce: Pass pointer to saved pt_regs to severity calculation routines Tony Luck
@ 2020-10-06 21:09 ` Tony Luck
2020-10-07 10:02 ` [tip: ras/core] x86/mce: Provide method to find out the type of an exception handler tip-bot2 for Tony Luck
2020-10-06 21:09 ` [PATCH v3 3/6] x86/mce: Add _ASM_EXTABLE_CPY for copy user access Tony Luck
` (3 subsequent siblings)
5 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-10-06 21:09 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Tony Luck, Youquan Song, x86, linux-kernel
Avoid a proliferation of ex_has_*_handler() functions by having just
one function that returns the type of the handler (if any).
Drop the __visible attribute for this function. It is not called
from assembler so the attribute is not necessary.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/include/asm/extable.h | 9 ++++++++-
arch/x86/kernel/cpu/mce/severity.c | 5 ++++-
arch/x86/mm/extable.c | 12 ++++++++----
3 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index d8c2198d543b..1f0cbc52937c 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,10 +29,17 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
+enum handler_type {
+ EX_HANDLER_NONE,
+ EX_HANDLER_FAULT,
+ EX_HANDLER_UACCESS,
+ EX_HANDLER_OTHER
+};
+
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
-extern bool ex_has_fault_handler(unsigned long ip);
+extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#endif
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 0b072dc231ad..c6494e6579c1 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -225,10 +225,13 @@ static struct severity {
*/
static int error_context(struct mce *m, struct pt_regs *regs)
{
+ enum handler_type t;
+
if ((m->cs & 3) == 3)
return IN_USER;
- if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) {
+ t = ex_get_fault_handler_type(m->ip);
+ if (mc_recoverable(m->mcgstatus) && t == EX_HANDLER_FAULT) {
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 1d6cb07f4f86..de43525df69b 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -125,17 +125,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);
-__visible bool ex_has_fault_handler(unsigned long ip)
+enum handler_type ex_get_fault_handler_type(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(ip);
if (!e)
- return false;
+ return EX_HANDLER_NONE;
handler = ex_fixup_handler(e);
-
- return handler == ex_handler_fault;
+ if (handler == ex_handler_fault)
+ return EX_HANDLER_FAULT;
+ else if (handler == ex_handler_uaccess)
+ return EX_HANDLER_UACCESS;
+ else
+ return EX_HANDLER_OTHER;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [tip: ras/core] x86/mce: Provide method to find out the type of an exception handler
2020-10-06 21:09 ` [PATCH v3 2/6] x86/mce: Provide method to find out the type of exception handle Tony Luck
@ 2020-10-07 10:02 ` tip-bot2 for Tony Luck
0 siblings, 0 replies; 49+ messages in thread
From: tip-bot2 for Tony Luck @ 2020-10-07 10:02 UTC (permalink / raw)
To: linux-tip-commits; +Cc: Tony Luck, Borislav Petkov, x86, LKML
The following commit has been merged into the ras/core branch of tip:
Commit-ID: a05d54c41ecfa1a322b229b4e5ce50c157284f74
Gitweb: https://git.kernel.org/tip/a05d54c41ecfa1a322b229b4e5ce50c157284f74
Author: Tony Luck <tony.luck@intel.com>
AuthorDate: Tue, 06 Oct 2020 14:09:06 -07:00
Committer: Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 07 Oct 2020 11:08:59 +02:00
x86/mce: Provide method to find out the type of an exception handler
Avoid a proliferation of ex_has_*_handler() functions by having just
one function that returns the type of the handler (if any).
Drop the __visible attribute for this function. It is not called
from assembler so the attribute is not necessary.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-3-tony.luck@intel.com
---
arch/x86/include/asm/extable.h | 9 ++++++++-
arch/x86/kernel/cpu/mce/severity.c | 5 ++++-
arch/x86/mm/extable.c | 12 ++++++++----
3 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index d8c2198..1f0cbc5 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -29,10 +29,17 @@ struct pt_regs;
(b)->handler = (tmp).handler - (delta); \
} while (0)
+enum handler_type {
+ EX_HANDLER_NONE,
+ EX_HANDLER_FAULT,
+ EX_HANDLER_UACCESS,
+ EX_HANDLER_OTHER
+};
+
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
extern int fixup_bug(struct pt_regs *regs, int trapnr);
-extern bool ex_has_fault_handler(unsigned long ip);
+extern enum handler_type ex_get_fault_handler_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
#endif
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index 0b072dc..c6494e6 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -225,10 +225,13 @@ static struct severity {
*/
static int error_context(struct mce *m, struct pt_regs *regs)
{
+ enum handler_type t;
+
if ((m->cs & 3) == 3)
return IN_USER;
- if (mc_recoverable(m->mcgstatus) && ex_has_fault_handler(m->ip)) {
+ t = ex_get_fault_handler_type(m->ip);
+ if (mc_recoverable(m->mcgstatus) && t == EX_HANDLER_FAULT) {
m->kflags |= MCE_IN_KERNEL_RECOV;
return IN_KERNEL_RECOV;
}
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 1d6cb07..de43525 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -125,17 +125,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_clear_fs);
-__visible bool ex_has_fault_handler(unsigned long ip)
+enum handler_type ex_get_fault_handler_type(unsigned long ip)
{
const struct exception_table_entry *e;
ex_handler_t handler;
e = search_exception_tables(ip);
if (!e)
- return false;
+ return EX_HANDLER_NONE;
handler = ex_fixup_handler(e);
-
- return handler == ex_handler_fault;
+ if (handler == ex_handler_fault)
+ return EX_HANDLER_FAULT;
+ else if (handler == ex_handler_uaccess)
+ return EX_HANDLER_UACCESS;
+ else
+ return EX_HANDLER_OTHER;
}
int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code,
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [PATCH v3 3/6] x86/mce: Add _ASM_EXTABLE_CPY for copy user access
2020-10-06 21:09 ` [PATCH v3 0/6] Add machine check recovery when copying from user space Tony Luck
2020-10-06 21:09 ` [PATCH v3 1/6] x86/mce: Pass pointer to saved pt_regs to severity calculation routines Tony Luck
2020-10-06 21:09 ` [PATCH v3 2/6] x86/mce: Provide method to find out the type of exception handle Tony Luck
@ 2020-10-06 21:09 ` Tony Luck
2020-10-07 10:02 ` [tip: ras/core] " tip-bot2 for Youquan Song
2020-10-06 21:09 ` [PATCH v3 4/6] x86/mce: Avoid tail copy when machine check terminated a copy from user Tony Luck
` (2 subsequent siblings)
5 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-10-06 21:09 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Youquan Song, Tony Luck, x86, linux-kernel
From: Youquan Song <youquan.song@intel.com>
_ASM_EXTABLE_UA is a general exception entry to record the exception fixup
for all exception spots between kernel and user space access.
To enable recovery from machine checks while coping data from user
addresses it is necessary to be able to distinguish the places that are
looping copying data from those that copy a single byte/word/etc.
Add a new macro _ASM_EXTABLE_CPY and use it in place of _ASM_EXTABLE_UA
in the copy functions.
Record the exception reason number to regs->ax at
ex_handler_uaccess which is used to check MCE triggered.
The new fixup routine ex_handler_copy() is almost an exact copy of
ex_handler_uaccess() The difference is that it sets regs->ax to the trap
number. Following patches use this to avoid trying to copy remaining
bytes from the tail of the copy and possibly hitting the poison again.
New mce.kflags bit MCE_IN_KERNEL_COPYIN will be used by mce_severity()
calculation to indicate that a machine check is recoverable because the
kernel was copying from user space.
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/include/asm/asm.h | 6 +++
arch/x86/include/asm/mce.h | 15 ++++++
arch/x86/lib/copy_user_64.S | 96 ++++++++++++++++++-------------------
arch/x86/mm/extable.c | 14 +++++-
4 files changed, 82 insertions(+), 49 deletions(-)
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 5c15f95b1ba7..0359cbbd0f50 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -135,6 +135,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
@@ -160,6 +163,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ba2062d6df92..a0f147893a04 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -136,8 +136,23 @@
#define MCE_HANDLED_NFIT BIT_ULL(3)
#define MCE_HANDLED_EDAC BIT_ULL(4)
#define MCE_HANDLED_MCELOG BIT_ULL(5)
+
+/*
+ * Indicates an MCE which has happened in kernel space but from
+ * which the kernel can recover simply by executing fixup_exception()
+ * so that an error is returned to the caller of the function that
+ * hit the machine check.
+ */
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
+/*
+ * Indicates an MCE that happened in kernel space while copying data
+ * from user. In this case fixup_exception() gets the kernel to the
+ * error exit for the copy function. Machine check handler can then
+ * treat it like a fault taken in user mode.
+ */
+#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
+
/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 816f128a6d52..5b68e945bf65 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -36,8 +36,8 @@
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
+ _ASM_EXTABLE_CPY(100b, 103b)
+ _ASM_EXTABLE_CPY(101b, 103b)
.endm
/*
@@ -116,26 +116,26 @@ SYM_FUNC_START(copy_user_generic_unrolled)
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
- _ASM_EXTABLE_UA(1b, 30b)
- _ASM_EXTABLE_UA(2b, 30b)
- _ASM_EXTABLE_UA(3b, 30b)
- _ASM_EXTABLE_UA(4b, 30b)
- _ASM_EXTABLE_UA(5b, 30b)
- _ASM_EXTABLE_UA(6b, 30b)
- _ASM_EXTABLE_UA(7b, 30b)
- _ASM_EXTABLE_UA(8b, 30b)
- _ASM_EXTABLE_UA(9b, 30b)
- _ASM_EXTABLE_UA(10b, 30b)
- _ASM_EXTABLE_UA(11b, 30b)
- _ASM_EXTABLE_UA(12b, 30b)
- _ASM_EXTABLE_UA(13b, 30b)
- _ASM_EXTABLE_UA(14b, 30b)
- _ASM_EXTABLE_UA(15b, 30b)
- _ASM_EXTABLE_UA(16b, 30b)
- _ASM_EXTABLE_UA(18b, 40b)
- _ASM_EXTABLE_UA(19b, 40b)
- _ASM_EXTABLE_UA(21b, 50b)
- _ASM_EXTABLE_UA(22b, 50b)
+ _ASM_EXTABLE_CPY(1b, 30b)
+ _ASM_EXTABLE_CPY(2b, 30b)
+ _ASM_EXTABLE_CPY(3b, 30b)
+ _ASM_EXTABLE_CPY(4b, 30b)
+ _ASM_EXTABLE_CPY(5b, 30b)
+ _ASM_EXTABLE_CPY(6b, 30b)
+ _ASM_EXTABLE_CPY(7b, 30b)
+ _ASM_EXTABLE_CPY(8b, 30b)
+ _ASM_EXTABLE_CPY(9b, 30b)
+ _ASM_EXTABLE_CPY(10b, 30b)
+ _ASM_EXTABLE_CPY(11b, 30b)
+ _ASM_EXTABLE_CPY(12b, 30b)
+ _ASM_EXTABLE_CPY(13b, 30b)
+ _ASM_EXTABLE_CPY(14b, 30b)
+ _ASM_EXTABLE_CPY(15b, 30b)
+ _ASM_EXTABLE_CPY(16b, 30b)
+ _ASM_EXTABLE_CPY(18b, 40b)
+ _ASM_EXTABLE_CPY(19b, 40b)
+ _ASM_EXTABLE_CPY(21b, 50b)
+ _ASM_EXTABLE_CPY(22b, 50b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -180,8 +180,8 @@ SYM_FUNC_START(copy_user_generic_string)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 11b)
- _ASM_EXTABLE_UA(3b, 12b)
+ _ASM_EXTABLE_CPY(1b, 11b)
+ _ASM_EXTABLE_CPY(3b, 12b)
SYM_FUNC_END(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
@@ -213,7 +213,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 12b)
+ _ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
@@ -237,7 +237,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
ASM_CLAC
ret
- _ASM_EXTABLE_UA(1b, 2b)
+ _ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
@@ -366,27 +366,27 @@ SYM_FUNC_START(__copy_user_nocache)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
- _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index de43525df69b..5829457f7ca3 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -80,6 +80,18 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_uaccess);
+__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = trapnr;
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_copy);
+
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
@@ -136,7 +148,7 @@ enum handler_type ex_get_fault_handler_type(unsigned long ip)
handler = ex_fixup_handler(e);
if (handler == ex_handler_fault)
return EX_HANDLER_FAULT;
- else if (handler == ex_handler_uaccess)
+ else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
return EX_HANDLER_UACCESS;
else
return EX_HANDLER_OTHER;
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [tip: ras/core] x86/mce: Add _ASM_EXTABLE_CPY for copy user access
2020-10-06 21:09 ` [PATCH v3 3/6] x86/mce: Add _ASM_EXTABLE_CPY for copy user access Tony Luck
@ 2020-10-07 10:02 ` tip-bot2 for Youquan Song
0 siblings, 0 replies; 49+ messages in thread
From: tip-bot2 for Youquan Song @ 2020-10-07 10:02 UTC (permalink / raw)
To: linux-tip-commits; +Cc: Youquan Song, Tony Luck, Borislav Petkov, x86, LKML
The following commit has been merged into the ras/core branch of tip:
Commit-ID: 278b917f8cb9b02923c15249f9d1a5769d2c1976
Gitweb: https://git.kernel.org/tip/278b917f8cb9b02923c15249f9d1a5769d2c1976
Author: Youquan Song <youquan.song@intel.com>
AuthorDate: Tue, 06 Oct 2020 14:09:07 -07:00
Committer: Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 07 Oct 2020 11:19:11 +02:00
x86/mce: Add _ASM_EXTABLE_CPY for copy user access
_ASM_EXTABLE_UA is a general exception entry to record the exception fixup
for all exception spots between kernel and user space access.
To enable recovery from machine checks while coping data from user
addresses it is necessary to be able to distinguish the places that are
looping copying data from those that copy a single byte/word/etc.
Add a new macro _ASM_EXTABLE_CPY and use it in place of _ASM_EXTABLE_UA
in the copy functions.
Record the exception reason number to regs->ax at
ex_handler_uaccess which is used to check MCE triggered.
The new fixup routine ex_handler_copy() is almost an exact copy of
ex_handler_uaccess() The difference is that it sets regs->ax to the trap
number. Following patches use this to avoid trying to copy remaining
bytes from the tail of the copy and possibly hitting the poison again.
New mce.kflags bit MCE_IN_KERNEL_COPYIN will be used by mce_severity()
calculation to indicate that a machine check is recoverable because the
kernel was copying from user space.
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-4-tony.luck@intel.com
---
arch/x86/include/asm/asm.h | 6 ++-
arch/x86/include/asm/mce.h | 15 ++++++-
arch/x86/lib/copy_user_64.S | 96 ++++++++++++++++++------------------
arch/x86/mm/extable.c | 14 ++++-
4 files changed, 82 insertions(+), 49 deletions(-)
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 5c15f95..0359cbb 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -135,6 +135,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
@@ -160,6 +163,9 @@
# define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_uaccess)
+# define _ASM_EXTABLE_CPY(from, to) \
+ _ASM_EXTABLE_HANDLE(from, to, ex_handler_copy)
+
# define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_fault)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ba2062d..a0f1478 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -136,9 +136,24 @@
#define MCE_HANDLED_NFIT BIT_ULL(3)
#define MCE_HANDLED_EDAC BIT_ULL(4)
#define MCE_HANDLED_MCELOG BIT_ULL(5)
+
+/*
+ * Indicates an MCE which has happened in kernel space but from
+ * which the kernel can recover simply by executing fixup_exception()
+ * so that an error is returned to the caller of the function that
+ * hit the machine check.
+ */
#define MCE_IN_KERNEL_RECOV BIT_ULL(6)
/*
+ * Indicates an MCE that happened in kernel space while copying data
+ * from user. In this case fixup_exception() gets the kernel to the
+ * error exit for the copy function. Machine check handler can then
+ * treat it like a fault taken in user mode.
+ */
+#define MCE_IN_KERNEL_COPYIN BIT_ULL(7)
+
+/*
* This structure contains all data related to the MCE log. Also
* carries a signature to make it easier to find from external
* debugging tools. Each entry is only valid when its finished flag
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 816f128..5b68e94 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -36,8 +36,8 @@
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
+ _ASM_EXTABLE_CPY(100b, 103b)
+ _ASM_EXTABLE_CPY(101b, 103b)
.endm
/*
@@ -116,26 +116,26 @@ SYM_FUNC_START(copy_user_generic_unrolled)
60: jmp .Lcopy_user_handle_tail /* ecx is zerorest also */
.previous
- _ASM_EXTABLE_UA(1b, 30b)
- _ASM_EXTABLE_UA(2b, 30b)
- _ASM_EXTABLE_UA(3b, 30b)
- _ASM_EXTABLE_UA(4b, 30b)
- _ASM_EXTABLE_UA(5b, 30b)
- _ASM_EXTABLE_UA(6b, 30b)
- _ASM_EXTABLE_UA(7b, 30b)
- _ASM_EXTABLE_UA(8b, 30b)
- _ASM_EXTABLE_UA(9b, 30b)
- _ASM_EXTABLE_UA(10b, 30b)
- _ASM_EXTABLE_UA(11b, 30b)
- _ASM_EXTABLE_UA(12b, 30b)
- _ASM_EXTABLE_UA(13b, 30b)
- _ASM_EXTABLE_UA(14b, 30b)
- _ASM_EXTABLE_UA(15b, 30b)
- _ASM_EXTABLE_UA(16b, 30b)
- _ASM_EXTABLE_UA(18b, 40b)
- _ASM_EXTABLE_UA(19b, 40b)
- _ASM_EXTABLE_UA(21b, 50b)
- _ASM_EXTABLE_UA(22b, 50b)
+ _ASM_EXTABLE_CPY(1b, 30b)
+ _ASM_EXTABLE_CPY(2b, 30b)
+ _ASM_EXTABLE_CPY(3b, 30b)
+ _ASM_EXTABLE_CPY(4b, 30b)
+ _ASM_EXTABLE_CPY(5b, 30b)
+ _ASM_EXTABLE_CPY(6b, 30b)
+ _ASM_EXTABLE_CPY(7b, 30b)
+ _ASM_EXTABLE_CPY(8b, 30b)
+ _ASM_EXTABLE_CPY(9b, 30b)
+ _ASM_EXTABLE_CPY(10b, 30b)
+ _ASM_EXTABLE_CPY(11b, 30b)
+ _ASM_EXTABLE_CPY(12b, 30b)
+ _ASM_EXTABLE_CPY(13b, 30b)
+ _ASM_EXTABLE_CPY(14b, 30b)
+ _ASM_EXTABLE_CPY(15b, 30b)
+ _ASM_EXTABLE_CPY(16b, 30b)
+ _ASM_EXTABLE_CPY(18b, 40b)
+ _ASM_EXTABLE_CPY(19b, 40b)
+ _ASM_EXTABLE_CPY(21b, 50b)
+ _ASM_EXTABLE_CPY(22b, 50b)
SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
@@ -180,8 +180,8 @@ SYM_FUNC_START(copy_user_generic_string)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 11b)
- _ASM_EXTABLE_UA(3b, 12b)
+ _ASM_EXTABLE_CPY(1b, 11b)
+ _ASM_EXTABLE_CPY(3b, 12b)
SYM_FUNC_END(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
@@ -213,7 +213,7 @@ SYM_FUNC_START(copy_user_enhanced_fast_string)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, 12b)
+ _ASM_EXTABLE_CPY(1b, 12b)
SYM_FUNC_END(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
@@ -237,7 +237,7 @@ SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
ASM_CLAC
ret
- _ASM_EXTABLE_UA(1b, 2b)
+ _ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
/*
@@ -366,27 +366,27 @@ SYM_FUNC_START(__copy_user_nocache)
jmp .Lcopy_user_handle_tail
.previous
- _ASM_EXTABLE_UA(1b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(2b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(3b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(4b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(5b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(6b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(7b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(8b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(9b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(10b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(11b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(12b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(13b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(14b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(15b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(16b, .L_fixup_4x8b_copy)
- _ASM_EXTABLE_UA(20b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(21b, .L_fixup_8b_copy)
- _ASM_EXTABLE_UA(30b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
- _ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
- _ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(1b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(2b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(3b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(4b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(5b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(6b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(7b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(8b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(9b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(10b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(11b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(12b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(13b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(14b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(15b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(16b, .L_fixup_4x8b_copy)
+ _ASM_EXTABLE_CPY(20b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(21b, .L_fixup_8b_copy)
+ _ASM_EXTABLE_CPY(30b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(31b, .L_fixup_4b_copy)
+ _ASM_EXTABLE_CPY(40b, .L_fixup_1b_copy)
+ _ASM_EXTABLE_CPY(41b, .L_fixup_1b_copy)
SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index de43525..5829457 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -80,6 +80,18 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
}
EXPORT_SYMBOL(ex_handler_uaccess);
+__visible bool ex_handler_copy(const struct exception_table_entry *fixup,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code,
+ unsigned long fault_addr)
+{
+ WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?");
+ regs->ip = ex_fixup_addr(fixup);
+ regs->ax = trapnr;
+ return true;
+}
+EXPORT_SYMBOL(ex_handler_copy);
+
__visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup,
struct pt_regs *regs, int trapnr,
unsigned long error_code,
@@ -136,7 +148,7 @@ enum handler_type ex_get_fault_handler_type(unsigned long ip)
handler = ex_fixup_handler(e);
if (handler == ex_handler_fault)
return EX_HANDLER_FAULT;
- else if (handler == ex_handler_uaccess)
+ else if (handler == ex_handler_uaccess || handler == ex_handler_copy)
return EX_HANDLER_UACCESS;
else
return EX_HANDLER_OTHER;
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [PATCH v3 4/6] x86/mce: Avoid tail copy when machine check terminated a copy from user
2020-10-06 21:09 ` [PATCH v3 0/6] Add machine check recovery when copying from user space Tony Luck
` (2 preceding siblings ...)
2020-10-06 21:09 ` [PATCH v3 3/6] x86/mce: Add _ASM_EXTABLE_CPY for copy user access Tony Luck
@ 2020-10-06 21:09 ` Tony Luck
2020-10-07 8:23 ` David Laight
2020-10-07 10:02 ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-10-06 21:09 ` [PATCH v3 5/6] x86/mce: Recover from poison found while copying from user space Tony Luck
2020-10-06 21:09 ` [PATCH v3 6/6] x86/mce: Decode a kernel instruction to determine if it is copying from user Tony Luck
5 siblings, 2 replies; 49+ messages in thread
From: Tony Luck @ 2020-10-06 21:09 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Tony Luck, Youquan Song, x86, linux-kernel
In the page fault case it is ok to see if a few more unaligned bytes
can be copied from the source address. Worst case is that the page fault
will be triggered again.
Machine checks are more serious. Just give up at the point where the
main copy loop triggered the #MC and return from the copy code as if
the copy succeeded. The machine check handler will use task_work_add() to
make sure that the task is sent a SIGBUS.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/lib/copy_user_64.S | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 5b68e945bf65..77b9b2a3b5c8 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,6 +15,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
+#include <asm/trapnr.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
@@ -221,6 +222,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
+ * Don't try to copy the tail if machine check happened
*
* Input:
* rdi destination
@@ -232,11 +234,24 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
+ cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
+ je 3f
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
+ /*
+ * Return zero to pretend that this copy succeeded. This
+ * is counter-intuitive, but needed to prevent the code
+ * in lib/iov_iter.c from retrying and running back into
+ * the poison cache line again. The machine check handler
+ * will ensure that a SIGBUS is sent to the task.
+ */
+3: xorl %eax,%eax
+ ASM_CLAC
+ ret
+
_ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* RE: [PATCH v3 4/6] x86/mce: Avoid tail copy when machine check terminated a copy from user
2020-10-06 21:09 ` [PATCH v3 4/6] x86/mce: Avoid tail copy when machine check terminated a copy from user Tony Luck
@ 2020-10-07 8:23 ` David Laight
2020-10-07 18:49 ` Luck, Tony
2020-10-07 10:02 ` [tip: ras/core] " tip-bot2 for Tony Luck
1 sibling, 1 reply; 49+ messages in thread
From: David Laight @ 2020-10-07 8:23 UTC (permalink / raw)
To: 'Tony Luck', Borislav Petkov; +Cc: Youquan Song, x86, linux-kernel
From: Tony Luck
> Sent: 06 October 2020 22:09
>
> In the page fault case it is ok to see if a few more unaligned bytes
> can be copied from the source address. Worst case is that the page fault
> will be triggered again.
>
> Machine checks are more serious. Just give up at the point where the
> main copy loop triggered the #MC and return from the copy code as if
> the copy succeeded. The machine check handler will use task_work_add() to
> make sure that the task is sent a SIGBUS.
Isn't that just plain wrong?
If copy is reported as succeeding the kernel code will use the 'old'
data that is in the buffer as if it had been read from userspace.
This could end up with kernel stack data being written to a file.
Even zeroing the rest of the kernel buffer is wrong.
IIRC the code to try to maximise the copy has been removed.
So the 'slow' retry wont happen any more.
David
>
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
> arch/x86/lib/copy_user_64.S | 15 +++++++++++++++
> 1 file changed, 15 insertions(+)
>
> diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
> index 5b68e945bf65..77b9b2a3b5c8 100644
> --- a/arch/x86/lib/copy_user_64.S
> +++ b/arch/x86/lib/copy_user_64.S
> @@ -15,6 +15,7 @@
> #include <asm/asm.h>
> #include <asm/smap.h>
> #include <asm/export.h>
> +#include <asm/trapnr.h>
>
> .macro ALIGN_DESTINATION
> /* check for bad alignment of destination */
> @@ -221,6 +222,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
> * Try to copy last bytes and clear the rest if needed.
> * Since protection fault in copy_from/to_user is not a normal situation,
> * it is not necessary to optimize tail handling.
> + * Don't try to copy the tail if machine check happened
> *
> * Input:
> * rdi destination
> @@ -232,11 +234,24 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
> */
> SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
> movl %edx,%ecx
> + cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
> + je 3f
> 1: rep movsb
> 2: mov %ecx,%eax
> ASM_CLAC
> ret
>
> + /*
> + * Return zero to pretend that this copy succeeded. This
> + * is counter-intuitive, but needed to prevent the code
> + * in lib/iov_iter.c from retrying and running back into
> + * the poison cache line again. The machine check handler
> + * will ensure that a SIGBUS is sent to the task.
> + */
> +3: xorl %eax,%eax
> + ASM_CLAC
> + ret
> +
> _ASM_EXTABLE_CPY(1b, 2b)
> SYM_CODE_END(.Lcopy_user_handle_tail)
>
> --
> 2.21.1
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply [flat|nested] 49+ messages in thread
* RE: [PATCH v3 4/6] x86/mce: Avoid tail copy when machine check terminated a copy from user
2020-10-07 8:23 ` David Laight
@ 2020-10-07 18:49 ` Luck, Tony
2020-10-07 21:11 ` David Laight
0 siblings, 1 reply; 49+ messages in thread
From: Luck, Tony @ 2020-10-07 18:49 UTC (permalink / raw)
To: David Laight, Borislav Petkov; +Cc: Song, Youquan, x86, linux-kernel
>> Machine checks are more serious. Just give up at the point where the
>> main copy loop triggered the #MC and return from the copy code as if
>> the copy succeeded. The machine check handler will use task_work_add() to
>> make sure that the task is sent a SIGBUS.
>
> Isn't that just plain wrong?
It isn't pretty. I'm not sure how wrong it is.
> If copy is reported as succeeding the kernel code will use the 'old'
> data that is in the buffer as if it had been read from userspace.
> This could end up with kernel stack data being written to a file.
I ran a test with:
write(fd, buf, 512)
With poison injected into buf[256] to force a machine check mid-copy.
The size of the file did get incremented by 512 rather than 256. Which isn't good.
The data in the file up to the 256 byte mark was the user data from buf[0 ... 255].
The data in the file past offset 256 was all zeroes. I suspect that isn't by chance.
The kernel has to defend against a user writing a partial page and using mmap(2)
on the same file to peek at data past EOF and up to the next PAGE_SIZE boundary.
So I think it must zero new pages allocated in page cache as they are allocated to
a file.
> Even zeroing the rest of the kernel buffer is wrong.
It wouldn't help/change anything.
> IIRC the code to try to maximise the copy has been removed.
> So the 'slow' retry wont happen any more.
Which code has been removed (and when ... TIP, and my testing, is based on 5.9-rc1)
-Tony
^ permalink raw reply [flat|nested] 49+ messages in thread
* RE: [PATCH v3 4/6] x86/mce: Avoid tail copy when machine check terminated a copy from user
2020-10-07 18:49 ` Luck, Tony
@ 2020-10-07 21:11 ` David Laight
0 siblings, 0 replies; 49+ messages in thread
From: David Laight @ 2020-10-07 21:11 UTC (permalink / raw)
To: 'Luck, Tony', Borislav Petkov; +Cc: Song, Youquan, x86, linux-kernel
From: Luck, Tony
> Sent: 07 October 2020 19:50
> >> Machine checks are more serious. Just give up at the point where the
> >> main copy loop triggered the #MC and return from the copy code as if
> >> the copy succeeded. The machine check handler will use task_work_add() to
> >> make sure that the task is sent a SIGBUS.
> >
> > Isn't that just plain wrong?
>
> It isn't pretty. I'm not sure how wrong it is.
>
> > If copy is reported as succeeding the kernel code will use the 'old'
> > data that is in the buffer as if it had been read from userspace.
> > This could end up with kernel stack data being written to a file.
>
> I ran a test with:
>
> write(fd, buf, 512)
>
> With poison injected into buf[256] to force a machine check mid-copy.
>
> The size of the file did get incremented by 512 rather than 256. Which isn't good.
>
> The data in the file up to the 256 byte mark was the user data from buf[0 ... 255].
>
> The data in the file past offset 256 was all zeroes. I suspect that isn't by chance.
> The kernel has to defend against a user writing a partial page and using mmap(2)
> on the same file to peek at data past EOF and up to the next PAGE_SIZE boundary.
> So I think it must zero new pages allocated in page cache as they are allocated to
> a file.
Think about what happens to a device write or an ioctl request.
These typically get copied into on-stack buffers.
> > IIRC the code to try to maximise the copy has been removed.
> > So the 'slow' retry wont happen any more.
>
> Which code has been removed (and when ... TIP, and my testing, is based on 5.9-rc1)
The code that retries byte by byte after an initial fault.
Might only be removed from 'next' and for some architectures.
Basically almost nothing ever relies on partial copies (except mount).
IIRC there is 'magic' in the syscall exit path to convert EFAULT
into SIGSEGV.
You probably want to hijack it to generate SIGBUS?
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply [flat|nested] 49+ messages in thread
* [tip: ras/core] x86/mce: Avoid tail copy when machine check terminated a copy from user
2020-10-06 21:09 ` [PATCH v3 4/6] x86/mce: Avoid tail copy when machine check terminated a copy from user Tony Luck
2020-10-07 8:23 ` David Laight
@ 2020-10-07 10:02 ` tip-bot2 for Tony Luck
1 sibling, 0 replies; 49+ messages in thread
From: tip-bot2 for Tony Luck @ 2020-10-07 10:02 UTC (permalink / raw)
To: linux-tip-commits; +Cc: Tony Luck, Borislav Petkov, x86, LKML
The following commit has been merged into the ras/core branch of tip:
Commit-ID: a2f73400e4dfd13f673c6e1b4b98d180fd1e47b3
Gitweb: https://git.kernel.org/tip/a2f73400e4dfd13f673c6e1b4b98d180fd1e47b3
Author: Tony Luck <tony.luck@intel.com>
AuthorDate: Tue, 06 Oct 2020 14:09:08 -07:00
Committer: Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 07 Oct 2020 11:26:56 +02:00
x86/mce: Avoid tail copy when machine check terminated a copy from user
In the page fault case it is ok to see if a few more unaligned bytes
can be copied from the source address. Worst case is that the page fault
will be triggered again.
Machine checks are more serious. Just give up at the point where the
main copy loop triggered the #MC and return from the copy code as if
the copy succeeded. The machine check handler will use task_work_add() to
make sure that the task is sent a SIGBUS.
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-5-tony.luck@intel.com
---
arch/x86/lib/copy_user_64.S | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 5b68e94..77b9b2a 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -15,6 +15,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
#include <asm/export.h>
+#include <asm/trapnr.h>
.macro ALIGN_DESTINATION
/* check for bad alignment of destination */
@@ -221,6 +222,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Try to copy last bytes and clear the rest if needed.
* Since protection fault in copy_from/to_user is not a normal situation,
* it is not necessary to optimize tail handling.
+ * Don't try to copy the tail if machine check happened
*
* Input:
* rdi destination
@@ -232,11 +234,24 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
*/
SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
+ cmp $X86_TRAP_MC,%eax /* check if X86_TRAP_MC */
+ je 3f
1: rep movsb
2: mov %ecx,%eax
ASM_CLAC
ret
+ /*
+ * Return zero to pretend that this copy succeeded. This
+ * is counter-intuitive, but needed to prevent the code
+ * in lib/iov_iter.c from retrying and running back into
+ * the poison cache line again. The machine check handler
+ * will ensure that a SIGBUS is sent to the task.
+ */
+3: xorl %eax,%eax
+ ASM_CLAC
+ ret
+
_ASM_EXTABLE_CPY(1b, 2b)
SYM_CODE_END(.Lcopy_user_handle_tail)
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [PATCH v3 5/6] x86/mce: Recover from poison found while copying from user space
2020-10-06 21:09 ` [PATCH v3 0/6] Add machine check recovery when copying from user space Tony Luck
` (3 preceding siblings ...)
2020-10-06 21:09 ` [PATCH v3 4/6] x86/mce: Avoid tail copy when machine check terminated a copy from user Tony Luck
@ 2020-10-06 21:09 ` Tony Luck
2020-10-07 10:02 ` [tip: ras/core] " tip-bot2 for Tony Luck
2020-10-06 21:09 ` [PATCH v3 6/6] x86/mce: Decode a kernel instruction to determine if it is copying from user Tony Luck
5 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-10-06 21:09 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Tony Luck, Youquan Song, x86, linux-kernel
Existing kernel code can only recover from a machine check on code that
is tagged in the exception table with a fault handling recovery path.
Two new fields in the task structure to pass information from machine
check handler to the "task_work" that is queued to run before the task
returns to user mode:
+ mce_vaddr: will be initialized to the user virtual address of the fault
in the case where the fault occurred in the kernel copying data from
a user address. This is so that kill_me_maybe() can provide that
information to the user SIGBUS handler.
+ mce_kflags: copy of the struct mce.kflags needed by kill_me_maybe()
to determine if mce_vaddr is applicable to this error.
Add code to recover from a machine check while copying data from user
space to the kernel. Action for this case is the same as if the user
touched the poison directly; unmap the page and send a SIGBUS to the task.
Use a new helper function to share common code between the "fault
in user mode" case and the "fault while copying from user" case.
New code paths will be activated by the next patch which sets
MCE_IN_KERNEL_COPYIN.
Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mce/core.c | 27 ++++++++++++++++++++-------
include/linux/sched.h | 2 ++
2 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2d6caf09e8e6..5c423c4bab68 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1260,6 +1260,21 @@ static void kill_me_maybe(struct callback_head *cb)
kill_me_now(cb);
}
+static void queue_task_work(struct mce *m, int kill_it)
+{
+ current->mce_addr = m->addr;
+ current->mce_kflags = m->kflags;
+ current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
+ current->mce_whole_page = whole_page(m);
+
+ if (kill_it)
+ current->mce_kill_me.func = kill_me_now;
+ else
+ current->mce_kill_me.func = kill_me_maybe;
+
+ task_work_add(current, ¤t->mce_kill_me, true);
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1401,13 +1416,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- current->mce_addr = m.addr;
- current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
- current->mce_whole_page = whole_page(&m);
- current->mce_kill_me.func = kill_me_maybe;
- if (kill_it)
- current->mce_kill_me.func = kill_me_now;
- task_work_add(current, ¤t->mce_kill_me, true);
+ queue_task_work(&m, kill_it);
+
} else {
/*
* Handle an MCE which has happened in kernel space but from
@@ -1422,6 +1432,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
+
+ if (m.kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&m, kill_it);
}
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 93ecd930efd3..2cbba3e2b150 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1308,6 +1308,8 @@ struct task_struct {
#endif
#ifdef CONFIG_X86_MCE
+ void __user *mce_vaddr;
+ __u64 mce_kflags;
u64 mce_addr;
__u64 mce_ripv : 1,
mce_whole_page : 1,
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [tip: ras/core] x86/mce: Recover from poison found while copying from user space
2020-10-06 21:09 ` [PATCH v3 5/6] x86/mce: Recover from poison found while copying from user space Tony Luck
@ 2020-10-07 10:02 ` tip-bot2 for Tony Luck
0 siblings, 0 replies; 49+ messages in thread
From: tip-bot2 for Tony Luck @ 2020-10-07 10:02 UTC (permalink / raw)
To: linux-tip-commits; +Cc: Borislav Petkov, Tony Luck, Borislav Petkov, x86, LKML
The following commit has been merged into the ras/core branch of tip:
Commit-ID: c0ab7ffce275d3f83bd253c70889c28821d4a41d
Gitweb: https://git.kernel.org/tip/c0ab7ffce275d3f83bd253c70889c28821d4a41d
Author: Tony Luck <tony.luck@intel.com>
AuthorDate: Tue, 06 Oct 2020 14:09:09 -07:00
Committer: Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 07 Oct 2020 11:29:41 +02:00
x86/mce: Recover from poison found while copying from user space
Existing kernel code can only recover from a machine check on code that
is tagged in the exception table with a fault handling recovery path.
Add two new fields in the task structure to pass information from
machine check handler to the "task_work" that is queued to run before
the task returns to user mode:
+ mce_vaddr: will be initialized to the user virtual address of the fault
in the case where the fault occurred in the kernel copying data from
a user address. This is so that kill_me_maybe() can provide that
information to the user SIGBUS handler.
+ mce_kflags: copy of the struct mce.kflags needed by kill_me_maybe()
to determine if mce_vaddr is applicable to this error.
Add code to recover from a machine check while copying data from user
space to the kernel. Action for this case is the same as if the user
touched the poison directly; unmap the page and send a SIGBUS to the task.
Use a new helper function to share common code between the "fault
in user mode" case and the "fault while copying from user" case.
New code paths will be activated by the next patch which sets
MCE_IN_KERNEL_COPYIN.
Suggested-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-6-tony.luck@intel.com
---
arch/x86/kernel/cpu/mce/core.c | 27 ++++++++++++++++++++-------
include/linux/sched.h | 2 ++
2 files changed, 22 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 2d6caf0..5c423c4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1260,6 +1260,21 @@ static void kill_me_maybe(struct callback_head *cb)
kill_me_now(cb);
}
+static void queue_task_work(struct mce *m, int kill_it)
+{
+ current->mce_addr = m->addr;
+ current->mce_kflags = m->kflags;
+ current->mce_ripv = !!(m->mcgstatus & MCG_STATUS_RIPV);
+ current->mce_whole_page = whole_page(m);
+
+ if (kill_it)
+ current->mce_kill_me.func = kill_me_now;
+ else
+ current->mce_kill_me.func = kill_me_maybe;
+
+ task_work_add(current, ¤t->mce_kill_me, true);
+}
+
/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
@@ -1401,13 +1416,8 @@ noinstr void do_machine_check(struct pt_regs *regs)
/* If this triggers there is no way to recover. Die hard. */
BUG_ON(!on_thread_stack() || !user_mode(regs));
- current->mce_addr = m.addr;
- current->mce_ripv = !!(m.mcgstatus & MCG_STATUS_RIPV);
- current->mce_whole_page = whole_page(&m);
- current->mce_kill_me.func = kill_me_maybe;
- if (kill_it)
- current->mce_kill_me.func = kill_me_now;
- task_work_add(current, ¤t->mce_kill_me, true);
+ queue_task_work(&m, kill_it);
+
} else {
/*
* Handle an MCE which has happened in kernel space but from
@@ -1422,6 +1432,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
if (!fixup_exception(regs, X86_TRAP_MC, 0, 0))
mce_panic("Failed kernel mode recovery", &m, msg);
}
+
+ if (m.kflags & MCE_IN_KERNEL_COPYIN)
+ queue_task_work(&m, kill_it);
}
out:
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 93ecd93..2cbba3e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1308,6 +1308,8 @@ struct task_struct {
#endif
#ifdef CONFIG_X86_MCE
+ void __user *mce_vaddr;
+ __u64 mce_kflags;
u64 mce_addr;
__u64 mce_ripv : 1,
mce_whole_page : 1,
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [PATCH v3 6/6] x86/mce: Decode a kernel instruction to determine if it is copying from user
2020-10-06 21:09 ` [PATCH v3 0/6] Add machine check recovery when copying from user space Tony Luck
` (4 preceding siblings ...)
2020-10-06 21:09 ` [PATCH v3 5/6] x86/mce: Recover from poison found while copying from user space Tony Luck
@ 2020-10-06 21:09 ` Tony Luck
2020-10-07 10:02 ` [tip: ras/core] " tip-bot2 for Tony Luck
5 siblings, 1 reply; 49+ messages in thread
From: Tony Luck @ 2020-10-06 21:09 UTC (permalink / raw)
To: Borislav Petkov; +Cc: Tony Luck, Youquan Song, x86, linux-kernel
All instructions copying data between kernel and user memory
are tagged with either _ASM_EXTABLE_UA or _ASM_EXTABLE_CPY
entries in the exception table. ex_fault_handler_type() returns
EX_HANDLER_UACCESS for both of these.
Recovery is only possible when the machine check was triggered
on a read from user memory. In this case the same strategy for
recovery applies as if the user had made the access in ring3. If
the fault was in kernel memory while copying to user there is no
current recovery plan.
For MOV and MOVZ instructions a full decode of the instruction
is done to find the source address. For MOVS instructions
the source address is in the %rsi register. The function
fault_in_kernel_space() determines whether the source address is
kernel or user, upgrade it from "static" so it can be used here.
Co-developed-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/include/asm/traps.h | 2 ++
arch/x86/kernel/cpu/mce/core.c | 11 +++++--
arch/x86/kernel/cpu/mce/severity.c | 53 +++++++++++++++++++++++++++++-
arch/x86/mm/fault.c | 2 +-
4 files changed, 63 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 714b1a30e7b0..df0b7bfc1234 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -35,6 +35,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
+bool fault_in_kernel_space(unsigned long address);
+
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5c423c4bab68..3d6e1bfa4f9d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1250,14 +1250,19 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
- if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
+ !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
}
- pr_err("Memory error not recovered");
- kill_me_now(cb);
+ if (p->mce_vaddr != (void __user *)-1l) {
+ force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
+ } else {
+ pr_err("Memory error not recovered");
+ kill_me_now(cb);
+ }
}
static void queue_task_work(struct mce *m, int kill_it)
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index c6494e6579c1..83df991314c5 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -13,6 +13,9 @@
#include <asm/mce.h>
#include <asm/intel-family.h>
+#include <asm/traps.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
#include "internal.h"
@@ -212,6 +215,47 @@ static struct severity {
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+static bool is_copy_from_user(struct pt_regs *regs)
+{
+ u8 insn_buf[MAX_INSN_SIZE];
+ struct insn insn;
+ unsigned long addr;
+
+ if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+ return false;
+
+ kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+ insn_get_opcode(&insn);
+ if (!insn.opcode.got)
+ return false;
+
+ switch (insn.opcode.value) {
+ /* MOV mem,reg */
+ case 0x8A: case 0x8B:
+ /* MOVZ mem,reg */
+ case 0xB60F: case 0xB70F:
+ insn_get_modrm(&insn);
+ insn_get_sib(&insn);
+ if (!insn.modrm.got || !insn.sib.got)
+ return false;
+ addr = (unsigned long)insn_get_addr_ref(&insn, regs);
+ break;
+ /* REP MOVS */
+ case 0xA4: case 0xA5:
+ addr = regs->si;
+ break;
+ default:
+ return false;
+ }
+
+ if (fault_in_kernel_space(addr))
+ return false;
+
+ current->mce_vaddr = (void __user *)addr;
+
+ return true;
+}
+
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
@@ -229,10 +273,17 @@ static int error_context(struct mce *m, struct pt_regs *regs)
if ((m->cs & 3) == 3)
return IN_USER;
+ if (!mc_recoverable(m->mcgstatus))
+ return IN_KERNEL;
t = ex_get_fault_handler_type(m->ip);
- if (mc_recoverable(m->mcgstatus) && t == EX_HANDLER_FAULT) {
+ if (t == EX_HANDLER_FAULT) {
+ m->kflags |= MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ }
+ if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
m->kflags |= MCE_IN_KERNEL_RECOV;
+ m->kflags |= MCE_IN_KERNEL_COPYIN;
return IN_KERNEL_RECOV;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498e9832..88ae443e4e5f 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1081,7 +1081,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
return 0;
}
-static int fault_in_kernel_space(unsigned long address)
+bool fault_in_kernel_space(unsigned long address)
{
/*
* On 64-bit systems, the vsyscall page is at an address above
--
2.21.1
^ permalink raw reply related [flat|nested] 49+ messages in thread
* [tip: ras/core] x86/mce: Decode a kernel instruction to determine if it is copying from user
2020-10-06 21:09 ` [PATCH v3 6/6] x86/mce: Decode a kernel instruction to determine if it is copying from user Tony Luck
@ 2020-10-07 10:02 ` tip-bot2 for Tony Luck
0 siblings, 0 replies; 49+ messages in thread
From: tip-bot2 for Tony Luck @ 2020-10-07 10:02 UTC (permalink / raw)
To: linux-tip-commits; +Cc: Youquan Song, Tony Luck, Borislav Petkov, x86, LKML
The following commit has been merged into the ras/core branch of tip:
Commit-ID: 300638101329e8f1569115f3d7197ef5ef754a3a
Gitweb: https://git.kernel.org/tip/300638101329e8f1569115f3d7197ef5ef754a3a
Author: Tony Luck <tony.luck@intel.com>
AuthorDate: Tue, 06 Oct 2020 14:09:10 -07:00
Committer: Borislav Petkov <bp@suse.de>
CommitterDate: Wed, 07 Oct 2020 11:32:40 +02:00
x86/mce: Decode a kernel instruction to determine if it is copying from user
All instructions copying data between kernel and user memory
are tagged with either _ASM_EXTABLE_UA or _ASM_EXTABLE_CPY
entries in the exception table. ex_fault_handler_type() returns
EX_HANDLER_UACCESS for both of these.
Recovery is only possible when the machine check was triggered
on a read from user memory. In this case the same strategy for
recovery applies as if the user had made the access in ring3. If
the fault was in kernel memory while copying to user there is no
current recovery plan.
For MOV and MOVZ instructions a full decode of the instruction
is done to find the source address. For MOVS instructions
the source address is in the %rsi register. The function
fault_in_kernel_space() determines whether the source address is
kernel or user, upgrade it from "static" so it can be used here.
Co-developed-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Youquan Song <youquan.song@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Link: https://lkml.kernel.org/r/20201006210910.21062-7-tony.luck@intel.com
---
arch/x86/include/asm/traps.h | 2 +-
arch/x86/kernel/cpu/mce/core.c | 11 ++++--
arch/x86/kernel/cpu/mce/severity.c | 53 ++++++++++++++++++++++++++++-
arch/x86/mm/fault.c | 2 +-
4 files changed, 63 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 714b1a3..df0b7bf 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -35,6 +35,8 @@ extern int panic_on_unrecovered_nmi;
void math_emulate(struct math_emu_info *);
+bool fault_in_kernel_space(unsigned long address);
+
#ifdef CONFIG_VMAP_STACK
void __noreturn handle_stack_overflow(const char *message,
struct pt_regs *regs,
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5c423c4..3d6e1bf 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1250,14 +1250,19 @@ static void kill_me_maybe(struct callback_head *cb)
if (!p->mce_ripv)
flags |= MF_MUST_KILL;
- if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) {
+ if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags) &&
+ !(p->mce_kflags & MCE_IN_KERNEL_COPYIN)) {
set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
sync_core();
return;
}
- pr_err("Memory error not recovered");
- kill_me_now(cb);
+ if (p->mce_vaddr != (void __user *)-1l) {
+ force_sig_mceerr(BUS_MCEERR_AR, p->mce_vaddr, PAGE_SHIFT);
+ } else {
+ pr_err("Memory error not recovered");
+ kill_me_now(cb);
+ }
}
static void queue_task_work(struct mce *m, int kill_it)
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index c6494e6..83df991 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -13,6 +13,9 @@
#include <asm/mce.h>
#include <asm/intel-family.h>
+#include <asm/traps.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
#include "internal.h"
@@ -212,6 +215,47 @@ static struct severity {
#define mc_recoverable(mcg) (((mcg) & (MCG_STATUS_RIPV|MCG_STATUS_EIPV)) == \
(MCG_STATUS_RIPV|MCG_STATUS_EIPV))
+static bool is_copy_from_user(struct pt_regs *regs)
+{
+ u8 insn_buf[MAX_INSN_SIZE];
+ struct insn insn;
+ unsigned long addr;
+
+ if (copy_from_kernel_nofault(insn_buf, (void *)regs->ip, MAX_INSN_SIZE))
+ return false;
+
+ kernel_insn_init(&insn, insn_buf, MAX_INSN_SIZE);
+ insn_get_opcode(&insn);
+ if (!insn.opcode.got)
+ return false;
+
+ switch (insn.opcode.value) {
+ /* MOV mem,reg */
+ case 0x8A: case 0x8B:
+ /* MOVZ mem,reg */
+ case 0xB60F: case 0xB70F:
+ insn_get_modrm(&insn);
+ insn_get_sib(&insn);
+ if (!insn.modrm.got || !insn.sib.got)
+ return false;
+ addr = (unsigned long)insn_get_addr_ref(&insn, regs);
+ break;
+ /* REP MOVS */
+ case 0xA4: case 0xA5:
+ addr = regs->si;
+ break;
+ default:
+ return false;
+ }
+
+ if (fault_in_kernel_space(addr))
+ return false;
+
+ current->mce_vaddr = (void __user *)addr;
+
+ return true;
+}
+
/*
* If mcgstatus indicated that ip/cs on the stack were
* no good, then "m->cs" will be zero and we will have
@@ -229,10 +273,17 @@ static int error_context(struct mce *m, struct pt_regs *regs)
if ((m->cs & 3) == 3)
return IN_USER;
+ if (!mc_recoverable(m->mcgstatus))
+ return IN_KERNEL;
t = ex_get_fault_handler_type(m->ip);
- if (mc_recoverable(m->mcgstatus) && t == EX_HANDLER_FAULT) {
+ if (t == EX_HANDLER_FAULT) {
+ m->kflags |= MCE_IN_KERNEL_RECOV;
+ return IN_KERNEL_RECOV;
+ }
+ if (t == EX_HANDLER_UACCESS && regs && is_copy_from_user(regs)) {
m->kflags |= MCE_IN_KERNEL_RECOV;
+ m->kflags |= MCE_IN_KERNEL_COPYIN;
return IN_KERNEL_RECOV;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 35f1498..88ae443 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1081,7 +1081,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
return 0;
}
-static int fault_in_kernel_space(unsigned long address)
+bool fault_in_kernel_space(unsigned long address)
{
/*
* On 64-bit systems, the vsyscall page is at an address above
^ permalink raw reply related [flat|nested] 49+ messages in thread