From: Balbir Singh <bsingharora@gmail.com>
To: mpe@ellerman.id.au
Cc: linuxppc-dev@lists.ozlabs.org, Balbir Singh <bsingharora@gmail.com>
Subject: [PATCH v4 5/5] powerpc/mce: hookup memory_failure for UE errors
Date: Fri, 29 Sep 2017 14:26:55 +1000 [thread overview]
Message-ID: <20170929042655.14570-6-bsingharora@gmail.com> (raw)
In-Reply-To: <20170929042655.14570-1-bsingharora@gmail.com>
If we are in user space and hit a UE error, we now have the
basic infrastructure to walk the page tables and find out
the effective address that was accessed, since the DAR
is not valid.
We use a work_queue content to hookup the bad pfn, any
other context causes problems, since memory_failure itself
can call into schedule() via lru_drain_ bits.
We could probably poison the struct page to avoid a race
between detection and taking corrective action.
Signed-off-by: Balbir Singh <bsingharora@gmail.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kernel/mce.c | 70 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 67 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index ee148f4..299553e 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -39,11 +39,21 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event);
static DEFINE_PER_CPU(int, mce_queue_count);
static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT], mce_event_queue);
+/* Queue for delayed MCE UE events. */
+static DEFINE_PER_CPU(int, mce_ue_count);
+static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
+ mce_ue_event_queue);
+
static void machine_check_process_queued_event(struct irq_work *work);
+void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_process_ue_event(struct work_struct *work);
+
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
+
static void mce_set_error_info(struct machine_check_event *mce,
struct mce_error_info *mce_err)
{
@@ -143,6 +153,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
+ machine_check_ue_event(mce);
}
}
return;
@@ -197,6 +208,26 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+
+/*
+ * Queue up the MCE event which then can be handled later.
+ */
+void machine_check_ue_event(struct machine_check_event *evt)
+{
+ int index;
+
+ index = __this_cpu_inc_return(mce_ue_count) - 1;
+ /* If queue is full, just return for now. */
+ if (index >= MAX_MC_EVT) {
+ __this_cpu_dec(mce_ue_count);
+ return;
+ }
+ memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
+
+ /* Queue work to process this event later. */
+ schedule_work(&mce_ue_event_work);
+}
+
/*
* Queue up the MCE event which then can be handled later.
*/
@@ -219,7 +250,39 @@ void machine_check_queue_event(void)
/* Queue irq work to process this event later. */
irq_work_queue(&mce_event_process_work);
}
-
+/*
+ * process pending MCE event from the mce event queue. This function will be
+ * called during syscall exit.
+ */
+static void machine_process_ue_event(struct work_struct *work)
+{
+ int index;
+ struct machine_check_event *evt;
+
+ while (__this_cpu_read(mce_ue_count) > 0) {
+ index = __this_cpu_read(mce_ue_count) - 1;
+ evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+#ifdef CONFIG_MEMORY_FAILURE
+ /*
+ * This should probably queued elsewhere, but
+ * oh! well
+ */
+ if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.physical_address_provided) {
+ unsigned long pfn;
+
+ pfn = evt->u.ue_error.physical_address >>
+ PAGE_SHIFT;
+ memory_failure(pfn, SIGBUS, 0);
+ } else
+ pr_warn("Failed to identify bad address from "
+ "where the uncorrectable error (UE) "
+ "was generated\n");
+ }
+#endif
+ __this_cpu_dec(mce_ue_count);
+ }
+}
/*
* process pending MCE event from the mce event queue. This function will be
* called during syscall exit.
@@ -227,6 +290,7 @@ void machine_check_queue_event(void)
static void machine_check_process_queued_event(struct irq_work *work)
{
int index;
+ struct machine_check_event *evt;
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
@@ -236,8 +300,8 @@ static void machine_check_process_queued_event(struct irq_work *work)
*/
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
- machine_check_print_event_info(
- this_cpu_ptr(&mce_event_queue[index]), false);
+ evt = this_cpu_ptr(&mce_event_queue[index]);
+ machine_check_print_event_info(evt, false);
__this_cpu_dec(mce_queue_count);
}
}
--
2.9.5
next prev parent reply other threads:[~2017-09-29 4:27 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-29 4:26 [PATCH v4 0/5] Revisit MCE handling for UE Errors Balbir Singh
2017-09-29 4:26 ` [PATCH v4 1/5] powerpc/mce.c: Remove unused function get_mce_fault_addr() Balbir Singh
2017-10-16 5:13 ` Michael Ellerman
2017-10-16 5:23 ` Balbir Singh
2017-10-19 4:48 ` [v4, " Michael Ellerman
2017-09-29 4:26 ` [PATCH v4 2/5] powerpc/mce: align the print of physical address better Balbir Singh
2017-10-16 5:18 ` Michael Ellerman
2017-10-16 5:36 ` Balbir Singh
2017-09-29 4:26 ` [PATCH v4 3/5] powerpc/mce: Hookup derror (load/store) UE errors Balbir Singh
2017-10-16 5:36 ` Michael Ellerman
2017-10-16 5:38 ` Balbir Singh
2017-09-29 4:26 ` [PATCH v4 4/5] powerpc/mce: Hookup ierror (instruction) " Balbir Singh
2017-09-29 4:26 ` Balbir Singh [this message]
2017-10-16 5:38 ` [PATCH v4 5/5] powerpc/mce: hookup memory_failure for " Michael Ellerman
2017-10-16 5:40 ` Balbir Singh
2017-10-16 12:07 ` Michael Ellerman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170929042655.14570-6-bsingharora@gmail.com \
--to=bsingharora@gmail.com \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=mpe@ellerman.id.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).