From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755461AbdHYKYy (ORCPT ); Fri, 25 Aug 2017 06:24:54 -0400 Received: from mail.skyhub.de ([5.9.137.197]:55656 "EHLO mail.skyhub.de" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1755155AbdHYKYZ (ORCPT ); Fri, 25 Aug 2017 06:24:25 -0400 From: Borislav Petkov To: linux-edac Cc: Steven Rostedt , Tony Luck , Yazen Ghannam , X86 ML , LKML Subject: [PATCH 6/7] EDAC, mce_amd: Issue the decoded info through the TP or printk() Date: Fri, 25 Aug 2017 12:24:10 +0200 Message-Id: <20170825102411.8682-7-bp@alien8.de> X-Mailer: git-send-email 2.13.0 In-Reply-To: <20170825102411.8682-1-bp@alien8.de> References: <20170825102411.8682-1-bp@alien8.de> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org From: Borislav Petkov ... depending on whether we have userspace consumers. Handle the HW_ERR prefix accordingly - for printk() I need to do some strsep() monkey business. Signed-off-by: Borislav Petkov --- drivers/edac/mce_amd.c | 106 +++++++++++++++++++++++++++++++------------------ 1 file changed, 68 insertions(+), 38 deletions(-) diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index b7c1f8f7e871..41c09d5b81f0 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -2,9 +2,12 @@ #include #include #include +#include #include +#include + #include "mce_amd.h" static struct amd_decoder_ops *fam_ops; @@ -465,7 +468,7 @@ static void decode_mc0_mce(struct mce *m) u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - seq_buf_printf(&sb, HW_ERR "MC0 Error: "); + seq_buf_printf(&sb, "MC0 Error: "); /* TLB error signatures are the same across families */ if (TLB_ERROR(ec)) { @@ -575,7 +578,7 @@ static void decode_mc1_mce(struct mce *m) u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - seq_buf_printf(&sb, HW_ERR "MC1 Error: "); + seq_buf_printf(&sb, "MC1 Error: "); if (TLB_ERROR(ec)) seq_buf_printf(&sb, "%s TLB %s.\n", LL_MSG(ec), @@ -721,7 +724,7 @@ static void decode_mc2_mce(struct mce *m) u16 ec = EC(m->status); u8 xec = XEC(m->status, xec_mask); - seq_buf_printf(&sb, HW_ERR "MC2 Error: "); + seq_buf_printf(&sb, "MC2 Error: "); if (!fam_ops->mc2_mce(ec, xec)) pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n"); @@ -738,7 +741,7 @@ static void decode_mc3_mce(struct mce *m) return; } - seq_buf_printf(&sb, HW_ERR "MC3 Error"); + seq_buf_printf(&sb, "MC3 Error"); if (xec == 0x0) { u8 r4 = R4(ec); @@ -764,7 +767,7 @@ static void decode_mc4_mce(struct mce *m) u8 xec = XEC(m->status, 0x1f); u8 offset = 0; - seq_buf_printf(&sb, HW_ERR "MC4 Error (node %d): ", node_id); + seq_buf_printf(&sb, "MC4 Error (node %d): ", node_id); switch (xec) { case 0x0 ... 0xe: @@ -823,7 +826,7 @@ static void decode_mc5_mce(struct mce *m) if (fam == 0xf || fam == 0x11) goto wrong_mc5_mce; - seq_buf_printf(&sb, HW_ERR "MC5 Error: "); + seq_buf_printf(&sb, "MC5 Error: "); if (INT_ERROR(ec)) { if (xec <= 0x1f) { @@ -850,7 +853,7 @@ static void decode_mc6_mce(struct mce *m) { u8 xec = XEC(m->status, xec_mask); - seq_buf_printf(&sb, HW_ERR "MC6 Error: "); + seq_buf_printf(&sb, "MC6 Error: "); if (xec > 0x5) goto wrong_mc6_mce; @@ -883,12 +886,12 @@ static void decode_smca_error(struct mce *m) bank_type = hwid->bank_type; ip_name = smca_get_long_name(bank_type); - seq_buf_printf(&sb, HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); + seq_buf_printf(&sb, "%s Extended Error Code: %d\n", ip_name, xec); /* Only print the decode of valid error codes */ if (xec < smca_mce_descs[bank_type].num_descs && (hwid->xec_bitmap & BIT_ULL(xec))) { - seq_buf_printf(&sb, HW_ERR "%s Error: ", ip_name); + seq_buf_printf(&sb, "%s Error: ", ip_name); seq_buf_printf(&sb, "%s.\n", smca_mce_descs[bank_type].descs[xec]); } @@ -950,26 +953,10 @@ static const char *decode_error_status(struct mce *m) return "Corrected error, no action required."; } -static int -amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) +static void __decode_mce(struct mce *m) { - struct mce *m = (struct mce *)data; unsigned int fam = x86_family(m->cpuid); int ecc; - char *dec_buf; - - if (amd_filter_mce(m)) - return NOTIFY_STOP; - - dec_buf = (void *)gen_pool_alloc(dec_pool, ELEM_SIZE); - if (!dec_buf) { - pr_warn("Decode buffer full!\n"); - return NOTIFY_STOP; - } - - /* \0 terminated */ - seq_buf_init(&sb, dec_buf, ELEM_SIZE); - seq_buf_clear_buf(&sb); pr_emerg(HW_ERR "%s\n", decode_error_status(m)); @@ -1011,7 +998,12 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont("]: 0x%016llx\n", m->status); if (m->status & MCI_STATUS_ADDRV) - pr_emerg(HW_ERR "Error Addr: 0x%016llx\n", m->addr); + pr_emerg(HW_ERR "Error Addr: 0x%016llx ", m->addr); + + if (m->tsc) + pr_cont("TSC: %llu", m->tsc); + + pr_cont("\n"); if (boot_cpu_has(X86_FEATURE_SMCA)) { pr_emerg(HW_ERR "IPID: 0x%016llx", m->ipid); @@ -1020,16 +1012,13 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) pr_cont(", Syndrome: 0x%016llx", m->synd); pr_cont("\n"); - - decode_smca_error(m); - goto err_code; } +} - if (m->tsc) - pr_emerg(HW_ERR "TSC: %llu\n", m->tsc); - +static void decode_legacy_error(struct mce *m) +{ if (!fam_ops) - goto err_code; + return; switch (m->bank) { case 0: @@ -1063,11 +1052,52 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) default: break; } +} - err_code: - amd_decode_err_code(m->status & 0xffff); +static int +amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) +{ + struct mce *m = (struct mce *)data; + char *dec_buf; + + if (amd_filter_mce(m)) + return NOTIFY_STOP; - pr_emerg("%.*s\n", (int)sb.len, sb.buffer); + dec_buf = (void *)gen_pool_alloc(dec_pool, ELEM_SIZE); + if (!dec_buf) { + pr_warn("Decode buffer full!\n"); + return NOTIFY_STOP; + } + + /* \0 terminated */ + seq_buf_init(&sb, dec_buf, ELEM_SIZE); + seq_buf_clear_buf(&sb); + + if (!ras_userspace_consumers()) + __decode_mce(m); + + if (boot_cpu_has(X86_FEATURE_SMCA)) + decode_smca_error(m); + else + decode_legacy_error(m); + + if (ras_userspace_consumers()) { + trace_mce_record(m, sb.buffer); + } else { + char *l; + + while ((l = strsep(&sb.buffer, "\n"))) { + if (!strnlen(l, ELEM_SIZE)) + break; + + pr_emerg(HW_ERR "%s\n", l); + } + + /* Restore original address because strsep() mangles it. */ + sb.buffer = __err_buf; + + amd_decode_err_code(m->status & 0xffff); + } gen_pool_free(dec_pool, (unsigned long)dec_buf, ELEM_SIZE); @@ -1076,7 +1106,7 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) static struct notifier_block amd_mce_dec_nb = { .notifier_call = amd_decode_mce, - .priority = MCE_PRIO_EDAC, + .priority = MCE_PRIO_DECODER, }; static int __init mce_amd_init(void) -- 2.13.0