From: "Baicar, Tyler" <tbaicar@codeaurora.org>
To: Suzuki K Poulose <Suzuki.Poulose@arm.com>,
marc.zyngier@arm.com, pbonzini@redhat.com, rkrcmar@redhat.com,
linux@armlinux.org.uk, catalin.marinas@arm.com,
will.deacon@arm.com, rjw@rjwysocki.net, lenb@kernel.org,
matt@codeblueprint.co.uk, robert.moore@intel.com,
lv.zheng@intel.com, nkaje@codeaurora.org, zjzhang@codeaurora.org,
mark.rutland@arm.com, james.morse@arm.com,
akpm@linux-foundation.org, eun.taik.lee@samsung.com,
sandeepa.s.prabhu@gmail.com, shijie.huang@arm.com,
rruigrok@codeaurora.org, paul.gortmaker@windriver.com,
tomasz.nowicki@linaro.org, fu.wei@linaro.org,
rostedt@goodmis.org, bristot@redhat.com,
linux-arm-kernel@lists.infradead.org,
kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
linux-efi@vger.kernel.org, punit.agrawal@arm.com,
astone@redhat.com, harba@codeaurora.org, hanjun.guo@linaro.org
Subject: Re: [PATCH V4 02/10] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1
Date: Mon, 24 Oct 2016 14:33:54 -0600 [thread overview]
Message-ID: <2422a0c6-ebe4-7f51-2b8d-c42b9bb468b1@codeaurora.org> (raw)
In-Reply-To: <da257bb4-48f3-b187-1d43-c68669427cef@arm.com>
On 10/24/2016 3:50 AM, Suzuki K Poulose wrote:
> On 21/10/16 18:30, Tyler Baicar wrote:
>> Currently when a RAS error is reported it is not timestamped.
>> The ACPI 6.1 spec adds the timestamp field to the generic error
>> data entry v3 structure. The timestamp of when the firmware
>> generated the error is now being reported.
>>
>> Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
>> Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org>
>> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
>> Signed-off-by: Naveen Kaje <nkaje@codeaurora.org>
>> ---
>> drivers/acpi/apei/ghes.c | 14 +++++++---
>> drivers/firmware/efi/cper.c | 67
>> +++++++++++++++++++++++++++++++++++++--------
>> include/acpi/ghes.h | 10 +++++++
>> include/linux/cper.h | 12 ++++++++
>> 4 files changed, 88 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 7d020b0..7610f08 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -419,7 +419,8 @@ static void ghes_handle_memory_failure(struct
>> acpi_hest_generic_data *gdata, int
>> int flags = -1;
>> int sec_sev = ghes_severity(gdata->error_severity);
>> struct cper_sec_mem_err *mem_err;
>> - mem_err = (struct cper_sec_mem_err *)(gdata + 1);
>> +
>> + mem_err = acpi_hest_generic_data_payload(gdata);
>>
>> if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
>> return;
>> @@ -449,14 +450,18 @@ static void ghes_do_proc(struct ghes *ghes,
>> {
>> int sev, sec_sev;
>> struct acpi_hest_generic_data *gdata;
>> + uuid_le sec_type;
>>
>> sev = ghes_severity(estatus->error_severity);
>> apei_estatus_for_each_section(estatus, gdata) {
>> sec_sev = ghes_severity(gdata->error_severity);
>> - if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
>> + sec_type = *(uuid_le *)gdata->section_type;
>> +
>> + if (!uuid_le_cmp(sec_type,
>> CPER_SEC_PLATFORM_MEM)) {
>> struct cper_sec_mem_err *mem_err;
>> - mem_err = (struct cper_sec_mem_err *)(gdata+1);
>> +
>> + mem_err = acpi_hest_generic_data_payload(gdata);
>> ghes_edac_report_mem_error(ghes, sev, mem_err);
>>
>> arch_apei_report_mem_error(sev, mem_err);
>> @@ -466,7 +471,8 @@ static void ghes_do_proc(struct ghes *ghes,
>> else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
>> CPER_SEC_PCIE)) {
>> struct cper_sec_pcie *pcie_err;
>> - pcie_err = (struct cper_sec_pcie *)(gdata+1);
>> +
>> + pcie_err = acpi_hest_generic_data_payload(gdata);
>> if (sev == GHES_SEV_RECOVERABLE &&
>> sec_sev == GHES_SEV_RECOVERABLE &&
>> pcie_err->validation_bits &
>> CPER_PCIE_VALID_DEVICE_ID &&
>> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
>> index d425374..af7e1e9 100644
>> --- a/drivers/firmware/efi/cper.c
>> +++ b/drivers/firmware/efi/cper.c
>> @@ -32,6 +32,9 @@
>> #include <linux/acpi.h>
>> #include <linux/pci.h>
>> #include <linux/aer.h>
>> +#include <linux/printk.h>
>> +#include <linux/bcd.h>
>> +#include <acpi/ghes.h>
>>
>> #define INDENT_SP " "
>>
>> @@ -386,13 +389,37 @@ static void cper_print_pcie(const char *pfx,
>> const struct cper_sec_pcie *pcie,
>> pfx, pcie->bridge.secondary_status, pcie->bridge.control);
>> }
>>
>> +static void cper_estatus_print_section_v300(const char *pfx,
>> + const struct acpi_hest_generic_data_v300 *gdata)
>> +{
>> + __u8 hour, min, sec, day, mon, year, century, *timestamp;
>> +
>> + if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
>> + timestamp = (__u8 *)&(gdata->time_stamp);
>> + sec = bcd2bin(timestamp[0]);
>> + min = bcd2bin(timestamp[1]);
>> + hour = bcd2bin(timestamp[2]);
>> + day = bcd2bin(timestamp[4]);
>> + mon = bcd2bin(timestamp[5]);
>> + year = bcd2bin(timestamp[6]);
>> + century = bcd2bin(timestamp[7]);
>> + printk("%stime: %7s %02d%02d-%02d-%02d %02d:%02d:%02d\n", pfx,
>> + 0x01 & *(timestamp + 3) ? "precise" : "", century,
>> + year, mon, day, hour, min, sec);
>> + }
>> +}
>> +
>> static void cper_estatus_print_section(
>> - const char *pfx, const struct acpi_hest_generic_data *gdata, int
>> sec_no)
>> + const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
>> {
>> uuid_le *sec_type = (uuid_le *)gdata->section_type;
>> __u16 severity;
>> char newpfx[64];
>>
>> + if (acpi_hest_generic_data_version(gdata))
>> + cper_estatus_print_section_v300(pfx,
>> + (const struct acpi_hest_generic_data_v300 *)gdata);
>> +
>> severity = gdata->error_severity;
>> printk("%s""Error %d, type: %s\n", pfx, sec_no,
>> cper_severity_str(severity));
>> @@ -403,14 +430,18 @@ static void cper_estatus_print_section(
>>
>> snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>> if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
>> - struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
>> + struct cper_sec_proc_generic *proc_err;
>> +
>> + proc_err = acpi_hest_generic_data_payload(gdata);
>> printk("%s""section_type: general processor error\n", newpfx);
>> if (gdata->error_data_length >= sizeof(*proc_err))
>> cper_print_proc_generic(newpfx, proc_err);
>> else
>> goto err_section_too_small;
>> } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
>> - struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
>> + struct cper_sec_mem_err *mem_err;
>> +
>> + mem_err = acpi_hest_generic_data_payload(gdata);
>> printk("%s""section_type: memory error\n", newpfx);
>> if (gdata->error_data_length >=
>> sizeof(struct cper_sec_mem_err_old))
>> @@ -419,7 +450,9 @@ static void cper_estatus_print_section(
>> else
>> goto err_section_too_small;
>> } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
>> - struct cper_sec_pcie *pcie = (void *)(gdata + 1);
>> + struct cper_sec_pcie *pcie;
>> +
>> + pcie = acpi_hest_generic_data_payload(gdata);
>> printk("%s""section_type: PCIe error\n", newpfx);
>> if (gdata->error_data_length >= sizeof(*pcie))
>> cper_print_pcie(newpfx, pcie, gdata);
>> @@ -438,6 +471,7 @@ void cper_estatus_print(const char *pfx,
>> const struct acpi_hest_generic_status *estatus)
>> {
>> struct acpi_hest_generic_data *gdata;
>> + struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
>> unsigned int data_len, gedata_len;
>> int sec_no = 0;
>> char newpfx[64];
>> @@ -451,12 +485,22 @@ void cper_estatus_print(const char *pfx,
>> printk("%s""event severity: %s\n", pfx,
>> cper_severity_str(severity));
>> data_len = estatus->data_length;
>> gdata = (struct acpi_hest_generic_data *)(estatus + 1);
>> + if (acpi_hest_generic_data_version(gdata))
>> + gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
>
> I think the acpi_hest_generic_data_version() doesn't check if the
> version is
> V3 or higher ?
Oops :) I need to make sure that returns 3.
>
>> +
>> snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>> +
>> while (data_len >= sizeof(*gdata)) {
>> gedata_len = gdata->error_data_length;
>> cper_estatus_print_section(newpfx, gdata, sec_no);
>> - data_len -= gedata_len + sizeof(*gdata);
>> - gdata = (void *)(gdata + 1) + gedata_len;
>> + if(gdata_v3) {
>> + data_len -= gedata_len + sizeof(*gdata_v3);
>> + gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
>> + gdata = (struct acpi_hest_generic_data *)gdata_v3;
>> + } else {
>> + data_len -= gedata_len + sizeof(*gdata);
>> + gdata = (void *)(gdata + 1) + gedata_len;
>> + }
>
> Could we not use the helpers we define below to unify the code here
> and avoid the
> switch ?
>
Yes, those helpers should be able to reduce the code here, I'll update this.
>
>> sec_no++;
>> }
>> }
>> @@ -486,12 +530,13 @@ int cper_estatus_check(const struct
>> acpi_hest_generic_status *estatus)
>> return rc;
>> data_len = estatus->data_length;
>> gdata = (struct acpi_hest_generic_data *)(estatus + 1);
>> - while (data_len >= sizeof(*gdata)) {
>> - gedata_len = gdata->error_data_length;
>> - if (gedata_len > data_len - sizeof(*gdata))
>> +
>> + while (data_len >= acpi_hest_generic_data_size(gdata)) {
>> + gedata_len = acpi_hest_generic_data_error_length(gdata);
>> + if (gedata_len > data_len - acpi_hest_generic_data_size(gdata))
>> return -EINVAL;
>> - data_len -= gedata_len + sizeof(*gdata);
>> - gdata = (void *)(gdata + 1) + gedata_len;
>> + data_len -= gedata_len + acpi_hest_generic_data_size(gdata);
>> + gdata = acpi_hest_generic_data_next(gdata);
>> }
>> if (data_len)
>> return -EINVAL;
>> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
>> index 68f088a..56b9679 100644
>> --- a/include/acpi/ghes.h
>> +++ b/include/acpi/ghes.h
>> @@ -73,3 +73,13 @@ static inline void ghes_edac_unregister(struct
>> ghes *ghes)
>> {
>> }
>> #endif
>> +
>> +#define acpi_hest_generic_data_version(gdata) \
>> + (gdata->revision >> 8)
>> +
>
>
>>
>> +#define acpi_hest_generic_data_error_length(gdata) \
>> + (((struct acpi_hest_generic_data *)(gdata))->error_data_length)
>> +#define acpi_hest_generic_data_size(gdata) \
>> + ((acpi_hest_generic_data_version(gdata) >= 3) ? \
>> + sizeof(struct acpi_hest_generic_data_v300) : \
>> + sizeof(struct acpi_hest_generic_data))
>> +#define acpi_hest_generic_data_record_size(gdata) \
>> + (acpi_hest_generic_data_size(gdata) + \
>> + acpi_hest_generic_data_error_length(gdata))
>> +#define acpi_hest_generic_data_next(gdata) \
>> + ((void *)(gdata) + acpi_hest_generic_data_record_size(gdata))
>> +
>
> Rest looks good
>
> Cheers
> Suzuki
Thanks,
Tyler
--
Qualcomm Datacenter Technologies, Inc. as an affiliate of Qualcomm Technologies, Inc.
Qualcomm Technologies, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project.
next prev parent reply other threads:[~2016-10-24 20:33 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-21 17:30 [PATCH V4 00/10] Add UEFI 2.6 and ACPI 6.1 updates for RAS on ARM64 Tyler Baicar
2016-10-21 17:30 ` [PATCH V4 01/10] acpi: apei: read ack upon ghes record consumption Tyler Baicar
2016-10-24 8:51 ` Suzuki K Poulose
2016-10-24 20:28 ` Baicar, Tyler
2016-10-21 17:30 ` [PATCH V4 02/10] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1 Tyler Baicar
2016-10-24 9:50 ` Suzuki K Poulose
2016-10-24 20:33 ` Baicar, Tyler [this message]
2016-10-21 17:30 ` [PATCH V4 03/10] efi: parse ARMv8 processor error Tyler Baicar
2016-10-21 17:30 ` [PATCH V4 04/10] arm64: exception: handle Synchronous External Abort Tyler Baicar
2016-10-21 17:30 ` [PATCH V4 05/10] acpi: apei: handle SEA notification type for ARMv8 Tyler Baicar
2016-10-21 17:30 ` [PATCH V4 06/10] acpi: apei: panic OS with fatal error status block Tyler Baicar
2016-10-21 17:30 ` [PATCH V4 07/10] efi: print unrecognized CPER section Tyler Baicar
2016-10-21 17:30 ` [PATCH V4 08/10] ras: acpi / apei: generate trace event for " Tyler Baicar
2016-10-21 17:30 ` [PATCH V4 09/10] trace, ras: add ARM processor error trace event Tyler Baicar
2016-10-21 20:34 ` Steven Rostedt
2016-10-24 17:21 ` Baicar, Tyler
2016-10-21 17:30 ` [PATCH V4 10/10] arm64: KVM: add guest SEA support Tyler Baicar
2016-10-31 10:02 ` Russell King - ARM Linux
2016-10-31 22:36 ` Baicar, Tyler
[not found] ` <85443ee1-9981-c7e8-fe88-a3aac48c3e80-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org>
2016-11-01 9:36 ` Russell King - ARM Linux
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2422a0c6-ebe4-7f51-2b8d-c42b9bb468b1@codeaurora.org \
--to=tbaicar@codeaurora.org \
--cc=Suzuki.Poulose@arm.com \
--cc=akpm@linux-foundation.org \
--cc=astone@redhat.com \
--cc=bristot@redhat.com \
--cc=catalin.marinas@arm.com \
--cc=eun.taik.lee@samsung.com \
--cc=fu.wei@linaro.org \
--cc=hanjun.guo@linaro.org \
--cc=harba@codeaurora.org \
--cc=james.morse@arm.com \
--cc=kvm@vger.kernel.org \
--cc=kvmarm@lists.cs.columbia.edu \
--cc=lenb@kernel.org \
--cc=linux-acpi@vger.kernel.org \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-efi@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux@armlinux.org.uk \
--cc=lv.zheng@intel.com \
--cc=marc.zyngier@arm.com \
--cc=mark.rutland@arm.com \
--cc=matt@codeblueprint.co.uk \
--cc=nkaje@codeaurora.org \
--cc=paul.gortmaker@windriver.com \
--cc=pbonzini@redhat.com \
--cc=punit.agrawal@arm.com \
--cc=rjw@rjwysocki.net \
--cc=rkrcmar@redhat.com \
--cc=robert.moore@intel.com \
--cc=rostedt@goodmis.org \
--cc=rruigrok@codeaurora.org \
--cc=sandeepa.s.prabhu@gmail.com \
--cc=shijie.huang@arm.com \
--cc=tomasz.nowicki@linaro.org \
--cc=will.deacon@arm.com \
--cc=zjzhang@codeaurora.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).