linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Borislav Petkov <bp@alien8.de>
To: Tyler Baicar <tbaicar@codeaurora.org>
Cc: christoffer.dall@linaro.org, marc.zyngier@arm.com,
	pbonzini@redhat.com, rkrcmar@redhat.com, linux@armlinux.org.uk,
	catalin.marinas@arm.com, will.deacon@arm.com, rjw@rjwysocki.net,
	lenb@kernel.org, matt@codeblueprint.co.uk,
	robert.moore@intel.com, lv.zheng@intel.com, nkaje@codeaurora.org,
	zjzhang@codeaurora.org, mark.rutland@arm.com,
	james.morse@arm.com, akpm@linux-foundation.org,
	eun.taik.lee@samsung.com, sandeepa.s.prabhu@gmail.com,
	labbott@redhat.com, shijie.huang@arm.com,
	rruigrok@codeaurora.org, paul.gortmaker@windriver.com,
	tn@semihalf.com, fu.wei@linaro.org, rostedt@goodmis.org,
	bristot@redhat.com, linux-arm-kernel@lists.infradead.org,
	kvmarm@lists.cs.columbia.edu, kvm@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
	linux-efi@vger.kernel.org, devel@acpica.org,
	Suzuki.Poulose@arm.com, punit.agrawal@arm.com, astone@redhat.com,
	harba@codeaurora.org, hanjun.guo@linaro.org,
	john.garry@huawei.com, shiju.jose@huawei.com, joe@perches.com,
	rafael@kernel.org, tony.luck@intel.com, gengdongjiu@huawei.com,
	xiexiuqi@huawei.com
Subject: Re: [PATCH V15 04/11] efi: parse ARM processor error
Date: Fri, 21 Apr 2017 19:55:27 +0200	[thread overview]
Message-ID: <20170421175527.fjwnqd22jz7br5yu@pd.tnic> (raw)
In-Reply-To: <1492556723-9189-5-git-send-email-tbaicar@codeaurora.org>

On Tue, Apr 18, 2017 at 05:05:16PM -0600, Tyler Baicar wrote:
> Add support for ARM Common Platform Error Record (CPER).
> UEFI 2.6 specification adds support for ARM specific
> processor error information to be reported as part of the
> CPER records. This provides more detail on for processor error logs.
> 
> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
> CC: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
> Reviewed-by: James Morse <james.morse@arm.com>
> Reviewed-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  drivers/firmware/efi/cper.c | 135 ++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/cper.h        |  54 ++++++++++++++++++
>  2 files changed, 189 insertions(+)
> 
> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index 46585f9..f959185 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c
> @@ -110,12 +110,15 @@ void cper_print_bits(const char *pfx, unsigned int bits,
>  static const char * const proc_type_strs[] = {
>  	"IA32/X64",
>  	"IA64",
> +	"ARM",
>  };
>  
>  static const char * const proc_isa_strs[] = {
>  	"IA32",
>  	"IA64",
>  	"X64",
> +	"ARM A32/T32",
> +	"ARM A64",
>  };
>  
>  static const char * const proc_error_type_strs[] = {
> @@ -184,6 +187,128 @@ static void cper_print_proc_generic(const char *pfx,
>  		printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
>  }
>  
> +#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
> +static const char * const arm_reg_ctx_strs[] = {
> +	"AArch32 general purpose registers",
> +	"AArch32 EL1 context registers",
> +	"AArch32 EL2 context registers",
> +	"AArch32 secure context registers",
> +	"AArch64 general purpose registers",
> +	"AArch64 EL1 context registers",
> +	"AArch64 EL2 context registers",
> +	"AArch64 EL3 context registers",
> +	"Misc. system register structure",
> +};
> +
> +static void cper_print_proc_arm(const char *pfx,
> +				const struct cper_sec_proc_arm *proc)
> +{
> +	int i, len, max_ctx_type;
> +	struct cper_arm_err_info *err_info;
> +	struct cper_arm_ctx_info *ctx_info;
> +	char newpfx[64];
> +
> +	printk("%ssection length: %d\n", pfx, proc->section_length);

We need to dump section length because?

> +	printk("%sMIDR: 0x%016llx\n", pfx, proc->midr);
> +
> +	len = proc->section_length - (sizeof(*proc) +
> +		proc->err_info_num * (sizeof(*err_info)));
> +	if (len < 0) {
> +		printk("%ssection length is too small\n", pfx);

Now here we *can* dump it.

> +		printk("%sfirmware-generated error record is incorrect\n", pfx);
> +		printk("%sERR_INFO_NUM is %d\n", pfx, proc->err_info_num);
> +		return;
> +	}
> +
> +	if (proc->validation_bits & CPER_ARM_VALID_MPIDR)
> +		printk("%sMPIDR: 0x%016llx\n", pfx, proc->mpidr);


<---- newline here.

Also, what is MPIDR and can it be written in a more user-friendly manner
and not be an abbreviation?

> +	if (proc->validation_bits & CPER_ARM_VALID_AFFINITY_LEVEL)
> +		printk("%serror affinity level: %d\n", pfx,
> +			proc->affinity_level);
> +	if (proc->validation_bits & CPER_ARM_VALID_RUNNING_STATE) {
> +		printk("%srunning state: 0x%x\n", pfx, proc->running_state);
> +		printk("%sPSCI state: %d\n", pfx, proc->psci_state);

One more abbreviation. Please consider whether having the abbreviations
or actually writing them out is more user-friendly.

> +	}
> +
> +	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);

That INDENT_SP thing is just silly, someone should kill it.

> +
> +	err_info = (struct cper_arm_err_info *)(proc + 1);
> +	for (i = 0; i < proc->err_info_num; i++) {
> +		printk("%sError info structure %d:\n", pfx, i);
> +		printk("%sversion:%d\n", newpfx, err_info->version);
> +		printk("%slength:%d\n", newpfx, err_info->length);

<---- newline here.

Why do we even dump version and info for *every* err_info structure?

> +		if (err_info->validation_bits &
> +		    CPER_ARM_INFO_VALID_MULTI_ERR) {
> +			if (err_info->multiple_error == 0)
> +				printk("%ssingle error\n", newpfx);
> +			else if (err_info->multiple_error == 1)
> +				printk("%smultiple errors\n", newpfx);
> +			else
> +				printk("%smultiple errors count:%u\n",
> +				newpfx, err_info->multiple_error);

So this can be simply: "num errors: %d", err_info->multiple_error+1...

Without checking CPER_ARM_INFO_VALID_MULTI_ERR.

> +		}

<---- newline here.

> +		if (err_info->validation_bits & CPER_ARM_INFO_VALID_FLAGS) {
> +			if (err_info->flags & CPER_ARM_INFO_FLAGS_FIRST)
> +				printk("%sfirst error captured\n", newpfx);
> +			if (err_info->flags & CPER_ARM_INFO_FLAGS_LAST)
> +				printk("%slast error captured\n", newpfx);
> +			if (err_info->flags & CPER_ARM_INFO_FLAGS_PROPAGATED)
> +				printk("%spropagated error captured\n",
> +				       newpfx);
> +			if (err_info->flags & CPER_ARM_INFO_FLAGS_OVERFLOW)
> +				printk("%soverflow occurred, error info is incomplete\n",
> +				       newpfx);
> +		}

<---- newline here.

> +		printk("%serror_type: %d, %s\n", newpfx, err_info->type,
> +			err_info->type < ARRAY_SIZE(proc_error_type_strs) ?
> +			proc_error_type_strs[err_info->type] : "unknown");
> +		if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO)
> +			printk("%serror_info: 0x%016llx\n", newpfx,
> +			       err_info->error_info);

err_info->error_info ?

What is that supposed to mean? A u64 value of some sorts.

> +		if (err_info->validation_bits & CPER_ARM_INFO_VALID_VIRT_ADDR)
> +			printk("%svirtual fault address: 0x%016llx\n",
> +				newpfx, err_info->virt_fault_addr);
> +		if (err_info->validation_bits &
> +		    CPER_ARM_INFO_VALID_PHYSICAL_ADDR)

Just let that line stick out.

> +			printk("%sphysical fault address: 0x%016llx\n",
> +				newpfx, err_info->physical_fault_addr);
> +		err_info += 1;
> +	}

<---- newline here.

That function is kinda missing newlines.

> +	ctx_info = (struct cper_arm_ctx_info *)err_info;
> +	max_ctx_type = ARRAY_SIZE(arm_reg_ctx_strs) - 1;
> +	for (i = 0; i < proc->context_info_num; i++) {
> +		int size = sizeof(*ctx_info) + ctx_info->size;
> +
> +		printk("%sContext info structure %d:\n", pfx, i);
> +		if (len < size) {
> +			printk("%ssection length is too small\n", newpfx);
> +			printk("%sfirmware-generated error record is incorrect\n", pfx);
> +			return;
> +		}
> +		if (ctx_info->type > max_ctx_type) {
> +			printk("%sInvalid context type: %d\n", newpfx,
> +							ctx_info->type);
> +			printk("%sMax context type: %d\n", newpfx,
> +							max_ctx_type);
> +			return;

You can combine those into:

                        printk("%sInvalid context type: %d (max: %d)\n",
				newpfx, ctx_info->type, max_ctx_type);


> +		}
> +		printk("%sregister context type %d: %s\n", newpfx,
> +			ctx_info->type, arm_reg_ctx_strs[ctx_info->type]);

Why dump the type as %d and as a string too? String should be enough, no?

> +		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4,
> +				(ctx_info + 1), ctx_info->size, 0);
> +		len -= size;
> +		ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + size);
> +	}
> +
> +	if (len > 0) {
> +		printk("%sVendor specific error info has %u bytes:\n", pfx,
> +		       len);
> +		print_hex_dump(newpfx, "", DUMP_PREFIX_OFFSET, 16, 4, ctx_info,
> +				len, true);

That looks like it should be a debug printk...

> +	}
> +}
> +#endif
> +
>  static const char * const mem_err_type_strs[] = {
>  	"unknown",
>  	"no error",
> @@ -461,6 +586,16 @@ static void cper_estatus_timestamp(const char *pfx,
>  			cper_print_pcie(newpfx, pcie, gdata);
>  		else
>  			goto err_section_too_small;
> +	} else if ((IS_ENABLED(CONFIG_ARM64) || IS_ENABLED(CONFIG_ARM)) &&
> +		   !uuid_le_cmp(*sec_type, CPER_SEC_PROC_ARM)) {
> +		struct cper_sec_proc_arm *arm_err;
> +
> +		arm_err = acpi_hest_get_payload(gdata);

		struct cper_sec_proc_arm *arm_err = acpi_hest_get_payload(gdata);

> +		printk("%ssection_type: ARM processor error\n", newpfx);
> +		if (gdata->error_data_length >= sizeof(*arm_err))
> +			cper_print_proc_arm(newpfx, arm_err);
> +		else
> +			goto err_section_too_small;

You need to build-test your patches before submitting:

drivers/firmware/efi/cper.c: In function ‘cper_estatus_print_section’:
drivers/firmware/efi/cper.c:596:4: error: implicit declaration of function ‘cper_print_proc_arm’ [-Werror=implicit-function-declaration]
    cper_print_proc_arm(newpfx, arm_err);
    ^~~~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors
make[3]: *** [drivers/firmware/efi/cper.o] Error 1
make[2]: *** [drivers/firmware/efi] Error 2
make[1]: *** [drivers/firmware] Error 2
make[1]: *** Waiting for unfinished jobs....
make: *** [drivers] Error 2
make: *** Waiting for unfinished jobs....

this is a x86 build.

>  	} else
>  		printk("%s""section type: unknown, %pUl\n", newpfx, sec_type);
>  
> diff --git a/include/linux/cper.h b/include/linux/cper.h
> index dcacb1a..85450f3 100644
> --- a/include/linux/cper.h
> +++ b/include/linux/cper.h
> @@ -180,6 +180,10 @@ enum {
>  #define CPER_SEC_PROC_IPF						\
>  	UUID_LE(0xE429FAF1, 0x3CB7, 0x11D4, 0x0B, 0xCA, 0x07, 0x00,	\
>  		0x80, 0xC7, 0x3C, 0x88, 0x81)
> +/* Processor Specific: ARM */
> +#define CPER_SEC_PROC_ARM						\
> +	UUID_LE(0xE19E3D16, 0xBC11, 0x11E4, 0x9C, 0xAA, 0xC2, 0x05,	\
> +		0x1D, 0x5D, 0x46, 0xB0)
>  /* Platform Memory */
>  #define CPER_SEC_PLATFORM_MEM						\
>  	UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83,	\
> @@ -255,6 +259,22 @@ enum {
>  
>  #define CPER_PCIE_SLOT_SHIFT			3
>  
> +#define CPER_ARM_VALID_MPIDR			0x00000001
> +#define CPER_ARM_VALID_AFFINITY_LEVEL		0x00000002
> +#define CPER_ARM_VALID_RUNNING_STATE		0x00000004
> +#define CPER_ARM_VALID_VENDOR_INFO		0x00000008
> +
> +#define CPER_ARM_INFO_VALID_MULTI_ERR		0x0001
> +#define CPER_ARM_INFO_VALID_FLAGS		0x0002
> +#define CPER_ARM_INFO_VALID_ERR_INFO		0x0004
> +#define CPER_ARM_INFO_VALID_VIRT_ADDR		0x0008
> +#define CPER_ARM_INFO_VALID_PHYSICAL_ADDR	0x0010
> +
> +#define CPER_ARM_INFO_FLAGS_FIRST		0x0001
> +#define CPER_ARM_INFO_FLAGS_LAST		0x0002
> +#define CPER_ARM_INFO_FLAGS_PROPAGATED		0x0004
> +#define CPER_ARM_INFO_FLAGS_OVERFLOW		0x0008

For all of the above use BIT().

> +
>  /*
>   * All tables and structs must be byte-packed to match CPER
>   * specification, since the tables are provided by the system BIOS
> @@ -340,6 +360,40 @@ struct cper_ia_proc_ctx {
>  	__u64	mm_reg_addr;
>  };
>  
> +/* ARM Processor Error Section */
> +struct cper_sec_proc_arm {
> +	__u32	validation_bits;
> +	__u16	err_info_num; /* Number of Processor Error Info */
> +	__u16	context_info_num; /* Number of Processor Context Info Records*/
> +	__u32	section_length;
> +	__u8	affinity_level;
> +	__u8	reserved[3];	/* must be zero */
> +	__u64	mpidr;
> +	__u64	midr;
> +	__u32	running_state; /* Bit 0 set - Processor running. PSCI = 0 */
> +	__u32	psci_state;

Align comments vertically pls.

> +};
> +
> +/* ARM Processor Error Information Structure */
> +struct cper_arm_err_info {
> +	__u8	version;
> +	__u8	length;
> +	__u16	validation_bits;
> +	__u8	type;
> +	__u16	multiple_error;
> +	__u8	flags;
> +	__u64	error_info;
> +	__u64	virt_fault_addr;
> +	__u64	physical_fault_addr;
> +};
> +
> +/* ARM Processor Context Information Structure */
> +struct cper_arm_ctx_info {
> +	__u16	version;
> +	__u16	type;
> +	__u32	size;
> +};

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

  reply	other threads:[~2017-04-21 18:20 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-18 23:05 [PATCH V15 00/11] Add UEFI 2.6 and ACPI 6.1 updates for RAS on ARM64 Tyler Baicar
2017-04-18 23:05 ` [PATCH V15 01/11] acpi: apei: read ack upon ghes record consumption Tyler Baicar
2017-04-19 18:31   ` Borislav Petkov
2017-04-19 20:31     ` Baicar, Tyler
2017-04-19 20:41       ` Borislav Petkov
2017-04-18 23:05 ` [PATCH V15 02/11] ras: acpi/apei: cper: add support for generic data v3 structure Tyler Baicar
2017-04-20 11:14   ` Borislav Petkov
2017-04-18 23:05 ` [PATCH V15 03/11] cper: add timestamp print to CPER status printing Tyler Baicar
2017-04-21 12:21   ` Borislav Petkov
2017-04-21 16:04     ` Baicar, Tyler
2017-04-21 17:26       ` Borislav Petkov
2017-04-21 18:08         ` Baicar, Tyler
2017-04-21 18:12           ` Borislav Petkov
2017-04-18 23:05 ` [PATCH V15 04/11] efi: parse ARM processor error Tyler Baicar
2017-04-21 17:55   ` Borislav Petkov [this message]
2017-04-21 18:22     ` Baicar, Tyler
2017-04-24 17:52       ` Borislav Petkov
2017-04-25 16:05         ` Baicar, Tyler
2017-04-25 16:31           ` Borislav Petkov
2017-04-18 23:05 ` [PATCH V15 05/11] arm64: exception: handle Synchronous External Abort Tyler Baicar
2017-04-18 23:05 ` [PATCH V15 06/11] acpi: apei: handle SEA notification type for ARMv8 Tyler Baicar
2017-04-25 17:21   ` Borislav Petkov
2017-04-25 17:41     ` Baicar, Tyler
2017-04-25 17:46       ` Borislav Petkov
2017-05-08 17:28   ` James Morse
2017-05-08 19:59     ` Baicar, Tyler
2017-05-12 16:45       ` James Morse
2017-08-14  7:55   ` Xiongfeng Wang
2017-04-18 23:05 ` [PATCH V15 07/11] acpi: apei: panic OS with fatal error status block Tyler Baicar
2017-04-28 13:07   ` Borislav Petkov
2017-04-18 23:05 ` [PATCH V15 08/11] efi: print unrecognized CPER section Tyler Baicar
2017-05-05 13:27   ` Borislav Petkov
2017-04-18 23:05 ` [PATCH V15 09/11] ras: acpi / apei: generate trace event for " Tyler Baicar
2017-05-05 17:53   ` Borislav Petkov
2017-05-05 18:44   ` Steven Rostedt
2017-04-18 23:05 ` [PATCH V15 10/11] trace, ras: add ARM processor error trace event Tyler Baicar
2017-05-08 17:34   ` Borislav Petkov
2017-04-18 23:05 ` [PATCH V15 11/11] arm/arm64: KVM: add guest SEA support Tyler Baicar
2017-05-08 17:40   ` Borislav Petkov
2017-05-08 19:54     ` Baicar, Tyler
2017-05-08 20:22       ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170421175527.fjwnqd22jz7br5yu@pd.tnic \
    --to=bp@alien8.de \
    --cc=Suzuki.Poulose@arm.com \
    --cc=akpm@linux-foundation.org \
    --cc=astone@redhat.com \
    --cc=bristot@redhat.com \
    --cc=catalin.marinas@arm.com \
    --cc=christoffer.dall@linaro.org \
    --cc=devel@acpica.org \
    --cc=eun.taik.lee@samsung.com \
    --cc=fu.wei@linaro.org \
    --cc=gengdongjiu@huawei.com \
    --cc=hanjun.guo@linaro.org \
    --cc=harba@codeaurora.org \
    --cc=james.morse@arm.com \
    --cc=joe@perches.com \
    --cc=john.garry@huawei.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=labbott@redhat.com \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-efi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@armlinux.org.uk \
    --cc=lv.zheng@intel.com \
    --cc=marc.zyngier@arm.com \
    --cc=mark.rutland@arm.com \
    --cc=matt@codeblueprint.co.uk \
    --cc=nkaje@codeaurora.org \
    --cc=paul.gortmaker@windriver.com \
    --cc=pbonzini@redhat.com \
    --cc=punit.agrawal@arm.com \
    --cc=rafael@kernel.org \
    --cc=rjw@rjwysocki.net \
    --cc=rkrcmar@redhat.com \
    --cc=robert.moore@intel.com \
    --cc=rostedt@goodmis.org \
    --cc=rruigrok@codeaurora.org \
    --cc=sandeepa.s.prabhu@gmail.com \
    --cc=shijie.huang@arm.com \
    --cc=shiju.jose@huawei.com \
    --cc=tbaicar@codeaurora.org \
    --cc=tn@semihalf.com \
    --cc=tony.luck@intel.com \
    --cc=will.deacon@arm.com \
    --cc=xiexiuqi@huawei.com \
    --cc=zjzhang@codeaurora.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).