linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Robert Richter <rrichter@marvell.com>
To: Hanna Hawa <hhhawa@amazon.com>
Cc: "bp@alien8.de" <bp@alien8.de>,
	"mchehab@kernel.org" <mchehab@kernel.org>,
	"james.morse@arm.com" <james.morse@arm.com>,
	"linux-edac@vger.kernel.org" <linux-edac@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"dwmw@amazon.co.uk" <dwmw@amazon.co.uk>,
	"benh@amazon.com" <benh@amazon.com>,
	"ronenk@amazon.com" <ronenk@amazon.com>,
	"talel@amazon.com" <talel@amazon.com>,
	"jonnyc@amazon.com" <jonnyc@amazon.com>,
	"hanochu@amazon.com" <hanochu@amazon.com>
Subject: Re: [PATCH 1/1] edac: Add an API for edac device to report for multiple errors
Date: Thu, 5 Sep 2019 09:56:51 +0000	[thread overview]
Message-ID: <20190905095642.ohqkcllm7wufx6sc@rric.localdomain> (raw)
In-Reply-To: <20190905083745.6899-1-hhhawa@amazon.com>

Hi Hanna,

thanks for the update. See below.

On 05.09.19 09:37:45, Hanna Hawa wrote:
> Add an API for edac device to report multiple errors with same type.
> 
> Signed-off-by: Hanna Hawa <hhhawa@amazon.com>
> ---
>  drivers/edac/edac_device.c | 66 +++++++++++++++++++++++++++++---------
>  drivers/edac/edac_device.h | 31 ++++++++++++++++--
>  2 files changed, 79 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
> index 65cf2b9355c4..bf6a4fd9831b 100644
> --- a/drivers/edac/edac_device.c
> +++ b/drivers/edac/edac_device.c
> @@ -555,12 +555,15 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
>  	return edac_dev->panic_on_ue;
>  }
>  
> -void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> -			int inst_nr, int block_nr, const char *msg)
> +static void __edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> +			   u16 error_count, int inst_nr, int block_nr,

Just curious, why u16, some register mask size? Maybe just use unsigned int?

I think the variable can be shortened to 'count', the meaning should
still be clear.

> +			   const char *msg)
>  {
>  	struct edac_device_instance *instance;
>  	struct edac_device_block *block = NULL;
>  
> +	WARN_ON(!error_count);

Should return in this case.

Better use WARN_ON_ONCE() to avoid flooding.

The check should be moved to edac_device_handle_ce_count().

> +
>  	if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
>  		edac_device_printk(edac_dev, KERN_ERR,
>  				"INTERNAL ERROR: 'instance' out of range "
> @@ -582,27 +585,44 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
>  
>  	if (instance->nr_blocks > 0) {
>  		block = instance->blocks + block_nr;
> -		block->counters.ce_count++;
> +		block->counters.ce_count += error_count;
>  	}
>  
>  	/* Propagate the count up the 'totals' tree */
> -	instance->counters.ce_count++;
> -	edac_dev->counters.ce_count++;
> +	instance->counters.ce_count += error_count;
> +	edac_dev->counters.ce_count += error_count;
>  
>  	if (edac_device_get_log_ce(edac_dev))
>  		edac_device_printk(edac_dev, KERN_WARNING,
> -				"CE: %s instance: %s block: %s '%s'\n",
> +				"CE: %s instance: %s block: %s count: %d '%s'\n",
>  				edac_dev->ctl_name, instance->name,
> -				block ? block->name : "N/A", msg);
> +				block ? block->name : "N/A", error_count, msg);
> +}
> +
> +void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> +			   int inst_nr, int block_nr, const char *msg)
> +{
> +	__edac_device_handle_ce(edac_dev, 1, inst_nr, block_nr, msg);
>  }
>  EXPORT_SYMBOL_GPL(edac_device_handle_ce);

We could just export the __*() version of those functions and make
everything else inline in the header file? Though, better do this with
two patches to avoid an ABI breakage in case someone wants to backport
it. Let's see what others say here.

>  
> -void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
> -			int inst_nr, int block_nr, const char *msg)
> +void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
> +				 u16 error_count, int inst_nr, int block_nr,
> +				 const char *msg)
> +{
> +	__edac_device_handle_ce(edac_dev, error_count, inst_nr, block_nr, msg);
> +}
> +EXPORT_SYMBOL_GPL(edac_device_handle_ce_count);
> +
> +static void __edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
> +			   u16 error_count, int inst_nr, int block_nr,
> +			   const char *msg)

All the above applies for this function too.

>  {
>  	struct edac_device_instance *instance;
>  	struct edac_device_block *block = NULL;
>  
> +	WARN_ON(!error_count);
> +
>  	if ((inst_nr >= edac_dev->nr_instances) || (inst_nr < 0)) {
>  		edac_device_printk(edac_dev, KERN_ERR,
>  				"INTERNAL ERROR: 'instance' out of range "
> @@ -624,22 +644,36 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
>  
>  	if (instance->nr_blocks > 0) {
>  		block = instance->blocks + block_nr;
> -		block->counters.ue_count++;
> +		block->counters.ue_count += error_count;
>  	}
>  
>  	/* Propagate the count up the 'totals' tree */
> -	instance->counters.ue_count++;
> -	edac_dev->counters.ue_count++;
> +	instance->counters.ue_count += error_count;
> +	edac_dev->counters.ue_count += error_count;
>  
>  	if (edac_device_get_log_ue(edac_dev))
>  		edac_device_printk(edac_dev, KERN_EMERG,
> -				"UE: %s instance: %s block: %s '%s'\n",
> +				"UE: %s instance: %s block: %s count: %d '%s'\n",
>  				edac_dev->ctl_name, instance->name,
> -				block ? block->name : "N/A", msg);
> +				block ? block->name : "N/A", error_count, msg);
>  
>  	if (edac_device_get_panic_on_ue(edac_dev))
> -		panic("EDAC %s: UE instance: %s block %s '%s'\n",
> +		panic("EDAC %s: UE instance: %s block %s count: %d '%s'\n",
>  			edac_dev->ctl_name, instance->name,
> -			block ? block->name : "N/A", msg);
> +			block ? block->name : "N/A", error_count, msg);
> +}
> +
> +void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
> +			   int inst_nr, int block_nr, const char *msg)
> +{
> +	__edac_device_handle_ue(edac_dev, 1, inst_nr, block_nr, msg);
>  }
>  EXPORT_SYMBOL_GPL(edac_device_handle_ue);
> +
> +void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
> +				 u16 error_count, int inst_nr, int block_nr,
> +				 const char *msg)
> +{
> +	__edac_device_handle_ue(edac_dev, error_count, inst_nr, block_nr, msg);
> +}
> +EXPORT_SYMBOL_GPL(edac_device_handle_ue_count);
> diff --git a/drivers/edac/edac_device.h b/drivers/edac/edac_device.h
> index 1aaba74ae411..c8dc83eda64f 100644
> --- a/drivers/edac/edac_device.h
> +++ b/drivers/edac/edac_device.h
> @@ -287,7 +287,7 @@ extern struct edac_device_ctl_info *edac_device_del_device(struct device *dev);
>  
>  /**
>   * edac_device_handle_ue():
> - *	perform a common output and handling of an 'edac_dev' UE event
> + *	perform a common output and handling of an 'edac_dev' single UE event
>   *
>   * @edac_dev: pointer to struct &edac_device_ctl_info
>   * @inst_nr: number of the instance where the UE error happened
> @@ -298,7 +298,7 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
>  				int inst_nr, int block_nr, const char *msg);
>  /**
>   * edac_device_handle_ce():
> - *	perform a common output and handling of an 'edac_dev' CE event
> + *	perform a common output and handling of an 'edac_dev' single CE event
>   *
>   * @edac_dev: pointer to struct &edac_device_ctl_info
>   * @inst_nr: number of the instance where the CE error happened
> @@ -308,6 +308,33 @@ extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
>  extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
>  				int inst_nr, int block_nr, const char *msg);
>  
> +/**
> + * edac_device_handle_ue_count():
> + *	perform a common output and handling of an 'edac_dev'
> + *
> + * @edac_dev: pointer to struct &edac_device_ctl_info
> + * @error_count: number of errors of the same type
> + * @inst_nr: number of the instance where the UE error happened
> + * @block_nr: number of the block where the UE error happened
> + * @msg: message to be printed
> + */
> +extern void edac_device_handle_ue_count(struct edac_device_ctl_info *edac_dev,
> +					u16 error_count, int inst_nr,
> +					int block_nr, const char *msg);
> +/**
> + * edac_device_handle_ce_count():
> + *	perform a common output and handling of an 'edac_dev'
> + *
> + * @edac_dev: pointer to struct &edac_device_ctl_info
> + * @error_count: number of errors of the same type
> + * @inst_nr: number of the instance where the CE error happened
> + * @block_nr: number of the block where the CE error happened
> + * @msg: message to be printed
> + */
> +extern void edac_device_handle_ce_count(struct edac_device_ctl_info *edac_dev,
> +					u16 error_count, int inst_nr,
> +					int block_nr, const char *msg);
> +

Looks otherwise good to me.

Thanks,

-Robert

>  /**
>   * edac_device_alloc_index: Allocate a unique device index number
>   *
> -- 
> 2.17.1
> 

  reply	other threads:[~2019-09-05  9:57 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-05  8:37 [PATCH 1/1] edac: Add an API for edac device to report for multiple errors Hanna Hawa
2019-09-05  9:56 ` Robert Richter [this message]
2019-09-08  7:58   ` Hawa, Hanna
2019-09-08  8:16     ` Borislav Petkov
2019-09-08  8:35       ` Borislav Petkov
2019-09-10 11:10         ` Hawa, Hanna
2019-09-10 12:42     ` Robert Richter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190905095642.ohqkcllm7wufx6sc@rric.localdomain \
    --to=rrichter@marvell.com \
    --cc=benh@amazon.com \
    --cc=bp@alien8.de \
    --cc=dwmw@amazon.co.uk \
    --cc=hanochu@amazon.com \
    --cc=hhhawa@amazon.com \
    --cc=james.morse@arm.com \
    --cc=jonnyc@amazon.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@kernel.org \
    --cc=ronenk@amazon.com \
    --cc=talel@amazon.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).