netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Jakub Kicinski <kuba@kernel.org>
To: Michael Chan <michael.chan@broadcom.com>, Jiri Pirko <jiri@resnulli.us>
Cc: davem@davemloft.net, netdev@vger.kernel.org, gospo@broadcom.com,
	eranbe@nvidia.com
Subject: Re: [PATCH net-next 9/9] bnxt_en: add an nvm test for hw diagnose
Date: Mon, 7 Mar 2022 14:24:52 -0800	[thread overview]
Message-ID: <20220307142452.70c95fd1@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com> (raw)
In-Reply-To: <1646470482-13763-10-git-send-email-michael.chan@broadcom.com>

On Sat,  5 Mar 2022 03:54:42 -0500 Michael Chan wrote:
> From: Vikas Gupta <vikas.gupta@broadcom.com>
> 
> Add an NVM test function for devlink hw reporter.
> In this function an NVM VPD area is read followed by
> a write. Test result is cached and if it is successful then
> the next test can be conducted only after HW_RETEST_MIN_TIME to
> avoid frequent writes to the NVM.

You seem to execute a self-test from the .diganose callback.
That really seems like an abuse of the API. It's not hard to
add a separate self-test callback.

Jiri, WDYT?

> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> index fa0df43ddc1a..9dd878def3c2 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
> @@ -1544,17 +1544,29 @@ struct bnxt_ctx_mem_info {
>  };
>  
>  enum bnxt_hw_err {
> -	BNXT_HW_STATUS_HEALTHY		= 0x0,
> -	BNXT_HW_STATUS_NVM_WRITE_ERR	= 0x1,
> -	BNXT_HW_STATUS_NVM_ERASE_ERR	= 0x2,
> -	BNXT_HW_STATUS_NVM_UNKNOWN_ERR	= 0x3,
> +	BNXT_HW_STATUS_HEALTHY			= 0x0,
> +	BNXT_HW_STATUS_NVM_WRITE_ERR		= 0x1,
> +	BNXT_HW_STATUS_NVM_ERASE_ERR		= 0x2,
> +	BNXT_HW_STATUS_NVM_UNKNOWN_ERR		= 0x3,
> +	BNXT_HW_STATUS_NVM_TEST_VPD_ENT_ERR	= 0x4,
> +	BNXT_HW_STATUS_NVM_TEST_VPD_READ_ERR	= 0x5,
> +	BNXT_HW_STATUS_NVM_TEST_VPD_WRITE_ERR	= 0x6,
> +	BNXT_HW_STATUS_NVM_TEST_INCMPL_ERR	= 0x7,
>  };
>  
>  struct bnxt_hw_health {
>  	u32 nvm_err_address;
>  	u32 nvm_write_errors;
>  	u32 nvm_erase_errors;
> +	u32 nvm_test_vpd_ent_errors;
> +	u32 nvm_test_vpd_read_errors;
> +	u32 nvm_test_vpd_write_errors;
> +	u32 nvm_test_incmpl_errors;
>  	u8 synd;
> +	/* max a test in a day if previous test was successful */
> +#define HW_RETEST_MIN_TIME	(1000 * 3600 * 24)
> +	u8 nvm_test_result;
> +	unsigned long nvm_test_timestamp;
>  	struct devlink_health_reporter *hw_reporter;
>  };
>  
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
> index a802bbda1c27..77e55105d645 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
> @@ -20,6 +20,7 @@
>  #include "bnxt_ulp.h"
>  #include "bnxt_ptp.h"
>  #include "bnxt_coredump.h"
> +#include "bnxt_nvm_defs.h"	/* NVRAM content constant and structure defs */
>  
>  static void __bnxt_fw_recover(struct bnxt *bp)
>  {
> @@ -263,20 +264,82 @@ static const char *hw_err_str(u8 synd)
>  		return "nvm erase error";
>  	case BNXT_HW_STATUS_NVM_UNKNOWN_ERR:
>  		return "unrecognized nvm error";
> +	case BNXT_HW_STATUS_NVM_TEST_VPD_ENT_ERR:
> +		return "nvm test vpd entry error";
> +	case BNXT_HW_STATUS_NVM_TEST_VPD_READ_ERR:
> +		return "nvm test vpd read error";
> +	case BNXT_HW_STATUS_NVM_TEST_VPD_WRITE_ERR:
> +		return "nvm test vpd write error";
> +	case BNXT_HW_STATUS_NVM_TEST_INCMPL_ERR:
> +		return "nvm test incomplete error";
>  	default:
>  		return "unknown hw error";
>  	}
>  }
>  
> +static void bnxt_nvm_test(struct bnxt *bp)
> +{
> +	struct bnxt_hw_health *h = &bp->hw_health;
> +	u32 datalen;
> +	u16 index;
> +	u8 *buf;
> +
> +	if (!h->nvm_test_result) {
> +		if (!h->nvm_test_timestamp ||
> +		    time_after(jiffies, h->nvm_test_timestamp +
> +					msecs_to_jiffies(HW_RETEST_MIN_TIME)))
> +			h->nvm_test_timestamp = jiffies;
> +		else
> +			return;
> +	}
> +
> +	if (bnxt_find_nvram_item(bp->dev, BNX_DIR_TYPE_VPD,
> +				 BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE,
> +				 &index, NULL, &datalen) || !datalen) {
> +		h->nvm_test_result = BNXT_HW_STATUS_NVM_TEST_VPD_ENT_ERR;
> +		h->nvm_test_vpd_ent_errors++;
> +		return;
> +	}
> +
> +	buf = kzalloc(datalen, GFP_KERNEL);
> +	if (!buf) {
> +		h->nvm_test_result = BNXT_HW_STATUS_NVM_TEST_INCMPL_ERR;
> +		h->nvm_test_incmpl_errors++;
> +		return;
> +	}
> +
> +	if (bnxt_get_nvram_item(bp->dev, index, 0, datalen, buf)) {
> +		h->nvm_test_result = BNXT_HW_STATUS_NVM_TEST_VPD_READ_ERR;
> +		h->nvm_test_vpd_read_errors++;
> +		goto err;
> +	}
> +
> +	if (bnxt_flash_nvram(bp->dev, BNX_DIR_TYPE_VPD, BNX_DIR_ORDINAL_FIRST,
> +			     BNX_DIR_EXT_NONE, 0, 0, buf, datalen)) {
> +		h->nvm_test_result = BNXT_HW_STATUS_NVM_TEST_VPD_WRITE_ERR;
> +		h->nvm_test_vpd_write_errors++;
> +	}
> +
> +err:
> +	kfree(buf);
> +}
> +
>  static int bnxt_hw_diagnose(struct devlink_health_reporter *reporter,
>  			    struct devlink_fmsg *fmsg,
>  			    struct netlink_ext_ack *extack)
>  {
>  	struct bnxt *bp = devlink_health_reporter_priv(reporter);
>  	struct bnxt_hw_health *h = &bp->hw_health;
> +	u8 synd = h->synd;
>  	int rc;
>  
> -	rc = devlink_fmsg_string_pair_put(fmsg, "Status", hw_err_str(h->synd));
> +	bnxt_nvm_test(bp);
> +	if (h->nvm_test_result) {
> +		synd = h->nvm_test_result;
> +		devlink_health_report(h->hw_reporter, hw_err_str(synd), NULL);
> +	}
> +
> +	rc = devlink_fmsg_string_pair_put(fmsg, "Status", hw_err_str(synd));
>  	if (rc)
>  		return rc;
>  	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_write_errors", h->nvm_write_errors);
> @@ -285,6 +348,23 @@ static int bnxt_hw_diagnose(struct devlink_health_reporter *reporter,
>  	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_erase_errors", h->nvm_erase_errors);
>  	if (rc)
>  		return rc;
> +	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_test_vpd_ent_errors",
> +				       h->nvm_test_vpd_ent_errors);
> +	if (rc)
> +		return rc;
> +	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_test_vpd_read_errors",
> +				       h->nvm_test_vpd_read_errors);
> +	if (rc)
> +		return rc;
> +	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_test_vpd_write_errors",
> +				       h->nvm_test_vpd_write_errors);
> +	if (rc)
> +		return rc;
> +	rc = devlink_fmsg_u32_pair_put(fmsg, "nvm_test_incomplete_errors",
> +				       h->nvm_test_incmpl_errors);
> +	if (rc)
> +		return rc;
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> index eadaca42ed96..178074795b27 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
> @@ -2168,14 +2168,10 @@ static void bnxt_print_admin_err(struct bnxt *bp)
>  	netdev_info(bp->dev, "PF does not have admin privileges to flash or reset the device\n");
>  }
>  
> -static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
> -				u16 ext, u16 *index, u32 *item_length,
> -				u32 *data_length);
> -
> -static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
> -			    u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
> -			    u32 dir_item_len, const u8 *data,
> -			    size_t data_len)
> +int bnxt_flash_nvram(struct net_device *dev, u16 dir_type,
> +		     u16 dir_ordinal, u16 dir_ext, u16 dir_attr,
> +		     u32 dir_item_len, const u8 *data,
> +		     size_t data_len)
>  {
>  	struct bnxt *bp = netdev_priv(dev);
>  	struct hwrm_nvm_write_input *req;
> @@ -2819,8 +2815,8 @@ static int bnxt_get_nvram_directory(struct net_device *dev, u32 len, u8 *data)
>  	return rc;
>  }
>  
> -static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
> -			       u32 length, u8 *data)
> +int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
> +			u32 length, u8 *data)
>  {
>  	struct bnxt *bp = netdev_priv(dev);
>  	int rc;
> @@ -2854,9 +2850,9 @@ static int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
>  	return rc;
>  }
>  
> -static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
> -				u16 ext, u16 *index, u32 *item_length,
> -				u32 *data_length)
> +int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal,
> +			 u16 ext, u16 *index, u32 *item_length,
> +			 u32 *data_length)
>  {
>  	struct hwrm_nvm_find_dir_entry_output *output;
>  	struct hwrm_nvm_find_dir_entry_input *req;
> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
> index 6aa44840f13a..2593e0049582 100644
> --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
> +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
> @@ -56,6 +56,13 @@ int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
>  int bnxt_flash_package_from_fw_obj(struct net_device *dev, const struct firmware *fw,
>  				   u32 install_type);
>  int bnxt_get_pkginfo(struct net_device *dev, char *ver, int size);
> +int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, u16 ext,
> +			 u16 *index, u32 *item_length, u32 *data_length);
> +int bnxt_get_nvram_item(struct net_device *dev, u32 index, u32 offset,
> +			u32 length, u8 *data);
> +int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, u16 dir_ordinal,
> +		     u16 dir_ext, u16 dir_attr, u32 dir_item_len,
> +		     const u8 *data, size_t data_len);
>  void bnxt_ethtool_init(struct bnxt *bp);
>  void bnxt_ethtool_free(struct bnxt *bp);
>  


  reply	other threads:[~2022-03-07 22:25 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-03-05  8:54 [PATCH net-next 0/9] bnxt_en: Updates Michael Chan
2022-03-05  8:54 ` [PATCH net-next 1/9] bnxt_en: refactor error handling of HWRM_NVM_INSTALL_UPDATE Michael Chan
2022-03-05  8:54 ` [PATCH net-next 2/9] bnxt_en: add more error checks to HWRM_NVM_INSTALL_UPDATE Michael Chan
2022-03-05  8:54 ` [PATCH net-next 3/9] bnxt_en: parse result field when NVRAM package install fails Michael Chan
2022-03-07 22:13   ` Jakub Kicinski
2022-03-05  8:54 ` [PATCH net-next 4/9] bnxt_en: introduce initial link state of unknown Michael Chan
2022-03-05  8:54 ` [PATCH net-next 5/9] bnxt_en: Properly report no pause support on some cards Michael Chan
2022-03-05  8:54 ` [PATCH net-next 6/9] bnxt_en: Eliminate unintended link toggle during FW reset Michael Chan
2022-03-05  8:54 ` [PATCH net-next 7/9] bnxt_en: Do not destroy health reporters during reset Michael Chan
2022-03-05  8:54 ` [PATCH net-next 8/9] bnxt_en: implement hw health reporter Michael Chan
2022-03-07 22:21   ` Jakub Kicinski
2022-03-05  8:54 ` [PATCH net-next 9/9] bnxt_en: add an nvm test for hw diagnose Michael Chan
2022-03-07 22:24   ` Jakub Kicinski [this message]
2022-03-05 11:20 ` [PATCH net-next 0/9] bnxt_en: Updates patchwork-bot+netdevbpf
2022-03-07 22:27 ` Jakub Kicinski
2022-03-07 23:56   ` Michael Chan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220307142452.70c95fd1@kicinski-fedora-pc1c0hjn.dhcp.thefacebook.com \
    --to=kuba@kernel.org \
    --cc=davem@davemloft.net \
    --cc=eranbe@nvidia.com \
    --cc=gospo@broadcom.com \
    --cc=jiri@resnulli.us \
    --cc=michael.chan@broadcom.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).