linux-hwmon.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Limonciello, Mario" <mario.limonciello@amd.com>
To: Guenter Roeck <linux@roeck-us.net>
Cc: Clemens Ladisch <clemens@ladisch.de>,
	linux-hwmon@vger.kernel.org,
	Gabriel Craciunescu <nix.or.die@googlemail.com>,
	Wei Huang <wei.huang2@amd.com>, Jean Delvare <jdelvare@suse.com>,
	open list <linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v2 3/3] hwmon: (k10temp): Show errors failing to read
Date: Fri, 27 Aug 2021 16:10:06 -0500	[thread overview]
Message-ID: <90d283d0-06f6-baa5-b41b-fcd2f4b3ba99@amd.com> (raw)
In-Reply-To: <20210827210648.GA678755@roeck-us.net>

On 8/27/2021 16:06, Guenter Roeck wrote:
> On Fri, Aug 27, 2021 at 03:15:27PM -0500, Mario Limonciello wrote:
>> Enabling Yellow Carp was initially not working "properly"
>> because extra IDs were needed, but this wasn't obvious because fail values
>> from `amd_smn_read` were ignored.
>>
>> Don't discard errors from any functions providing them, instead pass up
>> to the caller.
>>
>> Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
>> ---
>>   drivers/hwmon/k10temp.c | 87 ++++++++++++++++++++++++-----------------
>>   1 file changed, 52 insertions(+), 35 deletions(-)
>>
>> diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
>> index 38bc35ac8135..2edb49d39d22 100644
>> --- a/drivers/hwmon/k10temp.c
>> +++ b/drivers/hwmon/k10temp.c
>> @@ -98,8 +98,8 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
>>   
>>   struct k10temp_data {
>>   	struct pci_dev *pdev;
>> -	void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
>> -	void (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
>> +	int (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
>> +	int (*read_tempreg)(struct pci_dev *pdev, u32 *regval);
>>   	int temp_offset;
>>   	u32 temp_adjust_mask;
>>   	u32 show_temp;
>> @@ -129,55 +129,65 @@ static const struct tctl_offset tctl_offset_table[] = {
>>   	{ 0x17, "AMD Ryzen Threadripper 29", 27000 }, /* 29{20,50,70,90}[W]X */
>>   };
>>   
>> -static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
>> +static int read_htcreg_pci(struct pci_dev *pdev, u32 *regval)
>>   {
>> -	pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
>> +	return pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval);
>>   }
>>   
>> -static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval)
>> +static int read_tempreg_pci(struct pci_dev *pdev, u32 *regval)
>>   {
>> -	pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval);
>> +	return pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval);
>>   }
>>   
>> -static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn,
>> +static int amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn,
>>   			      unsigned int base, int offset, u32 *val)
>>   {
>> +	int ret;
>> +
>>   	mutex_lock(&nb_smu_ind_mutex);
>> -	pci_bus_write_config_dword(pdev->bus, devfn,
>> -				   base, offset);
>> -	pci_bus_read_config_dword(pdev->bus, devfn,
>> -				  base + 4, val);
>> +	ret = pci_bus_write_config_dword(pdev->bus, devfn,
>> +					 base, offset);
>> +	if (ret)
>> +		goto out;
>> +	ret = pci_bus_read_config_dword(pdev->bus, devfn,
>> +					base + 4, val);
>> +out:
>>   	mutex_unlock(&nb_smu_ind_mutex);
>> +	return ret;
>>   }
>>   
>> -static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval)
>> +static int read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval)
>>   {
>> -	amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
>> -			  F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval);
>> +	return amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
>> +				F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval);
>>   }
>>   
>> -static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
>> +static int read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval)
>>   {
>> -	amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
>> -			  F15H_M60H_REPORTED_TEMP_CTRL_OFFSET, regval);
>> +	return amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8,
>> +				F15H_M60H_REPORTED_TEMP_CTRL_OFFSET, regval);
>>   }
>>   
>> -static void read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval)
>> +static int read_tempreg_nb_zen(struct pci_dev *pdev, u32 *regval)
>>   {
>> -	amd_smn_read(amd_pci_dev_to_node_id(pdev),
>> -		     ZEN_REPORTED_TEMP_CTRL_BASE, regval);
>> +	return amd_smn_read(amd_pci_dev_to_node_id(pdev),
>> +			    ZEN_REPORTED_TEMP_CTRL_BASE, regval);
>>   }
>>   
>> -static long get_raw_temp(struct k10temp_data *data)
>> +static int get_raw_temp(struct k10temp_data *data, long *val)
>>   {
>>   	u32 regval;
>> -	long temp;
>> +	int ret;
>>   
>> -	data->read_tempreg(data->pdev, &regval);
>> -	temp = (regval >> ZEN_CUR_TEMP_SHIFT) * 125;
>> +	ret = data->read_tempreg(data->pdev, &regval);
>> +	if (ret)
>> +		return ret;
>> +	*val = (regval >> ZEN_CUR_TEMP_SHIFT) * 125;
>>   	if (regval & data->temp_adjust_mask)
>> -		temp -= 49000;
>> -	return temp;
>> +		*val -= 49000;
>> +	if (*val < 0)
>> +		return -EINVAL;
> 
> Please don't do that. More on that see below.
> 
>> +	return 0;
>>   }
>>   
>>   static const char *k10temp_temp_label[] = {
>> @@ -212,24 +222,27 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
>>   {
>>   	struct k10temp_data *data = dev_get_drvdata(dev);
>>   	u32 regval;
>> +	int ret;
>>   
>>   	switch (attr) {
>>   	case hwmon_temp_input:
>>   		switch (channel) {
>>   		case 0:		/* Tctl */
>> -			*val = get_raw_temp(data);
>> -			if (*val < 0)
>> -				*val = 0;
> 
> We have to take the history into account here. A negative value
> is not an error per se, but it suggests that the chip returns wrong
> data. See commit aef17ca12719 ("hwmon: (k10temp) Only apply temperature
> offset if result is positive") for some of the background. I don't really
> want to change that into an error return just because we don't know
> what the chip is doing. Please retain the above code, either by fixing
> the values up here or in get_raw_temp().

Actually I thought what I was doing *was* making it a lot less ambiguous.

The caller getting -EINVAL from get_raw_tempt will indicate that the 
data shouldn't be trusted rather than a surely wrong "0".

> 
> Thanks,
> Guenter
> 
>> +			ret = get_raw_temp(data, val);
>> +			if (ret)
>> +				return ret;
>>   			break;
>>   		case 1:		/* Tdie */
>> -			*val = get_raw_temp(data) - data->temp_offset;
>> -			if (*val < 0)
>> -				*val = 0;
>> +			ret = get_raw_temp(data, val) - data->temp_offset;
>> +			if (ret)
>> +				return ret;
>>   			break;
>>   		case 2 ... 9:		/* Tccd{1-8} */
>> -			amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
>> +			ret = amd_smn_read(amd_pci_dev_to_node_id(data->pdev),
>>   				     ZEN_CCD_TEMP(data->ccd_offset, channel - 2),
>>   						  &regval);
>> +			if (ret)
>> +				return ret;
>>   			*val = (regval & ZEN_CCD_TEMP_MASK) * 125 - 49000;
>>   			break;
>>   		default:
>> @@ -240,11 +253,15 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
>>   		*val = 70 * 1000;
>>   		break;
>>   	case hwmon_temp_crit:
>> -		data->read_htcreg(data->pdev, &regval);
>> +		ret = data->read_htcreg(data->pdev, &regval);
>> +		if (ret)
>> +			return ret;
>>   		*val = ((regval >> 16) & 0x7f) * 500 + 52000;
>>   		break;
>>   	case hwmon_temp_crit_hyst:
>> -		data->read_htcreg(data->pdev, &regval);
>> +		ret = data->read_htcreg(data->pdev, &regval);
>> +		if (ret)
>> +			return ret;
>>   		*val = (((regval >> 16) & 0x7f)
>>   			- ((regval >> 24) & 0xf)) * 500 + 52000;
>>   		break;


  reply	other threads:[~2021-08-27 21:10 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-27 20:15 [PATCH v2 0/3] Extend k10temp support for more APUs Mario Limonciello
2021-08-27 20:15 ` [PATCH v2 1/3] hwmon: (k10temp): Rework the temperature offset calculation Mario Limonciello
2021-08-27 20:55   ` Guenter Roeck
2021-08-27 20:15 ` [PATCH v2 2/3] hwmon: (k10temp): Add support for yellow carp Mario Limonciello
2021-08-27 20:20   ` Borislav Petkov
2021-08-27 20:55   ` Guenter Roeck
2021-08-27 20:15 ` [PATCH v2 3/3] hwmon: (k10temp): Show errors failing to read Mario Limonciello
2021-08-27 21:06   ` Guenter Roeck
2021-08-27 21:10     ` Limonciello, Mario [this message]
2021-08-27 21:45       ` Guenter Roeck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=90d283d0-06f6-baa5-b41b-fcd2f4b3ba99@amd.com \
    --to=mario.limonciello@amd.com \
    --cc=clemens@ladisch.de \
    --cc=jdelvare@suse.com \
    --cc=linux-hwmon@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@roeck-us.net \
    --cc=nix.or.die@googlemail.com \
    --cc=wei.huang2@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).