stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>,
	Zhang Rui <rui.zhang@intel.com>,
	Daniel Lezcano <daniel.lezcano@linaro.org>,
	Sasha Levin <sashal@kernel.org>,
	linux-pm@vger.kernel.org
Subject: [PATCH AUTOSEL 5.10 051/217] thermal: intel: pch: fix S0ix failure due to PCH temperature above threshold
Date: Tue, 22 Dec 2020 21:13:40 -0500	[thread overview]
Message-ID: <20201223021626.2790791-51-sashal@kernel.org> (raw)
In-Reply-To: <20201223021626.2790791-1-sashal@kernel.org>

From: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>

[ Upstream commit ef63b043ac8645d2540d7b50dd3e09c53db3d504 ]

When system tries to enter S0ix suspend state, just after active load
scenarios, it fails due to PCH current temperature is higher than set
threshold.
This patch introduces delay loop mechanism that allows PCH temperature
to go down below threshold during suspend so it won't fail to enter S0ix.
Add delay loop timeout and count as module parameters for user to tune it,
if required based on system design. This change notifies the different
warning messages like when PCH temperature above the threshold and
executing delay loop. Also, notify the messages when it success or
failure for S0ix entry.
Previously out of 1000 runs around 3 to 5 times it might fail to enter
S0ix just after heavy workload. With this change, S0ix failures reduced
as PCH cools down below threshold.

Signed-off-by: Sumeet Pawnikar <sumeet.r.pawnikar@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/20201106170633.20838-1-sumeet.r.pawnikar@intel.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/thermal/intel/intel_pch_thermal.c | 76 +++++++++++++++++++++--
 1 file changed, 70 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c
index 3b813ebb6ca1d..0a9e4458bc3a5 100644
--- a/drivers/thermal/intel/intel_pch_thermal.c
+++ b/drivers/thermal/intel/intel_pch_thermal.c
@@ -7,14 +7,16 @@
  *     Tushar Dave <tushar.n.dave@intel.com>
  */
 
+#include <linux/acpi.h>
+#include <linux/delay.h>
 #include <linux/module.h>
-#include <linux/types.h>
 #include <linux/init.h>
 #include <linux/pci.h>
-#include <linux/acpi.h>
+#include <linux/pm.h>
+#include <linux/suspend.h>
 #include <linux/thermal.h>
+#include <linux/types.h>
 #include <linux/units.h>
-#include <linux/pm.h>
 
 /* Intel PCH thermal Device IDs */
 #define PCH_THERMAL_DID_HSW_1	0x9C24 /* Haswell PCH */
@@ -35,6 +37,7 @@
 #define WPT_TSREL	0x0A	/* Thermal Sensor Report Enable and Lock */
 #define WPT_TSMIC	0x0C	/* Thermal Sensor SMI Control */
 #define WPT_CTT	0x0010	/* Catastrophic Trip Point */
+#define WPT_TSPM	0x001C	/* Thermal Sensor Power Management */
 #define WPT_TAHV	0x0014	/* Thermal Alert High Value */
 #define WPT_TALV	0x0018	/* Thermal Alert Low Value */
 #define WPT_TL		0x00000040	/* Throttle Value */
@@ -55,6 +58,22 @@
 #define WPT_TL_T1L	0x1ff00000	/* T1 Level */
 #define WPT_TL_TTEN	0x20000000	/* TT Enable */
 
+/* Resolution of 1/2 degree C and an offset of -50C */
+#define PCH_TEMP_OFFSET	(-50)
+#define GET_WPT_TEMP(x)	((x) * MILLIDEGREE_PER_DEGREE / 2 + WPT_TEMP_OFFSET)
+#define WPT_TEMP_OFFSET	(PCH_TEMP_OFFSET * MILLIDEGREE_PER_DEGREE)
+#define GET_PCH_TEMP(x)	(((x) / 2) + PCH_TEMP_OFFSET)
+
+/* Amount of time for each cooling delay, 100ms by default for now */
+static unsigned int delay_timeout = 100;
+module_param(delay_timeout, int, 0644);
+MODULE_PARM_DESC(delay_timeout, "amount of time delay for each iteration.");
+
+/* Number of iterations for cooling delay, 10 counts by default for now */
+static unsigned int delay_cnt = 10;
+module_param(delay_cnt, int, 0644);
+MODULE_PARM_DESC(delay_cnt, "total number of iterations for time delay.");
+
 static char driver_name[] = "Intel PCH thermal driver";
 
 struct pch_thermal_device {
@@ -183,13 +202,58 @@ static int pch_wpt_get_temp(struct pch_thermal_device *ptd, int *temp)
 static int pch_wpt_suspend(struct pch_thermal_device *ptd)
 {
 	u8 tsel;
+	u8 pch_delay_cnt = 1;
+	u16 pch_thr_temp, pch_cur_temp;
 
-	if (ptd->bios_enabled)
+	/* Shutdown the thermal sensor if it is not enabled by BIOS */
+	if (!ptd->bios_enabled) {
+		tsel = readb(ptd->hw_base + WPT_TSEL);
+		writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL);
+		return 0;
+	}
+
+	/* Do not check temperature if it is not a S0ix capable platform */
+	if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0))
 		return 0;
 
-	tsel = readb(ptd->hw_base + WPT_TSEL);
+	/* Do not check temperature if it is not s2idle */
+	if (pm_suspend_via_firmware())
+		return 0;
+
+	/* Get the PCH temperature threshold value */
+	pch_thr_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TSPM));
+
+	/* Get the PCH current temperature value */
+	pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
 
-	writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL);
+	/*
+	 * If current PCH temperature is higher than configured PCH threshold
+	 * value, run some delay loop with sleep to let the current temperature
+	 * go down below the threshold value which helps to allow system enter
+	 * lower power S0ix suspend state. Even after delay loop if PCH current
+	 * temperature stays above threshold, notify the warning message
+	 * which helps to indentify the reason why S0ix entry was rejected.
+	 */
+	while (pch_delay_cnt <= delay_cnt) {
+		if (pch_cur_temp <= pch_thr_temp)
+			break;
+
+		dev_warn(&ptd->pdev->dev,
+			"CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n",
+			pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout);
+		msleep(delay_timeout);
+		/* Read the PCH current temperature for next cycle. */
+		pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
+		pch_delay_cnt++;
+	}
+
+	if (pch_cur_temp > pch_thr_temp)
+		dev_warn(&ptd->pdev->dev,
+			"CPU-PCH is hot [%dC] even after delay, continue to suspend. S0ix might fail\n",
+			pch_cur_temp);
+	else
+		dev_info(&ptd->pdev->dev,
+			"CPU-PCH is cool [%dC], continue to suspend\n", pch_cur_temp);
 
 	return 0;
 }
-- 
2.27.0


  parent reply	other threads:[~2020-12-23  3:09 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-23  2:12 [PATCH AUTOSEL 5.10 001/217] soc: aspeed-lpc-ctrl: Fail probe of lpc-ctrl if reserved memory is not aligned Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 002/217] drm/ingenic: Reset pixclock rate when parent clock rate changes Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 003/217] drm/bridge: ti-sn65dsi86: Add retries for link training Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 004/217] drm/amd/display: setup system context in dm_init Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 005/217] drm/amd/display: Fix the display corruption issue on Navi10 Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 006/217] locks: Fix UBSAN undefined behaviour in flock64_to_posix_lock Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 007/217] platform/x86: asus-wmi: Add support for SW_TABLET_MODE on UX360 Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 008/217] tomoyo: fix clang pointer arithmetic warning Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 009/217] HID: hid-input: occasionally report stylus battery even if not changed Sasha Levin
2020-12-23  2:12 ` [PATCH AUTOSEL 5.10 010/217] drm/amdgpu: change to save bad pages in UMC error interrupt callback Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 011/217] crypto: omap-aes - fix the reference count leak of omap device Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 012/217] drm/amd/display: Do not silently accept DCC for multiplane formats Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 013/217] ASoC: intel: sof_rt5682: Add quirk for Dooly Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 014/217] bpf: Use separate lockdep class for each hashtab Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 015/217] drm/msm: Fix race condition in msm driver with async layer updates Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 016/217] staging: wimax: depends on NET Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 017/217] RDMA/siw: Fix typo of EAGAIN not -EAGAIN in siw_cm_work_handler() Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 018/217] drm/amd/display: Fix compilation error Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 019/217] drm/amd/display: Force prefetch mode to 0 Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 020/217] drm/amd/display: Keep GSL for full updates with planes that flip VSYNC Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 021/217] drm/amd/display: stop top_mgr when type change to non-MST during s3 Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 022/217] drm/amd/display: correct eDP T9 delay Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 023/217] drm/amd/display: Update connector on DSC property change Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 024/217] drm/amd/display: fix recout calculation for left side clip Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 025/217] drm/amdgpu: disable gfxoff if VCN is busy Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 026/217] net: ethernet: ti: am65-cpsw: fix tx csum offload for multi mac mode Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 027/217] net: mscc: ocelot: don't reset the pvid to 0 when deleting it Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 028/217] drm/amdgpu: set LDS_CONFIG=0x20 on Navy Flounder to fix a GPU hang (v2) Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 029/217] scsi: target: Fix cmd_count ref leak Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 030/217] scsi: pm80xx: Make mpi_build_cmd locking consistent Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 031/217] scsi: pm80xx: Make running_req atomic Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 032/217] scsi: pm80xx: Avoid busywait in FW ready check Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 033/217] scsi: pm80xx: Fix pm8001_mpi_get_nvmd_resp() race condition Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 034/217] scsi: ufs: Allow an error return value from ->device_reset() Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 035/217] fcntl: Fix potential deadlock in send_sig{io, urg}() Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 036/217] drm/amdgpu: add missing clock gating info in amdgpu_pm_info Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 037/217] mac80211: don't overwrite QoS TID of injected frames Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 038/217] staging: ks7010: fix missing destroy_workqueue() on error in ks7010_sdio_probe Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 039/217] staging: rtl8192u: fix wrong judgement in rtl8192_rx_isr Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 040/217] mips: ar7: add missing iounmap() on error in ar7_gpio_init Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 041/217] mips: cm: add missing iounmap() on error in mips_cm_probe() Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 042/217] usb: typec: ucsi: Work around PPM losing change information Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 043/217] rcuscale: Prevent hangs for invalid arguments Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 044/217] refscale: " Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 045/217] locktorture: " Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 046/217] torture: Prevent jitter processes from delaying failed run Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 047/217] rcutorture: Prevent hangs for invalid arguments Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 048/217] kcsan: Fix encoding masks and regain address bit Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 049/217] ath10k: fix compilation warning Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 050/217] rsi: Fix TX EAPOL packet handling against iwlwifi AP Sasha Levin
2021-03-02 19:25   ` Marek Vasut
2021-03-04 20:47     ` Sasha Levin
2021-03-04 21:07       ` Marek Vasut
2020-12-23  2:13 ` Sasha Levin [this message]
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 052/217] drm: panel: simple: add missing platform_driver_unregister() in panel_simple_init Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 053/217] drm/bridge: lvds-codec: Use dev_err_probe for error handling Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 054/217] drm/ast: Fixed 1920x1080 sync. polarity issue Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 055/217] s390: make sure vmemmap is top region table entry aligned Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 056/217] s390/pci: remove races against pte updates Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 057/217] s390/trng: set quality to 1024 Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 058/217] Bluetooth: btqca: Add valid le states quirk Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 059/217] Bluetooth: Resume advertising after LE connection Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 060/217] Bluetooth: Move force_bredr_smp debugfs into hci_debugfs_create_bredr Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 061/217] Bluetooth: hidp: use correct wait queue when removing ctrl_wait Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 062/217] binder: change error code from postive to negative in binder_transaction Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 063/217] net: skb_vlan_untag(): don't reset transport offset if set by GRO layer Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 064/217] drm/omap: Fix runtime PM imbalance on error Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 065/217] pinctrl: qcom: Kconfig: Rework PINCTRL_MSM to be a depenency rather then a selected config Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 066/217] ASoC: Fix vaud18 power leakage of mt6359 Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 067/217] mwifiex: pcie: skip cancel_work_sync() on reset failure path Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 068/217] drm/amd/amdgpu: Fix incorrect logic to increment VCN doorbell index Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 069/217] drm/amd/amdgpu: Add rev_id workaround logic for SRIOV setup Sasha Levin
2020-12-23  2:13 ` [PATCH AUTOSEL 5.10 070/217] ASoC: Intel: sof_sdw: add quirk for new TigerLake-SDCA device Sasha Levin
2020-12-23  2:14 ` [PATCH AUTOSEL 5.10 071/217] RDMA/core: Postpone uobject cleanup on failure till FD close Sasha Levin
2020-12-23  2:14 ` [PATCH AUTOSEL 5.10 072/217] MIPS: BMC47xx: fix kconfig dependency bug for BCM47XX_SSB Sasha Levin
2020-12-23  2:14 ` [PATCH AUTOSEL 5.10 073/217] net: ipconfig: Avoid spurious blank lines in boot log Sasha Levin
2020-12-23  2:14 ` [PATCH AUTOSEL 5.10 074/217] drm/amd/amdgpu: Update VCN initizalization behvaior Sasha Levin
2020-12-23  2:14 ` [PATCH AUTOSEL 5.10 075/217] drm/amdgpu: check hive pointer before access Sasha Levin
2020-12-23  2:14 ` [PATCH AUTOSEL 5.10 076/217] jfs: Fix memleak in dbAdjCtl Sasha Levin
2020-12-23  2:14 ` [PATCH AUTOSEL 5.10 077/217] r8169: use READ_ONCE in rtl_tx_slots_avail Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201223021626.2790791-51-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=daniel.lezcano@linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=rui.zhang@intel.com \
    --cc=stable@vger.kernel.org \
    --cc=sumeet.r.pawnikar@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).