From: Loic Poulain <loic.poulain@linaro.org>
To: manivannan.sadhasivam@linaro.org
Cc: linux-arm-msm@vger.kernel.org, bbhatt@codeaurora.org,
hemantk@codeaurora.org, Loic Poulain <loic.poulain@linaro.org>
Subject: [PATCH v6 08/10] mhi: pci_generic: Add health-check
Date: Tue, 29 Dec 2020 09:43:49 +0100 [thread overview]
Message-ID: <1609231431-10048-9-git-send-email-loic.poulain@linaro.org> (raw)
In-Reply-To: <1609231431-10048-1-git-send-email-loic.poulain@linaro.org>
If the modem crashes for any reason, we may not be able to detect
it at MHI level (MHI registers not reachable anymore).
This patch implements a health-check mechanism to check regularly
that device is alive (MHI layer can communicate with). If device
is not alive (because a crash or unexpected reset), the recovery
procedure is triggered.
Tested successfully with Telit FN980m module.
Signed-off-by: Loic Poulain <loic.poulain@linaro.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Hemant Kumar <hemantk@codeaurora.org>
---
drivers/bus/mhi/pci_generic.c | 35 +++++++++++++++++++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/drivers/bus/mhi/pci_generic.c b/drivers/bus/mhi/pci_generic.c
index 9fe1e30..812d54f 100644
--- a/drivers/bus/mhi/pci_generic.c
+++ b/drivers/bus/mhi/pci_generic.c
@@ -14,11 +14,15 @@
#include <linux/mhi.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/timer.h>
#include <linux/workqueue.h>
#define MHI_PCI_DEFAULT_BAR_NUM 0
#define MHI_POST_RESET_DELAY_MS 500
+
+#define HEALTH_CHECK_PERIOD (HZ * 2)
+
/**
* struct mhi_pci_dev_info - MHI PCI device specific information
* @config: MHI controller configuration
@@ -189,6 +193,7 @@ struct mhi_pci_device {
struct mhi_controller mhi_cntrl;
struct pci_saved_state *pci_state;
struct work_struct recovery_work;
+ struct timer_list health_check_timer;
unsigned long status;
};
@@ -326,6 +331,8 @@ static void mhi_pci_recovery_work(struct work_struct *work)
dev_warn(&pdev->dev, "device recovery started\n");
+ del_timer(&mhi_pdev->health_check_timer);
+
/* Clean up MHI state */
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, false);
@@ -351,6 +358,7 @@ static void mhi_pci_recovery_work(struct work_struct *work)
dev_dbg(&pdev->dev, "Recovery completed\n");
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
return;
err_unprepare:
@@ -360,6 +368,21 @@ static void mhi_pci_recovery_work(struct work_struct *work)
dev_err(&pdev->dev, "Recovery failed\n");
}
+static void health_check(struct timer_list *t)
+{
+ struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer);
+ struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+ if (!mhi_pci_is_alive(mhi_cntrl)) {
+ dev_err(mhi_cntrl->cntrl_dev, "Device died\n");
+ queue_work(system_long_wq, &mhi_pdev->recovery_work);
+ return;
+ }
+
+ /* reschedule in two seconds */
+ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+}
+
static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data;
@@ -375,6 +398,7 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return -ENOMEM;
INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work);
+ timer_setup(&mhi_pdev->health_check_timer, health_check, 0);
mhi_cntrl_config = info->config;
mhi_cntrl = &mhi_pdev->mhi_cntrl;
@@ -427,6 +451,9 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+ /* start health check */
+ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+
return 0;
err_unprepare:
@@ -442,6 +469,7 @@ static void mhi_pci_remove(struct pci_dev *pdev)
struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+ del_timer(&mhi_pdev->health_check_timer);
cancel_work_sync(&mhi_pdev->recovery_work);
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
@@ -459,6 +487,8 @@ static void mhi_pci_reset_prepare(struct pci_dev *pdev)
dev_info(&pdev->dev, "reset\n");
+ del_timer(&mhi_pdev->health_check_timer);
+
/* Clean up MHI state */
if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
mhi_power_down(mhi_cntrl, false);
@@ -502,6 +532,7 @@ static void mhi_pci_reset_done(struct pci_dev *pdev)
}
set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
}
static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
@@ -562,6 +593,7 @@ static int __maybe_unused mhi_pci_suspend(struct device *dev)
struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+ del_timer(&mhi_pdev->health_check_timer);
cancel_work_sync(&mhi_pdev->recovery_work);
/* Transition to M3 state */
@@ -597,6 +629,9 @@ static int __maybe_unused mhi_pci_resume(struct device *dev)
goto err_recovery;
}
+ /* Resume health check */
+ mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+
return 0;
err_recovery:
--
2.7.4
next prev parent reply other threads:[~2020-12-29 8:38 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-12-29 8:43 [PATCH v6 00/10] mhi: pci_generic: Misc improvements Loic Poulain
2020-12-29 8:43 ` [PATCH v6 01/10] mhi: Add mhi_controller_initialize helper Loic Poulain
2020-12-31 6:48 ` Manivannan Sadhasivam
2020-12-29 8:43 ` [PATCH v6 02/10] bus: mhi: core: Add device hardware reset support Loic Poulain
2020-12-31 7:01 ` Manivannan Sadhasivam
2020-12-29 8:43 ` [PATCH v6 03/10] mhi: pci-generic: Increase number of hardware events Loic Poulain
2020-12-29 8:43 ` [PATCH v6 04/10] mhi: pci_generic: Enable burst mode for hardware channels Loic Poulain
2020-12-29 8:43 ` [PATCH v6 05/10] mhi: pci_generic: Add support for reset Loic Poulain
2020-12-29 8:43 ` [PATCH v6 06/10] mhi: pci_generic: Add suspend/resume/recovery procedure Loic Poulain
2020-12-31 7:05 ` Manivannan Sadhasivam
2020-12-29 8:43 ` [PATCH v6 07/10] mhi: pci_generic: Add PCI error handlers Loic Poulain
2020-12-31 7:18 ` Manivannan Sadhasivam
2020-12-31 9:27 ` Loic Poulain
2020-12-31 11:14 ` Manivannan Sadhasivam
2020-12-29 8:43 ` Loic Poulain [this message]
2020-12-29 8:43 ` [PATCH v6 09/10] mhi: pci_generic: Increase controller timeout value Loic Poulain
2020-12-31 7:18 ` Manivannan Sadhasivam
2020-12-29 8:43 ` [PATCH v6 10/10] mhi: pci_generic: Add diag channels Loic Poulain
2020-12-31 7:20 ` Manivannan Sadhasivam
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1609231431-10048-9-git-send-email-loic.poulain@linaro.org \
--to=loic.poulain@linaro.org \
--cc=bbhatt@codeaurora.org \
--cc=hemantk@codeaurora.org \
--cc=linux-arm-msm@vger.kernel.org \
--cc=manivannan.sadhasivam@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).