All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cezary Rojewski <cezary.rojewski@intel.com>
To: alsa-devel@alsa-project.org
Cc: Cezary Rojewski <cezary.rojewski@intel.com>,
	upstream@semihalf.com, harshapriya.n@intel.com,
	yung-chuan.liao@linux.intel.com, rad@semihalf.com,
	pierre-louis.bossart@linux.intel.com, tiwai@suse.com,
	hdegoede@redhat.com, broonie@kernel.org,
	ranjani.sridharan@linux.intel.com,
	amadeuszx.slawinski@linux.intel.com, cujomalainey@chromium.org,
	peter.ujfalusi@linux.intel.com, lma@semihalf.com
Subject: [RFC 29/37] ASoC: Intel: avs: Coredump and recovery flow
Date: Wed,  8 Dec 2021 12:12:53 +0100	[thread overview]
Message-ID: <20211208111301.1817725-30-cezary.rojewski@intel.com> (raw)
In-Reply-To: <20211208111301.1817725-1-cezary.rojewski@intel.com>

In rare occassions, under stress conditions or hardware malfunction, DSP
firmware may fail. Software is notified about such situation with
EXCEPTION_CAUGHT notification. IPC timeout is also counted as critical
device failure. More often than not, driver can recover from such
situations by performing full reset: killing and restarting ADSP.

Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
---
 sound/soc/intel/Kconfig        |  1 +
 sound/soc/intel/avs/avs.h      |  3 ++
 sound/soc/intel/avs/ipc.c      | 87 ++++++++++++++++++++++++++++++++--
 sound/soc/intel/avs/messages.h |  5 ++
 4 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/sound/soc/intel/Kconfig b/sound/soc/intel/Kconfig
index b01c492d3514..8c059e2a5a36 100644
--- a/sound/soc/intel/Kconfig
+++ b/sound/soc/intel/Kconfig
@@ -220,6 +220,7 @@ config SND_SOC_INTEL_AVS
 	select SND_HDA_EXT_CORE
 	select SND_HDA_DSP_LOADER
 	select SND_INTEL_NHLT
+	select WANT_DEV_COREDUMP
 	help
 	  Enable support for Intel(R) cAVS 1.5 platforms with DSP
 	  capabilities. This includes Skylake, Kabylake, Amberlake and
diff --git a/sound/soc/intel/avs/avs.h b/sound/soc/intel/avs/avs.h
index 74194d8c04fe..4562f187939d 100644
--- a/sound/soc/intel/avs/avs.h
+++ b/sound/soc/intel/avs/avs.h
@@ -33,6 +33,7 @@ struct avs_dsp_ops {
 	int (* const load_lib)(struct avs_dev *, struct firmware *, u32);
 	int (* const transfer_mods)(struct avs_dev *, bool,
 				    struct avs_module_entry *, u32);
+	int (* const coredump)(struct avs_dev *, union avs_notify_msg *);
 };
 
 #define avs_dsp_op(adev, op, ...) \
@@ -147,6 +148,8 @@ struct avs_ipc {
 	struct mutex msg_mutex;
 	struct completion done_completion;
 	struct completion busy_completion;
+
+	struct work_struct recovery_work;
 };
 
 #define AVS_EIPC	EREMOTEIO
diff --git a/sound/soc/intel/avs/ipc.c b/sound/soc/intel/avs/ipc.c
index c461f7db3683..a8d2323e07a7 100644
--- a/sound/soc/intel/avs/ipc.c
+++ b/sound/soc/intel/avs/ipc.c
@@ -14,6 +14,78 @@
 
 #define AVS_IPC_TIMEOUT_MS	300
 
+static void avs_dsp_recovery(struct avs_dev *adev)
+{
+	struct avs_soc_component *acomp;
+	unsigned int core_mask;
+	int ret;
+
+	mutex_lock(&adev->comp_list_mutex);
+	/* disconnect all running streams */
+	list_for_each_entry(acomp, &adev->comp_list, node) {
+		struct snd_soc_pcm_runtime *rtd;
+		struct snd_soc_card *card;
+
+		card = acomp->base.card;
+		if (!card)
+			continue;
+
+		for_each_card_rtds(card, rtd) {
+			struct snd_pcm *pcm;
+			int dir;
+
+			pcm = rtd->pcm;
+			if (!pcm || rtd->dai_link->no_pcm)
+				continue;
+
+			for_each_pcm_streams(dir) {
+				struct snd_pcm_substream *substream;
+
+				substream = pcm->streams[dir].substream;
+				if (!substream || !substream->runtime)
+					continue;
+
+				snd_pcm_stop(substream, SNDRV_PCM_STATE_DISCONNECTED);
+			}
+		}
+	}
+	mutex_unlock(&adev->comp_list_mutex);
+
+	/* forcibly shutdown all cores */
+	core_mask = GENMASK(adev->hw_cfg.dsp_cores - 1, 0);
+	avs_dsp_core_disable(adev, core_mask);
+
+	/* attempt dsp reboot */
+	ret = avs_dsp_boot_firmware(adev, true);
+	if (ret < 0)
+		dev_err(adev->dev, "firmware reboot failed: %d\n", ret);
+
+	pm_runtime_mark_last_busy(adev->dev);
+	pm_runtime_enable(adev->dev);
+	pm_request_autosuspend(adev->dev);
+}
+
+static void avs_dsp_recovery_work(struct work_struct *work)
+{
+	struct avs_ipc *ipc = container_of(work, struct avs_ipc, recovery_work);
+
+	avs_dsp_recovery(to_avs_dev(ipc->dev));
+}
+
+static void avs_dsp_exception_caught(struct avs_dev *adev, union avs_notify_msg *msg)
+{
+	dev_crit(adev->dev, "communication severed, rebooting dsp..\n");
+
+	/* Re-enabled on recovery completion. */
+	avs_ipc_block(adev->ipc);
+	pm_runtime_disable(adev->dev);
+
+	/* Process received notification. */
+	avs_dsp_op(adev, coredump, msg);
+
+	schedule_work(&adev->ipc->recovery_work);
+}
+
 static void avs_dsp_receive_rx(struct avs_dev *adev, u64 header)
 {
 	struct avs_ipc *ipc = adev->ipc;
@@ -51,6 +123,9 @@ static void avs_dsp_process_notification(struct avs_dev *adev, u64 header)
 		data_size = sizeof(struct avs_notify_res_data);
 		break;
 
+	case AVS_NOTIFY_EXCEPTION_CAUGHT:
+		break;
+
 	case AVS_NOTIFY_MODULE_EVENT:
 		memcpy_fromio(&mod_data, avs_uplink_addr(adev), sizeof(mod_data));
 		data_size = sizeof(mod_data) + mod_data.data_size;
@@ -78,6 +153,10 @@ static void avs_dsp_process_notification(struct avs_dev *adev, u64 header)
 		complete(&adev->fw_ready);
 		break;
 
+	case AVS_NOTIFY_EXCEPTION_CAUGHT:
+		avs_dsp_exception_caught(adev, &msg);
+		break;
+
 	default:
 		break;
 	}
@@ -264,10 +343,10 @@ static int avs_dsp_do_send_msg(struct avs_dev *adev, struct avs_ipc_msg *request
 	ret = avs_ipc_wait_busy_completion(ipc, timeout);
 	if (ret) {
 		if (ret == -ETIMEDOUT) {
-			dev_crit(adev->dev, "communication severed: %d, rebooting dsp..\n",
-				 ret);
+			union avs_notify_msg msg = AVS_NOTIFICATION(EXCEPTION_CAUGHT);
 
-			avs_ipc_block(ipc);
+			/* Same treatment as on exception, just stack_dump=0. */
+			avs_dsp_exception_caught(adev, &msg);
 		}
 		goto exit;
 	}
@@ -389,6 +468,7 @@ int avs_ipc_init(struct avs_ipc *ipc, struct device *dev)
 	ipc->dev = dev;
 	ipc->ready = false;
 	ipc->default_timeout_ms = AVS_IPC_TIMEOUT_MS;
+	INIT_WORK(&ipc->recovery_work, avs_dsp_recovery_work);
 	init_completion(&ipc->done_completion);
 	init_completion(&ipc->busy_completion);
 	spin_lock_init(&ipc->rx_lock);
@@ -400,4 +480,5 @@ int avs_ipc_init(struct avs_ipc *ipc, struct device *dev)
 void avs_ipc_block(struct avs_ipc *ipc)
 {
 	ipc->ready = false;
+	cancel_work_sync(&ipc->recovery_work);
 }
diff --git a/sound/soc/intel/avs/messages.h b/sound/soc/intel/avs/messages.h
index a79aadba161d..f289c3498096 100644
--- a/sound/soc/intel/avs/messages.h
+++ b/sound/soc/intel/avs/messages.h
@@ -192,6 +192,7 @@ enum avs_notify_msg_type {
 	AVS_NOTIFY_PHRASE_DETECTED = 4,
 	AVS_NOTIFY_RESOURCE_EVENT = 5,
 	AVS_NOTIFY_FW_READY = 8,
+	AVS_NOTIFY_EXCEPTION_CAUGHT = 10,
 	AVS_NOTIFY_MODULE_EVENT = 12,
 };
 
@@ -210,6 +211,10 @@ union avs_notify_msg {
 		};
 		union {
 			u32 val;
+			struct {
+				u32 core_id:2;
+				u32 stack_dump_size:16;
+			} coredump;
 		} ext;
 	};
 } __packed;
-- 
2.25.1


  parent reply	other threads:[~2021-12-08 11:22 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-12-08 11:12 [RFC 00/37] ASoC: Intel: AVS - Audio DSP for cAVS Cezary Rojewski
2021-12-08 11:12 ` [RFC 01/37] ALSA: hda: Add snd_hdac_ext_bus_link_at() helper Cezary Rojewski
2021-12-08 11:12 ` [RFC 02/37] ALSA: hda: Update and expose snd_hda_codec_device_init() Cezary Rojewski
2021-12-08 11:12 ` [RFC 03/37] ALSA: hda: Update and expose codec register procedures Cezary Rojewski
2021-12-08 11:12 ` [RFC 04/37] ALSA: hda: Expose codec cleanup and power-save functions Cezary Rojewski
2021-12-08 11:12 ` [RFC 05/37] ALSA: hda: Add helper macros for DSP capable devices Cezary Rojewski
2021-12-08 11:12 ` [RFC 06/37] ASoC: Export DAI register and widget ctor and dctor functions Cezary Rojewski
2021-12-21 13:41   ` Mark Brown
2021-12-21 16:40     ` Cezary Rojewski
2021-12-08 11:12 ` [RFC 07/37] ASoC: Intel: Introduce AVS driver Cezary Rojewski
2021-12-08 11:12 ` [RFC 08/37] ASoC: Intel: avs: Inter process communication Cezary Rojewski
2021-12-08 11:12 ` [RFC 09/37] ASoC: Intel: avs: Add code loading requests Cezary Rojewski
2021-12-08 11:12 ` [RFC 10/37] ASoC: Intel: avs: Add pipeline management requests Cezary Rojewski
2021-12-08 11:12 ` [RFC 11/37] ASoC: Intel: avs: Add module " Cezary Rojewski
2021-12-08 11:12 ` [RFC 12/37] ASoC: Intel: avs: Add power " Cezary Rojewski
2021-12-08 11:12 ` [RFC 13/37] ASoC: Intel: avs: Add ROM requests Cezary Rojewski
2021-12-08 11:12 ` [RFC 14/37] ASoC: Intel: avs: Add basefw runtime-parameter requests Cezary Rojewski
2021-12-08 11:12 ` [RFC 15/37] ASoC: Intel: avs: Firmware resources management utilities Cezary Rojewski
2021-12-08 11:12 ` [RFC 16/37] ASoC: Intel: avs: Declare module configuration types Cezary Rojewski
2021-12-08 11:12 ` [RFC 17/37] ASoC: Intel: avs: Dynamic firmware resources management Cezary Rojewski
2021-12-21 14:40   ` Mark Brown
2021-12-21 17:07     ` Cezary Rojewski
2021-12-08 11:12 ` [RFC 18/37] ASoC: Intel: avs: Topology parsing Cezary Rojewski
2021-12-21 17:39   ` Mark Brown
2021-12-22 14:21     ` Cezary Rojewski
2021-12-08 11:12 ` [RFC 19/37] ASoC: Intel: avs: Path management Cezary Rojewski
2021-12-08 11:12 ` [RFC 20/37] ASoC: Intel: avs: Conditional-path support Cezary Rojewski
2021-12-08 11:12 ` [RFC 21/37] ASoC: Intel: avs: General code loading flow Cezary Rojewski
2021-12-08 11:12 ` [RFC 22/37] ASoC: Intel: avs: Implement CLDMA transfer Cezary Rojewski
2021-12-08 11:12 ` [RFC 23/37] ASoC: Intel: avs: Code loading over CLDMA Cezary Rojewski
2021-12-08 11:12 ` [RFC 24/37] ASoC: Intel: avs: Code loading over HDA Cezary Rojewski
2021-12-08 11:12 ` [RFC 25/37] ASoC: Intel: avs: Generic soc component driver Cezary Rojewski
2021-12-08 11:12 ` [RFC 26/37] ASoC: Intel: avs: Generic PCM FE operations Cezary Rojewski
2021-12-08 11:12 ` [RFC 27/37] ASoC: Intel: avs: non-HDA PCM BE operations Cezary Rojewski
2021-12-08 11:12 ` [RFC 28/37] ASoC: Intel: avs: HDA " Cezary Rojewski
2021-12-08 11:12 ` Cezary Rojewski [this message]
2021-12-08 11:12 ` [RFC 30/37] ASoC: Intel: avs: Prepare for firmware tracing Cezary Rojewski
2021-12-08 11:12 ` [RFC 31/37] ASoC: Intel: avs: D0ix power state support Cezary Rojewski
2021-12-08 11:12 ` [RFC 32/37] ASoC: Intel: avs: Event tracing Cezary Rojewski
2021-12-08 11:12 ` [RFC 33/37] ASoC: Intel: avs: Machine board registration Cezary Rojewski
2021-12-08 11:12 ` [RFC 34/37] ASoC: Intel: avs: PCI driver implementation Cezary Rojewski
2021-12-08 11:12 ` [RFC 35/37] ASoC: Intel: avs: Power management Cezary Rojewski
2021-12-08 11:13 ` [RFC 36/37] ASoC: Intel: avs: SKL-based platforms support Cezary Rojewski
2021-12-08 11:13 ` [RFC 37/37] ASoC: Intel: avs: APL-based " Cezary Rojewski
2021-12-08 16:27 ` [RFC 00/37] ASoC: Intel: AVS - Audio DSP for cAVS Pierre-Louis Bossart
2021-12-08 17:51   ` Mark Brown
2021-12-09  9:59   ` Cezary Rojewski
2021-12-24 13:06 ` Mark Brown
2022-01-06 13:39   ` Cezary Rojewski
2022-01-18  9:42     ` Cezary Rojewski
2022-01-25 13:25       ` Mark Brown
2022-01-28 17:00     ` Mark Brown
2022-01-30 19:15       ` Cezary Rojewski
2022-02-02 13:26         ` Amadeusz Sławiński
2022-02-02 16:08           ` Mark Brown
2022-02-02 14:41         ` Mark Brown
2022-02-07 13:42           ` Cezary Rojewski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211208111301.1817725-30-cezary.rojewski@intel.com \
    --to=cezary.rojewski@intel.com \
    --cc=alsa-devel@alsa-project.org \
    --cc=amadeuszx.slawinski@linux.intel.com \
    --cc=broonie@kernel.org \
    --cc=cujomalainey@chromium.org \
    --cc=harshapriya.n@intel.com \
    --cc=hdegoede@redhat.com \
    --cc=lma@semihalf.com \
    --cc=peter.ujfalusi@linux.intel.com \
    --cc=pierre-louis.bossart@linux.intel.com \
    --cc=rad@semihalf.com \
    --cc=ranjani.sridharan@linux.intel.com \
    --cc=tiwai@suse.com \
    --cc=upstream@semihalf.com \
    --cc=yung-chuan.liao@linux.intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.