All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] DSPBRIDGE: DSP recovery feature
@ 2010-03-05 10:12 ` Guzman Lugo, Fernando
  2010-03-19 11:51   ` Felipe Contreras
  2010-03-24 21:14   ` Felipe Contreras
  0 siblings, 2 replies; 14+ messages in thread
From: Guzman Lugo, Fernando @ 2010-03-05 10:12 UTC (permalink / raw)
  To: linux-omap; +Cc: Hiroshi DOYU, Ameya Palande, felipe.contreras

>From 7db8ef8ac43e42a486b02c0647417da2b2387552 Mon Sep 17 00:00:00 2001
From: Fernando Guzman Lugo <x0095840@ti.com>
Date: Fri, 5 Mar 2010 03:57:19 -0600
Subject: [PATCH 2/2] DSPBRIDGE: DSP recovery feature

This patch implements a workqueue in charge of reseting
DSP in case of fatal error.

Original idea taken from:
http://android.git.kernel.org/?p=kernel/omap.git;a=commit;h=8af1fbcda4b05a8777b4f3819da98340bd5d9de2

Features:
-Recovery is done reloading baseimage.
-Workqueue will load last baseimage loaded.
-In order workqueue can reload the baseimage absolute path for
baseimage must be used.
-Workqueue will wait until all the dspbridge handles are closed
-During recovery process all ioctl calls will return -EIO
-After DSP crash applications must close their dspbridge handles
and open a new dspbridge handle, bridge_open function will block until
recovery process is done or timeout.

Signed-off-by: Fernando Guzman Lugo <x0095840@ti.com>
---
 arch/arm/plat-omap/include/dspbridge/drv.h |    3 +
 drivers/dsp/bridge/Kconfig                 |    8 ++++
 drivers/dsp/bridge/pmgr/wcd.c              |   14 +++----
 drivers/dsp/bridge/rmgr/drv_interface.c    |   63 ++++++++++++++++++++++++++++
 drivers/dsp/bridge/rmgr/proc.c             |    9 +---
 drivers/dsp/bridge/wmd/ue_deh.c            |    6 ++-
 6 files changed, 88 insertions(+), 15 deletions(-)

diff --git a/arch/arm/plat-omap/include/dspbridge/drv.h b/arch/arm/plat-omap/include/dspbridge/drv.h
index 2191a87..d6e1c73 100644
--- a/arch/arm/plat-omap/include/dspbridge/drv.h
+++ b/arch/arm/plat-omap/include/dspbridge/drv.h
@@ -389,4 +389,7 @@ extern u32 drv_request_resources(u32 dw_context, u32 *pDevNodeString);
  */
 extern u32 drv_release_resources(u32 dw_context, struct drv_object *hdrv_obj);
 
+#ifdef CONFIG_BRIDGE_RECOVERY
+	void bridge_recover_schedule(void);
+#endif
 #endif /* DRV_ */
diff --git a/drivers/dsp/bridge/Kconfig b/drivers/dsp/bridge/Kconfig
index 8d33142..a0cf5f0 100644
--- a/drivers/dsp/bridge/Kconfig
+++ b/drivers/dsp/bridge/Kconfig
@@ -52,6 +52,14 @@ config WDT_TIMEOUT
 	   Watchdog timer timeout value, after that time if the watchdog timer
 	   counter is not reset the wdt overflow interrupt will be triggered
 
+config BRIDGE_RECOVERY
+	bool "DSP Recovery Support"
+	depends on MPU_BRIDGE
+	help
+	  In case of DSP fatal error, BRIDGE driver will try to
+	  recover itself.
+
+
 comment "Bridge Notifications"
 	depends on MPU_BRIDGE
 
diff --git a/drivers/dsp/bridge/pmgr/wcd.c b/drivers/dsp/bridge/pmgr/wcd.c
index 3338cca..c8b53c2 100644
--- a/drivers/dsp/bridge/pmgr/wcd.c
+++ b/drivers/dsp/bridge/pmgr/wcd.c
@@ -381,7 +381,7 @@ u32 wcd_init_complete2(void)
 	u32 status = DSP_SOK;
 	struct cfg_devnode *dev_node;
 	struct dev_object *hdev_obj;
-	u32 dev_type;
+	u32 dev_type, tmp;
 
 	DBC_REQUIRE(wcd_c_refs > 0);
 
@@ -396,13 +396,11 @@ u32 wcd_init_complete2(void)
 		if (DSP_FAILED(dev_get_dev_type(hdev_obj, &dev_type)))
 			continue;
 
-		if ((dev_type == DSP_UNIT) || (dev_type == IVA_UNIT)) {
-			if (DSP_FAILED(proc_auto_start(dev_node, hdev_obj))) {
-				status = DSP_EFAIL;
-				/* break; */
-			}
-		}
-	}			/* End For Loop */
+		if ((dev_type == DSP_UNIT) || (dev_type == IVA_UNIT))
+			if (cfg_get_auto_start(dev_node, &tmp) == DSP_SOK
+									&& tmp)
+				proc_auto_start(dev_node, hdev_obj);
+	}
 
 	return status;
 }
diff --git a/drivers/dsp/bridge/rmgr/drv_interface.c b/drivers/dsp/bridge/rmgr/drv_interface.c
index b866661..0fe7ed3 100644
--- a/drivers/dsp/bridge/rmgr/drv_interface.c
+++ b/drivers/dsp/bridge/rmgr/drv_interface.c
@@ -91,6 +91,15 @@ static s32 shm_size = 0x500000;	/* 5 MB */
 static u32 phys_mempool_base;
 static u32 phys_mempool_size;
 static int tc_wordswapon;	/* Default value is always false */
+#ifdef CONFIG_BRIDGE_RECOVERY
+#define REC_TIMEOUT 5000	/*recovery timeout in msecs */
+static atomic_t bridge_cref;	/* number of bridge open handles */
+static struct workqueue_struct *bridge_rec_queue;
+static struct work_struct bridge_recovery_work;
+static DECLARE_COMPLETION_ONSTACK(bridge_comp);
+static DECLARE_COMPLETION_ONSTACK(bridge_open_comp);
+static bool recover;
+#endif
 
 #ifdef CONFIG_PM
 struct omap34_xx_bridge_suspend_data {
@@ -189,6 +198,33 @@ u32 vdd1_dsp_freq[6][4] = {
 	{0, 430000, 355000, 430000},
 };
 
+#ifdef CONFIG_BRIDGE_RECOVERY
+static void bridge_recover(struct work_struct *work)
+{
+	struct dev_object *dev;
+	struct cfg_devnode *dev_node;
+	if (atomic_read(&bridge_cref)) {
+		INIT_COMPLETION(bridge_comp);
+		while (!wait_for_completion_timeout(&bridge_comp,
+						msecs_to_jiffies(REC_TIMEOUT)))
+			pr_info("%s:%d handle(s) still opened\n",
+					__func__, atomic_read(&bridge_cref));
+	}
+	dev = dev_get_first();
+	dev_get_dev_node(dev, &dev_node);
+	if (!dev_node || DSP_FAILED(proc_auto_start(dev_node, dev)))
+		pr_err("DSP could not be restarted\n");
+	recover = false;
+	complete_all(&bridge_open_comp);
+}
+
+void bridge_recover_schedule(void)
+{
+	INIT_COMPLETION(bridge_open_comp);
+	recover = true;
+	queue_work(bridge_rec_queue, &bridge_recovery_work);
+}
+#endif
 #ifdef CONFIG_BRIDGE_DVFS
 static int dspbridge_post_scale(struct notifier_block *op, unsigned long level,
 				void *ptr)
@@ -323,6 +359,12 @@ static int __devinit omap34_xx_bridge_probe(struct platform_device *pdev)
 		}
 	}
 
+#ifdef CONFIG_BRIDGE_RECOVERY
+	bridge_rec_queue = create_workqueue("bridge_rec_queue");
+	INIT_WORK(&bridge_recovery_work, bridge_recover);
+	INIT_COMPLETION(bridge_comp);
+#endif
+
 	DBC_ASSERT(status == 0);
 	DBC_ASSERT(DSP_SUCCEEDED(init_status));
 
@@ -443,6 +485,13 @@ static int bridge_open(struct inode *ip, struct file *filp)
 	 * dload_allocate a new process context and insert it into global
 	 * process context list.
 	 */
+
+#ifdef CONFIG_BRIDGE_RECOVERY
+	if (recover)
+		if (wait_for_completion_interruptible_timeout(&bridge_open_comp,
+				msecs_to_jiffies(REC_TIMEOUT * 3)) <= 0)
+			return -EBUSY;
+#endif
 	pr_ctxt = mem_calloc(sizeof(struct process_context), MEM_PAGED);
 	if (pr_ctxt) {
 		pr_ctxt->res_state = PROC_RES_ALLOCATED;
@@ -458,6 +507,10 @@ static int bridge_open(struct inode *ip, struct file *filp)
 
 	filp->private_data = pr_ctxt;
 
+#ifdef CONFIG_BRIDGE_RECOVERY
+	if (!status)
+		atomic_inc(&bridge_cref);
+#endif
 	return status;
 }
 
@@ -484,6 +537,10 @@ static int bridge_release(struct inode *ip, struct file *filp)
 	filp->private_data = NULL;
 
 err:
+#ifdef CONFIG_BRIDGE_RECOVERY
+	if (!atomic_dec_return(&bridge_cref))
+		complete(&bridge_comp);
+#endif
 	return status;
 }
 
@@ -496,6 +553,12 @@ static long bridge_ioctl(struct file *filp, unsigned int code,
 	union Trapped_Args buf_in;
 
 	DBC_REQUIRE(filp != NULL);
+#ifdef CONFIG_BRIDGE_RECOVERY
+	if (recover) {
+		status = -EIO;
+		goto err;
+	}
+#endif
 #ifdef CONFIG_PM
 	status = omap34_xxbridge_suspend_lockout(&bridge_suspend_data, filp);
 	if (status != 0)
diff --git a/drivers/dsp/bridge/rmgr/proc.c b/drivers/dsp/bridge/rmgr/proc.c
index 5bc7ec7..1c94729 100644
--- a/drivers/dsp/bridge/rmgr/proc.c
+++ b/drivers/dsp/bridge/rmgr/proc.c
@@ -53,6 +53,7 @@
 #include <dspbridge/msg.h>
 #include <dspbridge/wmdioctl.h>
 #include <dspbridge/drv.h>
+#include <dspbridge/reg.h>
 
 /*  ----------------------------------- This */
 #include <dspbridge/proc.h>
@@ -257,7 +258,6 @@ u32 proc_auto_start(struct cfg_devnode *dev_node_obj,
 		    struct dev_object *hdev_obj)
 {
 	u32 status = DSP_EFAIL;
-	u32 dw_auto_start = 0;	/* autostart flag */
 	struct proc_object *pProcObject;
 	char sz_exec_file[MAXCMDLINELEN];
 	char *argv[2];
@@ -293,11 +293,6 @@ u32 proc_auto_start(struct cfg_devnode *dev_node_obj,
 	if (DSP_FAILED(status))
 		goto func_cont;
 
-	status = cfg_get_auto_start(dev_node_obj, &dw_auto_start);
-	if (DSP_FAILED(status) || !dw_auto_start) {
-		status = DSP_EFAIL;
-		goto func_cont;
-	}
 	/* Get the default executable for this board... */
 	dev_get_dev_type(hdev_obj, (u32 *) &dev_type);
 	pProcObject->processor_id = dev_type;
@@ -1024,6 +1019,8 @@ u32 proc_load(void *hprocessor, const s32 argc_index, const char **user_args,
 		if (DSP_SUCCEEDED((*pProcObject->intf_fxns->pfn_brd_status)
 				  (pProcObject->hwmd_context, &brd_state))) {
 			pr_info("%s: Processor Loaded %s\n", __func__, pargv0);
+			reg_set_value(DEFEXEC, (u8 *)pargv0,
+							strlen(pargv0) + 1);
 			DBC_ASSERT(brd_state == BRD_LOADED);
 		}
 	}
diff --git a/drivers/dsp/bridge/wmd/ue_deh.c b/drivers/dsp/bridge/wmd/ue_deh.c
index 5f1ea43..8b4e9be 100644
--- a/drivers/dsp/bridge/wmd/ue_deh.c
+++ b/drivers/dsp/bridge/wmd/ue_deh.c
@@ -287,8 +287,12 @@ void bridge_deh_notify(struct deh_mgr *hdeh_mgr, u32 ulEventMask, u32 dwErrInfo)
 		}
 
 		/* Filter subsequent notifications when an error occurs */
-		if (dev_context->dw_brd_state != BRD_ERROR)
+		if (dev_context->dw_brd_state != BRD_ERROR) {
 			ntfy_notify(deh_mgr_obj->ntfy_obj, ulEventMask);
+#ifdef CONFIG_BRIDGE_RECOVERY
+			bridge_recover_schedule();
+#endif
+		}
 
 		/* Set the Board state as ERROR */
 		dev_context->dw_brd_state = BRD_ERROR;
-- 
1.6.0.4


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-05 10:12 ` [PATCH 2/2] DSPBRIDGE: DSP recovery feature Guzman Lugo, Fernando
@ 2010-03-19 11:51   ` Felipe Contreras
  2010-03-19 15:53     ` Felipe Contreras
  2010-03-24 21:14   ` Felipe Contreras
  1 sibling, 1 reply; 14+ messages in thread
From: Felipe Contreras @ 2010-03-19 11:51 UTC (permalink / raw)
  To: Guzman Lugo, Fernando
  Cc: linux-omap, Hiroshi DOYU, Ameya Palande, felipe.contreras

On Fri, Mar 5, 2010 at 12:12 PM, Guzman Lugo, Fernando <x0095840@ti.com> wrote:
> --- a/arch/arm/plat-omap/include/dspbridge/drv.h
> +++ b/arch/arm/plat-omap/include/dspbridge/drv.h
> @@ -389,4 +389,7 @@ extern u32 drv_request_resources(u32 dw_context, u32 *pDevNodeString);
>  */
>  extern u32 drv_release_resources(u32 dw_context, struct drv_object *hdrv_obj);
>
> +#ifdef CONFIG_BRIDGE_RECOVERY
> +       void bridge_recover_schedule(void);

The preprocessor macros shouldn't affect indenting AFAIK.

> +#endif
>  #endif /* DRV_ */

> diff --git a/drivers/dsp/bridge/Kconfig b/drivers/dsp/bridge/Kconfig
> index 8d33142..a0cf5f0 100644
> --- a/drivers/dsp/bridge/Kconfig
> +++ b/drivers/dsp/bridge/Kconfig
> @@ -52,6 +52,14 @@ config WDT_TIMEOUT
>           Watchdog timer timeout value, after that time if the watchdog timer
>           counter is not reset the wdt overflow interrupt will be triggered
>
> +config BRIDGE_RECOVERY
> +       bool "DSP Recovery Support"
> +       depends on MPU_BRIDGE
> +       help
> +         In case of DSP fatal error, BRIDGE driver will try to
> +         recover itself.
> +
> +

Extra unnecessary space.

>  comment "Bridge Notifications"
>        depends on MPU_BRIDGE
>

I tried to rebase this patch on top of the latest head and the
user-space client never gets notified of the MMUFAULT. After manually
killing the process, the DSP is restarted correctly though.

Cheers.

-- 
Felipe Contreras
--
To unsubscribe from this list: send the line "unsubscribe linux-omap" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-19 11:51   ` Felipe Contreras
@ 2010-03-19 15:53     ` Felipe Contreras
  2010-03-19 16:05       ` Hebbar, Shivananda
  0 siblings, 1 reply; 14+ messages in thread
From: Felipe Contreras @ 2010-03-19 15:53 UTC (permalink / raw)
  To: Guzman Lugo, Fernando
  Cc: linux-omap, Hiroshi DOYU, Ameya Palande, felipe.contreras

On Fri, Mar 19, 2010 at 1:51 PM, Felipe Contreras
<felipe.contreras@gmail.com> wrote:
> I tried to rebase this patch on top of the latest head and the
> user-space client never gets notified of the MMUFAULT. After manually
> killing the process, the DSP is restarted correctly though.

Strike that. MMUFAULTS are not notified even before this patch... great.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-19 15:53     ` Felipe Contreras
@ 2010-03-19 16:05       ` Hebbar, Shivananda
  2010-03-19 16:18         ` Felipe Contreras
  0 siblings, 1 reply; 14+ messages in thread
From: Hebbar, Shivananda @ 2010-03-19 16:05 UTC (permalink / raw)
  To: Felipe Contreras, Guzman Lugo, Fernando
  Cc: linux-omap, Hiroshi DOYU, Ameya Palande, felipe.contreras

> -----Original Message-----
> From: linux-omap-owner@vger.kernel.org 
> [mailto:linux-omap-owner@vger.kernel.org] On Behalf Of Felipe 
> Contreras
> Sent: Friday, March 19, 2010 10:54 AM
> To: Guzman Lugo, Fernando
> Cc: linux-omap@vger.kernel.org; Hiroshi DOYU; Ameya Palande; 
> felipe.contreras@nokia.com
> Subject: Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
> 
> On Fri, Mar 19, 2010 at 1:51 PM, Felipe Contreras 
> <felipe.contreras@gmail.com> wrote:
> > I tried to rebase this patch on top of the latest head and the 
> > user-space client never gets notified of the MMUFAULT. 
> After manually 
> > killing the process, the DSP is restarted correctly though.
> 
> Strike that. MMUFAULTS are not notified even before this 
> patch... great.
Client app must register for MMUFault/DSPSysError events. Then only
You will receive notifications.

--Shivananda


> 
> --
> Felipe Contreras
> --
> To unsubscribe from this list: send the line "unsubscribe 
> linux-omap" in the body of a message to 
> majordomo@vger.kernel.org More majordomo info at  
> http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-19 16:05       ` Hebbar, Shivananda
@ 2010-03-19 16:18         ` Felipe Contreras
  2010-03-19 19:00           ` Felipe Contreras
  0 siblings, 1 reply; 14+ messages in thread
From: Felipe Contreras @ 2010-03-19 16:18 UTC (permalink / raw)
  To: Hebbar, Shivananda
  Cc: Guzman Lugo, Fernando, linux-omap, Hiroshi DOYU, Ameya Palande,
	felipe.contreras

On Fri, Mar 19, 2010 at 6:05 PM, Hebbar, Shivananda <x0hebbar@ti.com> wrote:
> Client app must register for MMUFault/DSPSysError events. Then only
> You will receive notifications.

It is registered, and it was receiving notifications on old versions
of the bridge... not any more.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-19 16:18         ` Felipe Contreras
@ 2010-03-19 19:00           ` Felipe Contreras
  2010-03-19 21:49             ` Guzman Lugo, Fernando
  0 siblings, 1 reply; 14+ messages in thread
From: Felipe Contreras @ 2010-03-19 19:00 UTC (permalink / raw)
  To: Hebbar, Shivananda
  Cc: Guzman Lugo, Fernando, linux-omap, Hiroshi DOYU, Ameya Palande,
	felipe.contreras

On Fri, Mar 19, 2010 at 6:18 PM, Felipe Contreras
<felipe.contreras@gmail.com> wrote:
> On Fri, Mar 19, 2010 at 6:05 PM, Hebbar, Shivananda <x0hebbar@ti.com> wrote:
>> Client app must register for MMUFault/DSPSysError events. Then only
>> You will receive notifications.
>
> It is registered, and it was receiving notifications on old versions
> of the bridge... not any more.

Strike that strike... the regression happens only with this patch.
Apparently the get_events ioctl fails constantly... that's why the
MMUFAULT is not reported.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-19 19:00           ` Felipe Contreras
@ 2010-03-19 21:49             ` Guzman Lugo, Fernando
  2010-03-19 22:11               ` Felipe Contreras
  0 siblings, 1 reply; 14+ messages in thread
From: Guzman Lugo, Fernando @ 2010-03-19 21:49 UTC (permalink / raw)
  To: Felipe Contreras, Hebbar, Shivananda
  Cc: linux-omap, Hiroshi DOYU, Ameya Palande, felipe.contreras



>-----Original Message-----
>From: Felipe Contreras [mailto:felipe.contreras@gmail.com]
>Sent: Friday, March 19, 2010 1:00 PM
>To: Hebbar, Shivananda
>Cc: Guzman Lugo, Fernando; linux-omap@vger.kernel.org; Hiroshi DOYU; Ameya
>Palande; felipe.contreras@nokia.com
>Subject: Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
>
>On Fri, Mar 19, 2010 at 6:18 PM, Felipe Contreras
><felipe.contreras@gmail.com> wrote:
>> On Fri, Mar 19, 2010 at 6:05 PM, Hebbar, Shivananda <x0hebbar@ti.com>
>wrote:
>>> Client app must register for MMUFault/DSPSysError events. Then only
>>> You will receive notifications.
>>
>> It is registered, and it was receiving notifications on old versions
>> of the bridge... not any more.
>
>Strike that strike... the regression happens only with this patch.
>Apparently the get_events ioctl fails constantly... that's why the
>MMUFAULT is not reported.

Do you mean applying DSP recovery process you are no able to receive MMUFault notifications?

I am going to check that case. Is there any possibility that the process is stuck waiting other event?

Regards,
Fernando.

>
>--
>Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-19 21:49             ` Guzman Lugo, Fernando
@ 2010-03-19 22:11               ` Felipe Contreras
  2010-03-19 22:30                 ` Guzman Lugo, Fernando
  0 siblings, 1 reply; 14+ messages in thread
From: Felipe Contreras @ 2010-03-19 22:11 UTC (permalink / raw)
  To: Guzman Lugo, Fernando
  Cc: Hebbar, Shivananda, linux-omap, Hiroshi DOYU, Ameya Palande,
	felipe.contreras

On Fri, Mar 19, 2010 at 11:49 PM, Guzman Lugo, Fernando <x0095840@ti.com> wrote:
> Do you mean applying DSP recovery process you are no able to receive MMUFault notifications?
>
> I am going to check that case. Is there any possibility that the process is stuck waiting other event?

I think mgr_wait_for_bridge_events is constantly failing, so no
MMUFAULT notifications come through.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-19 22:11               ` Felipe Contreras
@ 2010-03-19 22:30                 ` Guzman Lugo, Fernando
  2010-03-23 19:29                   ` Guzman Lugo, Fernando
  0 siblings, 1 reply; 14+ messages in thread
From: Guzman Lugo, Fernando @ 2010-03-19 22:30 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: Hebbar, Shivananda, linux-omap, Hiroshi DOYU, Ameya Palande,
	felipe.contreras



>-----Original Message-----
>From: Felipe Contreras [mailto:felipe.contreras@gmail.com]
>Sent: Friday, March 19, 2010 4:11 PM
>To: Guzman Lugo, Fernando
>Cc: Hebbar, Shivananda; linux-omap@vger.kernel.org; Hiroshi DOYU; Ameya
>Palande; felipe.contreras@nokia.com
>Subject: Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
>
>On Fri, Mar 19, 2010 at 11:49 PM, Guzman Lugo, Fernando <x0095840@ti.com>
>wrote:
>> Do you mean applying DSP recovery process you are no able to receive
>MMUFault notifications?
>>
>> I am going to check that case. Is there any possibility that the process
>is stuck waiting other event?
>
>I think mgr_wait_for_bridge_events is constantly failing, so no
>MMUFAULT notifications come through.

Ok, I am going to see if the patch is changing something related to that function and debug the problem.

Regards,
Fernando.

>
>--
>Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-19 22:30                 ` Guzman Lugo, Fernando
@ 2010-03-23 19:29                   ` Guzman Lugo, Fernando
  2010-03-24 18:15                     ` Felipe Contreras
  0 siblings, 1 reply; 14+ messages in thread
From: Guzman Lugo, Fernando @ 2010-03-23 19:29 UTC (permalink / raw)
  To: Guzman Lugo, Fernando, Felipe Contreras
  Cc: Hebbar, Shivananda, linux-omap, Hiroshi DOYU, Ameya Palande,
	felipe.contreras

[-- Attachment #1: Type: text/plain, Size: 5197 bytes --]



I have tested just using an application (base on bridged code) which is registered for fatal events and once it receives an event it prints the received event:

status = DSPManager_WaitForEvents(notifier, EVENTS, &index,
								DSP_FOREVER);
printf("Event received %s\n", evt_name[index]);  <<< print event received
try_err_out("Catch notification signal", status);


And this is what I get:

#####Load baseimage with absolute path, so that kernel thread can load #####baseimage

# ./cexec.out /dspbridge_reco/ddspbase_tiomap3430.dof64P


#####Run listener.out

# ./listener.out
# DspManager_Open succeeded
DSPProcessor_Attach succeeded
DSP node register notify DSP_MMUFAULT succeeded
DSP node register notify DSP_SYSERROR succeeded
DSP node register notify DSP_PWRERROR succeeded
DSP node register notify DSP_StateChange succeeded
#


#####Cause MMUFault.

# ./faultapp.out ddspbase_tiomap3430.dof64P test
DSP device detected !!
DSPProcessor_Attach succeeded.
DSPNode_Allocate succeeded.
DSPNode_Create succeeded.
DSPNod***** DSPMMU FAULT ***** IRQStatus 0x2  <<< MMUFault caused
e_Run succeeded.
DSPProcessor_R***** DSPMMU FAULT ***** fault_addr 0x80000000
eserveMemory succeeded. dspAddrSbridge_deh_notify: ********** DEVICE EXCEPTION *
*********
end= 0x203f0000
DSPProcessor_Rbridge_deh_notify: DSP_MMUFAULT,err_info = 0x0
eserveMemory succeeded. dspAddrdbridge_deh_notify: DSP_MMUFAULT, High Address =
0x8000
Recv= 0x2040a000
DSPProcessor_bridge_deh_notify: DSP_MMUFAULT, Low Address = 0x0
Map succeeded.
DSPProcessor_Mapbridge_deh_notify: DSP_MMUFAULT, fault address = 0x80000000
 succeeded.
Sending DMM BUFs toprint_dsp_trace_buffer:
DSP MMU FAULT currtask:0x20061114

 DSP cmd=SETUP, DspRecvBuf=0x2b2DSPTrace: DSP MMU FAULT currtask:0x20061114

f0, DspSendBuf=0x122e8
Read 102400 bytes from input file.
Event received MMU_FAULT   <<< MMUFault detected by the application (notification was really received) >>>
Catch notification signal succeeded

^C   <<< kill faultapp.out it is stuck by doing Ctrl + C
# proc_load: Processor Loaded /dspbridge_reco/ddspbase_tiomap3430.dof64P
			<<< baseimage was reloaded successfully >>>
proc_start: dsp in running state
DspManager_Open succeeded
DSPProcessor_Attach succeeded
DSP node register notify DSP_MMUFAULT succeeded
DSP node register notify DSP_SYSERROR succeeded
DSP node register notify DSP_PWRERROR succeeded
DSP node register notify DSP_StateChange succeeded

#


#####Run dmmcopy.out sample to make sure DSP was recovered successfully

# ./dmmcopy.out  ddspbase_tiomap3430.dof64P test
DSP device detected !!
DSPProcessor_Attach succeeded.
DSPNode_Allocate succeeded.
DSPNode_Create succeeded.
DSPNode_Run succeeded.
DSPProcessor_ReserveMemory succeeded. dspAddrSend= 0x203f0000
DSPProcessor_ReserveMemory succeeded. dspAddrdRecv= 0x2040a000
DSPProcessor_Map succeeded.
DSPProcessor_Map succeeded.
Sending DMM BUFs to DSP cmd=SETUP, DspRecvBuf=0x203f02e8, DspSendBuf=0x2040a2f0
Read 102400 bytes from input file.
Writing 102400 bytes to output file.
Read 33684 bytes from input file.
Writing 33684 bytes to output file.
DSPProcessor_UnMap succeeded.
DSPProcessor_UnMap succeeded.
DSPProcessor_UnReserveMemory succeeded.
DSPProcessor_UnReserveMemory succeeded.
RunTask succeeded.

DSPNode_Terminate succeeded.procwrap_detach: deprecated dspbridge ioctl

DSPNode_Delete succeeded.
DSPProcessor_Detach succeeded.
#


You can use the application attached to see if you are able to receive notifications, if you still don't receives the notifications can you share the code you are using?


Regards,
Fernando.


>-----Original Message-----
>From: linux-omap-owner@vger.kernel.org [mailto:linux-omap-
>owner@vger.kernel.org] On Behalf Of Guzman Lugo, Fernando
>Sent: Friday, March 19, 2010 4:31 PM
>To: Felipe Contreras
>Cc: Hebbar, Shivananda; linux-omap@vger.kernel.org; Hiroshi DOYU; Ameya
>Palande; felipe.contreras@nokia.com
>Subject: RE: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
>
>
>
>>-----Original Message-----
>>From: Felipe Contreras [mailto:felipe.contreras@gmail.com]
>>Sent: Friday, March 19, 2010 4:11 PM
>>To: Guzman Lugo, Fernando
>>Cc: Hebbar, Shivananda; linux-omap@vger.kernel.org; Hiroshi DOYU; Ameya
>>Palande; felipe.contreras@nokia.com
>>Subject: Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
>>
>>On Fri, Mar 19, 2010 at 11:49 PM, Guzman Lugo, Fernando <x0095840@ti.com>
>>wrote:
>>> Do you mean applying DSP recovery process you are no able to receive
>>MMUFault notifications?
>>>
>>> I am going to check that case. Is there any possibility that the process
>>is stuck waiting other event?
>>
>>I think mgr_wait_for_bridge_events is constantly failing, so no
>>MMUFAULT notifications come through.
>
>Ok, I am going to see if the patch is changing something related to that
>function and debug the problem.
>
>Regards,
>Fernando.
>
>>
>>--
>>Felipe Contreras
>--
>To unsubscribe from this list: send the line "unsubscribe linux-omap" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: listener.c --]
[-- Type: text/plain, Size: 3003 bytes --]

/*
 * Bridge driver Daemon
 * DSP Recovery feature for TI OMAP processors.
 *
 * Copyright (C) 2009 Texas Instruments, Inc.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * THIS PROGRAM IS PROVIDED ''AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 */

#include <stdio.h>
#include <string.h>

#include <dbapi.h>

#define try_err_out(msg, err)						\
do {									\
    if (DSP_FAILED(err)) {						\
	printf("%s failed : Err Num = %lx\n", msg, err);		\
	goto out;							\
    } else								\
	printf("%s succeeded\n", msg);					\
} while (0);

#define EVENTS 4
#define ROOT_ACCESS	1406

char *evt_name[5] = {"MMU_FAULT", "SYS_ERROR", "PWR_ERROR",
		 "STATE_CHANGE", "UNKNOWN"};


unsigned long bridge_listener(void)
{
	DSP_HPROCESSOR proc;
	unsigned int index = 0, i;
	unsigned long status = DSP_SOK;
	struct DSP_NOTIFICATION *notifier[EVENTS];

	for (i = 0; i < EVENTS; i++) {
		notifier[i] = malloc(sizeof(struct DSP_NOTIFICATION));
		if (!notifier[i])
			return DSP_EMEMORY;
		memset(notifier[i], 0, sizeof(struct DSP_NOTIFICATION));
	}

	/* Big listener loop */
	while (1) {
		status = DspManager_Open(ROOT_ACCESS, NULL);
		try_err_out("DspManager_Open", status);
		status = DSPProcessor_Attach(0, NULL, &proc);
		try_err_out("DSPProcessor_Attach", status);

		/* Register notify objects */
		status = DSPProcessor_RegisterNotify(proc, DSP_MMUFAULT,
					DSP_SIGNALEVENT, notifier[0]);
		try_err_out("DSP node register notify DSP_MMUFAULT", status);

		status = DSPProcessor_RegisterNotify(proc, DSP_SYSERROR,
					DSP_SIGNALEVENT, notifier[1]);
		try_err_out("DSP node register notify DSP_SYSERROR", status);

		status = DSPProcessor_RegisterNotify(proc, DSP_PWRERROR,
					DSP_SIGNALEVENT, notifier[2]);
		try_err_out("DSP node register notify DSP_PWRERROR", status);

		status = DSPProcessor_RegisterNotify(proc,
			DSP_PROCESSORSTATECHANGE, DSP_SIGNALEVENT, notifier[3]);
		try_err_out("DSP node register notify DSP_StateChange", status);

		status = DSPManager_WaitForEvents(notifier, EVENTS, &index,
								DSP_FOREVER);
		printf("Event received %s\n", evt_name[index]);
		try_err_out("Catch notification signal", status);
		status = DspManager_Close(0, NULL);
	}

out:
	status = DSPProcessor_Detach(proc);
	status = DspManager_Close(0, NULL);

	for (i = 0; i < EVENTS; i++)
		free(notifier[i]);

	return status;
}


int main ()
{
	pid_t child_pid, child_sid;

	/* Fork off the parent process */
	child_pid = fork();
	if (child_pid < 0) {
		exit(1); 	/* Failure */
	}
	/* If we got a good PID, then we can exit the parent process. */
	if (child_pid > 0) {
		exit(0);	/* Succeess */
	}
	/* Create a new SID for the child process */
	child_sid = setsid();
	if (child_sid < 0)
		exit(0);

	bridge_listener();

	return 0;
}



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-23 19:29                   ` Guzman Lugo, Fernando
@ 2010-03-24 18:15                     ` Felipe Contreras
  2010-03-24 19:18                       ` Guzman Lugo, Fernando
  0 siblings, 1 reply; 14+ messages in thread
From: Felipe Contreras @ 2010-03-24 18:15 UTC (permalink / raw)
  To: Guzman Lugo, Fernando
  Cc: Hebbar, Shivananda, linux-omap, Hiroshi DOYU, Ameya Palande,
	felipe.contreras

[-- Attachment #1: Type: text/plain, Size: 1150 bytes --]

On Tue, Mar 23, 2010 at 9:29 PM, Guzman Lugo, Fernando <x0095840@ti.com> wrote:
> I have tested just using an application (base on bridged code) which is registered for fatal events and once it receives an event it prints the received event:

Ok, I ran your test and it works, however, I tried to modify it to fit
what I'm doing in gst-dsp and it turns out it's very easy for this
code to reboot the device. I'm attaching the test.

Also, I found the exact reason why my code fails but yours not. I do
something like this:

while (!done) {
  dsp_wait_for_events(&index);

  if (index == 0) {
    /* node message */
    while (true) {
      if (!dsp_node_get_message(&msg))
        break;
      handle_message(msg);
    }
  }
}

So, before your patch, the get_message() failed, and the next
wait_for_events() succeeded and returned the MMU fault. Now, the
get_message() fails, and so does the wait_for_events().

The only way to make that code work with your patch is to remove the
inner while, so wait_for_events() and get_message() are always run one
after the other.

That is breaking old behavior and should be fixed, right?

-- 
Felipe Contreras

[-- Attachment #2: reboot.c --]
[-- Type: text/x-csrc, Size: 1334 bytes --]

#include <stdio.h>
#include <string.h>

#include <dbapi.h>

#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))

static char *e_name[] = { "MMU_FAULT", "SYS_ERROR", "PWR_ERROR", "STATE_CHANGE", "UNKNOWN" };

void bridge_listener(void)
{
	DSP_HPROCESSOR proc;
	unsigned index = 0, i;
	long status;
	struct DSP_NOTIFICATION *events[3];

	for (i = 0; i < ARRAY_SIZE(events); i++)
		events[i] = calloc(1, sizeof(**events));

	status = DspManager_Open(0, NULL);
	if (status < 0) {
		printf("open failed\n");
		goto out;
	}

	status = DSPProcessor_Attach(0, NULL, &proc);
	if (status < 0) {
		printf("attach failed\n");
		goto out;
	}

	status = DSPProcessor_RegisterNotify(proc, DSP_MMUFAULT, DSP_SIGNALEVENT, events[0]);
	if (status < 0) {
		printf("register mmu fault failed\n");
		goto out;
	}

	status = DSPProcessor_RegisterNotify(proc, DSP_SYSERROR, DSP_SIGNALEVENT, events[1]);
	if (status < 0) {
		printf("register sys error failed\n");
		goto out;
	}

	status = DSPManager_WaitForEvents(events, ARRAY_SIZE(events), &index, DSP_FOREVER);
	if (status >= 0)
		printf("event received %s\n", e_name[index]);
	else
		printf("wait for events failed\n");

out:
	DSPProcessor_Detach(proc);
	DspManager_Close(0, NULL);

	for (i = 0; i < ARRAY_SIZE(events); i++)
		free(events[i]);
}

int main(void)
{
	bridge_listener();

	return 0;
}

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-24 18:15                     ` Felipe Contreras
@ 2010-03-24 19:18                       ` Guzman Lugo, Fernando
  2010-03-24 19:49                         ` Felipe Contreras
  0 siblings, 1 reply; 14+ messages in thread
From: Guzman Lugo, Fernando @ 2010-03-24 19:18 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: Hebbar, Shivananda, linux-omap, Hiroshi DOYU, Ameya Palande,
	felipe.contreras


Hi Felipe,

>-----Original Message-----
>From: Felipe Contreras [mailto:felipe.contreras@gmail.com]
>Sent: Wednesday, March 24, 2010 12:15 PM
>To: Guzman Lugo, Fernando
>Cc: Hebbar, Shivananda; linux-omap@vger.kernel.org; Hiroshi DOYU; Ameya
>Palande; felipe.contreras@nokia.com
>Subject: Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
>
>On Tue, Mar 23, 2010 at 9:29 PM, Guzman Lugo, Fernando <x0095840@ti.com>
>wrote:
>> I have tested just using an application (base on bridged code) which is
>registered for fatal events and once it receives an event it prints the
>received event:
>
>Ok, I ran your test and it works, however, I tried to modify it to fit
>what I'm doing in gst-dsp and it turns out it's very easy for this
>code to reboot the device. I'm attaching the test.
>
>Also, I found the exact reason why my code fails but yours not. I do
>something like this:
>
>while (!done) {
>  dsp_wait_for_events(&index);
>
>  if (index == 0) {
>    /* node message */
>    while (true) {
>      if (!dsp_node_get_message(&msg))
>        break;
>      handle_message(msg);
>    }
>  }
>}
>
>So, before your patch, the get_message() failed, and the next
>wait_for_events() succeeded and returned the MMU fault. Now, the
>get_message() fails, and so does the wait_for_events().

The think is that the now after fatal error and recovery process start all bridge ioctl's will fail with -EIO (that was the requirement when I did the code)

@@ -496,6 +553,12 @@ static long bridge_ioctl(struct file *filp, unsigned int code,
 	union Trapped_Args buf_in;
 
 	DBC_REQUIRE(filp != NULL);
+#ifdef CONFIG_BRIDGE_RECOVERY
+	if (recover) {
+		status = -EIO;
+		goto err;
+	}
+#endif

So you if you call dsp_node_get_message and then a fatal error happens, dsp_node_get_message will failed and if you what to call dsp_wait_for_events will fail because we are in recovery process (before it was succeeded because wait_for_events ioctl can go through and the MMUFault event was already signaled).

To sum up:
With this patch in order to be notified of fatal errors you need to register for those and call dsp_wait_for_events before the fatal error occurs. Otherwise dsp_wait_for_events will fail.

So I think you need a dedicate thread to receive fatal errors.

Regards,
Fernando.
>
>The only way to make that code work with your patch is to remove the
>inner while, so wait_for_events() and get_message() are always run one
>after the other.
>
>That is breaking old behavior and should be fixed, right?
>
>--
>Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-24 19:18                       ` Guzman Lugo, Fernando
@ 2010-03-24 19:49                         ` Felipe Contreras
  0 siblings, 0 replies; 14+ messages in thread
From: Felipe Contreras @ 2010-03-24 19:49 UTC (permalink / raw)
  To: ext Guzman Lugo, Fernando
  Cc: Hebbar, Shivananda, linux-omap, Doyu Hiroshi (Nokia-D/Helsinki),
	Palande Ameya (Nokia-D/Helsinki)

Hey Fernando,

On Wed, Mar 24, 2010 at 08:18:36PM +0100, ext Guzman Lugo, Fernando wrote:
> 
> Hi Felipe,
> 
> >So, before your patch, the get_message() failed, and the next
> >wait_for_events() succeeded and returned the MMU fault. Now, the
> >get_message() fails, and so does the wait_for_events().
> 
> The think is that the now after fatal error and recovery process start
> all bridge ioctl's will fail with -EIO (that was the requirement when
> I did the code)
> 
> @@ -496,6 +553,12 @@ static long bridge_ioctl(struct file *filp, unsigned int code,
>  	union Trapped_Args buf_in;
>  
>  	DBC_REQUIRE(filp != NULL);
> +#ifdef CONFIG_BRIDGE_RECOVERY
> +	if (recover) {
> +		status = -EIO;
> +		goto err;
> +	}
> +#endif
> 
> So you if you call dsp_node_get_message and then a fatal error
> happens, dsp_node_get_message will failed and if you what to call
> dsp_wait_for_events will fail because we are in recovery process
> (before it was succeeded because wait_for_events ioctl can go through
> and the MMUFault event was already signaled).
> 
> To sum up:
> With this patch in order to be notified of fatal errors you need to
> register for those and call dsp_wait_for_events before the fatal error
> occurs. Otherwise dsp_wait_for_events will fail.
> 
> So I think you need a dedicate thread to receive fatal errors.

One thread to fetch messages and another thread to receive fatal errors?
Come on, that's overkill... threads are not free.

I'm ok changing gst-dsp code, but let's be clear about this; this is yet
another ABI break.

If I can detect fatal errors in wait_for_events() then I don't need to
register for MMU faults, sys erros and all that stuff. Now, how do I
distinguish a fatal wait_for_events() vs a non-fatal one (timeout).

Cheers.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/2] DSPBRIDGE: DSP recovery feature
  2010-03-05 10:12 ` [PATCH 2/2] DSPBRIDGE: DSP recovery feature Guzman Lugo, Fernando
  2010-03-19 11:51   ` Felipe Contreras
@ 2010-03-24 21:14   ` Felipe Contreras
  1 sibling, 0 replies; 14+ messages in thread
From: Felipe Contreras @ 2010-03-24 21:14 UTC (permalink / raw)
  To: Guzman Lugo, Fernando
  Cc: linux-omap, Hiroshi DOYU, Ameya Palande, felipe.contreras

On Fri, Mar 5, 2010 at 12:12 PM, Guzman Lugo, Fernando <x0095840@ti.com> wrote:
> This patch implements a workqueue in charge of reseting
> DSP in case of fatal error.

I found another problem. It seems if after the mmu fault I close the
driver handle, and open it again on the same process, the driver
thinks the handle was never closed.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2010-03-24 21:14 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <Acq8TGBkJRXTEjd3QLyd0bJ27/2C7g==>
2010-03-05 10:12 ` [PATCH 2/2] DSPBRIDGE: DSP recovery feature Guzman Lugo, Fernando
2010-03-19 11:51   ` Felipe Contreras
2010-03-19 15:53     ` Felipe Contreras
2010-03-19 16:05       ` Hebbar, Shivananda
2010-03-19 16:18         ` Felipe Contreras
2010-03-19 19:00           ` Felipe Contreras
2010-03-19 21:49             ` Guzman Lugo, Fernando
2010-03-19 22:11               ` Felipe Contreras
2010-03-19 22:30                 ` Guzman Lugo, Fernando
2010-03-23 19:29                   ` Guzman Lugo, Fernando
2010-03-24 18:15                     ` Felipe Contreras
2010-03-24 19:18                       ` Guzman Lugo, Fernando
2010-03-24 19:49                         ` Felipe Contreras
2010-03-24 21:14   ` Felipe Contreras

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.