All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] gatchat: implement timeout setting for commands
@ 2019-09-02 21:10 Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
  2019-09-02 21:25 ` Denis Kenzior
  2019-09-05 11:07 ` [PATCHv2] " Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
  0 siblings, 2 replies; 5+ messages in thread
From: Martin =?unknown-8bit?q?Hundeb=C3=B8ll?= @ 2019-09-02 21:10 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 5442 bytes --]

The adds a function to the set a chat-wide timeout for at commands. It
allows plugins to handle cases where the modem fails to respond with
either an OK or an error.

Without these timeouts, the plugin is never notified about a hanging
command, and it has no way to detect it; effectively leaving the device
in a broken state, where only a full restart of the ofono service
unbreaks it.

If a timeout occurs, the passed callback is invoked with 'ok' set to
false, and no final response in the result. This allows callbacks to
identify the timeout with this snippet:

  if (!ok) {
    decode_at_error(&error, g_at_result_final_response(result));
    if (error.type == OFONO_ERROR_TYPE_FAILURE) {
      /* handle timeout */
    }
  }
---
 gatchat/gatchat.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++
 gatchat/gatchat.h |  9 +++++++
 plugins/quectel.c |  3 ---
 3 files changed, 69 insertions(+), 3 deletions(-)

diff --git a/gatchat/gatchat.c b/gatchat/gatchat.c
index 9e777107..ba4d2346 100644
--- a/gatchat/gatchat.c
+++ b/gatchat/gatchat.c
@@ -97,6 +97,8 @@ struct at_chat {
 	gdouble inactivity_time;		/* Period of inactivity */
 	guint wakeup_timeout;			/* How long to wait for resp */
 	GTimer *wakeup_timer;			/* Keep track of elapsed time */
+	guint command_timeout;			/* How long to wait for command resp */
+	gint command_source;			/* Handle commands with no response */
 	GAtSyntax *syntax;
 	gboolean destroyed;			/* Re-entrancy guard */
 	gboolean in_read_handler;		/* Re-entrancy guard */
@@ -353,6 +355,11 @@ static void chat_cleanup(struct at_chat *chat)
 		chat->timeout_source = 0;
 	}
 
+	if (chat->command_source) {
+		g_source_remove(chat->command_source);
+		chat->command_source = 0;
+	}
+
 	g_at_syntax_unref(chat->syntax);
 	chat->syntax = NULL;
 
@@ -436,6 +443,11 @@ static void at_chat_finish_command(struct at_chat *p, gboolean ok, char *final)
 	struct at_command *cmd = g_queue_pop_head(p->command_queue);
 	GSList *response_lines;
 
+	if (p->command_source) {
+		g_source_remove(p->command_source);
+		p->command_source = 0;
+	}
+
 	/* Cannot happen, but lets be paranoid */
 	if (cmd == NULL)
 		return;
@@ -823,6 +835,22 @@ static gboolean wakeup_no_response(gpointer user_data)
 	return TRUE;
 }
 
+static gboolean command_no_response(gpointer user_data)
+{
+	struct at_chat *chat = user_data;
+	struct at_command *cmd = g_queue_peek_head(chat->command_queue);
+
+	if (chat->debugf)
+		chat->debugf("command got no response\n", chat->debug_data);
+
+	if (cmd == NULL)
+		return FALSE;
+
+	at_chat_finish_command(chat, FALSE, NULL);
+
+	return TRUE;
+}
+
 static gboolean can_write_data(gpointer data)
 {
 	struct at_chat *chat = data;
@@ -923,6 +951,12 @@ static gboolean can_write_data(gpointer data)
 	if (chat->wakeup_timer)
 		g_timer_start(chat->wakeup_timer);
 
+	/* Handle no response */
+	if (chat->command_timeout)
+		chat->command_source = g_timeout_add(chat->command_timeout,
+							command_no_response,
+							chat);
+
 	return FALSE;
 }
 
@@ -1065,11 +1099,29 @@ static gboolean at_chat_retry(struct at_chat *chat, guint id)
 	/* reset number of written bytes to re-write command */
 	chat->cmd_bytes_written = 0;
 
+	/* cancel potential timeout */
+	if (chat->command_source) {
+		g_source_remove(chat->command_source);
+		chat->command_source = 0;
+	}
+
 	chat_wakeup_writer(chat);
 
 	return TRUE;
 }
 
+static gboolean at_chat_set_timeout(struct at_chat *chat, size_t seconds)
+{
+	if (chat->command_source) {
+		g_source_remove(chat->command_source);
+		chat->command_source = 0;
+	}
+
+	chat->command_timeout = seconds * 1000;
+
+	return TRUE;
+}
+
 static struct at_notify *at_notify_create(struct at_chat *chat,
 						const char *prefix,
 						gboolean pdu)
@@ -1574,6 +1626,14 @@ gboolean g_at_chat_retry(GAtChat *chat, guint id)
 	return at_chat_retry(chat->parent, id);
 }
 
+gboolean g_at_chat_set_timeout(GAtChat *chat, size_t seconds)
+{
+	if (chat == NULL)
+		return FALSE;
+
+	return at_chat_set_timeout(chat->parent, seconds);
+}
+
 gboolean g_at_chat_cancel(GAtChat *chat, guint id)
 {
 	/* We use id 0 for wakeup commands */
diff --git a/gatchat/gatchat.h b/gatchat/gatchat.h
index 32870318..4514740b 100644
--- a/gatchat/gatchat.h
+++ b/gatchat/gatchat.h
@@ -154,6 +154,15 @@ guint g_at_chat_send_and_expect_short_prompt(GAtChat *chat, const char *cmd,
  */
 gboolean g_at_chat_retry(GAtChat *chat, guint id);
 
+/*!
+ * Set the timeout for commands. If the timeout is configured and no response
+ * is received from a written command within the timeout, the passed callback
+ * is called with ok as false, and a result that decodes to
+ * OFONO_ERROR_TYPE_FAILURE. Setting the timeout to zero disables the calling
+ * of the callback.
+ */
+gboolean g_at_chat_set_timeout(GAtChat *chat, size_t seconds);
+
 gboolean g_at_chat_cancel(GAtChat *chat, guint id);
 gboolean g_at_chat_cancel_all(GAtChat *chat);
 
diff --git a/plugins/quectel.c b/plugins/quectel.c
index ccfc6c5f..13947c78 100644
--- a/plugins/quectel.c
+++ b/plugins/quectel.c
@@ -879,9 +879,6 @@ static void init_cmd_cb(gboolean ok, GAtResult *result, void *user_data)
 
 	DBG("%p", modem);
 
-	if (!ok)
-		return;
-
 	rts_cts = ofono_modem_get_string(modem, "RtsCts");
 
 	if (strcmp(rts_cts, "on") == 0)
-- 
2.22.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC] gatchat: implement timeout setting for commands
  2019-09-02 21:10 [RFC] gatchat: implement timeout setting for commands Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
@ 2019-09-02 21:25 ` Denis Kenzior
  2019-09-05 11:41   ` Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
  2019-09-05 11:07 ` [PATCHv2] " Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
  1 sibling, 1 reply; 5+ messages in thread
From: Denis Kenzior @ 2019-09-02 21:25 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 1759 bytes --]

Hi Martin,

On 9/2/19 4:10 PM, Martin Hundebøll wrote:
> The adds a function to the set a chat-wide timeout for at commands. It
> allows plugins to handle cases where the modem fails to respond with
> either an OK or an error.

Okay, this is sort of a known problem without a really good solution...

> 
> Without these timeouts, the plugin is never notified about a hanging
> command, and it has no way to detect it; effectively leaving the device
> in a broken state, where only a full restart of the ofono service
> unbreaks it.

The trouble is, what if this happens on a command handled by a generic 
atmodem driver atom?  Are you going in and adding timeout handling to 
all command callbacks?

If this timeout only happens in special cases, then those can be taken 
care of by setting up your own timeout...

> 
> If a timeout occurs, the passed callback is invoked with 'ok' set to
> false, and no final response in the result. This allows callbacks to
> identify the timeout with this snippet:
> 
>    if (!ok) {
>      decode_at_error(&error, g_at_result_final_response(result));
>      if (error.type == OFONO_ERROR_TYPE_FAILURE) {
>        /* handle timeout */
>      }
>    }

About the only thing I can suggest is to create a separate timeout 
callback for the GAtChat object that the modem driver can set.  That way 
if the timeout happens, the modem driver can just reset the entire 
setup.  E.g. maybe via ofono_modem_reset.

> ---
>   gatchat/gatchat.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++
>   gatchat/gatchat.h |  9 +++++++
>   plugins/quectel.c |  3 ---

Also, I guess this part was extraneous :)

>   3 files changed, 69 insertions(+), 3 deletions(-)
> 

Regards,
-Denis

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC] gatchat: implement timeout setting for commands
  2019-09-05 11:41   ` Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
@ 2019-09-04 13:15     ` Denis Kenzior
  0 siblings, 0 replies; 5+ messages in thread
From: Denis Kenzior @ 2019-09-04 13:15 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 3935 bytes --]

Hi Martin,

On 9/5/19 6:41 AM, Martin Hundebøll wrote:
> Hi Denis,
> 
> On 02/09/2019 23.25, Denis Kenzior wrote:
>> Hi Martin,
>>
>> On 9/2/19 4:10 PM, Martin Hundebøll wrote:
>>> The adds a function to the set a chat-wide timeout for at commands. It
>>> allows plugins to handle cases where the modem fails to respond with
>>> either an OK or an error.
>>
>> Okay, this is sort of a known problem without a really good solution...
>>
>>>
>>> Without these timeouts, the plugin is never notified about a hanging
>>> command, and it has no way to detect it; effectively leaving the device
>>> in a broken state, where only a full restart of the ofono service
>>> unbreaks it.
>>
>> The trouble is, what if this happens on a command handled by a generic 
>> atmodem driver atom?  Are you going in and adding timeout handling to 
>> all command callbacks?
> 
> My initial thought was that calling the command callback with ok=false 
> would be handled already. But that might be too optimistic?
> 

Sort of.  The problem is that there's no concept of a timeout at the 
protocol level.  And we don't have any idea whether the timeout really 
happened, or things are just taking a while.  Nor do we know if it is a 
bug, e.g. in GAtSyntax or just the modem not adhering to the spec.

Another issue is that even if a timeout happens, we have no idea whether 
it is safe to send any subsequent commands.  Command cancelation is a 
gray area, with some manufacturers supporting it partially and some not 
at all.  Even just blindly sending on the next command would not 
necessarily work.

Hence my point that special (and probably vendor specific) handling of 
timeouts would need to be added _everywhere_ in the generic callbacks. 
So basically exposing this is generally a bad idea.

>> If this timeout only happens in special cases, then those can be taken 
>> care of by setting up your own timeout...
> 
> Our problem is that this was seen during SIM initialization when doing 
> so too early on the quectel modem (worked around in my latest quectel 
> sim patches.) So it needs to be chat-wide.
> 

SIM initialization or modem initialization?  Maybe you want to take care 
of this prior to setting up the MUX?

>>>
>>> If a timeout occurs, the passed callback is invoked with 'ok' set to
>>> false, and no final response in the result. This allows callbacks to
>>> identify the timeout with this snippet:
>>>
>>>    if (!ok) {
>>>      decode_at_error(&error, g_at_result_final_response(result));
>>>      if (error.type == OFONO_ERROR_TYPE_FAILURE) {
>>>        /* handle timeout */
>>>      }
>>>    }
>>
>> About the only thing I can suggest is to create a separate timeout 
>> callback for the GAtChat object that the modem driver can set.  That 
>> way if the timeout happens, the modem driver can just reset the entire 
>> setup.  E.g. maybe via ofono_modem_reset.
> 
> That could work too.
> 
> I have one concern with my current patches though:
> The timeout needs to accommodate long-lasting commands like AT+COPS, 
> which is can take up to 80 seconds or so. But that is much longer than 
> the timeout for powering up the modem.

That's the other problem.  The timeout should be variable and per 
command.  Which means g_at_chat_send should be modified with a timeout 
value and that change would be quite invasive.  Given that timeouts 
shouldn't even happen, it just makes the whole thing a huge mess.

> 
> So if:
>    1: user enables modem
>    2: gatchat gets stuck with no command
>    3: set_powered_timeout() triggers
>    4: gatchat timeout fires
> 
> What happens is the user tries to enable the modem again after step 3? 
> Should the plugin enable function clean up after potential timeouts to 
> avoid step 4?

No clue.  That is completely manufacturer dependent.

Regards,
-Denis

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCHv2] gatchat: implement timeout setting for commands
  2019-09-02 21:10 [RFC] gatchat: implement timeout setting for commands Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
  2019-09-02 21:25 ` Denis Kenzior
@ 2019-09-05 11:07 ` Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
  1 sibling, 0 replies; 5+ messages in thread
From: Martin =?unknown-8bit?q?Hundeb=C3=B8ll?= @ 2019-09-05 11:07 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 5391 bytes --]

The adds a function to the set a chat-wide timeout for at commands. It
allows plugins to handle cases where the modem fails to respond with
either an OK or an error.

Without these timeouts, the plugin is never notified about a hanging
command, and it has no way to detect it; effectively leaving the device
in a broken state, where only a full restart of the ofono service
unbreaks it.

If a timeout occurs, the passed callback so the plugin can reset the
modem to a known state.
---
 gatchat/gatchat.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++
 gatchat/gatchat.h | 10 +++++++
 2 files changed, 76 insertions(+)

diff --git a/gatchat/gatchat.c b/gatchat/gatchat.c
index 9e777107..8e36ca1a 100644
--- a/gatchat/gatchat.c
+++ b/gatchat/gatchat.c
@@ -97,6 +97,10 @@ struct at_chat {
 	gdouble inactivity_time;		/* Period of inactivity */
 	guint wakeup_timeout;			/* How long to wait for resp */
 	GTimer *wakeup_timer;			/* Keep track of elapsed time */
+	guint cmd_timeout_ms;			/* How long to wait for command resp */
+	gint cmd_timeout_source;		/* Handle commands with no response */
+	GAtTimeoutFunc cmd_timeout_func;	/* Function to call in case of timeout */
+	gpointer cmd_timeout_data;		/* Data to pass to cmd_timeout_func */
 	GAtSyntax *syntax;
 	gboolean destroyed;			/* Re-entrancy guard */
 	gboolean in_read_handler;		/* Re-entrancy guard */
@@ -353,6 +357,11 @@ static void chat_cleanup(struct at_chat *chat)
 		chat->timeout_source = 0;
 	}
 
+	if (chat->cmd_timeout_source) {
+		g_source_remove(chat->cmd_timeout_source);
+		chat->cmd_timeout_source = 0;
+	}
+
 	g_at_syntax_unref(chat->syntax);
 	chat->syntax = NULL;
 
@@ -436,6 +445,11 @@ static void at_chat_finish_command(struct at_chat *p, gboolean ok, char *final)
 	struct at_command *cmd = g_queue_pop_head(p->command_queue);
 	GSList *response_lines;
 
+	if (p->cmd_timeout_source) {
+		g_source_remove(p->cmd_timeout_source);
+		p->cmd_timeout_source = 0;
+	}
+
 	/* Cannot happen, but lets be paranoid */
 	if (cmd == NULL)
 		return;
@@ -823,6 +837,19 @@ static gboolean wakeup_no_response(gpointer user_data)
 	return TRUE;
 }
 
+static gboolean command_no_response(gpointer user_data)
+{
+	struct at_chat *chat = user_data;
+
+	if (chat->debugf)
+		chat->debugf("command got no response\n", chat->debug_data);
+
+	if (chat->cmd_timeout_func)
+		chat->cmd_timeout_func(chat->cmd_timeout_data);
+
+	return TRUE;
+}
+
 static gboolean can_write_data(gpointer data)
 {
 	struct at_chat *chat = data;
@@ -923,6 +950,12 @@ static gboolean can_write_data(gpointer data)
 	if (chat->wakeup_timer)
 		g_timer_start(chat->wakeup_timer);
 
+	/* Handle no response */
+	if (chat->cmd_timeout_ms)
+		chat->cmd_timeout_source = g_timeout_add(chat->cmd_timeout_ms,
+							command_no_response,
+							chat);
+
 	return FALSE;
 }
 
@@ -1065,11 +1098,34 @@ static gboolean at_chat_retry(struct at_chat *chat, guint id)
 	/* reset number of written bytes to re-write command */
 	chat->cmd_bytes_written = 0;
 
+	/* cancel potential timeout */
+	if (chat->cmd_timeout_source) {
+		g_source_remove(chat->cmd_timeout_source);
+		chat->cmd_timeout_source = 0;
+	}
+
 	chat_wakeup_writer(chat);
 
 	return TRUE;
 }
 
+static gboolean at_chat_set_command_timeout(struct at_chat *chat,
+						size_t seconds,
+						GAtTimeoutFunc func,
+						gpointer user_data)
+{
+	if (chat->cmd_timeout_source) {
+		g_source_remove(chat->cmd_timeout_source);
+		chat->cmd_timeout_source = 0;
+	}
+
+	chat->cmd_timeout_ms = seconds * 1000;
+	chat->cmd_timeout_func = func;
+	chat->cmd_timeout_data = user_data;
+
+	return TRUE;
+}
+
 static struct at_notify *at_notify_create(struct at_chat *chat,
 						const char *prefix,
 						gboolean pdu)
@@ -1574,6 +1630,16 @@ gboolean g_at_chat_retry(GAtChat *chat, guint id)
 	return at_chat_retry(chat->parent, id);
 }
 
+gboolean g_at_chat_set_command_timeout(GAtChat *chat, size_t seconds,
+					GAtTimeoutFunc func, gpointer user_data)
+{
+	if (chat == NULL)
+		return FALSE;
+
+	return at_chat_set_command_timeout(chat->parent, seconds, func,
+						user_data);
+}
+
 gboolean g_at_chat_cancel(GAtChat *chat, guint id)
 {
 	/* We use id 0 for wakeup commands */
diff --git a/gatchat/gatchat.h b/gatchat/gatchat.h
index 32870318..eb42aac5 100644
--- a/gatchat/gatchat.h
+++ b/gatchat/gatchat.h
@@ -38,6 +38,7 @@ typedef struct _GAtChat GAtChat;
 typedef void (*GAtResultFunc)(gboolean success, GAtResult *result,
 				gpointer user_data);
 typedef void (*GAtNotifyFunc)(GAtResult *result, gpointer user_data);
+typedef void (*GAtTimeoutFunc)(gpointer user_data);
 
 enum _GAtChatTerminator {
 	G_AT_CHAT_TERMINATOR_OK,
@@ -154,6 +155,15 @@ guint g_at_chat_send_and_expect_short_prompt(GAtChat *chat, const char *cmd,
  */
 gboolean g_at_chat_retry(GAtChat *chat, guint id);
 
+/*!
+ * Set the timeout for commands. If the timeout is configured and no response
+ * is received from a written command within the timeout, the callback is
+ * called to the plugin handle it.
+ */
+gboolean g_at_chat_set_command_timeout(GAtChat *chat, size_t seconds,
+					GAtTimeoutFunc func,
+					gpointer user_data);
+
 gboolean g_at_chat_cancel(GAtChat *chat, guint id);
 gboolean g_at_chat_cancel_all(GAtChat *chat);
 
-- 
2.23.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC] gatchat: implement timeout setting for commands
  2019-09-02 21:25 ` Denis Kenzior
@ 2019-09-05 11:41   ` Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
  2019-09-04 13:15     ` Denis Kenzior
  0 siblings, 1 reply; 5+ messages in thread
From: Martin =?unknown-8bit?q?Hundeb=C3=B8ll?= @ 2019-09-05 11:41 UTC (permalink / raw)
  To: ofono

[-- Attachment #1: Type: text/plain, Size: 2521 bytes --]

Hi Denis,

On 02/09/2019 23.25, Denis Kenzior wrote:
> Hi Martin,
> 
> On 9/2/19 4:10 PM, Martin Hundebøll wrote:
>> The adds a function to the set a chat-wide timeout for at commands. It
>> allows plugins to handle cases where the modem fails to respond with
>> either an OK or an error.
> 
> Okay, this is sort of a known problem without a really good solution...
> 
>>
>> Without these timeouts, the plugin is never notified about a hanging
>> command, and it has no way to detect it; effectively leaving the device
>> in a broken state, where only a full restart of the ofono service
>> unbreaks it.
> 
> The trouble is, what if this happens on a command handled by a generic 
> atmodem driver atom?  Are you going in and adding timeout handling to 
> all command callbacks?

My initial thought was that calling the command callback with ok=false 
would be handled already. But that might be too optimistic?

> If this timeout only happens in special cases, then those can be taken 
> care of by setting up your own timeout...

Our problem is that this was seen during SIM initialization when doing 
so too early on the quectel modem (worked around in my latest quectel 
sim patches.) So it needs to be chat-wide.

>>
>> If a timeout occurs, the passed callback is invoked with 'ok' set to
>> false, and no final response in the result. This allows callbacks to
>> identify the timeout with this snippet:
>>
>>    if (!ok) {
>>      decode_at_error(&error, g_at_result_final_response(result));
>>      if (error.type == OFONO_ERROR_TYPE_FAILURE) {
>>        /* handle timeout */
>>      }
>>    }
> 
> About the only thing I can suggest is to create a separate timeout 
> callback for the GAtChat object that the modem driver can set.  That way 
> if the timeout happens, the modem driver can just reset the entire 
> setup.  E.g. maybe via ofono_modem_reset.

That could work too.

I have one concern with my current patches though:
The timeout needs to accommodate long-lasting commands like AT+COPS, 
which is can take up to 80 seconds or so. But that is much longer than 
the timeout for powering up the modem.

So if:
   1: user enables modem
   2: gatchat gets stuck with no command
   3: set_powered_timeout() triggers
   4: gatchat timeout fires

What happens is the user tries to enable the modem again after step 3? 
Should the plugin enable function clean up after potential timeouts to 
avoid step 4?

// Martin

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-09-05 11:41 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-02 21:10 [RFC] gatchat: implement timeout setting for commands Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
2019-09-02 21:25 ` Denis Kenzior
2019-09-05 11:41   ` Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=
2019-09-04 13:15     ` Denis Kenzior
2019-09-05 11:07 ` [PATCHv2] " Martin =?unknown-8bit?q?Hundeb=C3=B8ll?=

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.