linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] Bluetooth: Add ncmd=0 recovery handling
@ 2021-04-22 17:17 Manish Mandlik
  2021-04-23  7:06 ` Marcel Holtmann
  0 siblings, 1 reply; 2+ messages in thread
From: Manish Mandlik @ 2021-04-22 17:17 UTC (permalink / raw)
  To: marcel, luiz.dentz
  Cc: Alain Michaud, linux-bluetooth, chromeos-bluetooth-upstreaming,
	Manish Mandlik, Abhishek Pandit-Subedi, David S. Miller,
	Jakub Kicinski, Johan Hedberg, linux-kernel, netdev

During command status or command complete event, the controller may set
ncmd=0 indicating that it is not accepting any more commands. In such a
case, host holds off sending any more commands to the controller. If the
controller doesn't recover from such condition, host will wait forever,
until the user decides that the Bluetooth is broken and may power cycles
the Bluetooth.

This patch triggers the hardware error to reset the controller and
driver when it gets into such state as there is no other wat out.

Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Manish Mandlik <mmandlik@google.com>
---

Changes in v3:
- Restructure ncmd_timer scheduling in hci_event.c
- Cancel delayed work in hci_dev_do_close
- Do not inject hw error during HCI_INIT
- Update comment, add log message while injecting hw error

Changes in v2:
- Emit the hardware error when ncmd=0 occurs

 include/net/bluetooth/hci.h      |  1 +
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_core.c         | 22 ++++++++++++++++++++++
 net/bluetooth/hci_event.c        | 22 ++++++++++++++++++----
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ea4ae551c426..c4b0650fb9ae 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -339,6 +339,7 @@ enum {
 #define HCI_PAIRING_TIMEOUT	msecs_to_jiffies(60000)	/* 60 seconds */
 #define HCI_INIT_TIMEOUT	msecs_to_jiffies(10000)	/* 10 seconds */
 #define HCI_CMD_TIMEOUT		msecs_to_jiffies(2000)	/* 2 seconds */
+#define HCI_NCMD_TIMEOUT	msecs_to_jiffies(4000)	/* 4 seconds */
 #define HCI_ACL_TX_TIMEOUT	msecs_to_jiffies(45000)	/* 45 seconds */
 #define HCI_AUTO_OFF_TIMEOUT	msecs_to_jiffies(2000)	/* 2 seconds */
 #define HCI_POWER_OFF_TIMEOUT	msecs_to_jiffies(5000)	/* 5 seconds */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ebdd4afe30d2..f14692b39fd5 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -470,6 +470,7 @@ struct hci_dev {
 	struct delayed_work	service_cache;
 
 	struct delayed_work	cmd_timer;
+	struct delayed_work	ncmd_timer;
 
 	struct work_struct	rx_work;
 	struct work_struct	cmd_work;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b0d9c36acc03..37789c5d0579 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1723,6 +1723,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
 	}
 
 	cancel_delayed_work(&hdev->power_off);
+	cancel_delayed_work(&hdev->ncmd_timer);
 
 	hci_request_cancel_all(hdev);
 	hci_req_sync_lock(hdev);
@@ -2769,6 +2770,24 @@ static void hci_cmd_timeout(struct work_struct *work)
 	queue_work(hdev->workqueue, &hdev->cmd_work);
 }
 
+/* HCI ncmd timer function */
+static void hci_ncmd_timeout(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev,
+					    ncmd_timer.work);
+
+	bt_dev_err(hdev, "Controller not accepting commands anymore: ncmd = 0");
+
+	/* No hardware error event needs to be injected if the ncmd timer
+	 * triggers during HCI_INIT.
+	 */
+	if (test_bit(HCI_INIT, &hdev->flags))
+		return;
+
+	/* This is an irrecoverable state, inject hardware error event */
+	hci_reset_dev(hdev);
+}
+
 struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
 					  bdaddr_t *bdaddr, u8 bdaddr_type)
 {
@@ -3831,6 +3850,7 @@ struct hci_dev *hci_alloc_dev(void)
 	init_waitqueue_head(&hdev->suspend_wait_q);
 
 	INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
+	INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout);
 
 	hci_request_setup(hdev);
 
@@ -4068,6 +4088,8 @@ int hci_reset_dev(struct hci_dev *hdev)
 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
 	skb_put_data(skb, hw_err, 3);
 
+	bt_dev_err(hdev, "Injecting HCI hardware error event");
+
 	/* Send Hardware Error to upper stack */
 	return hci_recv_frame(hdev, skb);
 }
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index cf2f4a0abdbd..8cd4bcf5dd00 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3635,8 +3635,15 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 	if (*opcode != HCI_OP_NOP)
 		cancel_delayed_work(&hdev->cmd_timer);
 
-	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
-		atomic_set(&hdev->cmd_cnt, 1);
+	if (!test_bit(HCI_RESET, &hdev->flags)) {
+		if (ev->ncmd) {
+			cancel_delayed_work(&hdev->ncmd_timer);
+			atomic_set(&hdev->cmd_cnt, 1);
+		} else {
+			schedule_delayed_work(&hdev->ncmd_timer,
+					      HCI_NCMD_TIMEOUT);
+		}
+	}
 
 	hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
 			     req_complete_skb);
@@ -3740,8 +3747,15 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
 	if (*opcode != HCI_OP_NOP)
 		cancel_delayed_work(&hdev->cmd_timer);
 
-	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
-		atomic_set(&hdev->cmd_cnt, 1);
+	if (!test_bit(HCI_RESET, &hdev->flags)) {
+		if (ev->ncmd) {
+			cancel_delayed_work(&hdev->ncmd_timer);
+			atomic_set(&hdev->cmd_cnt, 1);
+		} else {
+			schedule_delayed_work(&hdev->ncmd_timer,
+					      HCI_NCMD_TIMEOUT);
+		}
+	}
 
 	/* Indicate request completion if the command failed. Also, if
 	 * we're not waiting for a special event and we get a success
-- 
2.31.1.498.g6c1eba8ee3d-goog


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v3] Bluetooth: Add ncmd=0 recovery handling
  2021-04-22 17:17 [PATCH v3] Bluetooth: Add ncmd=0 recovery handling Manish Mandlik
@ 2021-04-23  7:06 ` Marcel Holtmann
  0 siblings, 0 replies; 2+ messages in thread
From: Marcel Holtmann @ 2021-04-23  7:06 UTC (permalink / raw)
  To: Manish Mandlik
  Cc: Luiz Augusto von Dentz, Alain Michaud,
	Bluetooth Kernel Mailing List, CrosBT Upstreaming,
	Abhishek Pandit-Subedi, David S. Miller, Jakub Kicinski,
	Johan Hedberg, open list, netdev

Hi Manish,

> During command status or command complete event, the controller may set
> ncmd=0 indicating that it is not accepting any more commands. In such a
> case, host holds off sending any more commands to the controller. If the
> controller doesn't recover from such condition, host will wait forever,
> until the user decides that the Bluetooth is broken and may power cycles
> the Bluetooth.
> 
> This patch triggers the hardware error to reset the controller and
> driver when it gets into such state as there is no other wat out.
> 
> Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> Signed-off-by: Manish Mandlik <mmandlik@google.com>
> ---
> 
> Changes in v3:
> - Restructure ncmd_timer scheduling in hci_event.c
> - Cancel delayed work in hci_dev_do_close
> - Do not inject hw error during HCI_INIT
> - Update comment, add log message while injecting hw error
> 
> Changes in v2:
> - Emit the hardware error when ncmd=0 occurs
> 
> include/net/bluetooth/hci.h      |  1 +
> include/net/bluetooth/hci_core.h |  1 +
> net/bluetooth/hci_core.c         | 22 ++++++++++++++++++++++
> net/bluetooth/hci_event.c        | 22 ++++++++++++++++++----
> 4 files changed, 42 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
> index ea4ae551c426..c4b0650fb9ae 100644
> --- a/include/net/bluetooth/hci.h
> +++ b/include/net/bluetooth/hci.h
> @@ -339,6 +339,7 @@ enum {
> #define HCI_PAIRING_TIMEOUT	msecs_to_jiffies(60000)	/* 60 seconds */
> #define HCI_INIT_TIMEOUT	msecs_to_jiffies(10000)	/* 10 seconds */
> #define HCI_CMD_TIMEOUT		msecs_to_jiffies(2000)	/* 2 seconds */
> +#define HCI_NCMD_TIMEOUT	msecs_to_jiffies(4000)	/* 4 seconds */
> #define HCI_ACL_TX_TIMEOUT	msecs_to_jiffies(45000)	/* 45 seconds */
> #define HCI_AUTO_OFF_TIMEOUT	msecs_to_jiffies(2000)	/* 2 seconds */
> #define HCI_POWER_OFF_TIMEOUT	msecs_to_jiffies(5000)	/* 5 seconds */
> diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
> index ebdd4afe30d2..f14692b39fd5 100644
> --- a/include/net/bluetooth/hci_core.h
> +++ b/include/net/bluetooth/hci_core.h
> @@ -470,6 +470,7 @@ struct hci_dev {
> 	struct delayed_work	service_cache;
> 
> 	struct delayed_work	cmd_timer;
> +	struct delayed_work	ncmd_timer;
> 
> 	struct work_struct	rx_work;
> 	struct work_struct	cmd_work;
> diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
> index b0d9c36acc03..37789c5d0579 100644
> --- a/net/bluetooth/hci_core.c
> +++ b/net/bluetooth/hci_core.c
> @@ -1723,6 +1723,7 @@ int hci_dev_do_close(struct hci_dev *hdev)
> 	}
> 
> 	cancel_delayed_work(&hdev->power_off);
> +	cancel_delayed_work(&hdev->ncmd_timer);
> 
> 	hci_request_cancel_all(hdev);
> 	hci_req_sync_lock(hdev);
> @@ -2769,6 +2770,24 @@ static void hci_cmd_timeout(struct work_struct *work)
> 	queue_work(hdev->workqueue, &hdev->cmd_work);
> }
> 
> +/* HCI ncmd timer function */
> +static void hci_ncmd_timeout(struct work_struct *work)
> +{
> +	struct hci_dev *hdev = container_of(work, struct hci_dev,
> +					    ncmd_timer.work);
> +
> +	bt_dev_err(hdev, "Controller not accepting commands anymore: ncmd = 0");
> +
> +	/* No hardware error event needs to be injected if the ncmd timer
> +	 * triggers during HCI_INIT.
> +	 */

while the patch looks good, I would be more strongly with my wording here.

	/* During HCI_INIT phase no events can be injected if the ncmd timer
	 * triggers since the procedure has its own timeout handling.
	 */

> +	if (test_bit(HCI_INIT, &hdev->flags))
> +		return;
> +
> +	/* This is an irrecoverable state, inject hardware error event */
> +	hci_reset_dev(hdev);
> +}
> +
> struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev,
> 					  bdaddr_t *bdaddr, u8 bdaddr_type)
> {
> @@ -3831,6 +3850,7 @@ struct hci_dev *hci_alloc_dev(void)
> 	init_waitqueue_head(&hdev->suspend_wait_q);
> 
> 	INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout);
> +	INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout);
> 
> 	hci_request_setup(hdev);
> 
> @@ -4068,6 +4088,8 @@ int hci_reset_dev(struct hci_dev *hdev)
> 	hci_skb_pkt_type(skb) = HCI_EVENT_PKT;
> 	skb_put_data(skb, hw_err, 3);
> 
> +	bt_dev_err(hdev, "Injecting HCI hardware error event");
> +
> 	/* Send Hardware Error to upper stack */
> 	return hci_recv_frame(hdev, skb);
> }
> diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
> index cf2f4a0abdbd..8cd4bcf5dd00 100644
> --- a/net/bluetooth/hci_event.c
> +++ b/net/bluetooth/hci_event.c
> @@ -3635,8 +3635,15 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
> 	if (*opcode != HCI_OP_NOP)
> 		cancel_delayed_work(&hdev->cmd_timer);
> 
> -	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
> -		atomic_set(&hdev->cmd_cnt, 1);
> +	if (!test_bit(HCI_RESET, &hdev->flags)) {
> +		if (ev->ncmd) {
> +			cancel_delayed_work(&hdev->ncmd_timer);
> +			atomic_set(&hdev->cmd_cnt, 1);
> +		} else {
> +			schedule_delayed_work(&hdev->ncmd_timer,
> +					      HCI_NCMD_TIMEOUT);
> +		}
> +	}
> 
> 	hci_req_cmd_complete(hdev, *opcode, *status, req_complete,
> 			     req_complete_skb);
> @@ -3740,8 +3747,15 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
> 	if (*opcode != HCI_OP_NOP)
> 		cancel_delayed_work(&hdev->cmd_timer);
> 
> -	if (ev->ncmd && !test_bit(HCI_RESET, &hdev->flags))
> -		atomic_set(&hdev->cmd_cnt, 1);
> +	if (!test_bit(HCI_RESET, &hdev->flags)) {
> +		if (ev->ncmd) {
> +			cancel_delayed_work(&hdev->ncmd_timer);
> +			atomic_set(&hdev->cmd_cnt, 1);
> +		} else {
> +			schedule_delayed_work(&hdev->ncmd_timer,
> +					      HCI_NCMD_TIMEOUT);
> +		}
> +	}
> 

Since the code is getting a bit more complex now, I would prefer that in a follow up patch we provide a common helper function for this.

	static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, bool is_nop)
	{
		if (!is_nop)
			cancel_delayed_work(&hdev->cmd_timer);

		if (!test_bit(HCI_RESET, ..) {
			..
		}
	}

And then you can just do:

	handle_cmd_cnt_and_timer(hdev, *opcode == HCI_OP_NOP);

Or something similar to this.

Regards

Marcel


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-04-23  7:06 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-22 17:17 [PATCH v3] Bluetooth: Add ncmd=0 recovery handling Manish Mandlik
2021-04-23  7:06 ` Marcel Holtmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).