linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump
@ 2020-02-12 15:48 Venkata Lakshmi Narayana Gubba
  2020-02-12 16:57 ` Abhishek Pandit-Subedi
  0 siblings, 1 reply; 3+ messages in thread
From: Venkata Lakshmi Narayana Gubba @ 2020-02-12 15:48 UTC (permalink / raw)
  To: marcel, johan.hedberg
  Cc: mka, linux-kernel, linux-bluetooth, robh, hemantg, linux-arm-msm,
	bgodavar, tientzu, seanpaul, rjliao, yshavit,
	Venkata Lakshmi Narayana Gubba

This patch will fix the below issues
   1.Fixed race conditions while accessing memory dump state flags.
   2.Updated with actual context of timer in hci_memdump_timeout()
   3.Updated injecting hardware error event if the dumps failed to receive.
   4.Once timeout is triggered, stopping the memory dump collections.

Possible scenarios while collecting memory dump:

Scenario 1:

Memdump event from firmware
Some number of memdump events with seq #
Hw error event
Reset

Scenario 2:

Memdump event from firmware
Some number of memdump events with seq #
Timeout schedules hw_error_event if hw error event is not received already
hw_error_event clears the memdump activity
reset

Scenario 3:

hw_error_event sends memdump command to firmware and waits for completion
Some number of memdump events with seq #
hw error event
reset

Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR")
Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
---
 drivers/bluetooth/hci_qca.c | 96 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 69 insertions(+), 27 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index eacc65b..80ee838 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -29,6 +29,7 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/serdev.h>
+#include <linux/mutex.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -69,7 +70,8 @@ enum qca_flags {
 	QCA_IBS_ENABLED,
 	QCA_DROP_VENDOR_EVENT,
 	QCA_SUSPENDING,
-	QCA_MEMDUMP_COLLECTION
+	QCA_MEMDUMP_COLLECTION,
+	QCA_HW_ERROR_EVENT
 };
 
 
@@ -145,11 +147,13 @@ struct qca_data {
 	struct work_struct ws_rx_vote_off;
 	struct work_struct ws_tx_vote_off;
 	struct work_struct ctrl_memdump_evt;
+	struct work_struct ctrl_memdump_timeout;
 	struct qca_memdump_data *qca_memdump;
 	unsigned long flags;
 	struct completion drop_ev_comp;
 	wait_queue_head_t suspend_wait_q;
 	enum qca_memdump_states memdump_state;
+	struct mutex hci_memdump_lock;
 
 	/* For debugging purpose */
 	u64 ibs_sent_wacks;
@@ -524,21 +528,33 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
 
 static void hci_memdump_timeout(struct timer_list *t)
 {
-	struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
-	struct hci_uart *hu = qca->hu;
-	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
-	char *memdump_buf = qca_memdump->memdump_buf_tail;
-
-	bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
-	/* Inject hw error event to reset the device and driver. */
-	hci_reset_dev(hu->hdev);
-	vfree(memdump_buf);
-	kfree(qca_memdump);
-	qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+	struct qca_data *qca = from_timer(qca, t, memdump_timer);
+
+	queue_work(qca->workqueue, &qca->ctrl_memdump_timeout);
 	del_timer(&qca->memdump_timer);
-	cancel_work_sync(&qca->ctrl_memdump_evt);
 }
 
+static void qca_controller_memdump_timeout(struct work_struct *work)
+{
+	struct qca_data *qca = container_of(work, struct qca_data,
+					ctrl_memdump_timeout);
+	struct hci_uart *hu = qca->hu;
+
+	mutex_lock(&qca->hci_memdump_lock);
+	if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) {
+		qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+		if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) {
+			/* Inject hw error event to reset the device
+			 * and driver.
+			 */
+			hci_reset_dev(hu->hdev);
+		}
+	}
+
+	mutex_unlock(&qca->hci_memdump_lock);
+}
+
+
 /* Initialize protocol */
 static int qca_open(struct hci_uart *hu)
 {
@@ -558,6 +574,7 @@ static int qca_open(struct hci_uart *hu)
 	skb_queue_head_init(&qca->tx_wait_q);
 	skb_queue_head_init(&qca->rx_memdump_q);
 	spin_lock_init(&qca->hci_ibs_lock);
+	mutex_init(&qca->hci_memdump_lock);
 	qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
 	if (!qca->workqueue) {
 		BT_ERR("QCA Workqueue not initialized properly");
@@ -570,6 +587,7 @@ static int qca_open(struct hci_uart *hu)
 	INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
 	INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
 	INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
+	INIT_WORK(&qca->ctrl_memdump_timeout, qca_controller_memdump_timeout);
 	init_waitqueue_head(&qca->suspend_wait_q);
 
 	qca->hu = hu;
@@ -963,11 +981,20 @@ static void qca_controller_memdump(struct work_struct *work)
 
 	while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
 
+		mutex_lock(&qca->hci_memdump_lock);
+		/* Skip processing the received packets if timeout detected. */
+		if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) {
+			mutex_unlock(&qca->hci_memdump_lock);
+			return;
+		}
+
 		if (!qca_memdump) {
 			qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
 					      GFP_ATOMIC);
-			if (!qca_memdump)
+			if (!qca_memdump) {
+				mutex_unlock(&qca->hci_memdump_lock);
 				return;
+			}
 
 			qca->qca_memdump = qca_memdump;
 		}
@@ -992,6 +1019,7 @@ static void qca_controller_memdump(struct work_struct *work)
 			if (!(dump_size)) {
 				bt_dev_err(hu->hdev, "Rx invalid memdump size");
 				kfree_skb(skb);
+				mutex_unlock(&qca->hci_memdump_lock);
 				return;
 			}
 
@@ -1016,6 +1044,7 @@ static void qca_controller_memdump(struct work_struct *work)
 			kfree(qca_memdump);
 			kfree_skb(skb);
 			qca->qca_memdump = NULL;
+			mutex_unlock(&qca->hci_memdump_lock);
 			return;
 		}
 
@@ -1050,12 +1079,16 @@ static void qca_controller_memdump(struct work_struct *work)
 			kfree(qca->qca_memdump);
 			qca->qca_memdump = NULL;
 			qca->memdump_state = QCA_MEMDUMP_COLLECTED;
+			clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
 		}
+
+		mutex_unlock(&qca->hci_memdump_lock);
 	}
 
 }
 
-int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb)
+static int qca_controller_memdump_event(struct hci_dev *hdev,
+					struct sk_buff *skb)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
 	struct qca_data *qca = hu->priv;
@@ -1406,30 +1439,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
 	struct qca_data *qca = hu->priv;
-	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
-	char *memdump_buf = NULL;
 
 	wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
 			    TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);
 
 	clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
-	if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
-		bt_dev_err(hu->hdev, "Clearing the buffers due to timeout");
-		if (qca_memdump)
-			memdump_buf = qca_memdump->memdump_buf_tail;
-		vfree(memdump_buf);
-		kfree(qca_memdump);
-		qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
-		del_timer(&qca->memdump_timer);
-		cancel_work_sync(&qca->ctrl_memdump_evt);
-	}
 }
 
 static void qca_hw_error(struct hci_dev *hdev, u8 code)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
 	struct qca_data *qca = hu->priv;
+	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
+	char *memdump_buf = NULL;
 
+	set_bit(QCA_HW_ERROR_EVENT, &qca->flags);
 	bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
 
 	if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
@@ -1449,6 +1473,24 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code)
 		bt_dev_info(hdev, "waiting for dump to complete");
 		qca_wait_for_dump_collection(hdev);
 	}
+
+	if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
+		bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
+		mutex_lock(&qca->hci_memdump_lock);
+		if (qca_memdump)
+			memdump_buf = qca_memdump->memdump_buf_head;
+		vfree(memdump_buf);
+		kfree(qca_memdump);
+		qca->qca_memdump = NULL;
+		qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+		del_timer(&qca->memdump_timer);
+		skb_queue_purge(&qca->rx_memdump_q);
+		mutex_unlock(&qca->hci_memdump_lock);
+		cancel_work_sync(&qca->ctrl_memdump_timeout);
+		cancel_work_sync(&qca->ctrl_memdump_evt);
+	}
+
+	clear_bit(QCA_HW_ERROR_EVENT, &qca->flags);
 }
 
 static void qca_cmd_timeout(struct hci_dev *hdev)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member 
of Code Aurora Forum, hosted by The Linux Foundation


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump
  2020-02-12 15:48 [PATCH v2] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump Venkata Lakshmi Narayana Gubba
@ 2020-02-12 16:57 ` Abhishek Pandit-Subedi
  2020-02-13  9:07   ` gubbaven
  0 siblings, 1 reply; 3+ messages in thread
From: Abhishek Pandit-Subedi @ 2020-02-12 16:57 UTC (permalink / raw)
  To: Venkata Lakshmi Narayana Gubba
  Cc: Marcel Holtmann, Johan Hedberg, Matthias Kaehlcke, LKML,
	Bluez mailing list, robh, hemantg, linux-arm-msm, bgodavar,
	Claire Chang, Sean Paul, rjliao, Yoni Shavit

Hi Venkata,

I would suggest removing the memdump_timer entirely and making the
ctrl_memdump_timeout into struct delayed_work.

Instead of using mod_timer to get the callback ready, you would
instead call `queue_delayed_work(qca->workqueue,
&qca->ctrl_memdump_timeout, MEMDUMP_TIMEOUT_MS);` and instead of
del_timer, you would instead
`cancel_delayed_work(&qca->ctrl_memdump_timeout)` if mutex is held or
`cancel_delayed_work_sync(&qca->ctrl_memdump_timeout)` if mutex is not
held.

Other than that, everything else looks good to me.

On Wed, Feb 12, 2020 at 7:51 AM Venkata Lakshmi Narayana Gubba
<gubbaven@codeaurora.org> wrote:
>
> This patch will fix the below issues
>    1.Fixed race conditions while accessing memory dump state flags.
>    2.Updated with actual context of timer in hci_memdump_timeout()
>    3.Updated injecting hardware error event if the dumps failed to receive.
>    4.Once timeout is triggered, stopping the memory dump collections.
>
> Possible scenarios while collecting memory dump:
>
> Scenario 1:
>
> Memdump event from firmware
> Some number of memdump events with seq #
> Hw error event
> Reset
>
> Scenario 2:
>
> Memdump event from firmware
> Some number of memdump events with seq #
> Timeout schedules hw_error_event if hw error event is not received already
> hw_error_event clears the memdump activity
> reset
>
> Scenario 3:
>
> hw_error_event sends memdump command to firmware and waits for completion
> Some number of memdump events with seq #
> hw error event
> reset
>
> Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR")
> Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
> ---
>  drivers/bluetooth/hci_qca.c | 96 ++++++++++++++++++++++++++++++++-------------
>  1 file changed, 69 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
> index eacc65b..80ee838 100644
> --- a/drivers/bluetooth/hci_qca.c
> +++ b/drivers/bluetooth/hci_qca.c
> @@ -29,6 +29,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/regulator/consumer.h>
>  #include <linux/serdev.h>
> +#include <linux/mutex.h>
>  #include <asm/unaligned.h>
>
>  #include <net/bluetooth/bluetooth.h>
> @@ -69,7 +70,8 @@ enum qca_flags {
>         QCA_IBS_ENABLED,
>         QCA_DROP_VENDOR_EVENT,
>         QCA_SUSPENDING,
> -       QCA_MEMDUMP_COLLECTION
> +       QCA_MEMDUMP_COLLECTION,
> +       QCA_HW_ERROR_EVENT
>  };
>
>
> @@ -145,11 +147,13 @@ struct qca_data {
>         struct work_struct ws_rx_vote_off;
>         struct work_struct ws_tx_vote_off;
>         struct work_struct ctrl_memdump_evt;
> +       struct work_struct ctrl_memdump_timeout;
>         struct qca_memdump_data *qca_memdump;
>         unsigned long flags;
>         struct completion drop_ev_comp;
>         wait_queue_head_t suspend_wait_q;
>         enum qca_memdump_states memdump_state;
> +       struct mutex hci_memdump_lock;
>
>         /* For debugging purpose */
>         u64 ibs_sent_wacks;
> @@ -524,21 +528,33 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
>
>  static void hci_memdump_timeout(struct timer_list *t)
>  {
> -       struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
> -       struct hci_uart *hu = qca->hu;
> -       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> -       char *memdump_buf = qca_memdump->memdump_buf_tail;
> -
> -       bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
> -       /* Inject hw error event to reset the device and driver. */
> -       hci_reset_dev(hu->hdev);
> -       vfree(memdump_buf);
> -       kfree(qca_memdump);
> -       qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> +       struct qca_data *qca = from_timer(qca, t, memdump_timer);
> +
> +       queue_work(qca->workqueue, &qca->ctrl_memdump_timeout);
>         del_timer(&qca->memdump_timer);
> -       cancel_work_sync(&qca->ctrl_memdump_evt);
>  }
>
> +static void qca_controller_memdump_timeout(struct work_struct *work)
> +{
> +       struct qca_data *qca = container_of(work, struct qca_data,
> +                                       ctrl_memdump_timeout);
> +       struct hci_uart *hu = qca->hu;
> +
> +       mutex_lock(&qca->hci_memdump_lock);
> +       if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) {
> +               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> +               if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) {
> +                       /* Inject hw error event to reset the device
> +                        * and driver.
> +                        */
> +                       hci_reset_dev(hu->hdev);
> +               }
> +       }
> +
> +       mutex_unlock(&qca->hci_memdump_lock);
> +}
> +
> +
>  /* Initialize protocol */
>  static int qca_open(struct hci_uart *hu)
>  {
> @@ -558,6 +574,7 @@ static int qca_open(struct hci_uart *hu)
>         skb_queue_head_init(&qca->tx_wait_q);
>         skb_queue_head_init(&qca->rx_memdump_q);
>         spin_lock_init(&qca->hci_ibs_lock);
> +       mutex_init(&qca->hci_memdump_lock);
>         qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
>         if (!qca->workqueue) {
>                 BT_ERR("QCA Workqueue not initialized properly");
> @@ -570,6 +587,7 @@ static int qca_open(struct hci_uart *hu)
>         INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
>         INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
>         INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
> +       INIT_WORK(&qca->ctrl_memdump_timeout, qca_controller_memdump_timeout);
>         init_waitqueue_head(&qca->suspend_wait_q);
>
>         qca->hu = hu;
> @@ -963,11 +981,20 @@ static void qca_controller_memdump(struct work_struct *work)
>
>         while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
>
> +               mutex_lock(&qca->hci_memdump_lock);
> +               /* Skip processing the received packets if timeout detected. */
> +               if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) {
> +                       mutex_unlock(&qca->hci_memdump_lock);
> +                       return;
> +               }
> +
>                 if (!qca_memdump) {
>                         qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
>                                               GFP_ATOMIC);
> -                       if (!qca_memdump)
> +                       if (!qca_memdump) {
> +                               mutex_unlock(&qca->hci_memdump_lock);
>                                 return;
> +                       }
>
>                         qca->qca_memdump = qca_memdump;
>                 }
> @@ -992,6 +1019,7 @@ static void qca_controller_memdump(struct work_struct *work)
>                         if (!(dump_size)) {
>                                 bt_dev_err(hu->hdev, "Rx invalid memdump size");
>                                 kfree_skb(skb);
> +                               mutex_unlock(&qca->hci_memdump_lock);
>                                 return;
>                         }
>
> @@ -1016,6 +1044,7 @@ static void qca_controller_memdump(struct work_struct *work)
>                         kfree(qca_memdump);
>                         kfree_skb(skb);
>                         qca->qca_memdump = NULL;
> +                       mutex_unlock(&qca->hci_memdump_lock);
>                         return;
>                 }
>
> @@ -1050,12 +1079,16 @@ static void qca_controller_memdump(struct work_struct *work)
>                         kfree(qca->qca_memdump);
>                         qca->qca_memdump = NULL;
>                         qca->memdump_state = QCA_MEMDUMP_COLLECTED;
> +                       clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
>                 }
> +
> +               mutex_unlock(&qca->hci_memdump_lock);
>         }
>
>  }
>
> -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb)
> +static int qca_controller_memdump_event(struct hci_dev *hdev,
> +                                       struct sk_buff *skb)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> @@ -1406,30 +1439,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> -       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> -       char *memdump_buf = NULL;
>
>         wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
>                             TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);
>
>         clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
> -       if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
> -               bt_dev_err(hu->hdev, "Clearing the buffers due to timeout");
> -               if (qca_memdump)
> -                       memdump_buf = qca_memdump->memdump_buf_tail;
> -               vfree(memdump_buf);
> -               kfree(qca_memdump);
> -               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> -               del_timer(&qca->memdump_timer);
> -               cancel_work_sync(&qca->ctrl_memdump_evt);
> -       }
>  }
>
>  static void qca_hw_error(struct hci_dev *hdev, u8 code)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> +       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> +       char *memdump_buf = NULL;
>
> +       set_bit(QCA_HW_ERROR_EVENT, &qca->flags);
>         bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
>
>         if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
> @@ -1449,6 +1473,24 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code)
>                 bt_dev_info(hdev, "waiting for dump to complete");
>                 qca_wait_for_dump_collection(hdev);
>         }
> +
> +       if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
> +               bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
> +               mutex_lock(&qca->hci_memdump_lock);
> +               if (qca_memdump)
> +                       memdump_buf = qca_memdump->memdump_buf_head;
> +               vfree(memdump_buf);
> +               kfree(qca_memdump);
> +               qca->qca_memdump = NULL;
> +               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> +               del_timer(&qca->memdump_timer);
> +               skb_queue_purge(&qca->rx_memdump_q);
> +               mutex_unlock(&qca->hci_memdump_lock);
> +               cancel_work_sync(&qca->ctrl_memdump_timeout);
> +               cancel_work_sync(&qca->ctrl_memdump_evt);
> +       }
> +
> +       clear_bit(QCA_HW_ERROR_EVENT, &qca->flags);
>  }
>
>  static void qca_cmd_timeout(struct hci_dev *hdev)
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation
>

Thanks
Abhishek

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH v2] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump
  2020-02-12 16:57 ` Abhishek Pandit-Subedi
@ 2020-02-13  9:07   ` gubbaven
  0 siblings, 0 replies; 3+ messages in thread
From: gubbaven @ 2020-02-13  9:07 UTC (permalink / raw)
  To: Abhishek Pandit-Subedi
  Cc: Marcel Holtmann, Johan Hedberg, Matthias Kaehlcke, LKML,
	Bluez mailing list, robh, hemantg, linux-arm-msm, bgodavar,
	Claire Chang, Sean Paul, rjliao, Yoni Shavit

Hi Abhishek,

Sure I will update and post next patch set.

Regards,
Lakshmi Narayana.
On 2020-02-12 22:27, Abhishek Pandit-Subedi wrote:
> Hi Venkata,
> 
> I would suggest removing the memdump_timer entirely and making the
> ctrl_memdump_timeout into struct delayed_work.
> 
> Instead of using mod_timer to get the callback ready, you would
> instead call `queue_delayed_work(qca->workqueue,
> &qca->ctrl_memdump_timeout, MEMDUMP_TIMEOUT_MS);` and instead of
> del_timer, you would instead
> `cancel_delayed_work(&qca->ctrl_memdump_timeout)` if mutex is held or
> `cancel_delayed_work_sync(&qca->ctrl_memdump_timeout)` if mutex is not
> held.
> 
> Other than that, everything else looks good to me.
> 
> On Wed, Feb 12, 2020 at 7:51 AM Venkata Lakshmi Narayana Gubba
> <gubbaven@codeaurora.org> wrote:
>> 
>> This patch will fix the below issues
>>    1.Fixed race conditions while accessing memory dump state flags.
>>    2.Updated with actual context of timer in hci_memdump_timeout()
>>    3.Updated injecting hardware error event if the dumps failed to 
>> receive.
>>    4.Once timeout is triggered, stopping the memory dump collections.
>> 
>> Possible scenarios while collecting memory dump:
>> 
>> Scenario 1:
>> 
>> Memdump event from firmware
>> Some number of memdump events with seq #
>> Hw error event
>> Reset
>> 
>> Scenario 2:
>> 
>> Memdump event from firmware
>> Some number of memdump events with seq #
>> Timeout schedules hw_error_event if hw error event is not received 
>> already
>> hw_error_event clears the memdump activity
>> reset
>> 
>> Scenario 3:
>> 
>> hw_error_event sends memdump command to firmware and waits for 
>> completion
>> Some number of memdump events with seq #
>> hw error event
>> reset
>> 
>> Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory 
>> dump during SSR")
>> Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
>> Signed-off-by: Venkata Lakshmi Narayana Gubba 
>> <gubbaven@codeaurora.org>
>> ---
>>  drivers/bluetooth/hci_qca.c | 96 
>> ++++++++++++++++++++++++++++++++-------------
>>  1 file changed, 69 insertions(+), 27 deletions(-)
>> 
>> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
>> index eacc65b..80ee838 100644
>> --- a/drivers/bluetooth/hci_qca.c
>> +++ b/drivers/bluetooth/hci_qca.c
>> @@ -29,6 +29,7 @@
>>  #include <linux/platform_device.h>
>>  #include <linux/regulator/consumer.h>
>>  #include <linux/serdev.h>
>> +#include <linux/mutex.h>
>>  #include <asm/unaligned.h>
>> 
>>  #include <net/bluetooth/bluetooth.h>
>> @@ -69,7 +70,8 @@ enum qca_flags {
>>         QCA_IBS_ENABLED,
>>         QCA_DROP_VENDOR_EVENT,
>>         QCA_SUSPENDING,
>> -       QCA_MEMDUMP_COLLECTION
>> +       QCA_MEMDUMP_COLLECTION,
>> +       QCA_HW_ERROR_EVENT
>>  };
>> 
>> 
>> @@ -145,11 +147,13 @@ struct qca_data {
>>         struct work_struct ws_rx_vote_off;
>>         struct work_struct ws_tx_vote_off;
>>         struct work_struct ctrl_memdump_evt;
>> +       struct work_struct ctrl_memdump_timeout;
>>         struct qca_memdump_data *qca_memdump;
>>         unsigned long flags;
>>         struct completion drop_ev_comp;
>>         wait_queue_head_t suspend_wait_q;
>>         enum qca_memdump_states memdump_state;
>> +       struct mutex hci_memdump_lock;
>> 
>>         /* For debugging purpose */
>>         u64 ibs_sent_wacks;
>> @@ -524,21 +528,33 @@ static void hci_ibs_wake_retrans_timeout(struct 
>> timer_list *t)
>> 
>>  static void hci_memdump_timeout(struct timer_list *t)
>>  {
>> -       struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
>> -       struct hci_uart *hu = qca->hu;
>> -       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
>> -       char *memdump_buf = qca_memdump->memdump_buf_tail;
>> -
>> -       bt_dev_err(hu->hdev, "clearing allocated memory due to memdump 
>> timeout");
>> -       /* Inject hw error event to reset the device and driver. */
>> -       hci_reset_dev(hu->hdev);
>> -       vfree(memdump_buf);
>> -       kfree(qca_memdump);
>> -       qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
>> +       struct qca_data *qca = from_timer(qca, t, memdump_timer);
>> +
>> +       queue_work(qca->workqueue, &qca->ctrl_memdump_timeout);
>>         del_timer(&qca->memdump_timer);
>> -       cancel_work_sync(&qca->ctrl_memdump_evt);
>>  }
>> 
>> +static void qca_controller_memdump_timeout(struct work_struct *work)
>> +{
>> +       struct qca_data *qca = container_of(work, struct qca_data,
>> +                                       ctrl_memdump_timeout);
>> +       struct hci_uart *hu = qca->hu;
>> +
>> +       mutex_lock(&qca->hci_memdump_lock);
>> +       if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) {
>> +               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
>> +               if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) {
>> +                       /* Inject hw error event to reset the device
>> +                        * and driver.
>> +                        */
>> +                       hci_reset_dev(hu->hdev);
>> +               }
>> +       }
>> +
>> +       mutex_unlock(&qca->hci_memdump_lock);
>> +}
>> +
>> +
>>  /* Initialize protocol */
>>  static int qca_open(struct hci_uart *hu)
>>  {
>> @@ -558,6 +574,7 @@ static int qca_open(struct hci_uart *hu)
>>         skb_queue_head_init(&qca->tx_wait_q);
>>         skb_queue_head_init(&qca->rx_memdump_q);
>>         spin_lock_init(&qca->hci_ibs_lock);
>> +       mutex_init(&qca->hci_memdump_lock);
>>         qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
>>         if (!qca->workqueue) {
>>                 BT_ERR("QCA Workqueue not initialized properly");
>> @@ -570,6 +587,7 @@ static int qca_open(struct hci_uart *hu)
>>         INIT_WORK(&qca->ws_rx_vote_off, 
>> qca_wq_serial_rx_clock_vote_off);
>>         INIT_WORK(&qca->ws_tx_vote_off, 
>> qca_wq_serial_tx_clock_vote_off);
>>         INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
>> +       INIT_WORK(&qca->ctrl_memdump_timeout, 
>> qca_controller_memdump_timeout);
>>         init_waitqueue_head(&qca->suspend_wait_q);
>> 
>>         qca->hu = hu;
>> @@ -963,11 +981,20 @@ static void qca_controller_memdump(struct 
>> work_struct *work)
>> 
>>         while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
>> 
>> +               mutex_lock(&qca->hci_memdump_lock);
>> +               /* Skip processing the received packets if timeout 
>> detected. */
>> +               if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) {
>> +                       mutex_unlock(&qca->hci_memdump_lock);
>> +                       return;
>> +               }
>> +
>>                 if (!qca_memdump) {
>>                         qca_memdump = kzalloc(sizeof(struct 
>> qca_memdump_data),
>>                                               GFP_ATOMIC);
>> -                       if (!qca_memdump)
>> +                       if (!qca_memdump) {
>> +                               mutex_unlock(&qca->hci_memdump_lock);
>>                                 return;
>> +                       }
>> 
>>                         qca->qca_memdump = qca_memdump;
>>                 }
>> @@ -992,6 +1019,7 @@ static void qca_controller_memdump(struct 
>> work_struct *work)
>>                         if (!(dump_size)) {
>>                                 bt_dev_err(hu->hdev, "Rx invalid 
>> memdump size");
>>                                 kfree_skb(skb);
>> +                               mutex_unlock(&qca->hci_memdump_lock);
>>                                 return;
>>                         }
>> 
>> @@ -1016,6 +1044,7 @@ static void qca_controller_memdump(struct 
>> work_struct *work)
>>                         kfree(qca_memdump);
>>                         kfree_skb(skb);
>>                         qca->qca_memdump = NULL;
>> +                       mutex_unlock(&qca->hci_memdump_lock);
>>                         return;
>>                 }
>> 
>> @@ -1050,12 +1079,16 @@ static void qca_controller_memdump(struct 
>> work_struct *work)
>>                         kfree(qca->qca_memdump);
>>                         qca->qca_memdump = NULL;
>>                         qca->memdump_state = QCA_MEMDUMP_COLLECTED;
>> +                       clear_bit(QCA_MEMDUMP_COLLECTION, 
>> &qca->flags);
>>                 }
>> +
>> +               mutex_unlock(&qca->hci_memdump_lock);
>>         }
>> 
>>  }
>> 
>> -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff 
>> *skb)
>> +static int qca_controller_memdump_event(struct hci_dev *hdev,
>> +                                       struct sk_buff *skb)
>>  {
>>         struct hci_uart *hu = hci_get_drvdata(hdev);
>>         struct qca_data *qca = hu->priv;
>> @@ -1406,30 +1439,21 @@ static void 
>> qca_wait_for_dump_collection(struct hci_dev *hdev)
>>  {
>>         struct hci_uart *hu = hci_get_drvdata(hdev);
>>         struct qca_data *qca = hu->priv;
>> -       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
>> -       char *memdump_buf = NULL;
>> 
>>         wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
>>                             TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);
>> 
>>         clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
>> -       if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
>> -               bt_dev_err(hu->hdev, "Clearing the buffers due to 
>> timeout");
>> -               if (qca_memdump)
>> -                       memdump_buf = qca_memdump->memdump_buf_tail;
>> -               vfree(memdump_buf);
>> -               kfree(qca_memdump);
>> -               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
>> -               del_timer(&qca->memdump_timer);
>> -               cancel_work_sync(&qca->ctrl_memdump_evt);
>> -       }
>>  }
>> 
>>  static void qca_hw_error(struct hci_dev *hdev, u8 code)
>>  {
>>         struct hci_uart *hu = hci_get_drvdata(hdev);
>>         struct qca_data *qca = hu->priv;
>> +       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
>> +       char *memdump_buf = NULL;
>> 
>> +       set_bit(QCA_HW_ERROR_EVENT, &qca->flags);
>>         bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
>> 
>>         if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
>> @@ -1449,6 +1473,24 @@ static void qca_hw_error(struct hci_dev *hdev, 
>> u8 code)
>>                 bt_dev_info(hdev, "waiting for dump to complete");
>>                 qca_wait_for_dump_collection(hdev);
>>         }
>> +
>> +       if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
>> +               bt_dev_err(hu->hdev, "clearing allocated memory due to 
>> memdump timeout");
>> +               mutex_lock(&qca->hci_memdump_lock);
>> +               if (qca_memdump)
>> +                       memdump_buf = qca_memdump->memdump_buf_head;
>> +               vfree(memdump_buf);
>> +               kfree(qca_memdump);
>> +               qca->qca_memdump = NULL;
>> +               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
>> +               del_timer(&qca->memdump_timer);
>> +               skb_queue_purge(&qca->rx_memdump_q);
>> +               mutex_unlock(&qca->hci_memdump_lock);
>> +               cancel_work_sync(&qca->ctrl_memdump_timeout);
>> +               cancel_work_sync(&qca->ctrl_memdump_evt);
>> +       }
>> +
>> +       clear_bit(QCA_HW_ERROR_EVENT, &qca->flags);
>>  }
>> 
>>  static void qca_cmd_timeout(struct hci_dev *hdev)
>> --
>> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a 
>> member
>> of Code Aurora Forum, hosted by The Linux Foundation
>> 
> 
> Thanks
> Abhishek

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-02-13  9:08 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-12 15:48 [PATCH v2] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump Venkata Lakshmi Narayana Gubba
2020-02-12 16:57 ` Abhishek Pandit-Subedi
2020-02-13  9:07   ` gubbaven

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).