linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump
@ 2020-02-13 15:56 Venkata Lakshmi Narayana Gubba
  2020-02-13 17:18 ` Abhishek Pandit-Subedi
  2020-02-14  2:22 ` Stephen Boyd
  0 siblings, 2 replies; 4+ messages in thread
From: Venkata Lakshmi Narayana Gubba @ 2020-02-13 15:56 UTC (permalink / raw)
  To: marcel, johan.hedberg
  Cc: mka, linux-kernel, linux-bluetooth, robh, hemantg, linux-arm-msm,
	bgodavar, tientzu, seanpaul, rjliao, yshavit,
	Venkata Lakshmi Narayana Gubba

This patch will fix the below issues
   1.Fixed race conditions while accessing memory dump state flags.
   2.Updated with actual context of timer in hci_memdump_timeout()
   3.Updated injecting hardware error event if the dumps failed to receive.
   4.Once timeout is triggered, stopping the memory dump collections.

Possible scenarios while collecting memory dump:

Scenario 1:

Memdump event from firmware
Some number of memdump events with seq #
Hw error event
Reset

Scenario 2:

Memdump event from firmware
Some number of memdump events with seq #
Timeout schedules hw_error_event if hw error event is not received already
hw_error_event clears the memdump activity
reset

Scenario 3:

hw_error_event sends memdump command to firmware and waits for completion
Some number of memdump events with seq #
hw error event
reset

Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR")
Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
---
v3:
  * Removed memdump_timer completely.
  * Used delayed work queue.
--- 
 drivers/bluetooth/hci_qca.c | 101 +++++++++++++++++++++++++++++---------------
 1 file changed, 67 insertions(+), 34 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index eacc65b..9cae5fe 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -29,6 +29,7 @@
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/serdev.h>
+#include <linux/mutex.h>
 #include <asm/unaligned.h>
 
 #include <net/bluetooth/bluetooth.h>
@@ -69,7 +70,8 @@ enum qca_flags {
 	QCA_IBS_ENABLED,
 	QCA_DROP_VENDOR_EVENT,
 	QCA_SUSPENDING,
-	QCA_MEMDUMP_COLLECTION
+	QCA_MEMDUMP_COLLECTION,
+	QCA_HW_ERROR_EVENT
 };
 
 
@@ -138,18 +140,19 @@ struct qca_data {
 	u32 tx_idle_delay;
 	struct timer_list wake_retrans_timer;
 	u32 wake_retrans;
-	struct timer_list memdump_timer;
 	struct workqueue_struct *workqueue;
 	struct work_struct ws_awake_rx;
 	struct work_struct ws_awake_device;
 	struct work_struct ws_rx_vote_off;
 	struct work_struct ws_tx_vote_off;
 	struct work_struct ctrl_memdump_evt;
+	struct delayed_work ctrl_memdump_timeout;
 	struct qca_memdump_data *qca_memdump;
 	unsigned long flags;
 	struct completion drop_ev_comp;
 	wait_queue_head_t suspend_wait_q;
 	enum qca_memdump_states memdump_state;
+	struct mutex hci_memdump_lock;
 
 	/* For debugging purpose */
 	u64 ibs_sent_wacks;
@@ -522,23 +525,28 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
 		hci_uart_tx_wakeup(hu);
 }
 
-static void hci_memdump_timeout(struct timer_list *t)
+
+static void qca_controller_memdump_timeout(struct work_struct *work)
 {
-	struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
+	struct qca_data *qca = container_of(work, struct qca_data,
+					ctrl_memdump_timeout.work);
 	struct hci_uart *hu = qca->hu;
-	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
-	char *memdump_buf = qca_memdump->memdump_buf_tail;
-
-	bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
-	/* Inject hw error event to reset the device and driver. */
-	hci_reset_dev(hu->hdev);
-	vfree(memdump_buf);
-	kfree(qca_memdump);
-	qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
-	del_timer(&qca->memdump_timer);
-	cancel_work_sync(&qca->ctrl_memdump_evt);
+
+	mutex_lock(&qca->hci_memdump_lock);
+	if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) {
+		qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+		if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) {
+			/* Inject hw error event to reset the device
+			 * and driver.
+			 */
+			hci_reset_dev(hu->hdev);
+		}
+	}
+
+	mutex_unlock(&qca->hci_memdump_lock);
 }
 
+
 /* Initialize protocol */
 static int qca_open(struct hci_uart *hu)
 {
@@ -558,6 +566,7 @@ static int qca_open(struct hci_uart *hu)
 	skb_queue_head_init(&qca->tx_wait_q);
 	skb_queue_head_init(&qca->rx_memdump_q);
 	spin_lock_init(&qca->hci_ibs_lock);
+	mutex_init(&qca->hci_memdump_lock);
 	qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
 	if (!qca->workqueue) {
 		BT_ERR("QCA Workqueue not initialized properly");
@@ -570,6 +579,8 @@ static int qca_open(struct hci_uart *hu)
 	INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
 	INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
 	INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
+	INIT_DELAYED_WORK(&qca->ctrl_memdump_timeout,
+			  qca_controller_memdump_timeout);
 	init_waitqueue_head(&qca->suspend_wait_q);
 
 	qca->hu = hu;
@@ -596,7 +607,6 @@ static int qca_open(struct hci_uart *hu)
 
 	timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
 	qca->tx_idle_delay = IBS_HOST_TX_IDLE_TIMEOUT_MS;
-	timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0);
 
 	BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
 	       qca->tx_idle_delay, qca->wake_retrans);
@@ -677,7 +687,6 @@ static int qca_close(struct hci_uart *hu)
 	skb_queue_purge(&qca->rx_memdump_q);
 	del_timer(&qca->tx_idle_timer);
 	del_timer(&qca->wake_retrans_timer);
-	del_timer(&qca->memdump_timer);
 	destroy_workqueue(qca->workqueue);
 	qca->hu = NULL;
 
@@ -963,11 +972,20 @@ static void qca_controller_memdump(struct work_struct *work)
 
 	while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
 
+		mutex_lock(&qca->hci_memdump_lock);
+		/* Skip processing the received packets if timeout detected. */
+		if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) {
+			mutex_unlock(&qca->hci_memdump_lock);
+			return;
+		}
+
 		if (!qca_memdump) {
 			qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
 					      GFP_ATOMIC);
-			if (!qca_memdump)
+			if (!qca_memdump) {
+				mutex_unlock(&qca->hci_memdump_lock);
 				return;
+			}
 
 			qca->qca_memdump = qca_memdump;
 		}
@@ -992,13 +1010,15 @@ static void qca_controller_memdump(struct work_struct *work)
 			if (!(dump_size)) {
 				bt_dev_err(hu->hdev, "Rx invalid memdump size");
 				kfree_skb(skb);
+				mutex_unlock(&qca->hci_memdump_lock);
 				return;
 			}
 
 			bt_dev_info(hu->hdev, "QCA collecting dump of size:%u",
 				    dump_size);
-			mod_timer(&qca->memdump_timer, (jiffies +
-				  msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)));
+			queue_delayed_work(qca->workqueue,
+					   &qca->ctrl_memdump_timeout,
+					msecs_to_jiffies(MEMDUMP_TIMEOUT_MS));
 
 			skb_pull(skb, sizeof(dump_size));
 			memdump_buf = vmalloc(dump_size);
@@ -1016,6 +1036,7 @@ static void qca_controller_memdump(struct work_struct *work)
 			kfree(qca_memdump);
 			kfree_skb(skb);
 			qca->qca_memdump = NULL;
+			mutex_unlock(&qca->hci_memdump_lock);
 			return;
 		}
 
@@ -1046,16 +1067,20 @@ static void qca_controller_memdump(struct work_struct *work)
 			memdump_buf = qca_memdump->memdump_buf_head;
 			dev_coredumpv(&hu->serdev->dev, memdump_buf,
 				      qca_memdump->received_dump, GFP_KERNEL);
-			del_timer(&qca->memdump_timer);
+			cancel_delayed_work(&qca->ctrl_memdump_timeout);
 			kfree(qca->qca_memdump);
 			qca->qca_memdump = NULL;
 			qca->memdump_state = QCA_MEMDUMP_COLLECTED;
+			clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
 		}
+
+		mutex_unlock(&qca->hci_memdump_lock);
 	}
 
 }
 
-int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb)
+static int qca_controller_memdump_event(struct hci_dev *hdev,
+					struct sk_buff *skb)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
 	struct qca_data *qca = hu->priv;
@@ -1406,30 +1431,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
 	struct qca_data *qca = hu->priv;
-	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
-	char *memdump_buf = NULL;
 
 	wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
 			    TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);
 
 	clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
-	if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
-		bt_dev_err(hu->hdev, "Clearing the buffers due to timeout");
-		if (qca_memdump)
-			memdump_buf = qca_memdump->memdump_buf_tail;
-		vfree(memdump_buf);
-		kfree(qca_memdump);
-		qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
-		del_timer(&qca->memdump_timer);
-		cancel_work_sync(&qca->ctrl_memdump_evt);
-	}
 }
 
 static void qca_hw_error(struct hci_dev *hdev, u8 code)
 {
 	struct hci_uart *hu = hci_get_drvdata(hdev);
 	struct qca_data *qca = hu->priv;
+	struct qca_memdump_data *qca_memdump = qca->qca_memdump;
+	char *memdump_buf = NULL;
 
+	set_bit(QCA_HW_ERROR_EVENT, &qca->flags);
 	bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
 
 	if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
@@ -1449,6 +1465,23 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code)
 		bt_dev_info(hdev, "waiting for dump to complete");
 		qca_wait_for_dump_collection(hdev);
 	}
+
+	if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
+		bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
+		mutex_lock(&qca->hci_memdump_lock);
+		if (qca_memdump)
+			memdump_buf = qca_memdump->memdump_buf_head;
+		vfree(memdump_buf);
+		kfree(qca_memdump);
+		qca->qca_memdump = NULL;
+		qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
+		cancel_delayed_work(&qca->ctrl_memdump_timeout);
+		skb_queue_purge(&qca->rx_memdump_q);
+		mutex_unlock(&qca->hci_memdump_lock);
+		cancel_work_sync(&qca->ctrl_memdump_evt);
+	}
+
+	clear_bit(QCA_HW_ERROR_EVENT, &qca->flags);
 }
 
 static void qca_cmd_timeout(struct hci_dev *hdev)
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member 
of Code Aurora Forum, hosted by The Linux Foundation


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump
  2020-02-13 15:56 [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump Venkata Lakshmi Narayana Gubba
@ 2020-02-13 17:18 ` Abhishek Pandit-Subedi
  2020-02-14  2:22 ` Stephen Boyd
  1 sibling, 0 replies; 4+ messages in thread
From: Abhishek Pandit-Subedi @ 2020-02-13 17:18 UTC (permalink / raw)
  To: Venkata Lakshmi Narayana Gubba
  Cc: Marcel Holtmann, Johan Hedberg, Matthias Kaehlcke, LKML,
	Bluez mailing list, robh, hemantg, linux-arm-msm, bgodavar,
	Claire Chang, Sean Paul, rjliao, Yoni Shavit

On Thu, Feb 13, 2020 at 7:56 AM Venkata Lakshmi Narayana Gubba
<gubbaven@codeaurora.org> wrote:
>
> This patch will fix the below issues
>    1.Fixed race conditions while accessing memory dump state flags.
>    2.Updated with actual context of timer in hci_memdump_timeout()
>    3.Updated injecting hardware error event if the dumps failed to receive.
>    4.Once timeout is triggered, stopping the memory dump collections.
>
> Possible scenarios while collecting memory dump:
>
> Scenario 1:
>
> Memdump event from firmware
> Some number of memdump events with seq #
> Hw error event
> Reset
>
> Scenario 2:
>
> Memdump event from firmware
> Some number of memdump events with seq #
> Timeout schedules hw_error_event if hw error event is not received already
> hw_error_event clears the memdump activity
> reset
>
> Scenario 3:
>
> hw_error_event sends memdump command to firmware and waits for completion
> Some number of memdump events with seq #
> hw error event
> reset
>
> Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR")
> Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
> ---
> v3:
>   * Removed memdump_timer completely.
>   * Used delayed work queue.
> ---
>  drivers/bluetooth/hci_qca.c | 101 +++++++++++++++++++++++++++++---------------
>  1 file changed, 67 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
> index eacc65b..9cae5fe 100644
> --- a/drivers/bluetooth/hci_qca.c
> +++ b/drivers/bluetooth/hci_qca.c
> @@ -29,6 +29,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/regulator/consumer.h>
>  #include <linux/serdev.h>
> +#include <linux/mutex.h>
>  #include <asm/unaligned.h>
>
>  #include <net/bluetooth/bluetooth.h>
> @@ -69,7 +70,8 @@ enum qca_flags {
>         QCA_IBS_ENABLED,
>         QCA_DROP_VENDOR_EVENT,
>         QCA_SUSPENDING,
> -       QCA_MEMDUMP_COLLECTION
> +       QCA_MEMDUMP_COLLECTION,
> +       QCA_HW_ERROR_EVENT
>  };
>
>
> @@ -138,18 +140,19 @@ struct qca_data {
>         u32 tx_idle_delay;
>         struct timer_list wake_retrans_timer;
>         u32 wake_retrans;
> -       struct timer_list memdump_timer;
>         struct workqueue_struct *workqueue;
>         struct work_struct ws_awake_rx;
>         struct work_struct ws_awake_device;
>         struct work_struct ws_rx_vote_off;
>         struct work_struct ws_tx_vote_off;
>         struct work_struct ctrl_memdump_evt;
> +       struct delayed_work ctrl_memdump_timeout;
>         struct qca_memdump_data *qca_memdump;
>         unsigned long flags;
>         struct completion drop_ev_comp;
>         wait_queue_head_t suspend_wait_q;
>         enum qca_memdump_states memdump_state;
> +       struct mutex hci_memdump_lock;
>
>         /* For debugging purpose */
>         u64 ibs_sent_wacks;
> @@ -522,23 +525,28 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
>                 hci_uart_tx_wakeup(hu);
>  }
>
> -static void hci_memdump_timeout(struct timer_list *t)
> +
> +static void qca_controller_memdump_timeout(struct work_struct *work)
>  {
> -       struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
> +       struct qca_data *qca = container_of(work, struct qca_data,
> +                                       ctrl_memdump_timeout.work);
>         struct hci_uart *hu = qca->hu;
> -       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> -       char *memdump_buf = qca_memdump->memdump_buf_tail;
> -
> -       bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
> -       /* Inject hw error event to reset the device and driver. */
> -       hci_reset_dev(hu->hdev);
> -       vfree(memdump_buf);
> -       kfree(qca_memdump);
> -       qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> -       del_timer(&qca->memdump_timer);
> -       cancel_work_sync(&qca->ctrl_memdump_evt);
> +
> +       mutex_lock(&qca->hci_memdump_lock);
> +       if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) {
> +               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> +               if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) {
> +                       /* Inject hw error event to reset the device
> +                        * and driver.
> +                        */
> +                       hci_reset_dev(hu->hdev);
> +               }
> +       }
> +
> +       mutex_unlock(&qca->hci_memdump_lock);
>  }
>
> +
>  /* Initialize protocol */
>  static int qca_open(struct hci_uart *hu)
>  {
> @@ -558,6 +566,7 @@ static int qca_open(struct hci_uart *hu)
>         skb_queue_head_init(&qca->tx_wait_q);
>         skb_queue_head_init(&qca->rx_memdump_q);
>         spin_lock_init(&qca->hci_ibs_lock);
> +       mutex_init(&qca->hci_memdump_lock);
>         qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
>         if (!qca->workqueue) {
>                 BT_ERR("QCA Workqueue not initialized properly");
> @@ -570,6 +579,8 @@ static int qca_open(struct hci_uart *hu)
>         INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
>         INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
>         INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
> +       INIT_DELAYED_WORK(&qca->ctrl_memdump_timeout,
> +                         qca_controller_memdump_timeout);
>         init_waitqueue_head(&qca->suspend_wait_q);
>
>         qca->hu = hu;
> @@ -596,7 +607,6 @@ static int qca_open(struct hci_uart *hu)
>
>         timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
>         qca->tx_idle_delay = IBS_HOST_TX_IDLE_TIMEOUT_MS;
> -       timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0);
>
>         BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
>                qca->tx_idle_delay, qca->wake_retrans);
> @@ -677,7 +687,6 @@ static int qca_close(struct hci_uart *hu)
>         skb_queue_purge(&qca->rx_memdump_q);
>         del_timer(&qca->tx_idle_timer);
>         del_timer(&qca->wake_retrans_timer);
> -       del_timer(&qca->memdump_timer);
>         destroy_workqueue(qca->workqueue);
>         qca->hu = NULL;
>
> @@ -963,11 +972,20 @@ static void qca_controller_memdump(struct work_struct *work)
>
>         while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
>
> +               mutex_lock(&qca->hci_memdump_lock);
> +               /* Skip processing the received packets if timeout detected. */
> +               if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) {
> +                       mutex_unlock(&qca->hci_memdump_lock);
> +                       return;
> +               }
> +
>                 if (!qca_memdump) {
>                         qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
>                                               GFP_ATOMIC);
> -                       if (!qca_memdump)
> +                       if (!qca_memdump) {
> +                               mutex_unlock(&qca->hci_memdump_lock);
>                                 return;
> +                       }
>
>                         qca->qca_memdump = qca_memdump;
>                 }
> @@ -992,13 +1010,15 @@ static void qca_controller_memdump(struct work_struct *work)
>                         if (!(dump_size)) {
>                                 bt_dev_err(hu->hdev, "Rx invalid memdump size");
>                                 kfree_skb(skb);
> +                               mutex_unlock(&qca->hci_memdump_lock);
>                                 return;
>                         }
>
>                         bt_dev_info(hu->hdev, "QCA collecting dump of size:%u",
>                                     dump_size);
> -                       mod_timer(&qca->memdump_timer, (jiffies +
> -                                 msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)));
> +                       queue_delayed_work(qca->workqueue,
> +                                          &qca->ctrl_memdump_timeout,
> +                                       msecs_to_jiffies(MEMDUMP_TIMEOUT_MS));
>
>                         skb_pull(skb, sizeof(dump_size));
>                         memdump_buf = vmalloc(dump_size);
> @@ -1016,6 +1036,7 @@ static void qca_controller_memdump(struct work_struct *work)
>                         kfree(qca_memdump);
>                         kfree_skb(skb);
>                         qca->qca_memdump = NULL;
> +                       mutex_unlock(&qca->hci_memdump_lock);
>                         return;
>                 }
>
> @@ -1046,16 +1067,20 @@ static void qca_controller_memdump(struct work_struct *work)
>                         memdump_buf = qca_memdump->memdump_buf_head;
>                         dev_coredumpv(&hu->serdev->dev, memdump_buf,
>                                       qca_memdump->received_dump, GFP_KERNEL);
> -                       del_timer(&qca->memdump_timer);
> +                       cancel_delayed_work(&qca->ctrl_memdump_timeout);
>                         kfree(qca->qca_memdump);
>                         qca->qca_memdump = NULL;
>                         qca->memdump_state = QCA_MEMDUMP_COLLECTED;
> +                       clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
>                 }
> +
> +               mutex_unlock(&qca->hci_memdump_lock);
>         }
>
>  }
>
> -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb)
> +static int qca_controller_memdump_event(struct hci_dev *hdev,
> +                                       struct sk_buff *skb)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> @@ -1406,30 +1431,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> -       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> -       char *memdump_buf = NULL;
>
>         wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
>                             TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);
>
>         clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
> -       if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
> -               bt_dev_err(hu->hdev, "Clearing the buffers due to timeout");
> -               if (qca_memdump)
> -                       memdump_buf = qca_memdump->memdump_buf_tail;
> -               vfree(memdump_buf);
> -               kfree(qca_memdump);
> -               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> -               del_timer(&qca->memdump_timer);
> -               cancel_work_sync(&qca->ctrl_memdump_evt);
> -       }
>  }
>
>  static void qca_hw_error(struct hci_dev *hdev, u8 code)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> +       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> +       char *memdump_buf = NULL;
>
> +       set_bit(QCA_HW_ERROR_EVENT, &qca->flags);
>         bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
>
>         if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
> @@ -1449,6 +1465,23 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code)
>                 bt_dev_info(hdev, "waiting for dump to complete");
>                 qca_wait_for_dump_collection(hdev);
>         }
> +
> +       if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
> +               bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
> +               mutex_lock(&qca->hci_memdump_lock);
> +               if (qca_memdump)
> +                       memdump_buf = qca_memdump->memdump_buf_head;
> +               vfree(memdump_buf);
> +               kfree(qca_memdump);
> +               qca->qca_memdump = NULL;
> +               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> +               cancel_delayed_work(&qca->ctrl_memdump_timeout);
> +               skb_queue_purge(&qca->rx_memdump_q);
> +               mutex_unlock(&qca->hci_memdump_lock);
> +               cancel_work_sync(&qca->ctrl_memdump_evt);
> +       }
> +
> +       clear_bit(QCA_HW_ERROR_EVENT, &qca->flags);
>  }
>
>  static void qca_cmd_timeout(struct hci_dev *hdev)
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation
>

Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump
  2020-02-13 15:56 [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump Venkata Lakshmi Narayana Gubba
  2020-02-13 17:18 ` Abhishek Pandit-Subedi
@ 2020-02-14  2:22 ` Stephen Boyd
  2020-02-14 11:48   ` gubbaven
  1 sibling, 1 reply; 4+ messages in thread
From: Stephen Boyd @ 2020-02-14  2:22 UTC (permalink / raw)
  To: Venkata Lakshmi Narayana Gubba, johan.hedberg, marcel
  Cc: mka, linux-kernel, linux-bluetooth, robh, hemantg, linux-arm-msm,
	bgodavar, tientzu, seanpaul, rjliao, yshavit,
	Venkata Lakshmi Narayana Gubba

Quoting Venkata Lakshmi Narayana Gubba (2020-02-13 07:56:04)
> This patch will fix the below issues
>    1.Fixed race conditions while accessing memory dump state flags.

What sort of race condition?

>    2.Updated with actual context of timer in hci_memdump_timeout()

What does this mean?

>    3.Updated injecting hardware error event if the dumps failed to receive.
>    4.Once timeout is triggered, stopping the memory dump collections.
> 
> Possible scenarios while collecting memory dump:
> 
> Scenario 1:
> 
> Memdump event from firmware
> Some number of memdump events with seq #
> Hw error event
> Reset
> 
> Scenario 2:
> 
> Memdump event from firmware
> Some number of memdump events with seq #
> Timeout schedules hw_error_event if hw error event is not received already
> hw_error_event clears the memdump activity
> reset
> 
> Scenario 3:
> 
> hw_error_event sends memdump command to firmware and waits for completion
> Some number of memdump events with seq #
> hw error event
> reset
> 
> Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR")
> Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
> ---
[...]
> @@ -1449,6 +1465,23 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code)
>                 bt_dev_info(hdev, "waiting for dump to complete");
>                 qca_wait_for_dump_collection(hdev);
>         }
> +
> +       if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
> +               bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
> +               mutex_lock(&qca->hci_memdump_lock);

Why is a mutex needed? Are crashes happening in parallel? It would be
nice if the commit text mentioned why the mutex is added so that the
reader doesn't have to figure it out.

> +               if (qca_memdump)
> +                       memdump_buf = qca_memdump->memdump_buf_head;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump
  2020-02-14  2:22 ` Stephen Boyd
@ 2020-02-14 11:48   ` gubbaven
  0 siblings, 0 replies; 4+ messages in thread
From: gubbaven @ 2020-02-14 11:48 UTC (permalink / raw)
  To: Stephen Boyd
  Cc: johan.hedberg, marcel, mka, linux-kernel, linux-bluetooth, robh,
	hemantg, linux-arm-msm, bgodavar, tientzu, seanpaul, rjliao,
	yshavit

Hi Stephen,
On 2020-02-14 07:52, Stephen Boyd wrote:
> Quoting Venkata Lakshmi Narayana Gubba (2020-02-13 07:56:04)
>> This patch will fix the below issues
>>    1.Fixed race conditions while accessing memory dump state flags.
> 
> What sort of race condition?
[Venkat]:
To avoid race condition between qca_hw_error() and 
qca_controller_memdump() while accessing memory buffer, mutex is added.
In timeout scenario, qca_hw_error() frees memory dump buffers and 
qca_controller_memdump() might still access same memory buffers.
We can avoid this situation by using mutex.
> 
>>    2.Updated with actual context of timer in hci_memdump_timeout()
> 
> What does this mean?
[Venkat]:
I will update commit text and post in next patch set.
> 
>>    3.Updated injecting hardware error event if the dumps failed to 
>> receive.
>>    4.Once timeout is triggered, stopping the memory dump collections.
>> 
>> Possible scenarios while collecting memory dump:
>> 
>> Scenario 1:
>> 
>> Memdump event from firmware
>> Some number of memdump events with seq #
>> Hw error event
>> Reset
>> 
>> Scenario 2:
>> 
>> Memdump event from firmware
>> Some number of memdump events with seq #
>> Timeout schedules hw_error_event if hw error event is not received 
>> already
>> hw_error_event clears the memdump activity
>> reset
>> 
>> Scenario 3:
>> 
>> hw_error_event sends memdump command to firmware and waits for 
>> completion
>> Some number of memdump events with seq #
>> hw error event
>> reset
>> 
>> Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory 
>> dump during SSR")
>> Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
>> Signed-off-by: Venkata Lakshmi Narayana Gubba 
>> <gubbaven@codeaurora.org>
>> ---
> [...]
>> @@ -1449,6 +1465,23 @@ static void qca_hw_error(struct hci_dev *hdev, 
>> u8 code)
>>                 bt_dev_info(hdev, "waiting for dump to complete");
>>                 qca_wait_for_dump_collection(hdev);
>>         }
>> +
>> +       if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
>> +               bt_dev_err(hu->hdev, "clearing allocated memory due to 
>> memdump timeout");
>> +               mutex_lock(&qca->hci_memdump_lock);
> 
> Why is a mutex needed? Are crashes happening in parallel? It would be
> nice if the commit text mentioned why the mutex is added so that the
> reader doesn't have to figure it out.
> 
[Venkat]:Explained in above answer.
>> +               if (qca_memdump)
>> +                       memdump_buf = qca_memdump->memdump_buf_head;

Regards,
Lakshmi Narayana.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-02-14 11:48 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-13 15:56 [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump Venkata Lakshmi Narayana Gubba
2020-02-13 17:18 ` Abhishek Pandit-Subedi
2020-02-14  2:22 ` Stephen Boyd
2020-02-14 11:48   ` gubbaven

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).