linux-arm-msm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
To: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
Cc: Marcel Holtmann <marcel@holtmann.org>,
	Johan Hedberg <johan.hedberg@gmail.com>,
	Matthias Kaehlcke <mka@chromium.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Bluez mailing list <linux-bluetooth@vger.kernel.org>,
	robh@kernel.org, hemantg@codeaurora.org,
	linux-arm-msm@vger.kernel.org, bgodavar@codeaurora.org,
	Claire Chang <tientzu@chromium.org>,
	Sean Paul <seanpaul@chromium.org>,
	rjliao@codeaurora.org, Yoni Shavit <yshavit@google.com>
Subject: Re: [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump
Date: Thu, 13 Feb 2020 09:18:17 -0800	[thread overview]
Message-ID: <CANFp7mVLagCME4oTfMaFnkFnktn7h9bLiXJ09nPkNqJyveELVw@mail.gmail.com> (raw)
In-Reply-To: <1581609364-21824-1-git-send-email-gubbaven@codeaurora.org>

On Thu, Feb 13, 2020 at 7:56 AM Venkata Lakshmi Narayana Gubba
<gubbaven@codeaurora.org> wrote:
>
> This patch will fix the below issues
>    1.Fixed race conditions while accessing memory dump state flags.
>    2.Updated with actual context of timer in hci_memdump_timeout()
>    3.Updated injecting hardware error event if the dumps failed to receive.
>    4.Once timeout is triggered, stopping the memory dump collections.
>
> Possible scenarios while collecting memory dump:
>
> Scenario 1:
>
> Memdump event from firmware
> Some number of memdump events with seq #
> Hw error event
> Reset
>
> Scenario 2:
>
> Memdump event from firmware
> Some number of memdump events with seq #
> Timeout schedules hw_error_event if hw error event is not received already
> hw_error_event clears the memdump activity
> reset
>
> Scenario 3:
>
> hw_error_event sends memdump command to firmware and waits for completion
> Some number of memdump events with seq #
> hw error event
> reset
>
> Fixes: d841502c79e3 ("Bluetooth: hci_qca: Collect controller memory dump during SSR")
> Reported-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
> Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
> ---
> v3:
>   * Removed memdump_timer completely.
>   * Used delayed work queue.
> ---
>  drivers/bluetooth/hci_qca.c | 101 +++++++++++++++++++++++++++++---------------
>  1 file changed, 67 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
> index eacc65b..9cae5fe 100644
> --- a/drivers/bluetooth/hci_qca.c
> +++ b/drivers/bluetooth/hci_qca.c
> @@ -29,6 +29,7 @@
>  #include <linux/platform_device.h>
>  #include <linux/regulator/consumer.h>
>  #include <linux/serdev.h>
> +#include <linux/mutex.h>
>  #include <asm/unaligned.h>
>
>  #include <net/bluetooth/bluetooth.h>
> @@ -69,7 +70,8 @@ enum qca_flags {
>         QCA_IBS_ENABLED,
>         QCA_DROP_VENDOR_EVENT,
>         QCA_SUSPENDING,
> -       QCA_MEMDUMP_COLLECTION
> +       QCA_MEMDUMP_COLLECTION,
> +       QCA_HW_ERROR_EVENT
>  };
>
>
> @@ -138,18 +140,19 @@ struct qca_data {
>         u32 tx_idle_delay;
>         struct timer_list wake_retrans_timer;
>         u32 wake_retrans;
> -       struct timer_list memdump_timer;
>         struct workqueue_struct *workqueue;
>         struct work_struct ws_awake_rx;
>         struct work_struct ws_awake_device;
>         struct work_struct ws_rx_vote_off;
>         struct work_struct ws_tx_vote_off;
>         struct work_struct ctrl_memdump_evt;
> +       struct delayed_work ctrl_memdump_timeout;
>         struct qca_memdump_data *qca_memdump;
>         unsigned long flags;
>         struct completion drop_ev_comp;
>         wait_queue_head_t suspend_wait_q;
>         enum qca_memdump_states memdump_state;
> +       struct mutex hci_memdump_lock;
>
>         /* For debugging purpose */
>         u64 ibs_sent_wacks;
> @@ -522,23 +525,28 @@ static void hci_ibs_wake_retrans_timeout(struct timer_list *t)
>                 hci_uart_tx_wakeup(hu);
>  }
>
> -static void hci_memdump_timeout(struct timer_list *t)
> +
> +static void qca_controller_memdump_timeout(struct work_struct *work)
>  {
> -       struct qca_data *qca = from_timer(qca, t, tx_idle_timer);
> +       struct qca_data *qca = container_of(work, struct qca_data,
> +                                       ctrl_memdump_timeout.work);
>         struct hci_uart *hu = qca->hu;
> -       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> -       char *memdump_buf = qca_memdump->memdump_buf_tail;
> -
> -       bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
> -       /* Inject hw error event to reset the device and driver. */
> -       hci_reset_dev(hu->hdev);
> -       vfree(memdump_buf);
> -       kfree(qca_memdump);
> -       qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> -       del_timer(&qca->memdump_timer);
> -       cancel_work_sync(&qca->ctrl_memdump_evt);
> +
> +       mutex_lock(&qca->hci_memdump_lock);
> +       if (test_bit(QCA_MEMDUMP_COLLECTION, &qca->flags)) {
> +               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> +               if (!test_bit(QCA_HW_ERROR_EVENT, &qca->flags)) {
> +                       /* Inject hw error event to reset the device
> +                        * and driver.
> +                        */
> +                       hci_reset_dev(hu->hdev);
> +               }
> +       }
> +
> +       mutex_unlock(&qca->hci_memdump_lock);
>  }
>
> +
>  /* Initialize protocol */
>  static int qca_open(struct hci_uart *hu)
>  {
> @@ -558,6 +566,7 @@ static int qca_open(struct hci_uart *hu)
>         skb_queue_head_init(&qca->tx_wait_q);
>         skb_queue_head_init(&qca->rx_memdump_q);
>         spin_lock_init(&qca->hci_ibs_lock);
> +       mutex_init(&qca->hci_memdump_lock);
>         qca->workqueue = alloc_ordered_workqueue("qca_wq", 0);
>         if (!qca->workqueue) {
>                 BT_ERR("QCA Workqueue not initialized properly");
> @@ -570,6 +579,8 @@ static int qca_open(struct hci_uart *hu)
>         INIT_WORK(&qca->ws_rx_vote_off, qca_wq_serial_rx_clock_vote_off);
>         INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
>         INIT_WORK(&qca->ctrl_memdump_evt, qca_controller_memdump);
> +       INIT_DELAYED_WORK(&qca->ctrl_memdump_timeout,
> +                         qca_controller_memdump_timeout);
>         init_waitqueue_head(&qca->suspend_wait_q);
>
>         qca->hu = hu;
> @@ -596,7 +607,6 @@ static int qca_open(struct hci_uart *hu)
>
>         timer_setup(&qca->tx_idle_timer, hci_ibs_tx_idle_timeout, 0);
>         qca->tx_idle_delay = IBS_HOST_TX_IDLE_TIMEOUT_MS;
> -       timer_setup(&qca->memdump_timer, hci_memdump_timeout, 0);
>
>         BT_DBG("HCI_UART_QCA open, tx_idle_delay=%u, wake_retrans=%u",
>                qca->tx_idle_delay, qca->wake_retrans);
> @@ -677,7 +687,6 @@ static int qca_close(struct hci_uart *hu)
>         skb_queue_purge(&qca->rx_memdump_q);
>         del_timer(&qca->tx_idle_timer);
>         del_timer(&qca->wake_retrans_timer);
> -       del_timer(&qca->memdump_timer);
>         destroy_workqueue(qca->workqueue);
>         qca->hu = NULL;
>
> @@ -963,11 +972,20 @@ static void qca_controller_memdump(struct work_struct *work)
>
>         while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
>
> +               mutex_lock(&qca->hci_memdump_lock);
> +               /* Skip processing the received packets if timeout detected. */
> +               if (qca->memdump_state == QCA_MEMDUMP_TIMEOUT) {
> +                       mutex_unlock(&qca->hci_memdump_lock);
> +                       return;
> +               }
> +
>                 if (!qca_memdump) {
>                         qca_memdump = kzalloc(sizeof(struct qca_memdump_data),
>                                               GFP_ATOMIC);
> -                       if (!qca_memdump)
> +                       if (!qca_memdump) {
> +                               mutex_unlock(&qca->hci_memdump_lock);
>                                 return;
> +                       }
>
>                         qca->qca_memdump = qca_memdump;
>                 }
> @@ -992,13 +1010,15 @@ static void qca_controller_memdump(struct work_struct *work)
>                         if (!(dump_size)) {
>                                 bt_dev_err(hu->hdev, "Rx invalid memdump size");
>                                 kfree_skb(skb);
> +                               mutex_unlock(&qca->hci_memdump_lock);
>                                 return;
>                         }
>
>                         bt_dev_info(hu->hdev, "QCA collecting dump of size:%u",
>                                     dump_size);
> -                       mod_timer(&qca->memdump_timer, (jiffies +
> -                                 msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)));
> +                       queue_delayed_work(qca->workqueue,
> +                                          &qca->ctrl_memdump_timeout,
> +                                       msecs_to_jiffies(MEMDUMP_TIMEOUT_MS));
>
>                         skb_pull(skb, sizeof(dump_size));
>                         memdump_buf = vmalloc(dump_size);
> @@ -1016,6 +1036,7 @@ static void qca_controller_memdump(struct work_struct *work)
>                         kfree(qca_memdump);
>                         kfree_skb(skb);
>                         qca->qca_memdump = NULL;
> +                       mutex_unlock(&qca->hci_memdump_lock);
>                         return;
>                 }
>
> @@ -1046,16 +1067,20 @@ static void qca_controller_memdump(struct work_struct *work)
>                         memdump_buf = qca_memdump->memdump_buf_head;
>                         dev_coredumpv(&hu->serdev->dev, memdump_buf,
>                                       qca_memdump->received_dump, GFP_KERNEL);
> -                       del_timer(&qca->memdump_timer);
> +                       cancel_delayed_work(&qca->ctrl_memdump_timeout);
>                         kfree(qca->qca_memdump);
>                         qca->qca_memdump = NULL;
>                         qca->memdump_state = QCA_MEMDUMP_COLLECTED;
> +                       clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
>                 }
> +
> +               mutex_unlock(&qca->hci_memdump_lock);
>         }
>
>  }
>
> -int qca_controller_memdump_event(struct hci_dev *hdev, struct sk_buff *skb)
> +static int qca_controller_memdump_event(struct hci_dev *hdev,
> +                                       struct sk_buff *skb)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> @@ -1406,30 +1431,21 @@ static void qca_wait_for_dump_collection(struct hci_dev *hdev)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> -       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> -       char *memdump_buf = NULL;
>
>         wait_on_bit_timeout(&qca->flags, QCA_MEMDUMP_COLLECTION,
>                             TASK_UNINTERRUPTIBLE, MEMDUMP_TIMEOUT_MS);
>
>         clear_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
> -       if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
> -               bt_dev_err(hu->hdev, "Clearing the buffers due to timeout");
> -               if (qca_memdump)
> -                       memdump_buf = qca_memdump->memdump_buf_tail;
> -               vfree(memdump_buf);
> -               kfree(qca_memdump);
> -               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> -               del_timer(&qca->memdump_timer);
> -               cancel_work_sync(&qca->ctrl_memdump_evt);
> -       }
>  }
>
>  static void qca_hw_error(struct hci_dev *hdev, u8 code)
>  {
>         struct hci_uart *hu = hci_get_drvdata(hdev);
>         struct qca_data *qca = hu->priv;
> +       struct qca_memdump_data *qca_memdump = qca->qca_memdump;
> +       char *memdump_buf = NULL;
>
> +       set_bit(QCA_HW_ERROR_EVENT, &qca->flags);
>         bt_dev_info(hdev, "mem_dump_status: %d", qca->memdump_state);
>
>         if (qca->memdump_state == QCA_MEMDUMP_IDLE) {
> @@ -1449,6 +1465,23 @@ static void qca_hw_error(struct hci_dev *hdev, u8 code)
>                 bt_dev_info(hdev, "waiting for dump to complete");
>                 qca_wait_for_dump_collection(hdev);
>         }
> +
> +       if (qca->memdump_state != QCA_MEMDUMP_COLLECTED) {
> +               bt_dev_err(hu->hdev, "clearing allocated memory due to memdump timeout");
> +               mutex_lock(&qca->hci_memdump_lock);
> +               if (qca_memdump)
> +                       memdump_buf = qca_memdump->memdump_buf_head;
> +               vfree(memdump_buf);
> +               kfree(qca_memdump);
> +               qca->qca_memdump = NULL;
> +               qca->memdump_state = QCA_MEMDUMP_TIMEOUT;
> +               cancel_delayed_work(&qca->ctrl_memdump_timeout);
> +               skb_queue_purge(&qca->rx_memdump_q);
> +               mutex_unlock(&qca->hci_memdump_lock);
> +               cancel_work_sync(&qca->ctrl_memdump_evt);
> +       }
> +
> +       clear_bit(QCA_HW_ERROR_EVENT, &qca->flags);
>  }
>
>  static void qca_cmd_timeout(struct hci_dev *hdev)
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation
>

Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>

  reply	other threads:[~2020-02-13 17:18 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-13 15:56 [PATCH v3] Bluetooth: hci_qca: Bug fixes while collecting controller memory dump Venkata Lakshmi Narayana Gubba
2020-02-13 17:18 ` Abhishek Pandit-Subedi [this message]
2020-02-14  2:22 ` Stephen Boyd
2020-02-14 11:48   ` gubbaven

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CANFp7mVLagCME4oTfMaFnkFnktn7h9bLiXJ09nPkNqJyveELVw@mail.gmail.com \
    --to=abhishekpandit@chromium.org \
    --cc=bgodavar@codeaurora.org \
    --cc=gubbaven@codeaurora.org \
    --cc=hemantg@codeaurora.org \
    --cc=johan.hedberg@gmail.com \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-bluetooth@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=marcel@holtmann.org \
    --cc=mka@chromium.org \
    --cc=rjliao@codeaurora.org \
    --cc=robh@kernel.org \
    --cc=seanpaul@chromium.org \
    --cc=tientzu@chromium.org \
    --cc=yshavit@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).