All of lore.kernel.org
 help / color / mirror / Atom feed
From: Lorenzo Bianconi <lorenzo@kernel.org>
To: Felix Fietkau <nbd@nbd.name>
Cc: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>,
	sujuan.chen@mediatek.com,
	Linux List Kernel Mailing <linux-wireless@vger.kernel.org>,
	Linux List Kernel Mailing <linux-kernel@vger.kernel.org>
Subject: Re: [6.2][regression] after commit cd372b8c99c5a5cf6a464acebb7e4a79af7ec8ae stopping working wifi mt7921e
Date: Wed, 21 Dec 2022 17:07:10 +0100	[thread overview]
Message-ID: <Y6MvLszMEvtQjcOk@lore-desk> (raw)
In-Reply-To: <a30d8580-936a-79e4-c1c7-70f3d3b8da35@nbd.name>

[-- Attachment #1: Type: text/plain, Size: 8381 bytes --]

> On 21.12.22 14:10, Mikhail Gavrilov wrote:
> > On Wed, Dec 21, 2022 at 3:45 PM Felix Fietkau <nbd@nbd.name> wrote:
> > > 
> > > I'm pretty sure that commit is unrelated to this issue. However, while
> > > looking at the code I found a bug that would explain your issue.
> > > 
> > > Please try this patch:
> > > ---
> > > --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c
> > > +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c
> > > @@ -422,15 +422,15 @@ void mt7921_roc_timer(struct timer_list *timer)
> > > 
> > >   static int mt7921_abort_roc(struct mt7921_phy *phy, struct mt7921_vif *vif)
> > >   {
> > > -       int err;
> > > -
> > > -       if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state))
> > > -               return 0;
> > > +       int err = 0;
> > > 
> > >         del_timer_sync(&phy->roc_timer);
> > >         cancel_work_sync(&phy->roc_work);
> > > -       err = mt7921_mcu_abort_roc(phy, vif, phy->roc_token_id);
> > > -       clear_bit(MT76_STATE_ROC, &phy->mt76->state);
> > > +
> > > +       mt7921_mutex_acquire(phy->dev);
> > > +       if (test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state))
> > > +               err = mt7921_mcu_abort_roc(phy, vif, phy->roc_token_id);
> > > +       mt7921_mutex_release(phy->dev);
> > > 
> > >         return err;
> > >   }
> > > @@ -487,13 +487,8 @@ static int mt7921_cancel_remain_on_channel(struct ieee80211_hw *hw,
> > >   {
> > >         struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
> > >         struct mt7921_phy *phy = mt7921_hw_phy(hw);
> > > -       int err;
> > > 
> > > -       mt7921_mutex_acquire(phy->dev);
> > > -       err = mt7921_abort_roc(phy, mvif);
> > > -       mt7921_mutex_release(phy->dev);
> > > -
> > > -       return err;
> > > +       return mt7921_abort_roc(phy, mvif);
> > >   }
> > > 
> > >   static int mt7921_set_channel(struct mt7921_phy *phy)
> > > @@ -1778,11 +1773,8 @@ static void mt7921_mgd_complete_tx(struct ieee80211_hw *hw,
> > >                                    struct ieee80211_prep_tx_info *info)
> > >   {
> > >         struct mt7921_vif *mvif = (struct mt7921_vif *)vif->drv_priv;
> > > -       struct mt7921_dev *dev = mt7921_hw_dev(hw);
> > > 
> > > -       mt7921_mutex_acquire(dev);
> > >         mt7921_abort_roc(mvif->phy, mvif);
> > > -       mt7921_mutex_release(dev);
> > >   }
> > > 
> > >   const struct ieee80211_ops mt7921_ops = {
> > > 
> > 
> > Unfortunately this patch did not fix the issue.
> > There are still many messages in the logs "mt7921e 0000:05:00.0:
> > AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0010 address=0xffdc6a80
> > flags=0x0050]"
> Thanks! I guess I focused on the wrong part of your kernel log
> initially. After more code review, I found that there is in fact a DMA
> related bug in the commit that your bisection pointed to, which happened
> to uncover and trigger the deadlock fixed by my other patch.
> 
> So here's my fix for the DMA issue:

Thx for fixing this issue, I tested the patch with mt7986 w and w/o WED enabled and it works
fine.

Tested-by: Lorenzo Bianconi <lorenzo@kernel.org>

> ---
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -205,6 +205,52 @@ mt76_dma_queue_reset(struct mt76_dev *dev, struct mt76_queue *q)
>  	mt76_dma_sync_idx(dev, q);
>  }
> +static int
> +mt76_dma_add_rx_buf(struct mt76_dev *dev, struct mt76_queue *q,
> +		    struct mt76_queue_buf *buf, void *data)
> +{
> +	struct mt76_desc *desc = &q->desc[q->head];
> +	struct mt76_queue_entry *entry = &q->entry[q->head];
> +	struct mt76_txwi_cache *txwi = NULL;
> +	u32 buf1 = 0, ctrl;
> +	int idx = q->head;
> +	int rx_token;
> +
> +	ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
> +
> +	if ((q->flags & MT_QFLAG_WED) &&
> +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> +		txwi = mt76_get_rxwi(dev);
> +		if (!txwi)
> +			return -ENOMEM;
> +
> +		rx_token = mt76_rx_token_consume(dev, data, txwi, buf->addr);
> +		if (rx_token < 0) {
> +			mt76_put_rxwi(dev, txwi);
> +			return -ENOMEM;
> +		}
> +
> +		buf1 |= FIELD_PREP(MT_DMA_CTL_TOKEN, rx_token);
> +		ctrl |= MT_DMA_CTL_TO_HOST;
> +	}
> +
> +	WRITE_ONCE(desc->buf0, cpu_to_le32(buf->addr));
> +	WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
> +	WRITE_ONCE(desc->ctrl, cpu_to_le32(ctrl));
> +	WRITE_ONCE(desc->info, 0);
> +
> +	entry->dma_addr[0] = buf->addr;
> +	entry->dma_len[0] = buf->len;
> +	entry->txwi = txwi;
> +	entry->buf = data;
> +	entry->wcid = 0xffff;
> +	entry->skip_buf1 = true;
> +	q->head = (q->head + 1) % q->ndesc;
> +	q->queued++;
> +
> +	return idx;
> +}
> +
>  static int
>  mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
>  		 struct mt76_queue_buf *buf, int nbufs, u32 info,
> @@ -215,6 +261,11 @@ mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
>  	int i, idx = -1;
>  	u32 ctrl, next;
> +	if (txwi) {
> +		q->entry[q->head].txwi = DMA_DUMMY_DATA;
> +		q->entry[q->head].skip_buf0 = true;
> +	}
> +
>  	for (i = 0; i < nbufs; i += 2, buf += 2) {
>  		u32 buf0 = buf[0].addr, buf1 = 0;
> @@ -224,51 +275,28 @@ mt76_dma_add_buf(struct mt76_dev *dev, struct mt76_queue *q,
>  		desc = &q->desc[idx];
>  		entry = &q->entry[idx];
> -		if ((q->flags & MT_QFLAG_WED) &&
> -		    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> -			struct mt76_txwi_cache *t = txwi;
> -			int rx_token;
> -
> -			if (!t)
> -				return -ENOMEM;
> -
> -			rx_token = mt76_rx_token_consume(dev, (void *)skb, t,
> -							 buf[0].addr);
> -			if (rx_token < 0)
> -				return -ENOMEM;
> -
> -			buf1 |= FIELD_PREP(MT_DMA_CTL_TOKEN, rx_token);
> -			ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len) |
> -			       MT_DMA_CTL_TO_HOST;
> -		} else {
> -			if (txwi) {
> -				q->entry[next].txwi = DMA_DUMMY_DATA;
> -				q->entry[next].skip_buf0 = true;
> -			}
> -
> -			if (buf[0].skip_unmap)
> -				entry->skip_buf0 = true;
> -			entry->skip_buf1 = i == nbufs - 1;
> -
> -			entry->dma_addr[0] = buf[0].addr;
> -			entry->dma_len[0] = buf[0].len;
> -
> -			ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
> -			if (i < nbufs - 1) {
> -				entry->dma_addr[1] = buf[1].addr;
> -				entry->dma_len[1] = buf[1].len;
> -				buf1 = buf[1].addr;
> -				ctrl |= FIELD_PREP(MT_DMA_CTL_SD_LEN1, buf[1].len);
> -				if (buf[1].skip_unmap)
> -					entry->skip_buf1 = true;
> -			}
> -
> -			if (i == nbufs - 1)
> -				ctrl |= MT_DMA_CTL_LAST_SEC0;
> -			else if (i == nbufs - 2)
> -				ctrl |= MT_DMA_CTL_LAST_SEC1;
> +		if (buf[0].skip_unmap)
> +			entry->skip_buf0 = true;
> +		entry->skip_buf1 = i == nbufs - 1;
> +
> +		entry->dma_addr[0] = buf[0].addr;
> +		entry->dma_len[0] = buf[0].len;
> +
> +		ctrl = FIELD_PREP(MT_DMA_CTL_SD_LEN0, buf[0].len);
> +		if (i < nbufs - 1) {
> +			entry->dma_addr[1] = buf[1].addr;
> +			entry->dma_len[1] = buf[1].len;
> +			buf1 = buf[1].addr;
> +			ctrl |= FIELD_PREP(MT_DMA_CTL_SD_LEN1, buf[1].len);
> +			if (buf[1].skip_unmap)
> +				entry->skip_buf1 = true;
>  		}
> +		if (i == nbufs - 1)
> +			ctrl |= MT_DMA_CTL_LAST_SEC0;
> +		else if (i == nbufs - 2)
> +			ctrl |= MT_DMA_CTL_LAST_SEC1;
> +
>  		WRITE_ONCE(desc->buf0, cpu_to_le32(buf0));
>  		WRITE_ONCE(desc->buf1, cpu_to_le32(buf1));
>  		WRITE_ONCE(desc->info, cpu_to_le32(info));
> @@ -567,17 +595,9 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
>  	spin_lock_bh(&q->lock);
>  	while (q->queued < q->ndesc - 1) {
> -		struct mt76_txwi_cache *t = NULL;
>  		struct mt76_queue_buf qbuf;
>  		void *buf = NULL;
> -		if ((q->flags & MT_QFLAG_WED) &&
> -		    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> -			t = mt76_get_rxwi(dev);
> -			if (!t)
> -				break;
> -		}
> -
>  		buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
>  		if (!buf)
>  			break;
> @@ -591,7 +611,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
>  		qbuf.addr = addr + offset;
>  		qbuf.len = len - offset;
>  		qbuf.skip_unmap = false;
> -		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0) {
> +		if (mt76_dma_add_rx_buf(dev, q, &qbuf, buf) < 0) {
>  			dma_unmap_single(dev->dma_dev, addr, len,
>  					 DMA_FROM_DEVICE);
>  			skb_free_frag(buf);
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

  reply	other threads:[~2022-12-21 16:07 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-21  1:10 [6.2][regression] after commit cd372b8c99c5a5cf6a464acebb7e4a79af7ec8ae stopping working wifi mt7921e Mikhail Gavrilov
2022-12-21 10:45 ` Felix Fietkau
2022-12-21 11:26   ` Lorenzo Bianconi
2022-12-21 13:10   ` Mikhail Gavrilov
2022-12-21 14:12     ` Felix Fietkau
2022-12-21 16:07       ` Lorenzo Bianconi [this message]
2022-12-21 16:46       ` Mikhail Gavrilov
2022-12-21 17:17         ` Felix Fietkau
2022-12-22  6:47           ` Mikhail Gavrilov
2022-12-24  7:55             ` Thorsten Leemhuis
2022-12-26 10:59               ` Thorsten Leemhuis
2023-01-04 14:20           ` Thorsten Leemhuis
2023-01-09  7:32             ` Linux kernel regression tracking (Thorsten Leemhuis)
2023-01-10  7:16               ` Linux kernel regression tracking (Thorsten Leemhuis)
2023-01-10  8:00                 ` Felix Fietkau
2023-01-10  8:41                   ` Linux kernel regression tracking (Thorsten Leemhuis)
2023-01-13 14:11                     ` Kalle Valo
2023-01-10 21:52                   ` Mikhail Gavrilov
2022-12-22 12:36 ` [6.2][regression] after commit cd372b8c99c5a5cf6a464acebb7e4a79af7ec8ae stopping working wifi mt7921e #forregzbot Thorsten Leemhuis
2023-01-27 11:36   ` Linux kernel regression tracking (#update)
2023-01-17  0:33 [6.2][regression] after commit cd372b8c99c5a5cf6a464acebb7e4a79af7ec8ae stopping working wifi mt7921e Mike Lothian
2023-01-17  5:42 ` Mikhail Gavrilov
2023-01-17  6:42   ` Kalle Valo
2023-01-17 13:06   ` Mike Lothian
2023-01-17 13:13     ` Mikhail Gavrilov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y6MvLszMEvtQjcOk@lore-desk \
    --to=lorenzo@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-wireless@vger.kernel.org \
    --cc=mikhail.v.gavrilov@gmail.com \
    --cc=nbd@nbd.name \
    --cc=sujuan.chen@mediatek.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.