All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] rt2x00: rt2800usb: fix races in tx queue
@ 2011-08-04 12:46 Stanislaw Gruszka
  2011-08-06 11:06 ` Ivo Van Doorn
  0 siblings, 1 reply; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-04 12:46 UTC (permalink / raw)
  To: John W. Linville
  Cc: Justin Piszcz, Ivo van Doorn, Gertjan van Wingerde, Helmut Schaa,
	linux-wireless

We can perform parallel putting new entries in a queue
(rt2x00queue_write_tx_frame()) and removing entries after finishing
transmitting (rt2800usb_work_txdone()). There are cases when txdone
may process an entry that was not fully send and nullify entry->skb.
What in a result crash the kernel in rt2800usb_write_tx_desc() or
rt2800usb_get_txwi() or other place where entry->skb is used.

To fix stop processing entries in txdone, which flags do not indicate
transition was finished.

Also change flags as soon as we get confirmation transmision is done.

Reported-and-tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: stable@kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 drivers/net/wireless/rt2x00/rt2800usb.c |   36 ++++++++++++++++++++++--------
 drivers/net/wireless/rt2x00/rt2x00usb.c |    6 ++--
 2 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index 5075593..edd0518 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -500,14 +500,14 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
 	return true;
 }
 
-static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
+static int rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
 {
 	struct data_queue *queue;
 	struct queue_entry *entry;
 	u32 reg;
 	u8 qid;
 
-	while (kfifo_get(&rt2x00dev->txstatus_fifo, &reg)) {
+	while (kfifo_peek(&rt2x00dev->txstatus_fifo, &reg)) {
 
 		/* TX_STA_FIFO_PID_QUEUE is a 2-bit field, thus
 		 * qid is guaranteed to be one of the TX QIDs
@@ -517,25 +517,39 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
 		if (unlikely(!queue)) {
 			WARNING(rt2x00dev, "Got TX status for an unavailable "
 					   "queue %u, dropping\n", qid);
-			continue;
+			goto next_reg;
 		}
 
 		/*
 		 * Inside each queue, we process each entry in a chronological
 		 * order. We first check that the queue is not empty.
 		 */
-		entry = NULL;
-		while (!rt2x00queue_empty(queue)) {
+		while (1) {
+			entry = NULL;
+
+			if (rt2x00queue_empty(queue))
+				break;
+
 			entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
+
+			if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+			    !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
+				WARNING(rt2x00dev, "Data pending for entry %u"
+					"in queue %u\n", entry->entry_idx, qid);
+				return 1;
+			}
+
 			if (rt2800usb_txdone_entry_check(entry, reg))
 				break;
 		}
 
-		if (!entry || rt2x00queue_empty(queue))
-			break;
-
-		rt2800_txdone_entry(entry, reg);
+		if (entry)
+			rt2800_txdone_entry(entry, reg);
+next_reg:
+		kfifo_skip(&rt2x00dev->txstatus_fifo);
 	}
+
+	return 0;
 }
 
 static void rt2800usb_work_txdone(struct work_struct *work)
@@ -545,7 +559,8 @@ static void rt2800usb_work_txdone(struct work_struct *work)
 	struct data_queue *queue;
 	struct queue_entry *entry;
 
-	rt2800usb_txdone(rt2x00dev);
+	if (rt2800usb_txdone(rt2x00dev))
+		goto out;
 
 	/*
 	 * Process any trailing TX status reports for IO failures,
@@ -569,6 +584,7 @@ static void rt2800usb_work_txdone(struct work_struct *work)
 		}
 	}
 
+out:
 	/*
 	 * The hw may delay sending the packet after DMA complete
 	 * if the medium is busy, thus the TX_STA_FIFO entry is
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index b6b4542..46c6d36 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -265,14 +265,14 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
 	if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
 		return;
 
-	if (rt2x00dev->ops->lib->tx_dma_done)
-		rt2x00dev->ops->lib->tx_dma_done(entry);
-
 	/*
 	 * Report the frame as DMA done
 	 */
 	rt2x00lib_dmadone(entry);
 
+	if (rt2x00dev->ops->lib->tx_dma_done)
+		rt2x00dev->ops->lib->tx_dma_done(entry);
+
 	/*
 	 * Check if the frame was correctly uploaded
 	 */
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] rt2x00: rt2800usb: fix races in tx queue
  2011-08-04 12:46 [PATCH] rt2x00: rt2800usb: fix races in tx queue Stanislaw Gruszka
@ 2011-08-06 11:06 ` Ivo Van Doorn
  2011-08-08  9:29   ` Stanislaw Gruszka
  0 siblings, 1 reply; 14+ messages in thread
From: Ivo Van Doorn @ 2011-08-06 11:06 UTC (permalink / raw)
  To: Stanislaw Gruszka
  Cc: John W. Linville, Justin Piszcz, Gertjan van Wingerde,
	Helmut Schaa, linux-wireless

Hi,

On Thu, Aug 4, 2011 at 2:46 PM, Stanislaw Gruszka <sgruszka@redhat.com> wrote:
> We can perform parallel putting new entries in a queue
> (rt2x00queue_write_tx_frame()) and removing entries after finishing
> transmitting (rt2800usb_work_txdone()). There are cases when txdone
> may process an entry that was not fully send and nullify entry->skb.
> What in a result crash the kernel in rt2800usb_write_tx_desc() or
> rt2800usb_get_txwi() or other place where entry->skb is used.
>
> To fix stop processing entries in txdone, which flags do not indicate
> transition was finished.
>
> Also change flags as soon as we get confirmation transmision is done.
>
> Reported-and-tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
> Cc: stable@kernel.org
> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
> ---
>  drivers/net/wireless/rt2x00/rt2800usb.c |   36 ++++++++++++++++++++++--------
>  drivers/net/wireless/rt2x00/rt2x00usb.c |    6 ++--
>  2 files changed, 29 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
> index 5075593..edd0518 100644
> --- a/drivers/net/wireless/rt2x00/rt2800usb.c
> +++ b/drivers/net/wireless/rt2x00/rt2800usb.c
> @@ -500,14 +500,14 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
>        return true;
>  }
>
> -static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
> +static int rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>  {
>        struct data_queue *queue;
>        struct queue_entry *entry;
>        u32 reg;
>        u8 qid;
>
> -       while (kfifo_get(&rt2x00dev->txstatus_fifo, &reg)) {
> +       while (kfifo_peek(&rt2x00dev->txstatus_fifo, &reg)) {

I'm not too sure about this change, why do you need to do kfifo_peek
and add gotos to the end of the while-loop to remove the item from the queue?
There is no condition in which the obtained value from kfifo-peek
will require it to be read again later (because when the value couldn't be
handled we are throwing it away anyway using kfifo_skip).

Ivo

>                /* TX_STA_FIFO_PID_QUEUE is a 2-bit field, thus
>                 * qid is guaranteed to be one of the TX QIDs
> @@ -517,25 +517,39 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>                if (unlikely(!queue)) {
>                        WARNING(rt2x00dev, "Got TX status for an unavailable "
>                                           "queue %u, dropping\n", qid);
> -                       continue;
> +                       goto next_reg;
>                }
>
>                /*
>                 * Inside each queue, we process each entry in a chronological
>                 * order. We first check that the queue is not empty.
>                 */
> -               entry = NULL;
> -               while (!rt2x00queue_empty(queue)) {
> +               while (1) {
> +                       entry = NULL;
> +
> +                       if (rt2x00queue_empty(queue))
> +                               break;
> +
>                        entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
> +
> +                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
> +                           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
> +                               WARNING(rt2x00dev, "Data pending for entry %u"
> +                                       "in queue %u\n", entry->entry_idx, qid);
> +                               return 1;
> +                       }
> +
>                        if (rt2800usb_txdone_entry_check(entry, reg))
>                                break;
>                }
>
> -               if (!entry || rt2x00queue_empty(queue))
> -                       break;
> -
> -               rt2800_txdone_entry(entry, reg);
> +               if (entry)
> +                       rt2800_txdone_entry(entry, reg);
> +next_reg:
> +               kfifo_skip(&rt2x00dev->txstatus_fifo);
>        }
> +
> +       return 0;
>  }
>
>  static void rt2800usb_work_txdone(struct work_struct *work)
> @@ -545,7 +559,8 @@ static void rt2800usb_work_txdone(struct work_struct *work)
>        struct data_queue *queue;
>        struct queue_entry *entry;
>
> -       rt2800usb_txdone(rt2x00dev);
> +       if (rt2800usb_txdone(rt2x00dev))
> +               goto out;
>
>        /*
>         * Process any trailing TX status reports for IO failures,
> @@ -569,6 +584,7 @@ static void rt2800usb_work_txdone(struct work_struct *work)
>                }
>        }
>
> +out:
>        /*
>         * The hw may delay sending the packet after DMA complete
>         * if the medium is busy, thus the TX_STA_FIFO entry is
> diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
> index b6b4542..46c6d36 100644
> --- a/drivers/net/wireless/rt2x00/rt2x00usb.c
> +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
> @@ -265,14 +265,14 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
>        if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
>                return;
>
> -       if (rt2x00dev->ops->lib->tx_dma_done)
> -               rt2x00dev->ops->lib->tx_dma_done(entry);
> -
>        /*
>         * Report the frame as DMA done
>         */
>        rt2x00lib_dmadone(entry);
>
> +       if (rt2x00dev->ops->lib->tx_dma_done)
> +               rt2x00dev->ops->lib->tx_dma_done(entry);
> +
>        /*
>         * Check if the frame was correctly uploaded
>         */
> --
> 1.7.1
>
>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] rt2x00: rt2800usb: fix races in tx queue
  2011-08-06 11:06 ` Ivo Van Doorn
@ 2011-08-08  9:29   ` Stanislaw Gruszka
  2011-08-08  9:35     ` [PATCH v2] " Stanislaw Gruszka
  2011-08-08  9:43     ` [PATCH] " Ivo Van Doorn
  0 siblings, 2 replies; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-08  9:29 UTC (permalink / raw)
  To: Ivo Van Doorn
  Cc: John W. Linville, Justin Piszcz, Gertjan van Wingerde,
	Helmut Schaa, linux-wireless

Hi Ivo

On Sat, Aug 06, 2011 at 01:06:51PM +0200, Ivo Van Doorn wrote:
> On Thu, Aug 4, 2011 at 2:46 PM, Stanislaw Gruszka <sgruszka@redhat.com> wrote:
> > -static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
> > +static int rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
> >  {
> >        struct data_queue *queue;
> >        struct queue_entry *entry;
> >        u32 reg;
> >        u8 qid;
> >
> > -       while (kfifo_get(&rt2x00dev->txstatus_fifo, &reg)) {
> > +       while (kfifo_peek(&rt2x00dev->txstatus_fifo, &reg)) {
> 
> I'm not too sure about this change, why do you need to do kfifo_peek
> and add gotos to the end of the while-loop to remove the item from the queue?
> There is no condition in which the obtained value from kfifo-peek
> will require it to be read again later (because when the value couldn't be
> handled we are throwing it away anyway using kfifo_skip).

There is new case (see below) where it is needed. I can get rid of goto,
that will make code a bit cleaner. There is place for optimization, mainly
make tx_status fifo per queue, but for now I just want to fix kernel crashes.

> >                /* TX_STA_FIFO_PID_QUEUE is a 2-bit field, thus
> >                 * qid is guaranteed to be one of the TX QIDs
> > @@ -517,25 +517,39 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
> >                if (unlikely(!queue)) {
> >                        WARNING(rt2x00dev, "Got TX status for an unavailable "
> >                                           "queue %u, dropping\n", qid);
> > -                       continue;
> > +                       goto next_reg;
> >                }
> >
> >                /*
> >                 * Inside each queue, we process each entry in a chronological
> >                 * order. We first check that the queue is not empty.
> >                 */
> > -               entry = NULL;
> > -               while (!rt2x00queue_empty(queue)) {
> > +               while (1) {
> > +                       entry = NULL;
> > +
> > +                       if (rt2x00queue_empty(queue))
> > +                               break;
> > +
> >                        entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
> > +
> > +                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
> > +                           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
> > +                               WARNING(rt2x00dev, "Data pending for entry %u"
> > +                                       "in queue %u\n", entry->entry_idx, qid);
> > +                               return 1;

Here is part of code where we exit the loop (and whole function) and do
not remove head "reg" from tx_status fifo - and read it again when
_txdone work is called next time.

Stanislaw

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2] rt2x00: rt2800usb: fix races in tx queue
  2011-08-08  9:29   ` Stanislaw Gruszka
@ 2011-08-08  9:35     ` Stanislaw Gruszka
  2011-08-08 20:55       ` Gertjan van Wingerde
  2011-08-08  9:43     ` [PATCH] " Ivo Van Doorn
  1 sibling, 1 reply; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-08  9:35 UTC (permalink / raw)
  To: Ivo Van Doorn
  Cc: John W. Linville, Justin Piszcz, Gertjan van Wingerde,
	Helmut Schaa, linux-wireless

We can perform parallel putting new entries in a queue
(rt2x00queue_write_tx_frame()) and removing entries after
finishing transmitting (rt2800usb_work_txdone()). There are
cases when txdone may process an entry that was not fully
send and nullify entry->skb. What in a result crash the kernel
in rt2800usb_write_tx_desc() or rt2800usb_get_txwi() or other
place where entry->skb is used.

To fix stop processing entries in txdone, which flags do not
indicate transition was finished.

Reported-and-tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
Cc: stable@kernel.org
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
v1 -> v2: remove goto 

 drivers/net/wireless/rt2x00/rt2800usb.c |   33 +++++++++++++++++++++++-------
 drivers/net/wireless/rt2x00/rt2x00usb.c |    6 ++--
 2 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index 5075593..670a30d 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -500,14 +500,14 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
 	return true;
 }
 
-static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
+static int rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
 {
 	struct data_queue *queue;
 	struct queue_entry *entry;
 	u32 reg;
 	u8 qid;
 
-	while (kfifo_get(&rt2x00dev->txstatus_fifo, &reg)) {
+	while (kfifo_peek(&rt2x00dev->txstatus_fifo, &reg)) {
 
 		/* TX_STA_FIFO_PID_QUEUE is a 2-bit field, thus
 		 * qid is guaranteed to be one of the TX QIDs
@@ -517,6 +517,7 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
 		if (unlikely(!queue)) {
 			WARNING(rt2x00dev, "Got TX status for an unavailable "
 					   "queue %u, dropping\n", qid);
+			kfifo_skip(&rt2x00dev->txstatus_fifo);
 			continue;
 		}
 
@@ -524,18 +525,32 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
 		 * Inside each queue, we process each entry in a chronological
 		 * order. We first check that the queue is not empty.
 		 */
-		entry = NULL;
-		while (!rt2x00queue_empty(queue)) {
+		while (1) {
+			entry = NULL;
+
+			if (rt2x00queue_empty(queue))
+				break;
+
 			entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
+
+			if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+			    !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
+				WARNING(rt2x00dev, "Data pending for entry %u"
+					"in queue %u\n", entry->entry_idx, qid);
+				return 1;
+			}
+
 			if (rt2800usb_txdone_entry_check(entry, reg))
 				break;
 		}
 
-		if (!entry || rt2x00queue_empty(queue))
-			break;
+		if (entry)
+			rt2800_txdone_entry(entry, reg);
 
-		rt2800_txdone_entry(entry, reg);
+		kfifo_skip(&rt2x00dev->txstatus_fifo);
 	}
+
+	return 0;
 }
 
 static void rt2800usb_work_txdone(struct work_struct *work)
@@ -545,7 +560,8 @@ static void rt2800usb_work_txdone(struct work_struct *work)
 	struct data_queue *queue;
 	struct queue_entry *entry;
 
-	rt2800usb_txdone(rt2x00dev);
+	if (rt2800usb_txdone(rt2x00dev))
+		goto out;
 
 	/*
 	 * Process any trailing TX status reports for IO failures,
@@ -569,6 +585,7 @@ static void rt2800usb_work_txdone(struct work_struct *work)
 		}
 	}
 
+out:
 	/*
 	 * The hw may delay sending the packet after DMA complete
 	 * if the medium is busy, thus the TX_STA_FIFO entry is
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index b6b4542..46c6d36 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -265,14 +265,14 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
 	if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
 		return;
 
-	if (rt2x00dev->ops->lib->tx_dma_done)
-		rt2x00dev->ops->lib->tx_dma_done(entry);
-
 	/*
 	 * Report the frame as DMA done
 	 */
 	rt2x00lib_dmadone(entry);
 
+	if (rt2x00dev->ops->lib->tx_dma_done)
+		rt2x00dev->ops->lib->tx_dma_done(entry);
+
 	/*
 	 * Check if the frame was correctly uploaded
 	 */
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] rt2x00: rt2800usb: fix races in tx queue
  2011-08-08  9:29   ` Stanislaw Gruszka
  2011-08-08  9:35     ` [PATCH v2] " Stanislaw Gruszka
@ 2011-08-08  9:43     ` Ivo Van Doorn
  2011-08-08 14:28       ` Stanislaw Gruszka
  2011-08-08 20:45       ` Gertjan van Wingerde
  1 sibling, 2 replies; 14+ messages in thread
From: Ivo Van Doorn @ 2011-08-08  9:43 UTC (permalink / raw)
  To: Stanislaw Gruszka
  Cc: John W. Linville, Justin Piszcz, Gertjan van Wingerde,
	Helmut Schaa, linux-wireless

Hi,

> On Sat, Aug 06, 2011 at 01:06:51PM +0200, Ivo Van Doorn wrote:
>> On Thu, Aug 4, 2011 at 2:46 PM, Stanislaw Gruszka <sgruszka@redhat.com> wrote:
>> > -static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>> > +static int rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>> >  {
>> >        struct data_queue *queue;
>> >        struct queue_entry *entry;
>> >        u32 reg;
>> >        u8 qid;
>> >
>> > -       while (kfifo_get(&rt2x00dev->txstatus_fifo, &reg)) {
>> > +       while (kfifo_peek(&rt2x00dev->txstatus_fifo, &reg)) {
>>
>> I'm not too sure about this change, why do you need to do kfifo_peek
>> and add gotos to the end of the while-loop to remove the item from the queue?
>> There is no condition in which the obtained value from kfifo-peek
>> will require it to be read again later (because when the value couldn't be
>> handled we are throwing it away anyway using kfifo_skip).
>
> There is new case (see below) where it is needed. I can get rid of goto,
> that will make code a bit cleaner. There is place for optimization, mainly
> make tx_status fifo per queue, but for now I just want to fix kernel crashes.
>
>> >                /* TX_STA_FIFO_PID_QUEUE is a 2-bit field, thus
>> >                 * qid is guaranteed to be one of the TX QIDs
>> > @@ -517,25 +517,39 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>> >                if (unlikely(!queue)) {
>> >                        WARNING(rt2x00dev, "Got TX status for an unavailable "
>> >                                           "queue %u, dropping\n", qid);
>> > -                       continue;
>> > +                       goto next_reg;
>> >                }
>> >
>> >                /*
>> >                 * Inside each queue, we process each entry in a chronological
>> >                 * order. We first check that the queue is not empty.
>> >                 */
>> > -               entry = NULL;
>> > -               while (!rt2x00queue_empty(queue)) {
>> > +               while (1) {
>> > +                       entry = NULL;
>> > +
>> > +                       if (rt2x00queue_empty(queue))
>> > +                               break;
>> > +
>> >                        entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
>> > +
>> > +                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
>> > +                           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
>> > +                               WARNING(rt2x00dev, "Data pending for entry %u"
>> > +                                       "in queue %u\n", entry->entry_idx, qid);
>> > +                               return 1;
>
> Here is part of code where we exit the loop (and whole function) and do
> not remove head "reg" from tx_status fifo - and read it again when
> _txdone work is called next time.

Well but for what reason would we want to read the register again? If
we found an status report
for a queue which does not have pending items, then in this change it
would mean that the
status report is intended for a TX frame which has yet to be enqueued
to the hardware.

Obviously this means a mismatch between the TX status report and the
actual frame to which it
is being connected.

Ivo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] rt2x00: rt2800usb: fix races in tx queue
  2011-08-08  9:43     ` [PATCH] " Ivo Van Doorn
@ 2011-08-08 14:28       ` Stanislaw Gruszka
  2011-08-08 20:45       ` Gertjan van Wingerde
  1 sibling, 0 replies; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-08 14:28 UTC (permalink / raw)
  To: Ivo Van Doorn
  Cc: John W. Linville, Justin Piszcz, Gertjan van Wingerde,
	Helmut Schaa, linux-wireless

On Mon, Aug 08, 2011 at 11:43:55AM +0200, Ivo Van Doorn wrote:
> Well but for what reason would we want to read the register again? If
> we found an status report
> for a queue which does not have pending items, then in this change it
> would mean that the
> status report is intended for a TX frame which has yet to be enqueued
> to the hardware.
> 
> Obviously this means a mismatch between the TX status report and the
> actual frame to which it
> is being connected.

Ok, I will rewrite patch to skip TX status fifo reg if entry is pending.

Stanislaw

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] rt2x00: rt2800usb: fix races in tx queue
  2011-08-08  9:43     ` [PATCH] " Ivo Van Doorn
  2011-08-08 14:28       ` Stanislaw Gruszka
@ 2011-08-08 20:45       ` Gertjan van Wingerde
  2011-08-09 10:01         ` Stanislaw Gruszka
  1 sibling, 1 reply; 14+ messages in thread
From: Gertjan van Wingerde @ 2011-08-08 20:45 UTC (permalink / raw)
  To: Ivo Van Doorn
  Cc: Stanislaw Gruszka, John W. Linville, Justin Piszcz, Helmut Schaa,
	linux-wireless

On 08/08/11 11:43, Ivo Van Doorn wrote:
> Hi,
> 
>> On Sat, Aug 06, 2011 at 01:06:51PM +0200, Ivo Van Doorn wrote:
>>> On Thu, Aug 4, 2011 at 2:46 PM, Stanislaw Gruszka <sgruszka@redhat.com> wrote:
>>>> -static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>>>> +static int rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>>>>  {
>>>>        struct data_queue *queue;
>>>>        struct queue_entry *entry;
>>>>        u32 reg;
>>>>        u8 qid;
>>>>
>>>> -       while (kfifo_get(&rt2x00dev->txstatus_fifo, &reg)) {
>>>> +       while (kfifo_peek(&rt2x00dev->txstatus_fifo, &reg)) {
>>>
>>> I'm not too sure about this change, why do you need to do kfifo_peek
>>> and add gotos to the end of the while-loop to remove the item from the queue?
>>> There is no condition in which the obtained value from kfifo-peek
>>> will require it to be read again later (because when the value couldn't be
>>> handled we are throwing it away anyway using kfifo_skip).
>>
>> There is new case (see below) where it is needed. I can get rid of goto,
>> that will make code a bit cleaner. There is place for optimization, mainly
>> make tx_status fifo per queue, but for now I just want to fix kernel crashes.
>>
>>>>                /* TX_STA_FIFO_PID_QUEUE is a 2-bit field, thus
>>>>                 * qid is guaranteed to be one of the TX QIDs
>>>> @@ -517,25 +517,39 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>>>>                if (unlikely(!queue)) {
>>>>                        WARNING(rt2x00dev, "Got TX status for an unavailable "
>>>>                                           "queue %u, dropping\n", qid);
>>>> -                       continue;
>>>> +                       goto next_reg;
>>>>                }
>>>>
>>>>                /*
>>>>                 * Inside each queue, we process each entry in a chronological
>>>>                 * order. We first check that the queue is not empty.
>>>>                 */
>>>> -               entry = NULL;
>>>> -               while (!rt2x00queue_empty(queue)) {
>>>> +               while (1) {
>>>> +                       entry = NULL;
>>>> +
>>>> +                       if (rt2x00queue_empty(queue))
>>>> +                               break;
>>>> +
>>>>                        entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
>>>> +
>>>> +                       if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
>>>> +                           !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
>>>> +                               WARNING(rt2x00dev, "Data pending for entry %u"
>>>> +                                       "in queue %u\n", entry->entry_idx, qid);
>>>> +                               return 1;
>>
>> Here is part of code where we exit the loop (and whole function) and do
>> not remove head "reg" from tx_status fifo - and read it again when
>> _txdone work is called next time.
> 
> Well but for what reason would we want to read the register again? If
> we found an status report
> for a queue which does not have pending items, then in this change it
> would mean that the
> status report is intended for a TX frame which has yet to be enqueued
> to the hardware.
> 
> Obviously this means a mismatch between the TX status report and the
> actual frame to which it
> is being connected.

Hmm, if I understood the patch description correctly, then there may be a race in the code,
where we actually read the TX status report before we have had the chance to handle the TX done
URB interrupt. As far as I understood it, it are not spurious TX status reports for frames that
were never submitted to the HW.
If that is really the case then it is quite reasonable to wait a little bit until the TX done
code has been executed and then process the TX status report again. 
However, we must be damn sure that this is really what happens.

Stanislaw, please confirm (or deny) that my understanding is correct.

Note that I have some comments of my own to the patch, which I will send as a response to
the v2 patch.

---
Gertjan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] rt2x00: rt2800usb: fix races in tx queue
  2011-08-08  9:35     ` [PATCH v2] " Stanislaw Gruszka
@ 2011-08-08 20:55       ` Gertjan van Wingerde
  2011-08-09  9:50         ` Stanislaw Gruszka
  0 siblings, 1 reply; 14+ messages in thread
From: Gertjan van Wingerde @ 2011-08-08 20:55 UTC (permalink / raw)
  To: Stanislaw Gruszka
  Cc: Ivo Van Doorn, John W. Linville, Justin Piszcz, Helmut Schaa,
	linux-wireless

Hi Stanislaw,

On 08/08/11 11:35, Stanislaw Gruszka wrote:
> We can perform parallel putting new entries in a queue
> (rt2x00queue_write_tx_frame()) and removing entries after
> finishing transmitting (rt2800usb_work_txdone()). There are
> cases when txdone may process an entry that was not fully
> send and nullify entry->skb. What in a result crash the kernel
> in rt2800usb_write_tx_desc() or rt2800usb_get_txwi() or other
> place where entry->skb is used.
> 
> To fix stop processing entries in txdone, which flags do not
> indicate transition was finished.
> 
> Reported-and-tested-by: Justin Piszcz <jpiszcz@lucidpixels.com>
> Cc: stable@kernel.org
> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>

Sorry for responding this late. I just didn't find the bandwidth to look at this earlier :-(

First of all, I don't think stable should be cc-ed on this particular patch. Although the ideas
of the patch are certainly applicable to the stable trees, this patch will simply not apply to
3.0 or earlier, since the code you are patching has been moved around between 3.0 and 3.1.

Second, I think it would be appropriate to split the patch in 2, or maybe 3, parts:
	1. The hunk to rt2x00usb to reverse the entry flag handling and the tx dma done handling.
	2. The hunk that checks that the entry on which the TX status is being reported has
           already been properly completed its TX done handling.
	3. The remainder, i.e. the retrying of handling a TX status report if the entry hasn't been
           fully completed its TX done handling yet. 

The code in this area has been proven to be very fragile, so I prefer to make mini changes to it in
small steps, so that we can properly bisect which change exactly has caused a problem.

See further down for more thoughts.

> ---
> v1 -> v2: remove goto 
> 
>  drivers/net/wireless/rt2x00/rt2800usb.c |   33 +++++++++++++++++++++++-------
>  drivers/net/wireless/rt2x00/rt2x00usb.c |    6 ++--
>  2 files changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
> index 5075593..670a30d 100644
> --- a/drivers/net/wireless/rt2x00/rt2800usb.c
> +++ b/drivers/net/wireless/rt2x00/rt2800usb.c
> @@ -500,14 +500,14 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
>  	return true;
>  }
>  
> -static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
> +static int rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>  {
>  	struct data_queue *queue;
>  	struct queue_entry *entry;
>  	u32 reg;
>  	u8 qid;
>  
> -	while (kfifo_get(&rt2x00dev->txstatus_fifo, &reg)) {
> +	while (kfifo_peek(&rt2x00dev->txstatus_fifo, &reg)) {
>  
>  		/* TX_STA_FIFO_PID_QUEUE is a 2-bit field, thus
>  		 * qid is guaranteed to be one of the TX QIDs
> @@ -517,6 +517,7 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>  		if (unlikely(!queue)) {
>  			WARNING(rt2x00dev, "Got TX status for an unavailable "
>  					   "queue %u, dropping\n", qid);
> +			kfifo_skip(&rt2x00dev->txstatus_fifo);
>  			continue;
>  		}
>  
> @@ -524,18 +525,32 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev)
>  		 * Inside each queue, we process each entry in a chronological
>  		 * order. We first check that the queue is not empty.
>  		 */
> -		entry = NULL;
> -		while (!rt2x00queue_empty(queue)) {
> +		while (1) {
> +			entry = NULL;
> +
> +			if (rt2x00queue_empty(queue))
> +				break;
> +

Do we really have to change this in a while (1) loop? I really hate those kind of
loops, as it is quite easy to make an uneducated change later on and create a never
ending loop. I don't think it is necessary to change the while loop conditions, if
you just move the entry = NULL assignment to the end of the loop. Then you can keep
the original while condition.

>  			entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
> +
> +			if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
> +			    !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
> +				WARNING(rt2x00dev, "Data pending for entry %u"
> +					"in queue %u\n", entry->entry_idx, qid);
> +				return 1;
> +			}
> +

Do we have to do this check inside the while loop?
Wouldn't it be enough to keep the loop as-is and have this check done after the while
loop, on the found entry?

>  			if (rt2800usb_txdone_entry_check(entry, reg))
>  				break;
>  		}
>  
> -		if (!entry || rt2x00queue_empty(queue))
> -			break;
> +		if (entry)
> +			rt2800_txdone_entry(entry, reg);
>  
> -		rt2800_txdone_entry(entry, reg);
> +		kfifo_skip(&rt2x00dev->txstatus_fifo);
>  	}
> +
> +	return 0;
>  }
>  
>  static void rt2800usb_work_txdone(struct work_struct *work)
> @@ -545,7 +560,8 @@ static void rt2800usb_work_txdone(struct work_struct *work)
>  	struct data_queue *queue;
>  	struct queue_entry *entry;
>  
> -	rt2800usb_txdone(rt2x00dev);
> +	if (rt2800usb_txdone(rt2x00dev))
> +		goto out;
>  
>  	/*
>  	 * Process any trailing TX status reports for IO failures,
> @@ -569,6 +585,7 @@ static void rt2800usb_work_txdone(struct work_struct *work)
>  		}
>  	}
>  
> +out:
>  	/*
>  	 * The hw may delay sending the packet after DMA complete
>  	 * if the medium is busy, thus the TX_STA_FIFO entry is
> diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
> index b6b4542..46c6d36 100644
> --- a/drivers/net/wireless/rt2x00/rt2x00usb.c
> +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
> @@ -265,14 +265,14 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
>  	if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
>  		return;
>  
> -	if (rt2x00dev->ops->lib->tx_dma_done)
> -		rt2x00dev->ops->lib->tx_dma_done(entry);
> -
>  	/*
>  	 * Report the frame as DMA done
>  	 */
>  	rt2x00lib_dmadone(entry);
>  
> +	if (rt2x00dev->ops->lib->tx_dma_done)
> +		rt2x00dev->ops->lib->tx_dma_done(entry);
> +
>  	/*
>  	 * Check if the frame was correctly uploaded
>  	 */

---
Gertjan

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] rt2x00: rt2800usb: fix races in tx queue
  2011-08-08 20:55       ` Gertjan van Wingerde
@ 2011-08-09  9:50         ` Stanislaw Gruszka
  2011-08-09 11:26           ` Stanislaw Gruszka
  0 siblings, 1 reply; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-09  9:50 UTC (permalink / raw)
  To: Gertjan van Wingerde
  Cc: Ivo Van Doorn, John W. Linville, Justin Piszcz, Helmut Schaa,
	linux-wireless

Hello Gertjan 

On Mon, Aug 08, 2011 at 10:55:56PM +0200, Gertjan van Wingerde wrote:
> Sorry for responding this late. I just didn't find the bandwidth to look at this earlier :-(
> 
> First of all, I don't think stable should be cc-ed on this particular patch. Although the ideas
> of the patch are certainly applicable to the stable trees, this patch will simply not apply to
> 3.0 or earlier, since the code you are patching has been moved around between 3.0 and 3.1.

CCing stable mean that we want that fix in stable kernel, not that patch
have to apply cleanly. When greg-kh will be applying patch on stable tries,
I'll provide proper backport.

> Second, I think it would be appropriate to split the patch in 2, or maybe 3, parts:
> 	1. The hunk to rt2x00usb to reverse the entry flag handling and the tx dma done handling.
> 	2. The hunk that checks that the entry on which the TX status is being reported has
>            already been properly completed its TX done handling.
> 	3. The remainder, i.e. the retrying of handling a TX status report if the entry hasn't been
>            fully completed its TX done handling yet. 
> 
> The code in this area has been proven to be very fragile, so I prefer to make mini changes to it in
> small steps, so that we can properly bisect which change exactly has caused a problem.
> 
> See further down for more thoughts.

Thanks for comments. I'll repost small patch that should fix the bug
and don't do things you dislike.

Stanislaw

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] rt2x00: rt2800usb: fix races in tx queue
  2011-08-08 20:45       ` Gertjan van Wingerde
@ 2011-08-09 10:01         ` Stanislaw Gruszka
  0 siblings, 0 replies; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-09 10:01 UTC (permalink / raw)
  To: Gertjan van Wingerde
  Cc: Ivo Van Doorn, John W. Linville, Justin Piszcz, Helmut Schaa,
	linux-wireless

On Mon, Aug 08, 2011 at 10:45:25PM +0200, Gertjan van Wingerde wrote:
> >> Here is part of code where we exit the loop (and whole function) and do
> >> not remove head "reg" from tx_status fifo - and read it again when
> >> _txdone work is called next time.
> > 
> > Well but for what reason would we want to read the register again? If
> > we found an status report
> > for a queue which does not have pending items, then in this change it
> > would mean that the
> > status report is intended for a TX frame which has yet to be enqueued
> > to the hardware.
> > 
> > Obviously this means a mismatch between the TX status report and the
> > actual frame to which it
> > is being connected.
> 
> Hmm, if I understood the patch description correctly, then there may be a race in the code,
> where we actually read the TX status report before we have had the chance to handle the TX done
> URB interrupt. As far as I understood it, it are not spurious TX status reports for frames that
> were never submitted to the HW.
> If that is really the case then it is quite reasonable to wait a little bit until the TX done
> code has been executed and then process the TX status report again. 
> However, we must be damn sure that this is really what happens.
> 
> Stanislaw, please confirm (or deny) that my understanding is correct.


In this loop

                while (!rt2x00queue_empty(queue)) {
                        entry = rt2x00queue_get_entry(queue,Q_INDEX_DONE);
                        if (rt2800usb_txdone_entry_check(entry, reg))
                                break;
                }
we can process entry that is currently added to tx queue and nulify skb.

This can happen if we return false from rt2800usb_txdone_entry_check(),
so only when reg does not match ((wcid != tx_wcid) || (ack != tx_ack) ||
(pid != tx_pid)), or previous entry is ENTRY_DATA_IO_FAILED.

Stanislaw

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] rt2x00: rt2800usb: fix races in tx queue
  2011-08-09  9:50         ` Stanislaw Gruszka
@ 2011-08-09 11:26           ` Stanislaw Gruszka
  2011-08-09 15:45             ` Stanislaw Gruszka
  0 siblings, 1 reply; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-09 11:26 UTC (permalink / raw)
  To: Gertjan van Wingerde
  Cc: Ivo Van Doorn, John W. Linville, Justin Piszcz, Helmut Schaa,
	linux-wireless

On Tue, Aug 09, 2011 at 11:50:50AM +0200, Stanislaw Gruszka wrote:
> Hello Gertjan 
> 
> On Mon, Aug 08, 2011 at 10:55:56PM +0200, Gertjan van Wingerde wrote:
> > Sorry for responding this late. I just didn't find the bandwidth to look at this earlier :-(
> > 
> > First of all, I don't think stable should be cc-ed on this particular patch. Although the ideas
> > of the patch are certainly applicable to the stable trees, this patch will simply not apply to
> > 3.0 or earlier, since the code you are patching has been moved around between 3.0 and 3.1.
> 
> CCing stable mean that we want that fix in stable kernel, not that patch
> have to apply cleanly. When greg-kh will be applying patch on stable tries,
> I'll provide proper backport.
> 
> > Second, I think it would be appropriate to split the patch in 2, or maybe 3, parts:
> > 	1. The hunk to rt2x00usb to reverse the entry flag handling and the tx dma done handling.
> > 	2. The hunk that checks that the entry on which the TX status is being reported has
> >            already been properly completed its TX done handling.
> > 	3. The remainder, i.e. the retrying of handling a TX status report if the entry hasn't been
> >            fully completed its TX done handling yet. 
> > 
> > The code in this area has been proven to be very fragile, so I prefer to make mini changes to it in
> > small steps, so that we can properly bisect which change exactly has caused a problem.
> > 
> > See further down for more thoughts.
> 
> Thanks for comments. I'll repost small patch that should fix the bug
> and don't do things you dislike.

Hmm, I planed to post the below patch, but unfortunately it does not fix
the crash on my system (rare reproducible after an hour of working). Seems
there are more problems here. Looks like there is possibility to mishmash
indexes i.e. make indexes like {Q_INDEX, Q_INDEX_DMA_DONE, Q_INDEX_DONE}
= {44, 54, 44}, whereas they should be {44, 44, 44} or {45, 43, 41}. 
Original patch seems to preventing this (fix or mask the problem), but 
honestly I do not understand way. I have to look more closely at it.

Stanislaw

diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index 5075593..d8b27ae 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -464,6 +464,15 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg)
 	int wcid, ack, pid;
 	int tx_wcid, tx_ack, tx_pid;
 
+	if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+	    !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
+		WARNING(entry->queue->rt2x00dev,
+			"Data pending for entry %u in queue %u\n",
+			entry->entry_idx, entry->queue->qid);
+		cond_resched();
+		return false;
+	}
+
 	wcid	= rt2x00_get_field32(reg, TX_STA_FIFO_WCID);
 	ack	= rt2x00_get_field32(reg, TX_STA_FIFO_TX_ACK_REQUIRED);
 	pid	= rt2x00_get_field32(reg, TX_STA_FIFO_PID_TYPE);
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index b6b4542..32eb5aa 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -265,14 +265,13 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
 	if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
 		return;
 
-	if (rt2x00dev->ops->lib->tx_dma_done)
-		rt2x00dev->ops->lib->tx_dma_done(entry);
-
 	/*
 	 * Report the frame as DMA done
 	 */
 	rt2x00lib_dmadone(entry);
 
+	if (rt2x00dev->ops->lib->tx_dma_done)
+		rt2x00dev->ops->lib->tx_dma_done(entry);
 	/*
 	 * Check if the frame was correctly uploaded
 	 */

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] rt2x00: rt2800usb: fix races in tx queue
  2011-08-09 11:26           ` Stanislaw Gruszka
@ 2011-08-09 15:45             ` Stanislaw Gruszka
  2011-08-10 10:39               ` Stanislaw Gruszka
  0 siblings, 1 reply; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-09 15:45 UTC (permalink / raw)
  To: Gertjan van Wingerde
  Cc: Ivo Van Doorn, John W. Linville, Justin Piszcz, Helmut Schaa,
	linux-wireless

On Tue, Aug 09, 2011 at 01:26:24PM +0200, Stanislaw Gruszka wrote:
> > > Second, I think it would be appropriate to split the patch in 2, or maybe 3, parts:
> > > 	1. The hunk to rt2x00usb to reverse the entry flag handling and the tx dma done handling.
> > > 	2. The hunk that checks that the entry on which the TX status is being reported has
> > >            already been properly completed its TX done handling.
> > > 	3. The remainder, i.e. the retrying of handling a TX status report if the entry hasn't been
> > >            fully completed its TX done handling yet. 
> > > 
> > > The code in this area has been proven to be very fragile, so I prefer to make mini changes to it in
> > > small steps, so that we can properly bisect which change exactly has caused a problem.
> > > 
> > > See further down for more thoughts.
> > 
> > Thanks for comments. I'll repost small patch that should fix the bug
> > and don't do things you dislike.
> 
> Hmm, I planed to post the below patch, but unfortunately it does not fix
> the crash on my system (rare reproducible after an hour of working). Seems
> there are more problems here. Looks like there is possibility to mishmash
> indexes i.e. make indexes like {Q_INDEX, Q_INDEX_DMA_DONE, Q_INDEX_DONE}
> = {44, 54, 44}, whereas they should be {44, 44, 44} or {45, 43, 41}. 
> Original patch seems to preventing this (fix or mask the problem), but 
> honestly I do not understand way. I have to look more closely at it.

Ok, I think I found these other problems, seems we have also check 
ENTRY_DATA_PENDING flags and add similar checks in rt2800usb_work_txdone
when checking against failed I/O.

Justin, if you have opportunity test below patch (for 3.0 kernel). It does
not crash here so far, but on my system bug is very rarely reproducible,
so I have to test whole night or more to be sure.

Comments welcome. If patch is ok, I will split it into 2 parts and post
officially.

diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 2a6aa85..49a9c76 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -607,6 +607,16 @@ static bool rt2800_txdone_entry_check(struct queue_entry *entry, u32 reg)
 	int wcid, ack, pid;
 	int tx_wcid, tx_ack, tx_pid;
 
+	if (test_bit(ENTRY_DATA_PENDING, &entry->flags) ||
+	    test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+	    !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) {
+		WARNING(entry->queue->rt2x00dev,
+			"Data pending for entry %u in queue %u\n",
+			entry->entry_idx, entry->queue->qid);
+		cond_resched();
+		return false;
+	}
+
 	wcid	= rt2x00_get_field32(reg, TX_STA_FIFO_WCID);
 	ack	= rt2x00_get_field32(reg, TX_STA_FIFO_TX_ACK_REQUIRED);
 	pid	= rt2x00_get_field32(reg, TX_STA_FIFO_PID_TYPE);
diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c
index ba82c97..3dfb4f3 100644
--- a/drivers/net/wireless/rt2x00/rt2800usb.c
+++ b/drivers/net/wireless/rt2x00/rt2800usb.c
@@ -477,8 +477,11 @@ static void rt2800usb_work_txdone(struct work_struct *work)
 		while (!rt2x00queue_empty(queue)) {
 			entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE);
 
-			if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
+			if (test_bit(ENTRY_DATA_PENDING, &entry->flags) ||
+			    test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) ||
+			    !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags))
 				break;
+
 			if (test_bit(ENTRY_DATA_IO_FAILED, &entry->flags))
 				rt2x00lib_txdone_noinfo(entry, TXDONE_FAILURE);
 			else if (rt2x00queue_status_timeout(entry))
diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c
index 8f90f62..7ec9e4f 100644
--- a/drivers/net/wireless/rt2x00/rt2x00usb.c
+++ b/drivers/net/wireless/rt2x00/rt2x00usb.c
@@ -265,14 +265,13 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb)
 	if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags))
 		return;
 
-	if (rt2x00dev->ops->lib->tx_dma_done)
-		rt2x00dev->ops->lib->tx_dma_done(entry);
-
 	/*
 	 * Report the frame as DMA done
 	 */
 	rt2x00lib_dmadone(entry);
 
+	if (rt2x00dev->ops->lib->tx_dma_done)
+		rt2x00dev->ops->lib->tx_dma_done(entry);
 	/*
 	 * Check if the frame was correctly uploaded
 	 */

 

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] rt2x00: rt2800usb: fix races in tx queue
  2011-08-09 15:45             ` Stanislaw Gruszka
@ 2011-08-10 10:39               ` Stanislaw Gruszka
  2011-08-10 13:28                 ` Stanislaw Gruszka
  0 siblings, 1 reply; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-10 10:39 UTC (permalink / raw)
  To: Gertjan van Wingerde
  Cc: Ivo Van Doorn, John W. Linville, Justin Piszcz, Helmut Schaa,
	linux-wireless

On Tue, Aug 09, 2011 at 05:45:40PM +0200, Stanislaw Gruszka wrote:
> On Tue, Aug 09, 2011 at 01:26:24PM +0200, Stanislaw Gruszka wrote:
> > > > Second, I think it would be appropriate to split the patch in 2, or maybe 3, parts:
> > > > 	1. The hunk to rt2x00usb to reverse the entry flag handling and the tx dma done handling.
> > > > 	2. The hunk that checks that the entry on which the TX status is being reported has
> > > >            already been properly completed its TX done handling.
> > > > 	3. The remainder, i.e. the retrying of handling a TX status report if the entry hasn't been
> > > >            fully completed its TX done handling yet. 
> > > > 
> > > > The code in this area has been proven to be very fragile, so I prefer to make mini changes to it in
> > > > small steps, so that we can properly bisect which change exactly has caused a problem.
> > > > 
> > > > See further down for more thoughts.
> > > 
> > > Thanks for comments. I'll repost small patch that should fix the bug
> > > and don't do things you dislike.
> > 
> > Hmm, I planed to post the below patch, but unfortunately it does not fix
> > the crash on my system (rare reproducible after an hour of working). Seems
> > there are more problems here. Looks like there is possibility to mishmash
> > indexes i.e. make indexes like {Q_INDEX, Q_INDEX_DMA_DONE, Q_INDEX_DONE}
> > = {44, 54, 44}, whereas they should be {44, 44, 44} or {45, 43, 41}. 
> > Original patch seems to preventing this (fix or mask the problem), but 
> > honestly I do not understand way. I have to look more closely at it.
> 
> Ok, I think I found these other problems, seems we have also check 
> ENTRY_DATA_PENDING flags and add similar checks in rt2800usb_work_txdone
> when checking against failed I/O.
> 
> Justin, if you have opportunity test below patch (for 3.0 kernel). It does
> not crash here so far, but on my system bug is very rarely reproducible,
> so I have to test whole night or more to be sure.
> 
> Comments welcome. If patch is ok, I will split it into 2 parts and post
> officially.

This patch crashes as well. I have to debug this issue a bit more ...

Stanislaw 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] rt2x00: rt2800usb: fix races in tx queue
  2011-08-10 10:39               ` Stanislaw Gruszka
@ 2011-08-10 13:28                 ` Stanislaw Gruszka
  0 siblings, 0 replies; 14+ messages in thread
From: Stanislaw Gruszka @ 2011-08-10 13:28 UTC (permalink / raw)
  To: Gertjan van Wingerde
  Cc: Ivo Van Doorn, John W. Linville, Justin Piszcz, Helmut Schaa,
	linux-wireless

On Wed, Aug 10, 2011 at 12:39:04PM +0200, Stanislaw Gruszka wrote:
> This patch crashes as well. I have to debug this issue a bit more ...

Figured this out, while(1) change helped in my original patch, without
that we can have ops in rt2800usb_get_txwi. I'm going to post reworked
patches now.

Stanislaw 

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2011-08-10 13:28 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-04 12:46 [PATCH] rt2x00: rt2800usb: fix races in tx queue Stanislaw Gruszka
2011-08-06 11:06 ` Ivo Van Doorn
2011-08-08  9:29   ` Stanislaw Gruszka
2011-08-08  9:35     ` [PATCH v2] " Stanislaw Gruszka
2011-08-08 20:55       ` Gertjan van Wingerde
2011-08-09  9:50         ` Stanislaw Gruszka
2011-08-09 11:26           ` Stanislaw Gruszka
2011-08-09 15:45             ` Stanislaw Gruszka
2011-08-10 10:39               ` Stanislaw Gruszka
2011-08-10 13:28                 ` Stanislaw Gruszka
2011-08-08  9:43     ` [PATCH] " Ivo Van Doorn
2011-08-08 14:28       ` Stanislaw Gruszka
2011-08-08 20:45       ` Gertjan van Wingerde
2011-08-09 10:01         ` Stanislaw Gruszka

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.