linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] synclink_gt fix transmit race and timeout
@ 2009-06-16 19:44 Paul Fulghum
  2009-06-23  6:19 ` Andrew Morton
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Fulghum @ 2009-06-16 19:44 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alan Cox, linux-kernel

Fix race condition when adding transmit data to active DMA buffer ring
that can cause transmit stall.
Update transmit timeout when adding data to active DMA buffer ring.
Base transmit timeout on amount of buffered data instead of
using fixed value.

Signed-off-by: Paul Fulghum <paulkf@microgate.com>

--- a/drivers/char/synclink_gt.c	2009-06-09 22:05:27.000000000 -0500
+++ b/drivers/char/synclink_gt.c	2009-06-16 13:58:14.000000000 -0500
@@ -463,7 +463,6 @@ static unsigned int free_tbuf_count(stru
 static unsigned int tbuf_bytes(struct slgt_info *info);
 static void reset_tbufs(struct slgt_info *info);
 static void tdma_reset(struct slgt_info *info);
-static void tdma_start(struct slgt_info *info);
 static void tx_load(struct slgt_info *info, const char *buf, unsigned int count);
 
 static void get_signals(struct slgt_info *info);
@@ -791,6 +790,18 @@ static void set_termios(struct tty_struc
 	}
 }
 
+static void update_tx_timer(struct slgt_info *info)
+{
+	/*
+	 * use worst case speed of 1200bps to calculate transmit timeout
+	 * based on data in buffers (tbuf_bytes) and FIFO (128 bytes)
+	 */
+	if (info->params.mode == MGSL_MODE_HDLC) {
+		int timeout  = (tbuf_bytes(info) * 7) + 1000;
+		mod_timer(&info->tx_timer, jiffies + msecs_to_jiffies(timeout));
+	}
+}
+
 static int write(struct tty_struct *tty,
 		 const unsigned char *buf, int count)
 {
@@ -834,8 +845,18 @@ start:
 		spin_lock_irqsave(&info->lock,flags);
 		if (!info->tx_active)
 		 	tx_start(info);
-		else
-			tdma_start(info);
+		else if (!(rd_reg32(info, TDCSR) & BIT0)) {
+			/* transmit still active but transmit DMA stopped */
+			unsigned int i = info->tbuf_current;
+			if (!i)
+				i = info->tbuf_count;
+			i--;
+			/* if DMA buf unsent must try later after tx idle */
+			if (desc_count(info->tbufs[i]))
+				ret = 0;
+		}
+		if (ret > 0)
+			update_tx_timer(info);
 		spin_unlock_irqrestore(&info->lock,flags);
  	}
 
@@ -1498,10 +1519,9 @@ static int hdlcdev_xmit(struct sk_buff *
 	/* save start time for transmit timeout detection */
 	dev->trans_start = jiffies;
 
-	/* start hardware transmitter if necessary */
 	spin_lock_irqsave(&info->lock,flags);
-	if (!info->tx_active)
-	 	tx_start(info);
+	tx_start(info);
+	update_tx_timer(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 
 	return 0;
@@ -3886,50 +3906,19 @@ static void tx_start(struct slgt_info *i
 			slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE);
 			/* clear tx idle and underrun status bits */
 			wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER));
-			if (info->params.mode == MGSL_MODE_HDLC)
-				mod_timer(&info->tx_timer, jiffies +
-						msecs_to_jiffies(5000));
 		} else {
 			slgt_irq_off(info, IRQ_TXDATA);
 			slgt_irq_on(info, IRQ_TXIDLE);
 			/* clear tx idle status bit */
 			wr_reg16(info, SSR, IRQ_TXIDLE);
 		}
-		tdma_start(info);
+		/* set 1st descriptor address and start DMA */
+		wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
+		wr_reg32(info, TDCSR, BIT2 + BIT0);
 		info->tx_active = true;
 	}
 }
 
-/*
- * start transmit DMA if inactive and there are unsent buffers
- */
-static void tdma_start(struct slgt_info *info)
-{
-	unsigned int i;
-
-	if (rd_reg32(info, TDCSR) & BIT0)
-		return;
-
-	/* transmit DMA inactive, check for unsent buffers */
-	i = info->tbuf_start;
-	while (!desc_count(info->tbufs[i])) {
-		if (++i == info->tbuf_count)
-			i = 0;
-		if (i == info->tbuf_current)
-			return;
-	}
-	info->tbuf_start = i;
-
-	/* there are unsent buffers, start transmit DMA */
-
-	/* reset needed if previous error condition */
-	tdma_reset(info);
-
-	/* set 1st descriptor address */
-	wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
-	wr_reg32(info, TDCSR, BIT2 + BIT0); /* IRQ + DMA enable */
-}
-
 static void tx_stop(struct slgt_info *info)
 {
 	unsigned short val;
@@ -4942,8 +4931,7 @@ static void tx_timeout(unsigned long con
 		info->icount.txtimeout++;
 	}
 	spin_lock_irqsave(&info->lock,flags);
-	info->tx_active = false;
-	info->tx_count = 0;
+	tx_stop(info);
 	spin_unlock_irqrestore(&info->lock,flags);
 
 #if SYNCLINK_GENERIC_HDLC



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] synclink_gt fix transmit race and timeout
  2009-06-16 19:44 [PATCH] synclink_gt fix transmit race and timeout Paul Fulghum
@ 2009-06-23  6:19 ` Andrew Morton
  2009-06-23 15:16   ` Paul Fulghum
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2009-06-23  6:19 UTC (permalink / raw)
  To: Paul Fulghum; +Cc: Alan Cox, linux-kernel

On Tue, 16 Jun 2009 14:44:11 -0500 Paul Fulghum <paulkf@microgate.com> wrote:

> Fix race condition when adding transmit data to active DMA buffer ring
> that can cause transmit stall.
> Update transmit timeout when adding data to active DMA buffer ring.
> Base transmit timeout on amount of buffered data instead of
> using fixed value.
> 

It's not a terribly good changelog, sorry.

It fails to describe the race condition?

It fails to explain the user-visible effects of the bug which was
fixed.  Those who need to make should-we-backport-this-to-stable
decisions need to know this.  Often we are told, often we can guess. 
Sometimes neither.


> 
> --- a/drivers/char/synclink_gt.c	2009-06-09 22:05:27.000000000 -0500
> +++ b/drivers/char/synclink_gt.c	2009-06-16 13:58:14.000000000 -0500
> @@ -463,7 +463,6 @@ static unsigned int free_tbuf_count(stru
>  static unsigned int tbuf_bytes(struct slgt_info *info);
>  static void reset_tbufs(struct slgt_info *info);
>  static void tdma_reset(struct slgt_info *info);
> -static void tdma_start(struct slgt_info *info);
>  static void tx_load(struct slgt_info *info, const char *buf, unsigned int count);
>  
>  static void get_signals(struct slgt_info *info);
> @@ -791,6 +790,18 @@ static void set_termios(struct tty_struc
>  	}
>  }
>  
> +static void update_tx_timer(struct slgt_info *info)
> +{
> +	/*
> +	 * use worst case speed of 1200bps to calculate transmit timeout
> +	 * based on data in buffers (tbuf_bytes) and FIFO (128 bytes)
> +	 */
> +	if (info->params.mode == MGSL_MODE_HDLC) {
> +		int timeout  = (tbuf_bytes(info) * 7) + 1000;
> +		mod_timer(&info->tx_timer, jiffies + msecs_to_jiffies(timeout));
> +	}
> +}

Where did the "7" come from?

>  static int write(struct tty_struct *tty,
>  		 const unsigned char *buf, int count)
>  {
> @@ -834,8 +845,18 @@ start:
>  		spin_lock_irqsave(&info->lock,flags);
>  		if (!info->tx_active)
>  		 	tx_start(info);
> -		else
> -			tdma_start(info);
> +		else if (!(rd_reg32(info, TDCSR) & BIT0)) {
> +			/* transmit still active but transmit DMA stopped */
> +			unsigned int i = info->tbuf_current;
> +			if (!i)
> +				i = info->tbuf_count;
> +			i--;
> +			/* if DMA buf unsent must try later after tx idle */
> +			if (desc_count(info->tbufs[i]))
> +				ret = 0;
> +		}
> +		if (ret > 0)
> +			update_tx_timer(info);
>  		spin_unlock_irqrestore(&info->lock,flags);
>   	}
>  
> @@ -1498,10 +1519,9 @@ static int hdlcdev_xmit(struct sk_buff *
>  	/* save start time for transmit timeout detection */
>  	dev->trans_start = jiffies;
>  
> -	/* start hardware transmitter if necessary */
>  	spin_lock_irqsave(&info->lock,flags);
> -	if (!info->tx_active)
> -	 	tx_start(info);
> +	tx_start(info);
> +	update_tx_timer(info);
>  	spin_unlock_irqrestore(&info->lock,flags);
>  
>  	return 0;
> @@ -3886,50 +3906,19 @@ static void tx_start(struct slgt_info *i
>  			slgt_irq_on(info, IRQ_TXUNDER + IRQ_TXIDLE);
>  			/* clear tx idle and underrun status bits */
>  			wr_reg16(info, SSR, (unsigned short)(IRQ_TXIDLE + IRQ_TXUNDER));
> -			if (info->params.mode == MGSL_MODE_HDLC)
> -				mod_timer(&info->tx_timer, jiffies +
> -						msecs_to_jiffies(5000));
>  		} else {
>  			slgt_irq_off(info, IRQ_TXDATA);
>  			slgt_irq_on(info, IRQ_TXIDLE);
>  			/* clear tx idle status bit */
>  			wr_reg16(info, SSR, IRQ_TXIDLE);
>  		}
> -		tdma_start(info);
> +		/* set 1st descriptor address and start DMA */
> +		wr_reg32(info, TDDAR, info->tbufs[info->tbuf_start].pdesc);
> +		wr_reg32(info, TDCSR, BIT2 + BIT0);
>  		info->tx_active = true;
>  	}
>  }
>  
> ...
>
> @@ -4942,8 +4931,7 @@ static void tx_timeout(unsigned long con
>  		info->icount.txtimeout++;
>  	}
>  	spin_lock_irqsave(&info->lock,flags);
> -	info->tx_active = false;
> -	info->tx_count = 0;
> +	tx_stop(info);

I have a suspicion that tx_stop() should use del_timer_sync(), not
del_timer().  What happens if the timer handler is concurrently
running?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] synclink_gt fix transmit race and timeout
  2009-06-23  6:19 ` Andrew Morton
@ 2009-06-23 15:16   ` Paul Fulghum
  2009-06-23 16:27     ` Andrew Morton
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Fulghum @ 2009-06-23 15:16 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alan Cox, linux-kernel

On Mon, 2009-06-22 at 23:19 -0700, Andrew Morton wrote:
> On Tue, 16 Jun 2009 14:44:11 -0500 Paul Fulghum <paulkf@microgate.com> wrote:
> 
> > Fix race condition when adding transmit data to active DMA buffer ring
> > that can cause transmit stall.

> It's not a terribly good changelog, sorry.
> It fails to describe the race condition?

I attempted to say what was done in the change log and
let the patch describe the details of how it was done.
But I can see that might not be enough.

How's this? :

If after adding transmit data to the transmit DMA ring the
transmit DMA controller has become inactive before reading
the new data and the serial transmitter is still active sending
data already in the transmit FIFO and/or shift register,
then wait for the serial transmitter to become idle before
reactivating the transmit DMA controller.

Otherwise the transmit DMA controller may present the
new data to the serial transmitter in a small timing window
just as the serial transmitter transitions to the idle state.
Hitting this window results in the serial transmitter entering
an inconsistent state where it does not process the new data
and does not signal the transition to the idle state.

>From the driver perspective, this condition appears as an
active transmit DMA controller (correct) with unsent DMA buffers 
(correct) and an active serial transmitter (wrong), even though
no data is actually being sent. When all the DMA buffers are full,
no further data is accepted from a user application calling
write() until a timeout occurs and the transmitter and
DMA controller are reset.

--

If this is good enough, I will resubmit the patch with
the updated change log.

--
Paul Fulghum
Microgate Systems, Ltd


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] synclink_gt fix transmit race and timeout
  2009-06-23 15:16   ` Paul Fulghum
@ 2009-06-23 16:27     ` Andrew Morton
  2009-06-23 17:55       ` Paul Fulghum
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2009-06-23 16:27 UTC (permalink / raw)
  To: Paul Fulghum; +Cc: Alan Cox, linux-kernel

On Tue, 23 Jun 2009 10:16:01 -0500 Paul Fulghum <paulkf@microgate.com> wrote:

> On Mon, 2009-06-22 at 23:19 -0700, Andrew Morton wrote:
> > On Tue, 16 Jun 2009 14:44:11 -0500 Paul Fulghum <paulkf@microgate.com> wrote:
> > 
> > > Fix race condition when adding transmit data to active DMA buffer ring
> > > that can cause transmit stall.
> 
> > It's not a terribly good changelog, sorry.
> > It fails to describe the race condition?
> 
> I attempted to say what was done in the change log and
> let the patch describe the details of how it was done.
> But I can see that might not be enough.
> 
> How's this? :
> 
> If after adding transmit data to the transmit DMA ring the
> transmit DMA controller has become inactive before reading
> the new data and the serial transmitter is still active sending
> data already in the transmit FIFO and/or shift register,
> then wait for the serial transmitter to become idle before
> reactivating the transmit DMA controller.
> 
> Otherwise the transmit DMA controller may present the
> new data to the serial transmitter in a small timing window
> just as the serial transmitter transitions to the idle state.
> Hitting this window results in the serial transmitter entering
> an inconsistent state where it does not process the new data
> and does not signal the transition to the idle state.
> 
> >From the driver perspective, this condition appears as an
> active transmit DMA controller (correct) with unsent DMA buffers 
> (correct) and an active serial transmitter (wrong), even though
> no data is actually being sent. When all the DMA buffers are full,
> no further data is accepted from a user application calling
> write() until a timeout occurs and the transmitter and
> DMA controller are reset.
> 

That covers it ;) But I stil lcan't work out whether we shoul backport
this into -stable.

> 
> If this is good enough, I will resubmit the patch with
> the updated change log.

I updated the patch.

I did have a couple of other comments on the patch which seem to have
been missed?


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] synclink_gt fix transmit race and timeout
  2009-06-23 17:55       ` Paul Fulghum
@ 2009-06-23 17:02         ` Andrew Morton
  2009-06-23 18:29           ` Paul Fulghum
  0 siblings, 1 reply; 7+ messages in thread
From: Andrew Morton @ 2009-06-23 17:02 UTC (permalink / raw)
  To: Paul Fulghum; +Cc: Alan Cox, linux-kernel

On Tue, 23 Jun 2009 11:55:06 -0600 Paul Fulghum <paulkf@microgate.com> wrote:

> Andrew Morton wrote:
> > I did have a couple of other comments on the patch which seem to have
> > been missed?
> 
> Let's see, there was:
> "It fails to explain the user-visible effects of the bug which was fixed."

I'm referring to

  Where did the "7" come from?

and

  I have a suspicion that tx_stop() should use del_timer_sync(), not
  del_timer().  What happens if the timer handler is concurrently
  running?


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] synclink_gt fix transmit race and timeout
  2009-06-23 16:27     ` Andrew Morton
@ 2009-06-23 17:55       ` Paul Fulghum
  2009-06-23 17:02         ` Andrew Morton
  0 siblings, 1 reply; 7+ messages in thread
From: Paul Fulghum @ 2009-06-23 17:55 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alan Cox, linux-kernel

Andrew Morton wrote:
> I did have a couple of other comments on the patch which seem to have
> been missed?

Let's see, there was:
"It fails to explain the user-visible effects of the bug which was fixed."

On Tue, 23 Jun 2009 10:16:01 -0500 Paul Fulghum <paulkf@microgate.com> wrote:
> When all the DMA buffers are full,
> no further data is accepted from a user application calling
> write() until a timeout occurs and the transmitter and
> DMA controller are reset.

That addresses the user-visible effects.

Then there was:
"Those who need to make should-we-backport-this-to-stable
decisions need to know this. Often we are told, often we can guess."

My opinion on if this should be back ported to a stable branch:

Probably not. It effects a small minority of users
in rare instances with non-fatal consequences, just reduced
performance. Updated drivers for all 2.6 and 2.4 kernels are already
available on our website. If stable maintainers want to back port it
I'm all for it, but they are likely busy enough with critical changes.

-- 
Paul Fulghum
MicroGate Systems, Ltd.
=Customer Driven, by Design=
(800)444-1982
(512)345-7791 (Direct)
(512)343-9046 (Fax)
Central Time Zone (GMT -5h)
www.microgate.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] synclink_gt fix transmit race and timeout
  2009-06-23 17:02         ` Andrew Morton
@ 2009-06-23 18:29           ` Paul Fulghum
  0 siblings, 0 replies; 7+ messages in thread
From: Paul Fulghum @ 2009-06-23 18:29 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Alan Cox, linux-kernel

Andrew Morton wrote:
> I'm referring to
> 
>   Where did the "7" come from?

Oops, I did not scroll down far enough.

+	/*
+	 * use worst case speed of 1200bps to calculate transmit timeout
+	 * based on data in buffers (tbuf_bytes) and FIFO (128 bytes)
+	 */
+	if (info->params.mode == MGSL_MODE_HDLC) {
+		int timeout  = (tbuf_bytes(info) * 7) + 1000;
+		mod_timer(&info->tx_timer, jiffies + msecs_to_jiffies(timeout));
+	}

7 is roughly the number of milliseconds to send a byte at 1200bps.

The problem with externally provided data clocks is that you
don't necessarily know the data rate before hand, so a somewhat
arbitrary worst case assumption is used.

> and
> 
>   I have a suspicion that tx_stop() should use del_timer_sync(), not
>   del_timer().  What happens if the timer handler is concurrently
>   running?

Everything is synchronized with info->lock spin_lock,
so nothing critical runs concurrently. tx_stop() is sometimes
called in interrupt context so it can't call del_timer_sync().
If the timer has already fired but has not
run yet it does nothing more than call tx_stop() itself and wake
any transmit waiters so there are no ill effects.

-- 
Paul Fulghum
MicroGate Systems, Ltd.
=Customer Driven, by Design=
(800)444-1982
(512)345-7791 (Direct)
(512)343-9046 (Fax)
Central Time Zone (GMT -5h)
www.microgate.com

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-06-23 17:29 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2009-06-16 19:44 [PATCH] synclink_gt fix transmit race and timeout Paul Fulghum
2009-06-23  6:19 ` Andrew Morton
2009-06-23 15:16   ` Paul Fulghum
2009-06-23 16:27     ` Andrew Morton
2009-06-23 17:55       ` Paul Fulghum
2009-06-23 17:02         ` Andrew Morton
2009-06-23 18:29           ` Paul Fulghum

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).