linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] ARM: PL011: Add support for Rx DMA buffer polling.
@ 2013-03-27  9:38 Chanho Min
  2013-03-29 16:19 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 10+ messages in thread
From: Chanho Min @ 2013-03-27  9:38 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Russell King - ARM Linux
  Cc: Linus Walleij, Alan Cox, linux-kernel, linux-serial, Chanho Min

In DMA support, The received data is not pushed to tty until the DMA buffer
is filled. But some megabyte rate chips such as BT expect fast response and
data should be pushed immediately. In order to fix this issue, We suggest
the use of the timer for polling DMA buffer.
In our test, no data loss occurred at high-baudrate as compared with interrupt-
driven (We tested with 3Mbps).
We changes:

- We add timer for polling. If we set poll_timer to 10, every 10ms,
 timer handler checks the residue in the dma buffer and transfer data
 to the tty. Also, last_residue is updated for the next polling.

- poll_timeout is used to prevent the timer's system cost.
  If poll_timeout is set to 3000 and no data is received in 3 seconds,
  we inactivate poll timer and driver falls back to interrupt-driven.
  When data is received again in FIFO and UART irq is occurred, we switch
  back to DMA mode and start polling.

- We use consistent DMA mappings to avoid from the frequent cache operation
  of the timer function for default.

- pl011_dma_rx_chars is modified. the pending size is recalculated because
  data can be taken by polling.

- the polling time is adjusted if dma rx poll is enabled but no rate is
  specified. Ideal polling interval to push 1 character at every interval
  is the reciprocal of 'baud rate / 10 line bits per character / 1000 ms
  per sec'. But It is very aggressive to system. Experimentally,
 '10000000 / baud' is suitable to receive dozens of characters. the poll rate
 can be specified statically by dma_rx_poll_rate of the platform data as well.

Changes compared to v1:
 - Use of consistent DMA mappings.
 - Added dma_rx_poll_rate in platform data to specify the polling interval.
 - Added dma_rx_poll_timeout in platform data to specify the polling timeout.

Changes compared to v2:
 - Use of consistent DMA mappings for default.
 - Added dma_rx_poll_enable in platform data to adjust the polling time
   according to the baud rate.
 - remove unnecessary lock from the polling function.

Signed-off-by: Chanho Min <chanho.min@lge.com>
---
 drivers/tty/serial/amba-pl011.c |  157 ++++++++++++++++++++++++++++++++++-----
 include/linux/amba/serial.h     |    3 +
 2 files changed, 141 insertions(+), 19 deletions(-)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 3ea5408..3b093b5 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -29,6 +29,7 @@
  * and hooked into this driver.
  */

+
 #if defined(CONFIG_SERIAL_AMBA_PL011_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
 #define SUPPORT_SYSRQ
 #endif
@@ -117,6 +118,12 @@ struct pl011_dmarx_data {
 	struct pl011_sgbuf	sgbuf_b;
 	dma_cookie_t		cookie;
 	bool			running;
+	struct timer_list	timer;
+	unsigned int last_residue;
+	unsigned long last_jiffies;
+	bool auto_poll_rate;
+	unsigned int poll_rate;
+	unsigned int poll_timeout;
 };

 struct pl011_dmatx_data {
@@ -223,16 +230,18 @@ static int pl011_fifo_to_tty(struct uart_amba_port *uap)
 static int pl011_sgbuf_init(struct dma_chan *chan, struct pl011_sgbuf *sg,
 	enum dma_data_direction dir)
 {
-	sg->buf = kmalloc(PL011_DMA_BUFFER_SIZE, GFP_KERNEL);
+	dma_addr_t dma_addr;
+
+	sg->buf = dma_alloc_coherent(chan->device->dev,
+		PL011_DMA_BUFFER_SIZE, &dma_addr, GFP_KERNEL);
 	if (!sg->buf)
 		return -ENOMEM;

-	sg_init_one(&sg->sg, sg->buf, PL011_DMA_BUFFER_SIZE);
+	sg_init_table(&sg->sg, 1);
+	sg_set_page(&sg->sg, phys_to_page(dma_addr),
+		PL011_DMA_BUFFER_SIZE, offset_in_page(dma_addr));
+	sg_dma_address(&sg->sg) = dma_addr;

-	if (dma_map_sg(chan->device->dev, &sg->sg, 1, dir) != 1) {
-		kfree(sg->buf);
-		return -EINVAL;
-	}
 	return 0;
 }

@@ -240,8 +249,9 @@ static void pl011_sgbuf_free(struct dma_chan
*chan, struct pl011_sgbuf *sg,
 	enum dma_data_direction dir)
 {
 	if (sg->buf) {
-		dma_unmap_sg(chan->device->dev, &sg->sg, 1, dir);
-		kfree(sg->buf);
+		dma_free_coherent(chan->device->dev,
+			PL011_DMA_BUFFER_SIZE, sg->buf,
+			sg_dma_address(&sg->sg));
 	}
 }

@@ -300,6 +310,29 @@ static void pl011_dma_probe_initcall(struct
uart_amba_port *uap)
 		dmaengine_slave_config(chan, &rx_conf);
 		uap->dmarx.chan = chan;

+		if (plat->dma_rx_poll_enable) {
+			/* Set poll rate if specified. */
+			if (plat->dma_rx_poll_rate) {
+				uap->dmarx.auto_poll_rate = false;
+				uap->dmarx.poll_rate = plat->dma_rx_poll_rate;
+			} else {
+				/*
+				 * 100 ms defaults to poll rate if not
+				 * specified. This will be adjusted with
+				 * the baud rate at set_termios.
+				 */
+				uap->dmarx.auto_poll_rate = true;
+				uap->dmarx.poll_rate =  100;
+			}
+			/* 3 secs defaults poll_timeout if not specified. */
+			if (plat->dma_rx_poll_timeout)
+				uap->dmarx.poll_timeout =
+					plat->dma_rx_poll_timeout;
+			else
+				uap->dmarx.poll_timeout = 3000;
+		} else
+			uap->dmarx.auto_poll_rate = false;
+
 		dev_info(uap->port.dev, "DMA channel RX %s\n",
 			 dma_chan_name(uap->dmarx.chan));
 	}
@@ -701,24 +734,30 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 	struct tty_port *port = &uap->port.state->port;
 	struct pl011_sgbuf *sgbuf = use_buf_b ?
 		&uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
-	struct device *dev = uap->dmarx.chan->device->dev;
 	int dma_count = 0;
 	u32 fifotaken = 0; /* only used for vdbg() */

-	/* Pick everything from the DMA first */
+	struct pl011_dmarx_data *dmarx = &uap->dmarx;
+	int dmataken = 0;
+
+	if (uap->dmarx.poll_rate) {
+		/* The data can be taken by polling */
+		dmataken = sgbuf->sg.length - dmarx->last_residue;
+		/* Recalculate the pending size */
+		if (pending >= dmataken)
+			pending -= dmataken;
+	}
+
+	/* Pick the remain data from the DMA */
 	if (pending) {
-		/* Sync in buffer */
-		dma_sync_sg_for_cpu(dev, &sgbuf->sg, 1, DMA_FROM_DEVICE);

 		/*
 		 * First take all chars in the DMA pipe, then look in the FIFO.
 		 * Note that tty_insert_flip_buf() tries to take as many chars
 		 * as it can.
 		 */
-		dma_count = tty_insert_flip_string(port, sgbuf->buf, pending);
-
-		/* Return buffer to device */
-		dma_sync_sg_for_device(dev, &sgbuf->sg, 1, DMA_FROM_DEVICE);
+		dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken,
+				pending);

 		uap->port.icount.rx += dma_count;
 		if (dma_count < pending)
@@ -726,6 +765,10 @@ static void pl011_dma_rx_chars(struct uart_amba_port *uap,
 				 "couldn't insert all characters (TTY is full?)\n");
 	}

+	/* Reset the last_residue for Rx DMA poll */
+	if (uap->dmarx.poll_rate)
+		dmarx->last_residue = sgbuf->sg.length;
+
 	/*
 	 * Only continue with trying to read the FIFO if all DMA chars have
 	 * been taken first.
@@ -865,6 +908,57 @@ static inline void pl011_dma_rx_stop(struct
uart_amba_port *uap)
 	writew(uap->dmacr, uap->port.membase + UART011_DMACR);
 }

+/*
+ * Timer handler for Rx DMA polling.
+ * Every polling, It checks the residue in the dma buffer and transfer
+ * data to the tty. Also, last_residue is updated for the next polling.
+ */
+static void pl011_dma_rx_poll(unsigned long args)
+{
+	struct uart_amba_port *uap = (struct uart_amba_port *)args;
+	struct tty_port *port = &uap->port.state->port;
+	struct pl011_dmarx_data *dmarx = &uap->dmarx;
+	struct dma_chan *rxchan = uap->dmarx.chan;
+	unsigned long flags = 0;
+	unsigned int dmataken = 0;
+	unsigned int size = 0;
+	struct pl011_sgbuf *sgbuf;
+	int dma_count;
+	struct dma_tx_state state;
+
+	sgbuf = dmarx->use_buf_b ? &uap->dmarx.sgbuf_b : &uap->dmarx.sgbuf_a;
+	rxchan->device->device_tx_status(rxchan, dmarx->cookie, &state);
+	if (likely(state.residue < dmarx->last_residue)) {
+		dmataken = sgbuf->sg.length - dmarx->last_residue;
+		size = dmarx->last_residue - state.residue;
+		dma_count = tty_insert_flip_string(port, sgbuf->buf + dmataken,
+				size);
+		if (dma_count == size)
+			dmarx->last_residue =  state.residue;
+		dmarx->last_jiffies = jiffies;
+	}
+	tty_flip_buffer_push(port);
+
+	/*
+	 * If no data is received in poll_timeout, the driver will fall back
+	 * to interrupt mode. We will retrigger DMA at the first interrupt.
+	 */
+	if (jiffies_to_msecs(jiffies - dmarx->last_jiffies)
+			> uap->dmarx.poll_timeout) {
+
+		spin_lock_irqsave(&uap->port.lock, flags);
+		pl011_dma_rx_stop(uap);
+		spin_unlock_irqrestore(&uap->port.lock, flags);
+
+		uap->dmarx.running = false;
+		dmaengine_terminate_all(rxchan);
+		del_timer(&uap->dmarx.timer);
+	} else {
+		mod_timer(&uap->dmarx.timer,
+			jiffies + msecs_to_jiffies(uap->dmarx.poll_rate));
+	}
+}
+
 static void pl011_dma_startup(struct uart_amba_port *uap)
 {
 	int ret;
@@ -927,6 +1021,16 @@ skip_rx:
 		if (pl011_dma_rx_trigger_dma(uap))
 			dev_dbg(uap->port.dev, "could not trigger initial "
 				"RX DMA job, fall back to interrupt mode\n");
+		if (uap->dmarx.poll_rate) {
+			init_timer(&(uap->dmarx.timer));
+			uap->dmarx.timer.function = pl011_dma_rx_poll;
+			uap->dmarx.timer.data = (unsigned long)uap;
+			mod_timer(&uap->dmarx.timer,
+				jiffies +
+				msecs_to_jiffies(uap->dmarx.poll_rate));
+			uap->dmarx.last_residue = PL011_DMA_BUFFER_SIZE;
+			uap->dmarx.last_jiffies = jiffies;
+		}
 	}
 }

@@ -962,6 +1066,8 @@ static void pl011_dma_shutdown(struct uart_amba_port *uap)
 		/* Clean up the RX DMA */
 		pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_a, DMA_FROM_DEVICE);
 		pl011_sgbuf_free(uap->dmarx.chan, &uap->dmarx.sgbuf_b, DMA_FROM_DEVICE);
+		if (uap->dmarx.poll_rate)
+			del_timer_sync(&uap->dmarx.timer);
 		uap->using_rx_dma = false;
 	}
 }
@@ -976,7 +1082,6 @@ static inline bool pl011_dma_rx_running(struct
uart_amba_port *uap)
 	return uap->using_rx_dma && uap->dmarx.running;
 }

-
 #else
 /* Blank functions if the DMA engine is not available */
 static inline void pl011_dma_probe(struct uart_amba_port *uap)
@@ -1088,8 +1193,18 @@ static void pl011_rx_chars(struct uart_amba_port *uap)
 			dev_dbg(uap->port.dev, "could not trigger RX DMA job "
 				"fall back to interrupt mode again\n");
 			uap->im |= UART011_RXIM;
-		} else
+		} else {
 			uap->im &= ~UART011_RXIM;
+			/* Start Rx DMA poll */
+			if (uap->dmarx.poll_rate) {
+				uap->dmarx.last_jiffies = jiffies;
+				uap->dmarx.last_residue	= PL011_DMA_BUFFER_SIZE;
+				mod_timer(&uap->dmarx.timer,
+					jiffies +
+					msecs_to_jiffies(uap->dmarx.poll_rate));
+			}
+		}
+
 		writew(uap->im, uap->port.membase + UART011_IMSC);
 	}
 	spin_lock(&uap->port.lock);
@@ -1164,7 +1279,6 @@ static irqreturn_t pl011_int(int irq, void *dev_id)
 	unsigned int dummy_read;

 	spin_lock_irqsave(&uap->port.lock, flags);
-
 	status = readw(uap->port.membase + UART011_MIS);
 	if (status) {
 		do {
@@ -1551,6 +1665,11 @@ pl011_set_termios(struct uart_port *port,
struct ktermios *termios,
 	 */
 	baud = uart_get_baud_rate(port, termios, old, 0,
 				  port->uartclk / clkdiv);
+	/*
+	 * Adjust RX DMA polling rate with baud rate if not specified.
+	 */
+	if (uap->dmarx.auto_poll_rate)
+		uap->dmarx.poll_rate = DIV_ROUND_UP(10000000, baud);

 	if (baud > port->uartclk/16)
 		quot = DIV_ROUND_CLOSEST(port->uartclk * 8, baud);
diff --git a/include/linux/amba/serial.h b/include/linux/amba/serial.h
index f612c78..62d9303 100644
--- a/include/linux/amba/serial.h
+++ b/include/linux/amba/serial.h
@@ -203,6 +203,9 @@ struct amba_pl011_data {
 	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
 	void *dma_rx_param;
 	void *dma_tx_param;
+	bool dma_rx_poll_enable;
+	unsigned int dma_rx_poll_rate;
+	unsigned int dma_rx_poll_timeout;
         void (*init) (void);
 	void (*exit) (void);
 };
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling.
  2013-03-27  9:38 [PATCH] ARM: PL011: Add support for Rx DMA buffer polling Chanho Min
@ 2013-03-29 16:19 ` Greg Kroah-Hartman
  0 siblings, 0 replies; 10+ messages in thread
From: Greg Kroah-Hartman @ 2013-03-29 16:19 UTC (permalink / raw)
  To: Chanho Min
  Cc: Russell King - ARM Linux, Linus Walleij, Alan Cox, linux-kernel,
	linux-serial, Chanho Min

On Wed, Mar 27, 2013 at 06:38:11PM +0900, Chanho Min wrote:
> In DMA support, The received data is not pushed to tty until the DMA buffer
> is filled. But some megabyte rate chips such as BT expect fast response and
> data should be pushed immediately. In order to fix this issue, We suggest
> the use of the timer for polling DMA buffer.
> In our test, no data loss occurred at high-baudrate as compared with interrupt-
> driven (We tested with 3Mbps).
> We changes:
> 
> - We add timer for polling. If we set poll_timer to 10, every 10ms,
>  timer handler checks the residue in the dma buffer and transfer data
>  to the tty. Also, last_residue is updated for the next polling.
> 
> - poll_timeout is used to prevent the timer's system cost.
>   If poll_timeout is set to 3000 and no data is received in 3 seconds,
>   we inactivate poll timer and driver falls back to interrupt-driven.
>   When data is received again in FIFO and UART irq is occurred, we switch
>   back to DMA mode and start polling.
> 
> - We use consistent DMA mappings to avoid from the frequent cache operation
>   of the timer function for default.
> 
> - pl011_dma_rx_chars is modified. the pending size is recalculated because
>   data can be taken by polling.
> 
> - the polling time is adjusted if dma rx poll is enabled but no rate is
>   specified. Ideal polling interval to push 1 character at every interval
>   is the reciprocal of 'baud rate / 10 line bits per character / 1000 ms
>   per sec'. But It is very aggressive to system. Experimentally,
>  '10000000 / baud' is suitable to receive dozens of characters. the poll rate
>  can be specified statically by dma_rx_poll_rate of the platform data as well.
> 
> Changes compared to v1:
>  - Use of consistent DMA mappings.
>  - Added dma_rx_poll_rate in platform data to specify the polling interval.
>  - Added dma_rx_poll_timeout in platform data to specify the polling timeout.
> 
> Changes compared to v2:
>  - Use of consistent DMA mappings for default.
>  - Added dma_rx_poll_enable in platform data to adjust the polling time
>    according to the baud rate.
>  - remove unnecessary lock from the polling function.
> 
> Signed-off-by: Chanho Min <chanho.min@lge.com>

This patch was line-wrapped and I had to fix it up by editing it by
hand.  Please fix your email client to not have this happen again.

greg k-h

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling
  2013-01-14  7:27           ` Chanho Min
  2013-01-14  7:56             ` Linus Walleij
@ 2013-01-14  8:41             ` Chanho Min
  1 sibling, 0 replies; 10+ messages in thread
From: Chanho Min @ 2013-01-14  8:41 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Russell King - ARM Linux, Alan Cox, Greg Kroah-Hartman,
	linux-kernel, linux-serial, Pawel Moll, Chanho Min

>Looking back at the patch, do you know if we can get rid of
>some of the #ifdefs and have this like a runtime switch set in
>struct amba_pl011_data or Device Tree?
Yes, I know. I'll come back with v2 soon.

Thanks,
Chanho

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling
  2013-01-14  7:27           ` Chanho Min
@ 2013-01-14  7:56             ` Linus Walleij
  2013-01-14  8:41             ` Chanho Min
  1 sibling, 0 replies; 10+ messages in thread
From: Linus Walleij @ 2013-01-14  7:56 UTC (permalink / raw)
  To: Chanho Min
  Cc: Russell King - ARM Linux, Alan Cox, Greg Kroah-Hartman,
	linux-kernel, linux-serial, Pawel Moll

On Mon, Jan 14, 2013 at 8:27 AM, Chanho Min <chanho.min@lge.com> wrote:

> We just want to use this HW function even if SW support is needed.
> Anyway, It was very useful for megabyte rate chips.

So RX DMA will not be enabled for things like console, but
accelerators etc. OK that's perfectly valid.

Looking back at the patch, do you know if we can get rid of
some of the #ifdefs and have this like a runtime switch set in
struct amba_pl011_data or Device Tree?

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling
  2013-01-14  6:46         ` Linus Walleij
@ 2013-01-14  7:27           ` Chanho Min
  2013-01-14  7:56             ` Linus Walleij
  2013-01-14  8:41             ` Chanho Min
  0 siblings, 2 replies; 10+ messages in thread
From: Chanho Min @ 2013-01-14  7:27 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Russell King - ARM Linux, Alan Cox, Greg Kroah-Hartman,
	linux-kernel, linux-serial, Pawel Moll

On Mon, Jan 14, 2013 at 3:46 PM, Linus Walleij <linus.walleij@linaro.org> wrote:
> On Mon, Jan 14, 2013 at 1:26 AM, Russell King - ARM Linux
>> What you describe above is exactly the problem I see on the Versatile
>> platform with it's PL080 and PL011.  I made the comment that this setup
>> can't work properly as it stands.  And I don't think that setting a
>> timer which fires every jiffy count is really a solution to the problem.
>> That's an expensive way of having a per-jiffy callback for something
>> that's probably going to remain idle most of the time.
>
> OK then that's what we can expect from an unmodified PL011.
>
>> Not only that, but a 1 jiffy timeout even for slower baud rates is
>> just utterly silly.
>>
>> The point of using DMA is to move the workload off the CPU onto hardware,
>> so that the CPU can go off and do other stuff.  If it's having to poll
>> it once every jiffy, then...
>
> Agreed.

I agree also. So We hope to suggest a way to reduce such cost.
Rx DMA is required for the fast baud only. PIO is sufficient for the
slow baud rate.
So this feature is optional for the specific port.
1 jiffy timeout is experimental and adjustable. Even 100ms, It will be
better than
remain in the DMA buffer. Also, the polling interval can be adjusted
according to
baud rate as Russel mentioned.

> Chanho, have you considered just leaving RX DMA undefined
> and only use DMA for TX?

No, We need RX DMA to reduce data loss rather than TX DMA.

>> The solution I came up with on Versatile was to just not enable
>> receive DMA - at all.

We just want to use this HW function even if SW support is needed.
Anyway, It was very useful for megabyte rate chips.

Thanks
Chanho

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling
  2013-01-14  0:26       ` Russell King - ARM Linux
@ 2013-01-14  6:46         ` Linus Walleij
  2013-01-14  7:27           ` Chanho Min
  0 siblings, 1 reply; 10+ messages in thread
From: Linus Walleij @ 2013-01-14  6:46 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Chanho Min, Alan Cox, Greg Kroah-Hartman, linux-kernel,
	linux-serial, Pawel Moll, chanho0207

On Mon, Jan 14, 2013 at 1:26 AM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> [Me]
>> But it may very well be that the single request can not be
>> enabled for the PL011 for it to work properly.
>
> What you describe above is exactly the problem I see on the Versatile
> platform with it's PL080 and PL011.  I made the comment that this setup
> can't work properly as it stands.  And I don't think that setting a
> timer which fires every jiffy count is really a solution to the problem.
> That's an expensive way of having a per-jiffy callback for something
> that's probably going to remain idle most of the time.

OK then that's what we can expect from an unmodified PL011.

> Not only that, but a 1 jiffy timeout even for slower baud rates is
> just utterly silly.
>
> The point of using DMA is to move the workload off the CPU onto hardware,
> so that the CPU can go off and do other stuff.  If it's having to poll
> it once every jiffy, then...

Agreed.

Chanho, have you considered just leaving RX DMA undefined
and only use DMA for TX?

I know sometimes a programmer will get assigned to "implement
DMA for IP block <foo>", and usually they want to deliver, but
sometimes you have to come back and say "you know that
is not such a good idea".

>> You may not be able to turn this off, and in that case this
>> kind of code is the only way forward indeed.
>
> Unless there's something inbetween the PL080 and PL011 to do that.
> The peripherals themselves don't have any means of controlling this.

We have it working perfectly on U300 and Ux500 but I think
it must be thanks to some hardware modifications.

On the Ux500 it is strictly required since it is used for megabit
rate chips like Bluetooth, and we get successful DMA support
on these.

> The solution I came up with on Versatile was to just not enable
> receive DMA - at all.

Then I suspect I can reproduce it with the PB11MPCore
and PL081 also given some time. I probably just did the tests
the wrong way.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling
  2013-01-14  0:04     ` Linus Walleij
@ 2013-01-14  0:26       ` Russell King - ARM Linux
  2013-01-14  6:46         ` Linus Walleij
  0 siblings, 1 reply; 10+ messages in thread
From: Russell King - ARM Linux @ 2013-01-14  0:26 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Chanho Min, Alan Cox, Greg Kroah-Hartman, linux-kernel,
	linux-serial, Pawel Moll, chanho0207

On Mon, Jan 14, 2013 at 01:04:58AM +0100, Linus Walleij wrote:
> At some point we reasoned that we may actually be saved
> by the reverse phenomena - if single request is *NOT*
> connected, there will very often be some characters in the
> FIFO, and that's enough to trigger the RTIM IRQ, and we get
> a nice flow. So maybe the platforms that actually work, work
> because they have bursts only.
> 
> But that would pose a problem when an even burst of
> characters arrive. Say 16 characters, this will be transferred
> in two bursts and leave the FIFO empty, but no DMA TC
> IRQ since the buffer is not full, and we'd get your
> phenomena.
> 
> But I could never reproduce the latter theoretical problem,
> so I think the hardware actually has a fix for this, around
> the burst signal logic.
> 
> But it may very well be that the single request can not be
> enabled for the PL011 for it to work properly.

What you describe above is exactly the problem I see on the Versatile
platform with it's PL080 and PL011.  I made the comment that this setup
can't work properly as it stands.  And I don't think that setting a
timer which fires every jiffy count is really a solution to the problem.
That's an expensive way of having a per-jiffy callback for something
that's probably going to remain idle most of the time.

Not only that, but a 1 jiffy timeout even for slower baud rates is
just utterly silly.

The point of using DMA is to move the workload off the CPU onto hardware,
so that the CPU can go off and do other stuff.  If it's having to poll
it once every jiffy, then...

> You may not be able to turn this off, and in that case this
> kind of code is the only way forward indeed.

Unless there's something inbetween the PL080 and PL011 to do that.
The peripherals themselves don't have any means of controlling this.

The solution I came up with on Versatile was to just not enable
receive DMA - at all.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling
       [not found]   ` <50f107aa.894e420a.596a.ffffe3f6SMTPIN_ADDED_BROKEN@mx.google.com>
@ 2013-01-14  0:04     ` Linus Walleij
  2013-01-14  0:26       ` Russell King - ARM Linux
  0 siblings, 1 reply; 10+ messages in thread
From: Linus Walleij @ 2013-01-14  0:04 UTC (permalink / raw)
  To: Chanho Min
  Cc: Alan Cox, Russell King, Greg Kroah-Hartman, linux-kernel,
	linux-serial, Pawel Moll, chanho0207

On Sat, Jan 12, 2013 at 7:50 AM, Chanho Min <chanho.min@lge.com> wrote:

> We tested as follows:
> We set baud rate to 3Mbps and transfer 5000 bytes via /dev/tty at
> a time. Then 4096 bytes is received by DMA irq. But, There is no way
> to get remaining 904 bytes, unless another 3192 bytes are coming,

OK clearly cut phenomena: IRQs seem to appear from the
PL080 TC (transfer complete) IRQ only, not from any of the
PL011 IRQs.

So there will be zero IRQs from the uart in /proc/interrupts
but a few of them from the PL080, correct?

>>- What DMA controller are you using with the
>>  PL011?
>
> PL080.
> We don't add any customized configuration from the mainline.

OK sorry I was thinking of the old times when we has
.cctl configured from the platform.

>> - If they cannot be software-configured, how are
>>   they then configured in hardware? Burst-only?
>> Burst single
>
> As far as I know, single request is not disabled at PL080.
> Could you explain what single request has to do with this?

At some point we reasoned that we may actually be saved
by the reverse phenomena - if single request is *NOT*
connected, there will very often be some characters in the
FIFO, and that's enough to trigger the RTIM IRQ, and we get
a nice flow. So maybe the platforms that actually work, work
because they have bursts only.

But that would pose a problem when an even burst of
characters arrive. Say 16 characters, this will be transferred
in two bursts and leave the FIFO empty, but no DMA TC
IRQ since the buffer is not full, and we'd get your
phenomena.

But I could never reproduce the latter theoretical problem,
so I think the hardware actually has a fix for this, around
the burst signal logic.

But it may very well be that the single request can not be
enabled for the PL011 for it to work properly.

You may not be able to turn this off, and in that case this
kind of code is the only way forward indeed.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling
       [not found] <50efad8d.84fc440a.589e.ffff9546SMTPIN_ADDED_BROKEN@mx.google.com>
@ 2013-01-11 18:34 ` Linus Walleij
       [not found]   ` <50f107aa.894e420a.596a.ffffe3f6SMTPIN_ADDED_BROKEN@mx.google.com>
  0 siblings, 1 reply; 10+ messages in thread
From: Linus Walleij @ 2013-01-11 18:34 UTC (permalink / raw)
  To: Chanho Min
  Cc: Alan Cox, Russell King, Greg Kroah-Hartman, linux-kernel,
	linux-serial, Pawel Moll

On Fri, Jan 11, 2013 at 7:13 AM, Chanho Min <chanho.min@lge.com> wrote:

> In DMA support, Received data is not triggered until the DMA buffer is filled.

Interesting! As you can see in commits:

commit ead76f329f777c7301e0a5456a0a1c7a081570bd
"ARM: 6763/1: pl011: add optional RX DMA to PL011 v2"

commit 5d7b8467e18b14ed44c5781d77993bfdcd8c826b
"mach-ux500: config Ux500 PL011 PL022 PL180 for DMA"

commit ec8f12533b4a439a8feb0d1a3bf15516781804be
"mach-u300: config U300 PL180 PL011 PL022 for DMA"

We have enabled RX DMA for Ux500 and U300.
It does work on our machines, I'm using it every day
and deploying the ux500 systems in the millions.

Let's recap how this can work for us. As you can see, when
using DMA we turn off UART011_RXIM and instead rely on
either:

1) DMA completes and we read in the entire buffer to
 the TTY

or

2) UART011_RTIM (recieve timeout) to happen. This is a
 timeout that need to be set low enough so that interactivity
 on a console will not be hampered.

Apparently this timeout cannot be configured on the ARM
variants while the ST variant has a register for that (which
is currently unused.)

The amazing thing is that we're using this. We're getting
timeouts, so the small set of characters in the FIFO on
an interactive console is polled perfectly.

This even works on the U300, which has the ARM version
of the PL011 and I have once tested it on the RealView
PB11MPcore (the only reference design with a working
PL08x DMA controller) and it worked on that system
as well.

As we've discussed before, in order for this to work,
the PL011 refman says:

----------8<------------------8<-----------------
UARTRXDMASREQ
Single character DMA transfer request, asserted by the UART.
For receive, one character consists of up to 12 bits. This signal
is asserted when the receive FIFO contains at least one character.

UARTRXDMABREQ
Burst DMA transfer request, asserted by the UART. This
signal is asserted when the receive FIFO contains more
characters than the programmed watermark level. You can
program the watermark level for each FIFO through the
UARTIFLS register.

UARTRXDMACLR
DMA request clear, asserted by the DMA controller to clear
the receive request signals. If DMA burst transfer is
requested, the clear signal is asserted during the transfer of
the last data in the burst.
----------8<------------------8<-----------------

We were worried that DMA would suck out characters
of the FIFO so quickly that it would never contain the
one character necessary to trigger the timeout, so that
we would read out the data from the DMA page, due
to this part of the refman:

----------8<------------------8<-----------------
UARTRTINTR
The receive timeout interrupt is asserted when the receive FIFO is
not empty, and no further data is received over a 32-bit period. The
receive timeout interrupt is cleared either when the FIFO becomes
empty through reading all the data (or by reading the holding register),
or when a 1 is written to the corresponding bit of the UARTICR
register.
----------8<------------------8<-----------------

The crucial part is "FIFO is not empty" - when using DMA
the FIFO will of course be empty, as DMA is sucking out
anything immediately. See this discussion:
http://comments.gmane.org/gmane.linux.ports.arm.kernel/92513

But when I tried it out in practice, it must be
wired differently or contain some VHDL hacks, because it
worked like a charm. I got the timeout IRQs and all. I never
managed to get it to fail. Sadly I don't have the VHDL code
for the PL011 so I cannot confirm how they (ARMs
IP core engineers) solved this. (Paging Pawel on this.)

So I suspect that maybe your DMA controller is badly
configured, or erroneous, atleast if this is a stock PL011:

- What DMA controller are you using with the
  PL011?

- How are the burst lines configured in the DMA
  controller? (This is often a platform-specific
  property.) Did you make sure both single and burst
  request lines are enabled? If only burst is enabled
  you will get *exactly* the phenomena you are trying
  to work around above...

- If they cannot be software-configured, how are
  they then configured in hardware? Burst-only?

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] ARM: PL011: Add support for Rx DMA buffer polling
       [not found] <006c01cdefc2$c7e1cb90$57a562b0$@min@lge.com>
@ 2013-01-11 16:18 ` Russell King - ARM Linux
  0 siblings, 0 replies; 10+ messages in thread
From: Russell King - ARM Linux @ 2013-01-11 16:18 UTC (permalink / raw)
  To: Chanho Min
  Cc: 'Alan Cox', 'Greg Kroah-Hartman',
	'Linus Walleij',
	linux-kernel, linux-serial

On Fri, Jan 11, 2013 at 03:13:31PM +0900, Chanho Min wrote:
> In DMA support, Received data is not triggered until the DMA buffer is filled.
> In order to actually use Rx DMA, We would like to suggest the use of the timer
> for polling DMA buffer. It makes possible character-level trigger.
> In our test, no data loss occurred at high-baudrate as compared with interrupt
> driven (We tested with 3Mbps). We know it will be because of the increased Rx
> buffer rather than DMA effect. but,It is very useful for high speed uart device.
> We changes:
> 
> - CONFIG_SERIAL_AMBA_PL011_DMAPOLL is added to select this feature.
> 
> - add timer and last_residue for polling. Every polling, timer handler checks
>   the residue in the dma buffer and transfer data to the tty. Also, last_residue
>   is updated for the next polling.
> 
> - We use consistent DMA mappings instead of streaming DMA mappings to avoid from
>   the frequent cache operation.
> 
> - pl011_dma_rx_chars is modified. the pending size is recalculated because data
>   can be taken by polling.
> 
> - add module parameter for adjusting polling interval.

Here's the question: what is the effect of having to poll every tick on
the overall system?

What I'm getting at is: what is the cost of that polling with DMA enabled
over not having DMA on the receive side at all?  What is the trade-off
vs baud rate?  Should we scale the polling interval according to baud
rate?

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-03-29 16:20 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-27  9:38 [PATCH] ARM: PL011: Add support for Rx DMA buffer polling Chanho Min
2013-03-29 16:19 ` Greg Kroah-Hartman
     [not found] <50efad8d.84fc440a.589e.ffff9546SMTPIN_ADDED_BROKEN@mx.google.com>
2013-01-11 18:34 ` Linus Walleij
     [not found]   ` <50f107aa.894e420a.596a.ffffe3f6SMTPIN_ADDED_BROKEN@mx.google.com>
2013-01-14  0:04     ` Linus Walleij
2013-01-14  0:26       ` Russell King - ARM Linux
2013-01-14  6:46         ` Linus Walleij
2013-01-14  7:27           ` Chanho Min
2013-01-14  7:56             ` Linus Walleij
2013-01-14  8:41             ` Chanho Min
     [not found] <006c01cdefc2$c7e1cb90$57a562b0$@min@lge.com>
2013-01-11 16:18 ` Russell King - ARM Linux

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).