linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] tty: serial: Use fifo in 8250 console driver
@ 2021-11-04 17:17 wander
  2021-12-20 15:45 ` Greg Kroah-Hartman
  0 siblings, 1 reply; 5+ messages in thread
From: wander @ 2021-11-04 17:17 UTC (permalink / raw)
  To: Greg Kroah-Hartman, Jiri Slaby, Andrew Jeffery,
	Wander Lairson Costa, Maciej W. Rozycki, Johan Hovold,
	Serge Semin, Pali Rohár, open list:SERIAL DRIVERS,
	open list

From: Wander Lairson Costa <wander@redhat.com>

Note: I am using a small test app + driver located at [0] for the
problem description. serco is a driver whose write function dispatches
to the serial controller. sertest is a user-mode app that writes n bytes
to the serial console using the serco driver.

Recently I got a report of a soft lockup while loading a bunch a
scsi_debug devices (> 500).

While investigating it, I noticed that the serial console throughput
(called by the printk code) is way below the configured speed of 115200
bps in a HP Proliant DL380 Gen9 server. I was expecting something above
10KB/s, but I got 2.5KB/s. I then built a simple driver [0] to isolate
the console from the printk code. Here it is:

$ time ./sertest -n 2500 /tmp/serco

real    0m0.997s
user    0m0.000s
sys     0m0.997s

With the help of the function tracer, I then noticed the serial
controller was taking around 410us seconds to dispatch one single byte:

$ trace-cmd record -p function_graph -g serial8250_console_write \
   ./sertest -n 1 /tmp/serco

$ trace-cmd report

            |  serial8250_console_write() {
 0.384 us   |    _raw_spin_lock_irqsave();
 1.836 us   |    io_serial_in();
 1.667 us   |    io_serial_out();
            |    uart_console_write() {
            |      serial8250_console_putchar() {
            |        wait_for_xmitr() {
 1.870 us   |          io_serial_in();
 2.238 us   |        }
 1.737 us   |        io_serial_out();
 4.318 us   |      }
 4.675 us   |    }
            |    wait_for_xmitr() {
 1.635 us   |      io_serial_in();
            |      __const_udelay() {
 1.125 us   |        delay_tsc();
 1.429 us   |      }
...
...
...
 1.683 us   |      io_serial_in();
            |      __const_udelay() {
 1.248 us   |        delay_tsc();
 1.486 us   |      }
 1.671 us   |      io_serial_in();
 411.342 us |    }

In another machine, I measured a throughput of 11.5KB/s, with the serial
controller taking between 80-90us to send each byte. That matches the
expected throughput for a configuration of 115200 bps.

This patch changes the serial8250_console_write to use the 16550 fifo
if available. In my artificial benchmark I could get a throughput
increase up to 100% in some cases, but in the real case described at the
beginning the gain was of about 25%.

[0] https://github.com/walac/serial-console-test

Signed-off-by: Wander Lairson Costa <wander@redhat.com>
---
 drivers/tty/serial/8250/8250.h      |  3 ++
 drivers/tty/serial/8250/8250_port.c | 63 +++++++++++++++++++++++++----
 2 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 6473361525d1..c711bf118cc1 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -83,6 +83,9 @@ struct serial8250_config {
 #define UART_CAP_MINI	BIT(17)	/* Mini UART on BCM283X family lacks:
 					 * STOP PARITY EPAR SPAR WLEN5 WLEN6
 					 */
+#define UART_CAP_CWFIFO BIT(18) /* Use the UART Fifo in
+				 * serial8250_console_write
+				 */
 
 #define UART_BUG_QUOT	BIT(0)	/* UART has buggy quot LSB */
 #define UART_BUG_TXEN	BIT(1)	/* UART has buggy TX IIR status */
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 5775cbff8f6e..e58938c435c9 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -82,7 +82,7 @@ static const struct serial8250_config uart_config[] = {
 		.tx_loadsz	= 16,
 		.fcr		= UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10,
 		.rxtrig_bytes	= {1, 4, 8, 14},
-		.flags		= UART_CAP_FIFO,
+		.flags		= UART_CAP_FIFO | UART_CAP_CWFIFO,
 	},
 	[PORT_CIRRUS] = {
 		.name		= "Cirrus",
@@ -2063,10 +2063,7 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
 	serial8250_rpm_put(up);
 }
 
-/*
- *	Wait for transmitter & holding register to empty
- */
-static void wait_for_xmitr(struct uart_8250_port *up, int bits)
+static void wait_for_lsr(struct uart_8250_port *up, int bits)
 {
 	unsigned int status, tmout = 10000;
 
@@ -2083,6 +2080,16 @@ static void wait_for_xmitr(struct uart_8250_port *up, int bits)
 		udelay(1);
 		touch_nmi_watchdog();
 	}
+}
+
+/*
+ *	Wait for transmitter & holding register to empty
+ */
+static void wait_for_xmitr(struct uart_8250_port *up, int bits)
+{
+	unsigned int tmout;
+
+	wait_for_lsr(up, bits);
 
 	/* Wait up to 1s for flow control if necessary */
 	if (up->port.flags & UPF_CONS_FLOW) {
@@ -3332,6 +3339,35 @@ static void serial8250_console_restore(struct uart_8250_port *up)
 	serial8250_out_MCR(up, UART_MCR_DTR | UART_MCR_RTS);
 }
 
+/*
+ * Print a string to the serial port using the device FIFO
+ *
+ * It sends fifosize bytes and then waits for the fifo
+ * to get empty.
+ */
+static void serial8250_console_fifo_write(struct uart_8250_port *up,
+		const char *s, unsigned int count)
+{
+	int i;
+	const char *end = s + count;
+	unsigned int fifosize = up->port.fifosize;
+	bool cr_sent = false;
+
+	while (s != end) {
+		wait_for_lsr(up, UART_LSR_THRE);
+
+		for (i = 0; i < fifosize && s != end; ++i) {
+			if (*s == '\n' && !cr_sent) {
+				serial_out(up, UART_TX, '\r');
+				cr_sent = true;
+			} else {
+				serial_out(up, UART_TX, *s++);
+				cr_sent = false;
+			}
+		}
+	}
+}
+
 /*
  *	Print a string to the serial port trying not to disturb
  *	any possible real use of the port...
@@ -3347,7 +3383,7 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
 	struct uart_8250_em485 *em485 = up->em485;
 	struct uart_port *port = &up->port;
 	unsigned long flags;
-	unsigned int ier;
+	unsigned int ier, use_fifo;
 	int locked = 1;
 
 	touch_nmi_watchdog();
@@ -3379,7 +3415,20 @@ void serial8250_console_write(struct uart_8250_port *up, const char *s,
 		mdelay(port->rs485.delay_rts_before_send);
 	}
 
-	uart_console_write(port, s, count, serial8250_console_putchar);
+	use_fifo = (up->capabilities & UART_CAP_CWFIFO)
+		&& port->fifosize > 1
+		&& (serial_port_in(port, UART_FCR) & UART_FCR_ENABLE_FIFO)
+		/*
+		 * After we put a data in the fifo, the controller will send
+		 * it regardless of the CTS state. Therefore, only use fifo
+		 * if we don't use control flow.
+		 */
+		&& !(up->port.flags & UPF_CONS_FLOW);
+
+	if (use_fifo)
+		serial8250_console_fifo_write(up, s, count);
+	else
+		uart_console_write(port, s, count, serial8250_console_putchar);
 
 	/*
 	 *	Finally, wait for transmitter to become empty
-- 
2.27.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] tty: serial: Use fifo in 8250 console driver
  2021-11-04 17:17 [PATCH v2] tty: serial: Use fifo in 8250 console driver wander
@ 2021-12-20 15:45 ` Greg Kroah-Hartman
  2021-12-20 17:02   ` Wander Costa
  0 siblings, 1 reply; 5+ messages in thread
From: Greg Kroah-Hartman @ 2021-12-20 15:45 UTC (permalink / raw)
  To: wander
  Cc: Jiri Slaby, Andrew Jeffery, Maciej W. Rozycki, Johan Hovold,
	Serge Semin, Pali Rohár, open list:SERIAL DRIVERS,
	open list

On Thu, Nov 04, 2021 at 02:17:31PM -0300, wander@redhat.com wrote:
> From: Wander Lairson Costa <wander@redhat.com>
> 
> Note: I am using a small test app + driver located at [0] for the
> problem description. serco is a driver whose write function dispatches
> to the serial controller. sertest is a user-mode app that writes n bytes
> to the serial console using the serco driver.
> 
> Recently I got a report of a soft lockup while loading a bunch a
> scsi_debug devices (> 500).
> 
> While investigating it, I noticed that the serial console throughput
> (called by the printk code) is way below the configured speed of 115200
> bps in a HP Proliant DL380 Gen9 server. I was expecting something above
> 10KB/s, but I got 2.5KB/s. I then built a simple driver [0] to isolate
> the console from the printk code. Here it is:
> 
> $ time ./sertest -n 2500 /tmp/serco
> 
> real    0m0.997s
> user    0m0.000s
> sys     0m0.997s
> 
> With the help of the function tracer, I then noticed the serial
> controller was taking around 410us seconds to dispatch one single byte:
> 
> $ trace-cmd record -p function_graph -g serial8250_console_write \
>    ./sertest -n 1 /tmp/serco
> 
> $ trace-cmd report
> 
>             |  serial8250_console_write() {
>  0.384 us   |    _raw_spin_lock_irqsave();
>  1.836 us   |    io_serial_in();
>  1.667 us   |    io_serial_out();
>             |    uart_console_write() {
>             |      serial8250_console_putchar() {
>             |        wait_for_xmitr() {
>  1.870 us   |          io_serial_in();
>  2.238 us   |        }
>  1.737 us   |        io_serial_out();
>  4.318 us   |      }
>  4.675 us   |    }
>             |    wait_for_xmitr() {
>  1.635 us   |      io_serial_in();
>             |      __const_udelay() {
>  1.125 us   |        delay_tsc();
>  1.429 us   |      }
> ...
> ...
> ...
>  1.683 us   |      io_serial_in();
>             |      __const_udelay() {
>  1.248 us   |        delay_tsc();
>  1.486 us   |      }
>  1.671 us   |      io_serial_in();
>  411.342 us |    }
> 
> In another machine, I measured a throughput of 11.5KB/s, with the serial
> controller taking between 80-90us to send each byte. That matches the
> expected throughput for a configuration of 115200 bps.
> 
> This patch changes the serial8250_console_write to use the 16550 fifo
> if available. In my artificial benchmark I could get a throughput
> increase up to 100% in some cases, but in the real case described at the
> beginning the gain was of about 25%.
> 
> [0] https://github.com/walac/serial-console-test
> 
> Signed-off-by: Wander Lairson Costa <wander@redhat.com>
> ---
>  drivers/tty/serial/8250/8250.h      |  3 ++
>  drivers/tty/serial/8250/8250_port.c | 63 +++++++++++++++++++++++++----
>  2 files changed, 59 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
> index 6473361525d1..c711bf118cc1 100644
> --- a/drivers/tty/serial/8250/8250.h
> +++ b/drivers/tty/serial/8250/8250.h
> @@ -83,6 +83,9 @@ struct serial8250_config {
>  #define UART_CAP_MINI	BIT(17)	/* Mini UART on BCM283X family lacks:
>  					 * STOP PARITY EPAR SPAR WLEN5 WLEN6
>  					 */
> +#define UART_CAP_CWFIFO BIT(18) /* Use the UART Fifo in
> +				 * serial8250_console_write
> +				 */

Why do you need a new bit?  Why can't you just do this change for all
devices that have a fifo?  Why would you _not_ want to do this for all
devices that have a fifo?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] tty: serial: Use fifo in 8250 console driver
  2021-12-20 15:45 ` Greg Kroah-Hartman
@ 2021-12-20 17:02   ` Wander Costa
  2021-12-21  8:12     ` Greg Kroah-Hartman
  0 siblings, 1 reply; 5+ messages in thread
From: Wander Costa @ 2021-12-20 17:02 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Wander Lairson Costa, Jiri Slaby, Andrew Jeffery,
	Maciej W. Rozycki, Johan Hovold, Serge Semin, Pali Rohár,
	open list:SERIAL DRIVERS, open list

On Mon, Dec 20, 2021 at 12:45 PM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Thu, Nov 04, 2021 at 02:17:31PM -0300, wander@redhat.com wrote:
> > From: Wander Lairson Costa <wander@redhat.com>
> >
> > Note: I am using a small test app + driver located at [0] for the
> > problem description. serco is a driver whose write function dispatches
> > to the serial controller. sertest is a user-mode app that writes n bytes
> > to the serial console using the serco driver.
> >
> > Recently I got a report of a soft lockup while loading a bunch a
> > scsi_debug devices (> 500).
> >
> > While investigating it, I noticed that the serial console throughput
> > (called by the printk code) is way below the configured speed of 115200
> > bps in a HP Proliant DL380 Gen9 server. I was expecting something above
> > 10KB/s, but I got 2.5KB/s. I then built a simple driver [0] to isolate
> > the console from the printk code. Here it is:
> >
> > $ time ./sertest -n 2500 /tmp/serco
> >
> > real    0m0.997s
> > user    0m0.000s
> > sys     0m0.997s
> >
> > With the help of the function tracer, I then noticed the serial
> > controller was taking around 410us seconds to dispatch one single byte:
> >
> > $ trace-cmd record -p function_graph -g serial8250_console_write \
> >    ./sertest -n 1 /tmp/serco
> >
> > $ trace-cmd report
> >
> >             |  serial8250_console_write() {
> >  0.384 us   |    _raw_spin_lock_irqsave();
> >  1.836 us   |    io_serial_in();
> >  1.667 us   |    io_serial_out();
> >             |    uart_console_write() {
> >             |      serial8250_console_putchar() {
> >             |        wait_for_xmitr() {
> >  1.870 us   |          io_serial_in();
> >  2.238 us   |        }
> >  1.737 us   |        io_serial_out();
> >  4.318 us   |      }
> >  4.675 us   |    }
> >             |    wait_for_xmitr() {
> >  1.635 us   |      io_serial_in();
> >             |      __const_udelay() {
> >  1.125 us   |        delay_tsc();
> >  1.429 us   |      }
> > ...
> > ...
> > ...
> >  1.683 us   |      io_serial_in();
> >             |      __const_udelay() {
> >  1.248 us   |        delay_tsc();
> >  1.486 us   |      }
> >  1.671 us   |      io_serial_in();
> >  411.342 us |    }
> >
> > In another machine, I measured a throughput of 11.5KB/s, with the serial
> > controller taking between 80-90us to send each byte. That matches the
> > expected throughput for a configuration of 115200 bps.
> >
> > This patch changes the serial8250_console_write to use the 16550 fifo
> > if available. In my artificial benchmark I could get a throughput
> > increase up to 100% in some cases, but in the real case described at the
> > beginning the gain was of about 25%.
> >
> > [0] https://github.com/walac/serial-console-test
> >
> > Signed-off-by: Wander Lairson Costa <wander@redhat.com>
> > ---
> >  drivers/tty/serial/8250/8250.h      |  3 ++
> >  drivers/tty/serial/8250/8250_port.c | 63 +++++++++++++++++++++++++----
> >  2 files changed, 59 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
> > index 6473361525d1..c711bf118cc1 100644
> > --- a/drivers/tty/serial/8250/8250.h
> > +++ b/drivers/tty/serial/8250/8250.h
> > @@ -83,6 +83,9 @@ struct serial8250_config {
> >  #define UART_CAP_MINI        BIT(17) /* Mini UART on BCM283X family lacks:
> >                                        * STOP PARITY EPAR SPAR WLEN5 WLEN6
> >                                        */
> > +#define UART_CAP_CWFIFO BIT(18) /* Use the UART Fifo in
> > +                              * serial8250_console_write
> > +                              */
>
> Why do you need a new bit?  Why can't you just do this change for all
> devices that have a fifo?  Why would you _not_ want to do this for all
> devices that have a fifo?
>
The v1 patch [1] didn't have this extra bit. Andy suggested [2] to add
it so we only enabled this new code on tested controllers as a
precaution.
If it doesn't make sense to you, feel free to consider the v1 patch [1].

[1] https://lore.kernel.org/all/20211029201402.428284-1-wander@redhat.com/
[2] https://lore.kernel.org/all/CAHp75Vf6DjNcPWpE4Dh3SuzUMJbFQjq1UNCkrCa60uw35SpqKg@mail.gmail.com/

> thanks,
>
> greg k-h
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] tty: serial: Use fifo in 8250 console driver
  2021-12-20 17:02   ` Wander Costa
@ 2021-12-21  8:12     ` Greg Kroah-Hartman
  2021-12-22 11:22       ` Wander Costa
  0 siblings, 1 reply; 5+ messages in thread
From: Greg Kroah-Hartman @ 2021-12-21  8:12 UTC (permalink / raw)
  To: Wander Costa
  Cc: Wander Lairson Costa, Jiri Slaby, Andrew Jeffery,
	Maciej W. Rozycki, Johan Hovold, Serge Semin, Pali Rohár,
	open list:SERIAL DRIVERS, open list

On Mon, Dec 20, 2021 at 02:02:11PM -0300, Wander Costa wrote:
> On Mon, Dec 20, 2021 at 12:45 PM Greg Kroah-Hartman
> <gregkh@linuxfoundation.org> wrote:
> >
> > On Thu, Nov 04, 2021 at 02:17:31PM -0300, wander@redhat.com wrote:
> > > From: Wander Lairson Costa <wander@redhat.com>
> > >
> > > Note: I am using a small test app + driver located at [0] for the
> > > problem description. serco is a driver whose write function dispatches
> > > to the serial controller. sertest is a user-mode app that writes n bytes
> > > to the serial console using the serco driver.
> > >
> > > Recently I got a report of a soft lockup while loading a bunch a
> > > scsi_debug devices (> 500).
> > >
> > > While investigating it, I noticed that the serial console throughput
> > > (called by the printk code) is way below the configured speed of 115200
> > > bps in a HP Proliant DL380 Gen9 server. I was expecting something above
> > > 10KB/s, but I got 2.5KB/s. I then built a simple driver [0] to isolate
> > > the console from the printk code. Here it is:
> > >
> > > $ time ./sertest -n 2500 /tmp/serco
> > >
> > > real    0m0.997s
> > > user    0m0.000s
> > > sys     0m0.997s
> > >
> > > With the help of the function tracer, I then noticed the serial
> > > controller was taking around 410us seconds to dispatch one single byte:
> > >
> > > $ trace-cmd record -p function_graph -g serial8250_console_write \
> > >    ./sertest -n 1 /tmp/serco
> > >
> > > $ trace-cmd report
> > >
> > >             |  serial8250_console_write() {
> > >  0.384 us   |    _raw_spin_lock_irqsave();
> > >  1.836 us   |    io_serial_in();
> > >  1.667 us   |    io_serial_out();
> > >             |    uart_console_write() {
> > >             |      serial8250_console_putchar() {
> > >             |        wait_for_xmitr() {
> > >  1.870 us   |          io_serial_in();
> > >  2.238 us   |        }
> > >  1.737 us   |        io_serial_out();
> > >  4.318 us   |      }
> > >  4.675 us   |    }
> > >             |    wait_for_xmitr() {
> > >  1.635 us   |      io_serial_in();
> > >             |      __const_udelay() {
> > >  1.125 us   |        delay_tsc();
> > >  1.429 us   |      }
> > > ...
> > > ...
> > > ...
> > >  1.683 us   |      io_serial_in();
> > >             |      __const_udelay() {
> > >  1.248 us   |        delay_tsc();
> > >  1.486 us   |      }
> > >  1.671 us   |      io_serial_in();
> > >  411.342 us |    }
> > >
> > > In another machine, I measured a throughput of 11.5KB/s, with the serial
> > > controller taking between 80-90us to send each byte. That matches the
> > > expected throughput for a configuration of 115200 bps.
> > >
> > > This patch changes the serial8250_console_write to use the 16550 fifo
> > > if available. In my artificial benchmark I could get a throughput
> > > increase up to 100% in some cases, but in the real case described at the
> > > beginning the gain was of about 25%.
> > >
> > > [0] https://github.com/walac/serial-console-test
> > >
> > > Signed-off-by: Wander Lairson Costa <wander@redhat.com>
> > > ---
> > >  drivers/tty/serial/8250/8250.h      |  3 ++
> > >  drivers/tty/serial/8250/8250_port.c | 63 +++++++++++++++++++++++++----
> > >  2 files changed, 59 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
> > > index 6473361525d1..c711bf118cc1 100644
> > > --- a/drivers/tty/serial/8250/8250.h
> > > +++ b/drivers/tty/serial/8250/8250.h
> > > @@ -83,6 +83,9 @@ struct serial8250_config {
> > >  #define UART_CAP_MINI        BIT(17) /* Mini UART on BCM283X family lacks:
> > >                                        * STOP PARITY EPAR SPAR WLEN5 WLEN6
> > >                                        */
> > > +#define UART_CAP_CWFIFO BIT(18) /* Use the UART Fifo in
> > > +                              * serial8250_console_write
> > > +                              */
> >
> > Why do you need a new bit?  Why can't you just do this change for all
> > devices that have a fifo?  Why would you _not_ want to do this for all
> > devices that have a fifo?
> >
> The v1 patch [1] didn't have this extra bit. Andy suggested [2] to add
> it so we only enabled this new code on tested controllers as a
> precaution.
> If it doesn't make sense to you, feel free to consider the v1 patch [1].
> 
> [1] https://lore.kernel.org/all/20211029201402.428284-1-wander@redhat.com/
> [2] https://lore.kernel.org/all/CAHp75Vf6DjNcPWpE4Dh3SuzUMJbFQjq1UNCkrCa60uw35SpqKg@mail.gmail.com/

I do like [1] better, but can you fix up the checkpatch issues and
resend it:

CHECK: Alignment should match open parenthesis
#116: FILE: drivers/tty/serial/8250/8250_port.c:3342:
+static void serial8250_console_fifo_write(struct uart_8250_port *up,
+	        const char *s, unsigned int count)

CHECK: Logical continuations should be on the previous line
#156: FILE: drivers/tty/serial/8250/8250_port.c:3412:
+	use_fifo = (up->capabilities & UART_CAP_FIFO)
+	        && port->fifosize > 1

CHECK: Logical continuations should be on the previous line
#157: FILE: drivers/tty/serial/8250/8250_port.c:3413:
+	        && port->fifosize > 1
+	        && (serial_port_in(port, UART_FCR) & UART_FCR_ENABLE_FIFO)

CHECK: Logical continuations should be on the previous line
#163: FILE: drivers/tty/serial/8250/8250_port.c:3419:
+	         */
+	        && !(up->port.flags & UPF_CONS_FLOW);



thanks,

greg k-h

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2] tty: serial: Use fifo in 8250 console driver
  2021-12-21  8:12     ` Greg Kroah-Hartman
@ 2021-12-22 11:22       ` Wander Costa
  0 siblings, 0 replies; 5+ messages in thread
From: Wander Costa @ 2021-12-22 11:22 UTC (permalink / raw)
  To: Greg Kroah-Hartman
  Cc: Wander Lairson Costa, Jiri Slaby, Andrew Jeffery,
	Maciej W. Rozycki, Johan Hovold, Serge Semin, Pali Rohár,
	open list:SERIAL DRIVERS, open list

On Tue, Dec 21, 2021 at 5:12 AM Greg Kroah-Hartman
<gregkh@linuxfoundation.org> wrote:
>
> On Mon, Dec 20, 2021 at 02:02:11PM -0300, Wander Costa wrote:
> > On Mon, Dec 20, 2021 at 12:45 PM Greg Kroah-Hartman
> > <gregkh@linuxfoundation.org> wrote:
> > >
> > > On Thu, Nov 04, 2021 at 02:17:31PM -0300, wander@redhat.com wrote:
> > > > From: Wander Lairson Costa <wander@redhat.com>
> > > >
> > > > Note: I am using a small test app + driver located at [0] for the
> > > > problem description. serco is a driver whose write function dispatches
> > > > to the serial controller. sertest is a user-mode app that writes n bytes
> > > > to the serial console using the serco driver.
> > > >
> > > > Recently I got a report of a soft lockup while loading a bunch a
> > > > scsi_debug devices (> 500).
> > > >
> > > > While investigating it, I noticed that the serial console throughput
> > > > (called by the printk code) is way below the configured speed of 115200
> > > > bps in a HP Proliant DL380 Gen9 server. I was expecting something above
> > > > 10KB/s, but I got 2.5KB/s. I then built a simple driver [0] to isolate
> > > > the console from the printk code. Here it is:
> > > >
> > > > $ time ./sertest -n 2500 /tmp/serco
> > > >
> > > > real    0m0.997s
> > > > user    0m0.000s
> > > > sys     0m0.997s
> > > >
> > > > With the help of the function tracer, I then noticed the serial
> > > > controller was taking around 410us seconds to dispatch one single byte:
> > > >
> > > > $ trace-cmd record -p function_graph -g serial8250_console_write \
> > > >    ./sertest -n 1 /tmp/serco
> > > >
> > > > $ trace-cmd report
> > > >
> > > >             |  serial8250_console_write() {
> > > >  0.384 us   |    _raw_spin_lock_irqsave();
> > > >  1.836 us   |    io_serial_in();
> > > >  1.667 us   |    io_serial_out();
> > > >             |    uart_console_write() {
> > > >             |      serial8250_console_putchar() {
> > > >             |        wait_for_xmitr() {
> > > >  1.870 us   |          io_serial_in();
> > > >  2.238 us   |        }
> > > >  1.737 us   |        io_serial_out();
> > > >  4.318 us   |      }
> > > >  4.675 us   |    }
> > > >             |    wait_for_xmitr() {
> > > >  1.635 us   |      io_serial_in();
> > > >             |      __const_udelay() {
> > > >  1.125 us   |        delay_tsc();
> > > >  1.429 us   |      }
> > > > ...
> > > > ...
> > > > ...
> > > >  1.683 us   |      io_serial_in();
> > > >             |      __const_udelay() {
> > > >  1.248 us   |        delay_tsc();
> > > >  1.486 us   |      }
> > > >  1.671 us   |      io_serial_in();
> > > >  411.342 us |    }
> > > >
> > > > In another machine, I measured a throughput of 11.5KB/s, with the serial
> > > > controller taking between 80-90us to send each byte. That matches the
> > > > expected throughput for a configuration of 115200 bps.
> > > >
> > > > This patch changes the serial8250_console_write to use the 16550 fifo
> > > > if available. In my artificial benchmark I could get a throughput
> > > > increase up to 100% in some cases, but in the real case described at the
> > > > beginning the gain was of about 25%.
> > > >
> > > > [0] https://github.com/walac/serial-console-test
> > > >
> > > > Signed-off-by: Wander Lairson Costa <wander@redhat.com>
> > > > ---
> > > >  drivers/tty/serial/8250/8250.h      |  3 ++
> > > >  drivers/tty/serial/8250/8250_port.c | 63 +++++++++++++++++++++++++----
> > > >  2 files changed, 59 insertions(+), 7 deletions(-)
> > > >
> > > > diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
> > > > index 6473361525d1..c711bf118cc1 100644
> > > > --- a/drivers/tty/serial/8250/8250.h
> > > > +++ b/drivers/tty/serial/8250/8250.h
> > > > @@ -83,6 +83,9 @@ struct serial8250_config {
> > > >  #define UART_CAP_MINI        BIT(17) /* Mini UART on BCM283X family lacks:
> > > >                                        * STOP PARITY EPAR SPAR WLEN5 WLEN6
> > > >                                        */
> > > > +#define UART_CAP_CWFIFO BIT(18) /* Use the UART Fifo in
> > > > +                              * serial8250_console_write
> > > > +                              */
> > >
> > > Why do you need a new bit?  Why can't you just do this change for all
> > > devices that have a fifo?  Why would you _not_ want to do this for all
> > > devices that have a fifo?
> > >
> > The v1 patch [1] didn't have this extra bit. Andy suggested [2] to add
> > it so we only enabled this new code on tested controllers as a
> > precaution.
> > If it doesn't make sense to you, feel free to consider the v1 patch [1].
> >
> > [1] https://lore.kernel.org/all/20211029201402.428284-1-wander@redhat.com/
> > [2] https://lore.kernel.org/all/CAHp75Vf6DjNcPWpE4Dh3SuzUMJbFQjq1UNCkrCa60uw35SpqKg@mail.gmail.com/
>
> I do like [1] better, but can you fix up the checkpatch issues and
> resend it:
>
Sure. No problem.

> CHECK: Alignment should match open parenthesis
> #116: FILE: drivers/tty/serial/8250/8250_port.c:3342:
> +static void serial8250_console_fifo_write(struct uart_8250_port *up,
> +               const char *s, unsigned int count)
>
> CHECK: Logical continuations should be on the previous line
> #156: FILE: drivers/tty/serial/8250/8250_port.c:3412:
> +       use_fifo = (up->capabilities & UART_CAP_FIFO)
> +               && port->fifosize > 1
>
> CHECK: Logical continuations should be on the previous line
> #157: FILE: drivers/tty/serial/8250/8250_port.c:3413:
> +               && port->fifosize > 1
> +               && (serial_port_in(port, UART_FCR) & UART_FCR_ENABLE_FIFO)
>
> CHECK: Logical continuations should be on the previous line
> #163: FILE: drivers/tty/serial/8250/8250_port.c:3419:
> +                */
> +               && !(up->port.flags & UPF_CONS_FLOW);
>
>
>
> thanks,
>
> greg k-h
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-12-22 11:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-04 17:17 [PATCH v2] tty: serial: Use fifo in 8250 console driver wander
2021-12-20 15:45 ` Greg Kroah-Hartman
2021-12-20 17:02   ` Wander Costa
2021-12-21  8:12     ` Greg Kroah-Hartman
2021-12-22 11:22       ` Wander Costa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).