All of lore.kernel.org
 help / color / mirror / Atom feed
From: Vladimir Oltean <olteanv@gmail.com>
To: broonie@kernel.org
Cc: linux-spi@vger.kernel.org, linux-kernel@vger.kernel.org,
	eha@deif.com, angelo@sysam.it, andrew.smirnov@gmail.com,
	gustavo@embeddedor.com, weic@nvidia.com, mhosny@nvidia.com
Subject: [PATCH 09/12] spi: spi-fsl-dspi: Accelerate transfers using larger word size if possible
Date: Thu,  5 Mar 2020 00:00:41 +0200	[thread overview]
Message-ID: <20200304220044.11193-10-olteanv@gmail.com> (raw)
In-Reply-To: <20200304220044.11193-1-olteanv@gmail.com>

From: Vladimir Oltean <vladimir.oltean@nxp.com>

This patch adds logic in the driver to transmit SPI buffers that use
bits_per_word=8 with a higher bits_per_word count (multiple of 8).

Currently the following (most common) modes are implemented:
 - 8 bits_per_word on 32-bit capable controllers
 - 8 bits_per_word on 16-bit capable controllers
 - 16 bits_per_word on 32-bit capable controllers

Transfers which are not accelerated are transferred with a hardware
bits_per_word value equal to the one of the SPI transfer.

The difference from just extending bits_per_word=32 at the spi_device
driver level is that endianness is different - the SPI core wants to
treat bits_per_word=32 buffers as arrays of u32 (i.e. words in host CPU
endianness). So to preserve endianness when clumping 8x4 bits into
32-bit words, one must perform conversion between CPU and standard (big)
endianness.

All appearances (both on the wire as well as in the buffers presented to
the peripheral driver) are preserved, just that accesses to the PUSHR
and POPR registers are now more efficient, since the same number of
reads/writes can now carry more data (2x more data on TX, 4x more data
on RX).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
 drivers/spi/spi-fsl-dspi.c | 160 +++++++++++++++++++++++++++++++------
 1 file changed, 135 insertions(+), 25 deletions(-)

diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 298c22def165..f5b802070d29 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -228,8 +228,6 @@ struct fsl_dspi {
 	const void				*tx;
 	void					*rx;
 	u16					tx_cmd;
-	u8					bits_per_word;
-	u8					bytes_per_word;
 	const struct fsl_dspi_devtype_data	*devtype_data;
 
 	wait_queue_head_t			waitq;
@@ -237,9 +235,70 @@ struct fsl_dspi {
 
 	struct fsl_dspi_dma			*dma;
 
+	int					oper_word_size;
+	int					oper_bits_per_word;
+
 	int					words_in_flight;
+
+	void (*host_to_dev)(struct fsl_dspi *dspi, u32 *txdata);
+	void (*dev_to_host)(struct fsl_dspi *dspi, u32 rxdata);
 };
 
+static void dspi_native_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
+{
+	memcpy(txdata, dspi->tx, dspi->oper_word_size);
+	dspi->tx += dspi->oper_word_size;
+}
+
+static void dspi_native_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
+{
+	memcpy(dspi->rx, &rxdata, dspi->oper_word_size);
+	dspi->rx += dspi->oper_word_size;
+}
+
+static void dspi_8on32_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
+{
+	*txdata = cpu_to_be32(*(u32 *)dspi->tx);
+	dspi->tx += sizeof(u32);
+}
+
+static void dspi_8on32_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
+{
+	*(u32 *)dspi->rx = be32_to_cpu(rxdata);
+	dspi->rx += sizeof(u32);
+}
+
+static void dspi_8on16_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
+{
+	*txdata = cpu_to_be16(*(u16 *)dspi->tx);
+	dspi->tx += sizeof(u16);
+}
+
+static void dspi_8on16_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
+{
+	*(u16 *)dspi->rx = be16_to_cpu(rxdata);
+	dspi->rx += sizeof(u16);
+}
+
+static void dspi_16on32_host_to_dev(struct fsl_dspi *dspi, u32 *txdata)
+{
+	u16 hi = *(u16 *)dspi->tx;
+	u16 lo = *(u16 *)(dspi->tx + 2);
+
+	*txdata = (u32)hi << 16 | lo;
+	dspi->tx += sizeof(u32);
+}
+
+static void dspi_16on32_dev_to_host(struct fsl_dspi *dspi, u32 rxdata)
+{
+	u16 hi = rxdata & 0xffff;
+	u16 lo = rxdata >> 16;
+
+	*(u16 *)dspi->rx = lo;
+	*(u16 *)(dspi->rx + 2) = hi;
+	dspi->rx += sizeof(u32);
+}
+
 /*
  * Pop one word from the TX buffer for pushing into the
  * PUSHR register (TX FIFO)
@@ -248,11 +307,9 @@ static u32 dspi_pop_tx(struct fsl_dspi *dspi)
 {
 	u32 txdata = 0;
 
-	if (dspi->tx) {
-		memcpy(&txdata, dspi->tx, dspi->bytes_per_word);
-		dspi->tx += dspi->bytes_per_word;
-	}
-	dspi->len -= dspi->bytes_per_word;
+	if (dspi->tx)
+		dspi->host_to_dev(dspi, &txdata);
+	dspi->len -= dspi->oper_word_size;
 	return txdata;
 }
 
@@ -274,9 +331,7 @@ static void dspi_push_rx(struct fsl_dspi *dspi, u32 rxdata)
 {
 	if (!dspi->rx)
 		return;
-
-	memcpy(dspi->rx, &rxdata, dspi->bytes_per_word);
-	dspi->rx += dspi->bytes_per_word;
+	dspi->dev_to_host(dspi, rxdata);
 }
 
 static void dspi_tx_dma_callback(void *arg)
@@ -393,8 +448,8 @@ static int dspi_dma_xfer(struct fsl_dspi *dspi)
 			   dspi->devtype_data->fifo_size;
 	while (curr_remaining_bytes) {
 		/* Check if current transfer fits the DMA buffer */
-		dma->curr_xfer_len = curr_remaining_bytes
-			/ dspi->bytes_per_word;
+		dma->curr_xfer_len = curr_remaining_bytes /
+				     dspi->oper_word_size;
 		if (dma->curr_xfer_len > bytes_per_buffer)
 			dma->curr_xfer_len = bytes_per_buffer;
 
@@ -404,8 +459,8 @@ static int dspi_dma_xfer(struct fsl_dspi *dspi)
 			goto exit;
 
 		} else {
-			const int len =
-				dma->curr_xfer_len * dspi->bytes_per_word;
+			const int len = dma->curr_xfer_len *
+					dspi->oper_word_size;
 			curr_remaining_bytes -= len;
 			message->actual_length += len;
 			if (curr_remaining_bytes < 0)
@@ -615,7 +670,7 @@ static void dspi_pushr_cmd_write(struct fsl_dspi *dspi)
 	 * generate a new PUSHR command with the final word that will have PCS
 	 * deasserted (not continued) here.
 	 */
-	if (dspi->len > dspi->bytes_per_word)
+	if (dspi->len > dspi->oper_word_size)
 		cmd |= SPI_PUSHR_CMD_CONT;
 	regmap_write(dspi->regmap_pushr, PUSHR_CMD, cmd);
 }
@@ -627,8 +682,9 @@ static void dspi_pushr_txdata_write(struct fsl_dspi *dspi, u16 txdata)
 
 static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt)
 {
+	/* Update CTARE */
 	regmap_write(dspi->regmap, SPI_CTARE(0),
-		     SPI_FRAME_EBITS(dspi->bits_per_word) |
+		     SPI_FRAME_EBITS(dspi->oper_bits_per_word) |
 		     SPI_CTARE_DTCP(cnt));
 
 	/*
@@ -642,7 +698,7 @@ static void dspi_xspi_write(struct fsl_dspi *dspi, int cnt)
 		u32 data = dspi_pop_tx(dspi);
 
 		dspi_pushr_txdata_write(dspi, data & 0xFFFF);
-		if (dspi->bits_per_word > 16)
+		if (dspi->oper_bits_per_word > 16)
 			dspi_pushr_txdata_write(dspi, data >> 16);
 	}
 }
@@ -653,15 +709,20 @@ static void dspi_xspi_fifo_write(struct fsl_dspi *dspi)
 	int bytes_in_flight;
 
 	/* In XSPI mode each 32-bit word occupies 2 TX FIFO entries */
-	if (dspi->bits_per_word > 16)
+	if (dspi->oper_word_size == 4)
 		num_fifo_entries /= 2;
 
-	dspi->words_in_flight = dspi->len / dspi->bytes_per_word;
+	/*
+	 * Integer division intentionally trims off odd (or non-multiple of 4)
+	 * numbers of bytes at the end of the buffer, which will be sent next
+	 * time using a smaller oper_word_size.
+	 */
+	dspi->words_in_flight = dspi->len / dspi->oper_word_size;
 
 	if (dspi->words_in_flight > num_fifo_entries)
 		dspi->words_in_flight = num_fifo_entries;
 
-	bytes_in_flight = dspi->words_in_flight * dspi->bytes_per_word;
+	bytes_in_flight = dspi->words_in_flight * dspi->oper_word_size;
 
 	/*
 	 * If the PCS needs to de-assert (i.e. we're at the end of the buffer
@@ -689,7 +750,7 @@ static void dspi_eoq_fifo_write(struct fsl_dspi *dspi)
 	while (dspi->len && num_fifo_entries--) {
 		dspi->tx_cmd = xfer_cmd;
 		/* Request EOQF for last transfer in FIFO */
-		if (dspi->len == dspi->bytes_per_word || num_fifo_entries == 0)
+		if (dspi->len == dspi->oper_word_size || num_fifo_entries == 0)
 			dspi->tx_cmd |= SPI_PUSHR_CMD_EOQ;
 		/* Write combined TX FIFO and CMD FIFO entry */
 		dspi_pushr_write(dspi);
@@ -711,8 +772,56 @@ static void dspi_fifo_read(struct fsl_dspi *dspi)
 		dspi_push_rx(dspi, dspi_popr_read(dspi));
 }
 
+static void dspi_setup_accel(struct fsl_dspi *dspi)
+{
+	struct spi_transfer *xfer = dspi->cur_transfer;
+
+	/* Start off with maximum supported by hardware */
+	if (dspi->devtype_data->trans_mode == DSPI_XSPI_MODE)
+		dspi->oper_bits_per_word = 32;
+	else
+		dspi->oper_bits_per_word = 16;
+
+	/* And go down only if the buffer can't be sent with words this big */
+	do {
+		if (dspi->len >= DIV_ROUND_UP(dspi->oper_bits_per_word, 8))
+			break;
+
+		dspi->oper_bits_per_word /= 2;
+	} while (dspi->oper_bits_per_word > 8);
+
+	if (xfer->bits_per_word == 8 && dspi->oper_bits_per_word == 32) {
+		dspi->dev_to_host = dspi_8on32_dev_to_host;
+		dspi->host_to_dev = dspi_8on32_host_to_dev;
+	} else if (xfer->bits_per_word == 8 && dspi->oper_bits_per_word == 16) {
+		dspi->dev_to_host = dspi_8on16_dev_to_host;
+		dspi->host_to_dev = dspi_8on16_host_to_dev;
+	} else if (xfer->bits_per_word == 16 && dspi->oper_bits_per_word == 32) {
+		dspi->dev_to_host = dspi_16on32_dev_to_host;
+		dspi->host_to_dev = dspi_16on32_host_to_dev;
+	} else {
+		/* No acceleration needed (8<N<=16 on 16, 16<N<=32 on 32) */
+		dspi->dev_to_host = dspi_native_dev_to_host;
+		dspi->host_to_dev = dspi_native_host_to_dev;
+		dspi->oper_bits_per_word = xfer->bits_per_word;
+	}
+
+	dspi->oper_word_size = DIV_ROUND_UP(dspi->oper_bits_per_word, 8);
+
+	/*
+	 * Update CTAR here (code is common for both EOQ and XSPI modes).
+	 * We will update CTARE in the portion specific to XSPI, when we
+	 * also know the preload value (DTCP).
+	 */
+	regmap_write(dspi->regmap, SPI_CTAR(0),
+		     dspi->cur_chip->ctar_val |
+		     SPI_FRAME_BITS(dspi->oper_bits_per_word));
+}
+
 static void dspi_fifo_write(struct fsl_dspi *dspi)
 {
+	dspi_setup_accel(dspi);
+
 	if (dspi->devtype_data->trans_mode == DSPI_EOQ_MODE)
 		dspi_eoq_fifo_write(dspi);
 	else
@@ -726,7 +835,7 @@ static int dspi_rxtx(struct fsl_dspi *dspi)
 	int bytes_sent;
 
 	/* Update total number of bytes that were transferred */
-	bytes_sent = dspi->words_in_flight * dspi->bytes_per_word;
+	bytes_sent = dspi->words_in_flight * dspi->oper_word_size;
 	msg->actual_length += bytes_sent;
 	dspi->progress += bytes_sent / DIV_ROUND_UP(xfer->bits_per_word, 8);
 
@@ -824,13 +933,14 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
 		dspi->rx = transfer->rx_buf;
 		dspi->len = transfer->len;
 		dspi->progress = 0;
-		/* Validated transfer specific frame size (defaults applied) */
-		dspi->bits_per_word = transfer->bits_per_word;
-		dspi->bytes_per_word = DIV_ROUND_UP(dspi->bits_per_word, 8);
 
 		regmap_update_bits(dspi->regmap, SPI_MCR,
 				   SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF,
 				   SPI_MCR_CLR_TXF | SPI_MCR_CLR_RXF);
+		/*
+		 * Static CTAR setup for modes that don't dynamically adjust it
+		 * via dspi_setup_accel (aka for DMA)
+		 */
 		regmap_write(dspi->regmap, SPI_CTAR(0),
 			     dspi->cur_chip->ctar_val |
 			     SPI_FRAME_BITS(transfer->bits_per_word));
-- 
2.17.1


  parent reply	other threads:[~2020-03-04 22:02 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-04 22:00 [PATCH 00/12] TCFQ to XSPI migration for NXP DSPI driver Vladimir Oltean
2020-03-04 22:00 ` Vladimir Oltean
2020-03-04 22:00 ` [PATCH 01/12] spi: spi-fsl-dspi: Simplify bytes_per_word gymnastics Vladimir Oltean
2020-03-05 14:38   ` Applied "spi: spi-fsl-dspi: Simplify bytes_per_word gymnastics" to the spi tree Mark Brown
2020-03-05 14:38     ` Mark Brown
2020-03-04 22:00 ` [PATCH 02/12] spi: spi-fsl-dspi: Remove unused chip->void_write_data Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Remove unused chip->void_write_data" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 03/12] spi: spi-fsl-dspi: Don't mask off undefined bits Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Don't mask off undefined bits" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 04/12] spi: spi-fsl-dspi: Add comments around dspi_pop_tx and dspi_push_rx functions Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Add comments around dspi_pop_tx and dspi_push_rx functions" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 05/12] spi: spi-fsl-dspi: Rename fifo_{read,write} and {tx,cmd}_fifo_write Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Rename fifo_{read,write} and {tx,cmd}_fifo_write" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 06/12] spi: spi-fsl-dspi: Implement .max_message_size method for EOQ mode Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Implement .max_message_size method for EOQ mode" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 07/12] spi: Do spi_take_timestamp_pre for as many times as necessary Vladimir Oltean
2020-03-05 12:12   ` Mark Brown
2020-03-05 12:12     ` Mark Brown
2020-03-05 13:00     ` Vladimir Oltean
2020-03-05 13:04       ` Mark Brown
2020-03-05 13:04         ` Mark Brown
2020-03-05 13:13         ` Vladimir Oltean
2020-03-05 13:13           ` Vladimir Oltean
2020-03-05 13:16           ` Mark Brown
2020-03-05 13:16             ` Mark Brown
2020-03-05 14:37   ` Applied "spi: Do spi_take_timestamp_pre for as many times as necessary" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 08/12] spi: spi-fsl-dspi: Convert TCFQ users to XSPI FIFO mode Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Convert TCFQ users to XSPI FIFO mode" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` Vladimir Oltean [this message]
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Accelerate transfers using larger word size if possible" " Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 10/12] spi: spi-fsl-dspi: Optimize dspi_setup_accel for lowest interrupt count Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Optimize dspi_setup_accel for lowest interrupt count" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 11/12] spi: spi-fsl-dspi: Use EOQ for last word in buffer even for XSPI mode Vladimir Oltean
2020-03-04 22:00   ` Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Use EOQ for last word in buffer even for XSPI mode" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown
2020-03-04 22:00 ` [PATCH 12/12] spi: spi-fsl-dspi: Take software timestamp in dspi_fifo_write Vladimir Oltean
2020-03-04 22:00   ` Vladimir Oltean
2020-03-05 14:37   ` Applied "spi: spi-fsl-dspi: Take software timestamp in dspi_fifo_write" to the spi tree Mark Brown
2020-03-05 14:37     ` Mark Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200304220044.11193-10-olteanv@gmail.com \
    --to=olteanv@gmail.com \
    --cc=andrew.smirnov@gmail.com \
    --cc=angelo@sysam.it \
    --cc=broonie@kernel.org \
    --cc=eha@deif.com \
    --cc=gustavo@embeddedor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-spi@vger.kernel.org \
    --cc=mhosny@nvidia.com \
    --cc=weic@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.