All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/6] Enable DMA for daVinci SPI controller
@ 2017-02-17 10:38 ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
  Cc: broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA

Enable DMA for the daVinci SPI using the SPI framework DMA mapping
function. Add fixes caused by large numer of SG entries
and VIVT cache.
Also, add some extra options to the spi-loopback-test
module to catch errors caused by many SG entries.
This has been tested using spidev_test, spi-loopback-test and
mounting and read/write UBIFS volume over SPI flash.

v2 changes:
- Use SPI_MASTER_MUST_ flag(s) for dummy buffer allocation (thanks
Mark Brown).
- For rx-only transfers, use rx buffer as dummy tx buffer to avoid
errors with more than 20 SG entries.
- Improved comments and use #define for contants.
- Abandon patches to set loopback and limit transfer size in
spi-loopback-test.

Thanks,
Frode

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 0/6] Enable DMA for daVinci SPI controller
@ 2017-02-17 10:38 ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: linux-arm-kernel

Enable DMA for the daVinci SPI using the SPI framework DMA mapping
function. Add fixes caused by large numer of SG entries
and VIVT cache.
Also, add some extra options to the spi-loopback-test
module to catch errors caused by many SG entries.
This has been tested using spidev_test, spi-loopback-test and
mounting and read/write UBIFS volume over SPI flash.

v2 changes:
- Use SPI_MASTER_MUST_ flag(s) for dummy buffer allocation (thanks
Mark Brown).
- For rx-only transfers, use rx buffer as dummy tx buffer to avoid
errors with more than 20 SG entries.
- Improved comments and use #define for contants.
- Abandon patches to set loopback and limit transfer size in
spi-loopback-test.

Thanks,
Frode

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 1/6] spi: davinci: Use SPI framework to handle DMA mapping
  2017-02-17 10:38 ` Frode Isaksen
@ 2017-02-17 10:38     ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
  Cc: broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	Fabien Parent, Frode Isaksen

From: Fabien Parent <fparent-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>

Uppers layers like MTD can pass vmalloc'd buffers to the SPI driver,
and the current implementation will fail to map these kind of buffers.
The SPI framework is able to detect the best way to handle and map
buffers.
This commit updates the davinci SPI driver in order to use the SPI
framework to handle the DMA mapping of buffers coming from an upper
layer.

Signed-off-by: Fabien Parent <fparent-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
---
 drivers/spi/spi-davinci.c | 70 +++++++++++++----------------------------------
 1 file changed, 19 insertions(+), 51 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 02fb967..5b164e5 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -467,6 +467,20 @@ static void davinci_spi_cleanup(struct spi_device *spi)
 		kfree(spicfg);
 }
 
+static inline bool __davinci_spi_can_dma(struct spi_device *spi)
+{
+	struct davinci_spi_config *spicfg = spi->controller_data;
+
+	return spicfg ? spicfg->io_type == SPI_IO_TYPE_DMA : false;
+}
+
+static bool davinci_spi_can_dma(struct spi_master *master,
+				struct spi_device *spi,
+				struct spi_transfer *xfer)
+{
+	return __davinci_spi_can_dma(spi);
+}
+
 static int davinci_spi_check_error(struct davinci_spi *dspi, int int_status)
 {
 	struct device *sdev = dspi->bitbang.master->dev.parent;
@@ -581,8 +595,6 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	struct davinci_spi_config *spicfg;
 	struct davinci_spi_platform_data *pdata;
 	unsigned uninitialized_var(rx_buf_count);
-	void *dummy_buf = NULL;
-	struct scatterlist sg_rx, sg_tx;
 
 	dspi = spi_master_get_devdata(spi->master);
 	pdata = &dspi->pdata;
@@ -630,51 +642,18 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		};
 		struct dma_async_tx_descriptor *rxdesc;
 		struct dma_async_tx_descriptor *txdesc;
-		void *buf;
-
-		dummy_buf = kzalloc(t->len, GFP_KERNEL);
-		if (!dummy_buf)
-			goto err_alloc_dummy_buf;
 
 		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
 		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
 
-		sg_init_table(&sg_rx, 1);
-		if (!t->rx_buf)
-			buf = dummy_buf;
-		else
-			buf = t->rx_buf;
-		t->rx_dma = dma_map_single(&spi->dev, buf,
-				t->len, DMA_FROM_DEVICE);
-		if (dma_mapping_error(&spi->dev, !t->rx_dma)) {
-			ret = -EFAULT;
-			goto err_rx_map;
-		}
-		sg_dma_address(&sg_rx) = t->rx_dma;
-		sg_dma_len(&sg_rx) = t->len;
-
-		sg_init_table(&sg_tx, 1);
-		if (!t->tx_buf)
-			buf = dummy_buf;
-		else
-			buf = (void *)t->tx_buf;
-		t->tx_dma = dma_map_single(&spi->dev, buf,
-				t->len, DMA_TO_DEVICE);
-		if (dma_mapping_error(&spi->dev, t->tx_dma)) {
-			ret = -EFAULT;
-			goto err_tx_map;
-		}
-		sg_dma_address(&sg_tx) = t->tx_dma;
-		sg_dma_len(&sg_tx) = t->len;
-
 		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
-				&sg_rx, 1, DMA_DEV_TO_MEM,
+				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!rxdesc)
 			goto err_desc;
 
 		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
-				&sg_tx, 1, DMA_MEM_TO_DEV,
+				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!txdesc)
 			goto err_desc;
@@ -710,16 +689,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
-	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
+	if (spicfg->io_type == SPI_IO_TYPE_DMA)
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
 
-		dma_unmap_single(&spi->dev, t->rx_dma,
-				t->len, DMA_FROM_DEVICE);
-		dma_unmap_single(&spi->dev, t->tx_dma,
-				t->len, DMA_TO_DEVICE);
-		kfree(dummy_buf);
-	}
-
 	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
 	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
 
@@ -742,12 +714,6 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	return t->len;
 
 err_desc:
-	dma_unmap_single(&spi->dev, t->tx_dma, t->len, DMA_TO_DEVICE);
-err_tx_map:
-	dma_unmap_single(&spi->dev, t->rx_dma, t->len, DMA_FROM_DEVICE);
-err_rx_map:
-	kfree(dummy_buf);
-err_alloc_dummy_buf:
 	return ret;
 }
 
@@ -988,8 +954,10 @@ static int davinci_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
+	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
+	master->can_dma = davinci_spi_can_dma;
 
 	dspi->bitbang.chipselect = davinci_spi_chipselect;
 	dspi->bitbang.setup_transfer = davinci_spi_setup_transfer;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 1/6] spi: davinci: Use SPI framework to handle DMA mapping
@ 2017-02-17 10:38     ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: linux-arm-kernel

From: Fabien Parent <fparent@baylibre.com>

Uppers layers like MTD can pass vmalloc'd buffers to the SPI driver,
and the current implementation will fail to map these kind of buffers.
The SPI framework is able to detect the best way to handle and map
buffers.
This commit updates the davinci SPI driver in order to use the SPI
framework to handle the DMA mapping of buffers coming from an upper
layer.

Signed-off-by: Fabien Parent <fparent@baylibre.com>
Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
---
 drivers/spi/spi-davinci.c | 70 +++++++++++++----------------------------------
 1 file changed, 19 insertions(+), 51 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 02fb967..5b164e5 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -467,6 +467,20 @@ static void davinci_spi_cleanup(struct spi_device *spi)
 		kfree(spicfg);
 }
 
+static inline bool __davinci_spi_can_dma(struct spi_device *spi)
+{
+	struct davinci_spi_config *spicfg = spi->controller_data;
+
+	return spicfg ? spicfg->io_type == SPI_IO_TYPE_DMA : false;
+}
+
+static bool davinci_spi_can_dma(struct spi_master *master,
+				struct spi_device *spi,
+				struct spi_transfer *xfer)
+{
+	return __davinci_spi_can_dma(spi);
+}
+
 static int davinci_spi_check_error(struct davinci_spi *dspi, int int_status)
 {
 	struct device *sdev = dspi->bitbang.master->dev.parent;
@@ -581,8 +595,6 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	struct davinci_spi_config *spicfg;
 	struct davinci_spi_platform_data *pdata;
 	unsigned uninitialized_var(rx_buf_count);
-	void *dummy_buf = NULL;
-	struct scatterlist sg_rx, sg_tx;
 
 	dspi = spi_master_get_devdata(spi->master);
 	pdata = &dspi->pdata;
@@ -630,51 +642,18 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		};
 		struct dma_async_tx_descriptor *rxdesc;
 		struct dma_async_tx_descriptor *txdesc;
-		void *buf;
-
-		dummy_buf = kzalloc(t->len, GFP_KERNEL);
-		if (!dummy_buf)
-			goto err_alloc_dummy_buf;
 
 		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
 		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
 
-		sg_init_table(&sg_rx, 1);
-		if (!t->rx_buf)
-			buf = dummy_buf;
-		else
-			buf = t->rx_buf;
-		t->rx_dma = dma_map_single(&spi->dev, buf,
-				t->len, DMA_FROM_DEVICE);
-		if (dma_mapping_error(&spi->dev, !t->rx_dma)) {
-			ret = -EFAULT;
-			goto err_rx_map;
-		}
-		sg_dma_address(&sg_rx) = t->rx_dma;
-		sg_dma_len(&sg_rx) = t->len;
-
-		sg_init_table(&sg_tx, 1);
-		if (!t->tx_buf)
-			buf = dummy_buf;
-		else
-			buf = (void *)t->tx_buf;
-		t->tx_dma = dma_map_single(&spi->dev, buf,
-				t->len, DMA_TO_DEVICE);
-		if (dma_mapping_error(&spi->dev, t->tx_dma)) {
-			ret = -EFAULT;
-			goto err_tx_map;
-		}
-		sg_dma_address(&sg_tx) = t->tx_dma;
-		sg_dma_len(&sg_tx) = t->len;
-
 		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
-				&sg_rx, 1, DMA_DEV_TO_MEM,
+				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!rxdesc)
 			goto err_desc;
 
 		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
-				&sg_tx, 1, DMA_MEM_TO_DEV,
+				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 		if (!txdesc)
 			goto err_desc;
@@ -710,16 +689,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
-	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
+	if (spicfg->io_type == SPI_IO_TYPE_DMA)
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
 
-		dma_unmap_single(&spi->dev, t->rx_dma,
-				t->len, DMA_FROM_DEVICE);
-		dma_unmap_single(&spi->dev, t->tx_dma,
-				t->len, DMA_TO_DEVICE);
-		kfree(dummy_buf);
-	}
-
 	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
 	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
 
@@ -742,12 +714,6 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	return t->len;
 
 err_desc:
-	dma_unmap_single(&spi->dev, t->tx_dma, t->len, DMA_TO_DEVICE);
-err_tx_map:
-	dma_unmap_single(&spi->dev, t->rx_dma, t->len, DMA_FROM_DEVICE);
-err_rx_map:
-	kfree(dummy_buf);
-err_alloc_dummy_buf:
 	return ret;
 }
 
@@ -988,8 +954,10 @@ static int davinci_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
+	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
+	master->can_dma = davinci_spi_can_dma;
 
 	dspi->bitbang.chipselect = davinci_spi_chipselect;
 	dspi->bitbang.setup_transfer = davinci_spi_setup_transfer;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 2/6] spi: davinci: enable DMA when channels are defined in DT
  2017-02-17 10:38 ` Frode Isaksen
@ 2017-02-17 10:38     ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
  Cc: broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	Fabien Parent, Frode Isaksen

From: Fabien Parent <fparent-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>

When booting with DT the SPI driver is always using
the SPI_IO_TYPE_INTR mode to transfer data even if DMA channels are
defined in the DT.

This commit changes the behaviour to select the SPI_IO_TYPE_DMA mode
if DMA channels are defined in the DT and will keep SPI_IO_TYPE_INTR
if the channels are not defined in it.

Signed-off-by: Fabien Parent <fparent-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
---
 drivers/spi/spi-davinci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 5b164e5..9823908 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -389,6 +389,7 @@ static int davinci_spi_of_setup(struct spi_device *spi)
 {
 	struct davinci_spi_config *spicfg = spi->controller_data;
 	struct device_node *np = spi->dev.of_node;
+	struct davinci_spi *dspi = spi_master_get_devdata(spi->master);
 	u32 prop;
 
 	if (spicfg == NULL && np) {
@@ -400,6 +401,9 @@ static int davinci_spi_of_setup(struct spi_device *spi)
 		if (!of_property_read_u32(np, "ti,spi-wdelay", &prop))
 			spicfg->wdelay = (u8)prop;
 		spi->controller_data = spicfg;
+
+		if (dspi->dma_rx && dspi->dma_tx)
+			spicfg->io_type = SPI_IO_TYPE_DMA;
 	}
 
 	return 0;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 2/6] spi: davinci: enable DMA when channels are defined in DT
@ 2017-02-17 10:38     ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: linux-arm-kernel

From: Fabien Parent <fparent@baylibre.com>

When booting with DT the SPI driver is always using
the SPI_IO_TYPE_INTR mode to transfer data even if DMA channels are
defined in the DT.

This commit changes the behaviour to select the SPI_IO_TYPE_DMA mode
if DMA channels are defined in the DT and will keep SPI_IO_TYPE_INTR
if the channels are not defined in it.

Signed-off-by: Fabien Parent <fparent@baylibre.com>
Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
---
 drivers/spi/spi-davinci.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 5b164e5..9823908 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -389,6 +389,7 @@ static int davinci_spi_of_setup(struct spi_device *spi)
 {
 	struct davinci_spi_config *spicfg = spi->controller_data;
 	struct device_node *np = spi->dev.of_node;
+	struct davinci_spi *dspi = spi_master_get_devdata(spi->master);
 	u32 prop;
 
 	if (spicfg == NULL && np) {
@@ -400,6 +401,9 @@ static int davinci_spi_of_setup(struct spi_device *spi)
 		if (!of_property_read_u32(np, "ti,spi-wdelay", &prop))
 			spicfg->wdelay = (u8)prop;
 		spi->controller_data = spicfg;
+
+		if (dspi->dma_rx && dspi->dma_tx)
+			spicfg->io_type = SPI_IO_TYPE_DMA;
 	}
 
 	return 0;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 3/6] spi: davinci: use rx buffer as dummy tx buffer
  2017-02-17 10:38 ` Frode Isaksen
@ 2017-02-17 10:38     ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
  Cc: broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	Frode Isaksen

When doing rx-only transfer, the transfer will fail
if the number of SG entries exceeds 20.
This happens because the eDMA DMA engine is limited
to 20 SG entries in one transaction, and when the
DMA transcation is resumed (which takes > 150us),
rx errors occurs because the slave is still transmitting.
Fix this by using the rx buffer as the dummy tx buffer,
so that resuming the rx transcation happens at the same
time as resuming the tx transcation.

Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
---
 drivers/spi/spi-davinci.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 9823908..2632ae0 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -656,6 +656,12 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		if (!rxdesc)
 			goto err_desc;
 
+		if (!t->tx_buf) {
+			/* use rx buffer as dummy tx buffer */
+			t->tx_sg.sgl = t->rx_sg.sgl;
+			t->tx_sg.nents = t->rx_sg.nents;
+		}
+
 		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
 				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -958,7 +964,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
-	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
+	master->flags = SPI_MASTER_MUST_RX;
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
 	master->can_dma = davinci_spi_can_dma;
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 3/6] spi: davinci: use rx buffer as dummy tx buffer
@ 2017-02-17 10:38     ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: linux-arm-kernel

When doing rx-only transfer, the transfer will fail
if the number of SG entries exceeds 20.
This happens because the eDMA DMA engine is limited
to 20 SG entries in one transaction, and when the
DMA transcation is resumed (which takes > 150us),
rx errors occurs because the slave is still transmitting.
Fix this by using the rx buffer as the dummy tx buffer,
so that resuming the rx transcation happens at the same
time as resuming the tx transcation.

Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
---
 drivers/spi/spi-davinci.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 9823908..2632ae0 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -656,6 +656,12 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		if (!rxdesc)
 			goto err_desc;
 
+		if (!t->tx_buf) {
+			/* use rx buffer as dummy tx buffer */
+			t->tx_sg.sgl = t->rx_sg.sgl;
+			t->tx_sg.nents = t->rx_sg.nents;
+		}
+
 		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
 				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -958,7 +964,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
-	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
+	master->flags = SPI_MASTER_MUST_RX;
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
 	master->can_dma = davinci_spi_can_dma;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-17 10:38 ` Frode Isaksen
@ 2017-02-17 10:38     ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
  Cc: broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	Frode Isaksen

This CPU has VIVT caches, so need to flush the cache
for vmalloc'ed buffers, since the address may be aliased
(same phyiscal address used by multiple virtual addresses).
This fixes errors when mounting and reading/writing UBIFS
volumes with SPI NOR flash.

Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
---
 drivers/spi/spi-davinci.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 2632ae0..b69a370 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -29,6 +29,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/slab.h>
+#include <asm/cacheflush.h>
 
 #include <linux/platform_data/spi-davinci.h>
 
@@ -650,6 +651,10 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
 		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
 
+		if (is_vmalloc_addr(t->rx_buf))
+			/* VIVT cache: flush since addr. may be aliased */
+			flush_kernel_vmap_range((void *)t->rx_buf, t->len);
+
 		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
 				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -660,7 +665,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 			/* use rx buffer as dummy tx buffer */
 			t->tx_sg.sgl = t->rx_sg.sgl;
 			t->tx_sg.nents = t->rx_sg.nents;
-		}
+		} else if (is_vmalloc_addr(t->tx_buf))
+			/* VIVT cache: flush since addr. may be aliased */
+			flush_kernel_vmap_range((void *)t->tx_buf, t->len);
 
 		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
 				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
@@ -699,8 +706,12 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
-	if (spicfg->io_type == SPI_IO_TYPE_DMA)
+	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
+		if (is_vmalloc_addr(t->rx_buf))
+			/* VIVT cache: invalidate since addr. may be aliased */
+			invalidate_kernel_vmap_range((void *)t->rx_buf, t->len);
+	}
 
 	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
 	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-17 10:38     ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: linux-arm-kernel

This CPU has VIVT caches, so need to flush the cache
for vmalloc'ed buffers, since the address may be aliased
(same phyiscal address used by multiple virtual addresses).
This fixes errors when mounting and reading/writing UBIFS
volumes with SPI NOR flash.

Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
---
 drivers/spi/spi-davinci.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 2632ae0..b69a370 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -29,6 +29,7 @@
 #include <linux/spi/spi.h>
 #include <linux/spi/spi_bitbang.h>
 #include <linux/slab.h>
+#include <asm/cacheflush.h>
 
 #include <linux/platform_data/spi-davinci.h>
 
@@ -650,6 +651,10 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
 		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
 
+		if (is_vmalloc_addr(t->rx_buf))
+			/* VIVT cache: flush since addr. may be aliased */
+			flush_kernel_vmap_range((void *)t->rx_buf, t->len);
+
 		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
 				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -660,7 +665,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 			/* use rx buffer as dummy tx buffer */
 			t->tx_sg.sgl = t->rx_sg.sgl;
 			t->tx_sg.nents = t->rx_sg.nents;
-		}
+		} else if (is_vmalloc_addr(t->tx_buf))
+			/* VIVT cache: flush since addr. may be aliased */
+			flush_kernel_vmap_range((void *)t->tx_buf, t->len);
 
 		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
 				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
@@ -699,8 +706,12 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
-	if (spicfg->io_type == SPI_IO_TYPE_DMA)
+	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
+		if (is_vmalloc_addr(t->rx_buf))
+			/* VIVT cache: invalidate since addr. may be aliased */
+			invalidate_kernel_vmap_range((void *)t->rx_buf, t->len);
+	}
 
 	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
 	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16
  2017-02-17 10:38 ` Frode Isaksen
@ 2017-02-17 10:38     ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
  Cc: broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	Frode Isaksen

Higher bitrate and lower CPU load if using PIO in this case.

Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
---
 drivers/spi/spi-davinci.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index b69a370..826bff1 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -110,6 +110,8 @@
 #define SPIDEF		0x4c
 #define SPIFMT0		0x50
 
+#define DMA_MIN_BYTES	16
+
 /* SPI Controller driver's private data. */
 struct davinci_spi {
 	struct spi_bitbang	bitbang;
@@ -483,7 +485,7 @@ static bool davinci_spi_can_dma(struct spi_master *master,
 				struct spi_device *spi,
 				struct spi_transfer *xfer)
 {
-	return __davinci_spi_can_dma(spi);
+	return __davinci_spi_can_dma(spi) && (xfer->len >= DMA_MIN_BYTES);
 }
 
 static int davinci_spi_check_error(struct davinci_spi *dspi, int int_status)
@@ -622,10 +624,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 
 	reinit_completion(&dspi->done);
 
-	if (spicfg->io_type == SPI_IO_TYPE_INTR)
-		set_io_bits(dspi->base + SPIINT, SPIINT_MASKINT);
-
-	if (spicfg->io_type != SPI_IO_TYPE_DMA) {
+	if (!davinci_spi_can_dma(spi->master, spi, t)) {
+		if (spicfg->io_type != SPI_IO_TYPE_POLL)
+			set_io_bits(dspi->base + SPIINT, SPIINT_MASKINT);
 		/* start the transfer */
 		dspi->wcount--;
 		tx_data = dspi->get_tx(dspi);
@@ -706,7 +707,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
-	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
+	if (davinci_spi_can_dma(spi->master, spi, t)) {
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
 		if (is_vmalloc_addr(t->rx_buf))
 			/* VIVT cache: invalidate since addr. may be aliased */
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16
@ 2017-02-17 10:38     ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: linux-arm-kernel

Higher bitrate and lower CPU load if using PIO in this case.

Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
---
 drivers/spi/spi-davinci.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index b69a370..826bff1 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -110,6 +110,8 @@
 #define SPIDEF		0x4c
 #define SPIFMT0		0x50
 
+#define DMA_MIN_BYTES	16
+
 /* SPI Controller driver's private data. */
 struct davinci_spi {
 	struct spi_bitbang	bitbang;
@@ -483,7 +485,7 @@ static bool davinci_spi_can_dma(struct spi_master *master,
 				struct spi_device *spi,
 				struct spi_transfer *xfer)
 {
-	return __davinci_spi_can_dma(spi);
+	return __davinci_spi_can_dma(spi) && (xfer->len >= DMA_MIN_BYTES);
 }
 
 static int davinci_spi_check_error(struct davinci_spi *dspi, int int_status)
@@ -622,10 +624,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 
 	reinit_completion(&dspi->done);
 
-	if (spicfg->io_type == SPI_IO_TYPE_INTR)
-		set_io_bits(dspi->base + SPIINT, SPIINT_MASKINT);
-
-	if (spicfg->io_type != SPI_IO_TYPE_DMA) {
+	if (!davinci_spi_can_dma(spi->master, spi, t)) {
+		if (spicfg->io_type != SPI_IO_TYPE_POLL)
+			set_io_bits(dspi->base + SPIINT, SPIINT_MASKINT);
 		/* start the transfer */
 		dspi->wcount--;
 		tx_data = dspi->get_tx(dspi);
@@ -706,7 +707,7 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 	}
 
 	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
-	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
+	if (davinci_spi_can_dma(spi->master, spi, t)) {
 		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
 		if (is_vmalloc_addr(t->rx_buf))
 			/* VIVT cache: invalidate since addr. may be aliased */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 6/6] spi: loopback-test: add option to use vmalloc'ed buffers
  2017-02-17 10:38 ` Frode Isaksen
@ 2017-02-17 10:38     ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
  Cc: broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	Frode Isaksen

Using vmalloc'ed buffers will use one SG entry for each page,
that may provoke DMA errors for large transfers.
Also vmalloc'ed buffers may cause errors on CPU's with VIVT cache.
Add this option to catch these errors when testing.

Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
---
 drivers/spi/spi-loopback-test.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index 50e620f..86686dd 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -55,6 +55,12 @@ module_param(run_only_test, int, 0);
 MODULE_PARM_DESC(run_only_test,
 		 "only run the test with this number (0-based !)");
 
+/* use vmalloc'ed buffers */
+int use_vmalloc;
+module_param(use_vmalloc, int, 0644);
+MODULE_PARM_DESC(use_vmalloc,
+		 "use vmalloc'ed buffers instead of kmalloc'ed");
+
 /* the actual tests to execute */
 static struct spi_test spi_tests[] = {
 	{
@@ -965,13 +971,19 @@ int spi_test_run_tests(struct spi_device *spi,
 	/* allocate rx/tx buffers of 128kB size without devm
 	 * in the hope that is on a page boundary
 	 */
-	rx = kzalloc(SPI_TEST_MAX_SIZE_PLUS, GFP_KERNEL);
+	if (use_vmalloc)
+		rx = vmalloc(SPI_TEST_MAX_SIZE_PLUS);
+	else
+		rx = kzalloc(SPI_TEST_MAX_SIZE_PLUS, GFP_KERNEL);
 	if (!rx) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	tx = kzalloc(SPI_TEST_MAX_SIZE_PLUS, GFP_KERNEL);
+	if (use_vmalloc)
+		tx = vmalloc(SPI_TEST_MAX_SIZE_PLUS);
+	else
+		tx = kzalloc(SPI_TEST_MAX_SIZE_PLUS, GFP_KERNEL);
 	if (!tx) {
 		ret = -ENOMEM;
 		goto out;
@@ -999,8 +1011,13 @@ int spi_test_run_tests(struct spi_device *spi,
 	}
 
 out:
-	kfree(rx);
-	kfree(tx);
+	if (use_vmalloc) {
+		vfree(rx);
+		vfree(tx);
+	} else {
+		kfree(rx);
+		kfree(tx);
+	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(spi_test_run_tests);
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* [PATCH v2 6/6] spi: loopback-test: add option to use vmalloc'ed buffers
@ 2017-02-17 10:38     ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 10:38 UTC (permalink / raw)
  To: linux-arm-kernel

Using vmalloc'ed buffers will use one SG entry for each page,
that may provoke DMA errors for large transfers.
Also vmalloc'ed buffers may cause errors on CPU's with VIVT cache.
Add this option to catch these errors when testing.

Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
---
 drivers/spi/spi-loopback-test.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/spi/spi-loopback-test.c b/drivers/spi/spi-loopback-test.c
index 50e620f..86686dd 100644
--- a/drivers/spi/spi-loopback-test.c
+++ b/drivers/spi/spi-loopback-test.c
@@ -55,6 +55,12 @@ module_param(run_only_test, int, 0);
 MODULE_PARM_DESC(run_only_test,
 		 "only run the test with this number (0-based !)");
 
+/* use vmalloc'ed buffers */
+int use_vmalloc;
+module_param(use_vmalloc, int, 0644);
+MODULE_PARM_DESC(use_vmalloc,
+		 "use vmalloc'ed buffers instead of kmalloc'ed");
+
 /* the actual tests to execute */
 static struct spi_test spi_tests[] = {
 	{
@@ -965,13 +971,19 @@ int spi_test_run_tests(struct spi_device *spi,
 	/* allocate rx/tx buffers of 128kB size without devm
 	 * in the hope that is on a page boundary
 	 */
-	rx = kzalloc(SPI_TEST_MAX_SIZE_PLUS, GFP_KERNEL);
+	if (use_vmalloc)
+		rx = vmalloc(SPI_TEST_MAX_SIZE_PLUS);
+	else
+		rx = kzalloc(SPI_TEST_MAX_SIZE_PLUS, GFP_KERNEL);
 	if (!rx) {
 		ret = -ENOMEM;
 		goto out;
 	}
 
-	tx = kzalloc(SPI_TEST_MAX_SIZE_PLUS, GFP_KERNEL);
+	if (use_vmalloc)
+		tx = vmalloc(SPI_TEST_MAX_SIZE_PLUS);
+	else
+		tx = kzalloc(SPI_TEST_MAX_SIZE_PLUS, GFP_KERNEL);
 	if (!tx) {
 		ret = -ENOMEM;
 		goto out;
@@ -999,8 +1011,13 @@ int spi_test_run_tests(struct spi_device *spi,
 	}
 
 out:
-	kfree(rx);
-	kfree(tx);
+	if (use_vmalloc) {
+		vfree(rx);
+		vfree(tx);
+	} else {
+		kfree(rx);
+		kfree(tx);
+	}
 	return ret;
 }
 EXPORT_SYMBOL_GPL(spi_test_run_tests);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-17 10:38     ` Frode Isaksen
@ 2017-02-17 10:57         ` Vignesh R
  -1 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-17 10:57 UTC (permalink / raw)
  To: Frode Isaksen, nsekhar-l0cyMroinI0,
	khilman-rdvid1DuHRBWk0Htik3J/w, ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r
  Cc: broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA



On Friday 17 February 2017 04:08 PM, Frode Isaksen wrote:
> This CPU has VIVT caches, so need to flush the cache
> for vmalloc'ed buffers, since the address may be aliased
> (same phyiscal address used by multiple virtual addresses).
> This fixes errors when mounting and reading/writing UBIFS
> volumes with SPI NOR flash.
> 
> Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
> ---
>  drivers/spi/spi-davinci.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
> index 2632ae0..b69a370 100644
> --- a/drivers/spi/spi-davinci.c
> +++ b/drivers/spi/spi-davinci.c
> @@ -29,6 +29,7 @@
>  #include <linux/spi/spi.h>
>  #include <linux/spi/spi_bitbang.h>
>  #include <linux/slab.h>
> +#include <asm/cacheflush.h>
>  
>  #include <linux/platform_data/spi-davinci.h>
>  
> @@ -650,6 +651,10 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>  		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
>  		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
>  
> +		if (is_vmalloc_addr(t->rx_buf))
> +			/* VIVT cache: flush since addr. may be aliased */
> +			flush_kernel_vmap_range((void *)t->rx_buf, t->len);
> +
>  		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
>  				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
>  				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> @@ -660,7 +665,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>  			/* use rx buffer as dummy tx buffer */
>  			t->tx_sg.sgl = t->rx_sg.sgl;
>  			t->tx_sg.nents = t->rx_sg.nents;
> -		}
> +		} else if (is_vmalloc_addr(t->tx_buf))
> +			/* VIVT cache: flush since addr. may be aliased */
> +			flush_kernel_vmap_range((void *)t->tx_buf, t->len);
>  

SPI core calls dma_unmap_sg(), that is supposed to flush caches.
If flush_kernel_vmap_range() call is required here to flush actual cache
lines, then what does dma_unmap_* calls in SPI core end up flushing?
Will it flush a different alias of same physical address? If so, isn't
that undesired?

>  		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
>  				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
> @@ -699,8 +706,12 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>  	}
>  
>  	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
> -	if (spicfg->io_type == SPI_IO_TYPE_DMA)
> +	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
>  		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
> +		if (is_vmalloc_addr(t->rx_buf))
> +			/* VIVT cache: invalidate since addr. may be aliased */
> +			invalidate_kernel_vmap_range((void *)t->rx_buf, t->len);
> +	}
>  
>  	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
>  	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
> 

-- 
Regards
Vignesh
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-17 10:57         ` Vignesh R
  0 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-17 10:57 UTC (permalink / raw)
  To: linux-arm-kernel



On Friday 17 February 2017 04:08 PM, Frode Isaksen wrote:
> This CPU has VIVT caches, so need to flush the cache
> for vmalloc'ed buffers, since the address may be aliased
> (same phyiscal address used by multiple virtual addresses).
> This fixes errors when mounting and reading/writing UBIFS
> volumes with SPI NOR flash.
> 
> Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
> ---
>  drivers/spi/spi-davinci.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
> index 2632ae0..b69a370 100644
> --- a/drivers/spi/spi-davinci.c
> +++ b/drivers/spi/spi-davinci.c
> @@ -29,6 +29,7 @@
>  #include <linux/spi/spi.h>
>  #include <linux/spi/spi_bitbang.h>
>  #include <linux/slab.h>
> +#include <asm/cacheflush.h>
>  
>  #include <linux/platform_data/spi-davinci.h>
>  
> @@ -650,6 +651,10 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>  		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
>  		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
>  
> +		if (is_vmalloc_addr(t->rx_buf))
> +			/* VIVT cache: flush since addr. may be aliased */
> +			flush_kernel_vmap_range((void *)t->rx_buf, t->len);
> +
>  		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
>  				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
>  				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> @@ -660,7 +665,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>  			/* use rx buffer as dummy tx buffer */
>  			t->tx_sg.sgl = t->rx_sg.sgl;
>  			t->tx_sg.nents = t->rx_sg.nents;
> -		}
> +		} else if (is_vmalloc_addr(t->tx_buf))
> +			/* VIVT cache: flush since addr. may be aliased */
> +			flush_kernel_vmap_range((void *)t->tx_buf, t->len);
>  

SPI core calls dma_unmap_sg(), that is supposed to flush caches.
If flush_kernel_vmap_range() call is required here to flush actual cache
lines, then what does dma_unmap_* calls in SPI core end up flushing?
Will it flush a different alias of same physical address? If so, isn't
that undesired?

>  		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
>  				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
> @@ -699,8 +706,12 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>  	}
>  
>  	clear_io_bits(dspi->base + SPIINT, SPIINT_MASKALL);
> -	if (spicfg->io_type == SPI_IO_TYPE_DMA)
> +	if (spicfg->io_type == SPI_IO_TYPE_DMA) {
>  		clear_io_bits(dspi->base + SPIINT, SPIINT_DMA_REQ_EN);
> +		if (is_vmalloc_addr(t->rx_buf))
> +			/* VIVT cache: invalidate since addr. may be aliased */
> +			invalidate_kernel_vmap_range((void *)t->rx_buf, t->len);
> +	}
>  
>  	clear_io_bits(dspi->base + SPIGCR1, SPIGCR1_SPIENA_MASK);
>  	set_io_bits(dspi->base + SPIGCR1, SPIGCR1_POWERDOWN_MASK);
> 

-- 
Regards
Vignesh

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-17 10:57         ` Vignesh R
@ 2017-02-17 11:22             ` Russell King - ARM Linux
  -1 siblings, 0 replies; 50+ messages in thread
From: Russell King - ARM Linux @ 2017-02-17 11:22 UTC (permalink / raw)
  To: Vignesh R
  Cc: Frode Isaksen, nsekhar-l0cyMroinI0,
	khilman-rdvid1DuHRBWk0Htik3J/w, ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA

On Fri, Feb 17, 2017 at 04:27:28PM +0530, Vignesh R wrote:
> On Friday 17 February 2017 04:08 PM, Frode Isaksen wrote:
> > @@ -650,6 +651,10 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
> >  		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
> >  		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
> >  
> > +		if (is_vmalloc_addr(t->rx_buf))
> > +			/* VIVT cache: flush since addr. may be aliased */
> > +			flush_kernel_vmap_range((void *)t->rx_buf, t->len);
> > +
> >  		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
> >  				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
> >  				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> > @@ -660,7 +665,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
> >  			/* use rx buffer as dummy tx buffer */
> >  			t->tx_sg.sgl = t->rx_sg.sgl;
> >  			t->tx_sg.nents = t->rx_sg.nents;
> > -		}
> > +		} else if (is_vmalloc_addr(t->tx_buf))
> > +			/* VIVT cache: flush since addr. may be aliased */
> > +			flush_kernel_vmap_range((void *)t->tx_buf, t->len);
> >  
> 
> SPI core calls dma_unmap_sg(), that is supposed to flush caches.
> If flush_kernel_vmap_range() call is required here to flush actual cache
> lines, then what does dma_unmap_* calls in SPI core end up flushing?

The DMA API deals with the _kernel_ lowmem mapping.  It has no knowledge
of any other aliases in the system.  When you have a VIVT cache (as all
old ARM CPUs have) then if you access the memory through a different
alias from the kernel lowmem mapping (iow, vmalloc) then the DMA API
can't help you.

However, the correct place to use flush_kernel_vmap_range() etc is not
in drivers - it's supposed to be done in the callers that know that
the memory is aliased.

For full details on these flushing functions, see cachetlb.txt.  This
does not remove the requirement to also use the DMA API.

=== cachetlb.txt ===

The final category of APIs is for I/O to deliberately aliased address
ranges inside the kernel.  Such aliases are set up by use of the
vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
subsystem assumes that the user mapping and kernel offset mapping are
the only aliases.  This isn't true for vmap aliases, so anything in
the kernel trying to do I/O to vmap areas must manually manage
coherency.  It must do this by flushing the vmap range before doing
I/O and invalidating it after the I/O returns.

  void flush_kernel_vmap_range(void *vaddr, int size)
       flushes the kernel cache for a given virtual address range in
       the vmap area.  This is to make sure that any data the kernel
       modified in the vmap range is made visible to the physical
       page.  The design is to make this area safe to perform I/O on.
       Note that this API does *not* also flush the offset map alias
       of the area.

  void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates
       the cache for a given virtual address range in the vmap area
       which prevents the processor from making the cache stale by
       speculatively reading data while the I/O was occurring to the
       physical pages.  This is only necessary for data reads into the
       vmap area.


-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-17 11:22             ` Russell King - ARM Linux
  0 siblings, 0 replies; 50+ messages in thread
From: Russell King - ARM Linux @ 2017-02-17 11:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Feb 17, 2017 at 04:27:28PM +0530, Vignesh R wrote:
> On Friday 17 February 2017 04:08 PM, Frode Isaksen wrote:
> > @@ -650,6 +651,10 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
> >  		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
> >  		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
> >  
> > +		if (is_vmalloc_addr(t->rx_buf))
> > +			/* VIVT cache: flush since addr. may be aliased */
> > +			flush_kernel_vmap_range((void *)t->rx_buf, t->len);
> > +
> >  		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
> >  				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
> >  				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
> > @@ -660,7 +665,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
> >  			/* use rx buffer as dummy tx buffer */
> >  			t->tx_sg.sgl = t->rx_sg.sgl;
> >  			t->tx_sg.nents = t->rx_sg.nents;
> > -		}
> > +		} else if (is_vmalloc_addr(t->tx_buf))
> > +			/* VIVT cache: flush since addr. may be aliased */
> > +			flush_kernel_vmap_range((void *)t->tx_buf, t->len);
> >  
> 
> SPI core calls dma_unmap_sg(), that is supposed to flush caches.
> If flush_kernel_vmap_range() call is required here to flush actual cache
> lines, then what does dma_unmap_* calls in SPI core end up flushing?

The DMA API deals with the _kernel_ lowmem mapping.  It has no knowledge
of any other aliases in the system.  When you have a VIVT cache (as all
old ARM CPUs have) then if you access the memory through a different
alias from the kernel lowmem mapping (iow, vmalloc) then the DMA API
can't help you.

However, the correct place to use flush_kernel_vmap_range() etc is not
in drivers - it's supposed to be done in the callers that know that
the memory is aliased.

For full details on these flushing functions, see cachetlb.txt.  This
does not remove the requirement to also use the DMA API.

=== cachetlb.txt ===

The final category of APIs is for I/O to deliberately aliased address
ranges inside the kernel.  Such aliases are set up by use of the
vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
subsystem assumes that the user mapping and kernel offset mapping are
the only aliases.  This isn't true for vmap aliases, so anything in
the kernel trying to do I/O to vmap areas must manually manage
coherency.  It must do this by flushing the vmap range before doing
I/O and invalidating it after the I/O returns.

  void flush_kernel_vmap_range(void *vaddr, int size)
       flushes the kernel cache for a given virtual address range in
       the vmap area.  This is to make sure that any data the kernel
       modified in the vmap range is made visible to the physical
       page.  The design is to make this area safe to perform I/O on.
       Note that this API does *not* also flush the offset map alias
       of the area.

  void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates
       the cache for a given virtual address range in the vmap area
       which prevents the processor from making the cache stale by
       speculatively reading data while the I/O was occurring to the
       physical pages.  This is only necessary for data reads into the
       vmap area.


-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-17 11:22             ` Russell King - ARM Linux
@ 2017-02-17 11:36                 ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 11:36 UTC (permalink / raw)
  To: Russell King - ARM Linux, Vignesh R
  Cc: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA



On 17/02/2017 12:22, Russell King - ARM Linux wrote:
> On Fri, Feb 17, 2017 at 04:27:28PM +0530, Vignesh R wrote:
>> On Friday 17 February 2017 04:08 PM, Frode Isaksen wrote:
>>> @@ -650,6 +651,10 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>>>  		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
>>>  		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
>>>  
>>> +		if (is_vmalloc_addr(t->rx_buf))
>>> +			/* VIVT cache: flush since addr. may be aliased */
>>> +			flush_kernel_vmap_range((void *)t->rx_buf, t->len);
>>> +
>>>  		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
>>>  				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
>>>  				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
>>> @@ -660,7 +665,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>>>  			/* use rx buffer as dummy tx buffer */
>>>  			t->tx_sg.sgl = t->rx_sg.sgl;
>>>  			t->tx_sg.nents = t->rx_sg.nents;
>>> -		}
>>> +		} else if (is_vmalloc_addr(t->tx_buf))
>>> +			/* VIVT cache: flush since addr. may be aliased */
>>> +			flush_kernel_vmap_range((void *)t->tx_buf, t->len);
>>>  
>> SPI core calls dma_unmap_sg(), that is supposed to flush caches.
>> If flush_kernel_vmap_range() call is required here to flush actual cache
>> lines, then what does dma_unmap_* calls in SPI core end up flushing?
> The DMA API deals with the _kernel_ lowmem mapping.  It has no knowledge
> of any other aliases in the system.  When you have a VIVT cache (as all
> old ARM CPUs have) then if you access the memory through a different
> alias from the kernel lowmem mapping (iow, vmalloc) then the DMA API
> can't help you.
>
> However, the correct place to use flush_kernel_vmap_range() etc is not
> in drivers - it's supposed to be done in the callers that know that
> the memory is aliased.
OK, so this should be done in the ubifs layer instead ? xfs already does this, but no other fs.

Thanks,
Frode

>
> For full details on these flushing functions, see cachetlb.txt.  This
> does not remove the requirement to also use the DMA API.
>
> === cachetlb.txt ===
>
> The final category of APIs is for I/O to deliberately aliased address
> ranges inside the kernel.  Such aliases are set up by use of the
> vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
> subsystem assumes that the user mapping and kernel offset mapping are
> the only aliases.  This isn't true for vmap aliases, so anything in
> the kernel trying to do I/O to vmap areas must manually manage
> coherency.  It must do this by flushing the vmap range before doing
> I/O and invalidating it after the I/O returns.
>
>   void flush_kernel_vmap_range(void *vaddr, int size)
>        flushes the kernel cache for a given virtual address range in
>        the vmap area.  This is to make sure that any data the kernel
>        modified in the vmap range is made visible to the physical
>        page.  The design is to make this area safe to perform I/O on.
>        Note that this API does *not* also flush the offset map alias
>        of the area.
>
>   void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates
>        the cache for a given virtual address range in the vmap area
>        which prevents the processor from making the cache stale by
>        speculatively reading data while the I/O was occurring to the
>        physical pages.  This is only necessary for data reads into the
>        vmap area.
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-17 11:36                 ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 11:36 UTC (permalink / raw)
  To: linux-arm-kernel



On 17/02/2017 12:22, Russell King - ARM Linux wrote:
> On Fri, Feb 17, 2017 at 04:27:28PM +0530, Vignesh R wrote:
>> On Friday 17 February 2017 04:08 PM, Frode Isaksen wrote:
>>> @@ -650,6 +651,10 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>>>  		dmaengine_slave_config(dspi->dma_rx, &dma_rx_conf);
>>>  		dmaengine_slave_config(dspi->dma_tx, &dma_tx_conf);
>>>  
>>> +		if (is_vmalloc_addr(t->rx_buf))
>>> +			/* VIVT cache: flush since addr. may be aliased */
>>> +			flush_kernel_vmap_range((void *)t->rx_buf, t->len);
>>> +
>>>  		rxdesc = dmaengine_prep_slave_sg(dspi->dma_rx,
>>>  				t->rx_sg.sgl, t->rx_sg.nents, DMA_DEV_TO_MEM,
>>>  				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
>>> @@ -660,7 +665,9 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
>>>  			/* use rx buffer as dummy tx buffer */
>>>  			t->tx_sg.sgl = t->rx_sg.sgl;
>>>  			t->tx_sg.nents = t->rx_sg.nents;
>>> -		}
>>> +		} else if (is_vmalloc_addr(t->tx_buf))
>>> +			/* VIVT cache: flush since addr. may be aliased */
>>> +			flush_kernel_vmap_range((void *)t->tx_buf, t->len);
>>>  
>> SPI core calls dma_unmap_sg(), that is supposed to flush caches.
>> If flush_kernel_vmap_range() call is required here to flush actual cache
>> lines, then what does dma_unmap_* calls in SPI core end up flushing?
> The DMA API deals with the _kernel_ lowmem mapping.  It has no knowledge
> of any other aliases in the system.  When you have a VIVT cache (as all
> old ARM CPUs have) then if you access the memory through a different
> alias from the kernel lowmem mapping (iow, vmalloc) then the DMA API
> can't help you.
>
> However, the correct place to use flush_kernel_vmap_range() etc is not
> in drivers - it's supposed to be done in the callers that know that
> the memory is aliased.
OK, so this should be done in the ubifs layer instead ? xfs already does this, but no other fs.

Thanks,
Frode

>
> For full details on these flushing functions, see cachetlb.txt.  This
> does not remove the requirement to also use the DMA API.
>
> === cachetlb.txt ===
>
> The final category of APIs is for I/O to deliberately aliased address
> ranges inside the kernel.  Such aliases are set up by use of the
> vmap/vmalloc API.  Since kernel I/O goes via physical pages, the I/O
> subsystem assumes that the user mapping and kernel offset mapping are
> the only aliases.  This isn't true for vmap aliases, so anything in
> the kernel trying to do I/O to vmap areas must manually manage
> coherency.  It must do this by flushing the vmap range before doing
> I/O and invalidating it after the I/O returns.
>
>   void flush_kernel_vmap_range(void *vaddr, int size)
>        flushes the kernel cache for a given virtual address range in
>        the vmap area.  This is to make sure that any data the kernel
>        modified in the vmap range is made visible to the physical
>        page.  The design is to make this area safe to perform I/O on.
>        Note that this API does *not* also flush the offset map alias
>        of the area.
>
>   void invalidate_kernel_vmap_range(void *vaddr, int size) invalidates
>        the cache for a given virtual address range in the vmap area
>        which prevents the processor from making the cache stale by
>        speculatively reading data while the I/O was occurring to the
>        physical pages.  This is only necessary for data reads into the
>        vmap area.
>
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-17 11:36                 ` Frode Isaksen
@ 2017-02-17 12:07                     ` Russell King - ARM Linux
  -1 siblings, 0 replies; 50+ messages in thread
From: Russell King - ARM Linux @ 2017-02-17 12:07 UTC (permalink / raw)
  To: Frode Isaksen
  Cc: Vignesh R, khilman-rdvid1DuHRBWk0Htik3J/w, nsekhar-l0cyMroinI0,
	linux-spi-u79uwXL29TY76Z2rM5mHXA, broonie-DgEjT+Ai2ygdnm+yROfE0A,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r

On Fri, Feb 17, 2017 at 12:36:17PM +0100, Frode Isaksen wrote:
> On 17/02/2017 12:22, Russell King - ARM Linux wrote:
> > The DMA API deals with the _kernel_ lowmem mapping.  It has no knowledge
> > of any other aliases in the system.  When you have a VIVT cache (as all
> > old ARM CPUs have) then if you access the memory through a different
> > alias from the kernel lowmem mapping (iow, vmalloc) then the DMA API
> > can't help you.
> >
> > However, the correct place to use flush_kernel_vmap_range() etc is not
> > in drivers - it's supposed to be done in the callers that know that
> > the memory is aliased.
> 
> OK, so this should be done in the ubifs layer instead ? xfs already does
> this, but no other fs.

These APIs were created when XFS was being used on older ARMs and people
experienced corruption.  XFS was the only filesystem driver which wanted
to do this (horrid, imho) DMA to memory that it accessed via a vmalloc
area mapping.

If ubifs is also doing this, it's followed XFS down the same route, but
ignored the need for additional flushing.

The down-side to adding this at the filesystem layer is that you get the
impact whether or not the driver does DMA.  However, for XFS that's
absolutely necessary, as block devices will transfer to the kernel lowmem
mapping, which itself will alias with the vmalloc area mapping.

SPI is rather another special case - rather than SPI following the
established mechanism of passing data references via scatterlists or
similar, it also passes them via virtual addresses, which means SPI
can directly access the vmalloc area when performing PIO.  This
really makes the problem more complex, because it means that if you
do have a SPI driver that does that, it's going to be reading/writing
direct from vmalloc space.

That's not a problem as long as the data is only accessed via vmalloc
space, but it will definitely go totally wrong if the data is
subsequently mapped into userspace.

The other important thing to realise is that the interfaces in
cachetlb.txt assume that it's the lowmem mapping that will be accessed,
and the IO device will push that data out to physical memory (either via
the DMA API, or flush_kernel_dcache_page()).  That's not true of SPI,
as it passes virtual addresses around.

So... overall, I'm not sure that this problem is properly solvable given
SPIs insistance on passing virtual addresses and the differences in this
area between SPI and block.

What I'm quite sure about is that adding yet more cache flushing
interfaces for legacy cache types really isn't the way forward.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-17 12:07                     ` Russell King - ARM Linux
  0 siblings, 0 replies; 50+ messages in thread
From: Russell King - ARM Linux @ 2017-02-17 12:07 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Feb 17, 2017 at 12:36:17PM +0100, Frode Isaksen wrote:
> On 17/02/2017 12:22, Russell King - ARM Linux wrote:
> > The DMA API deals with the _kernel_ lowmem mapping.  It has no knowledge
> > of any other aliases in the system.  When you have a VIVT cache (as all
> > old ARM CPUs have) then if you access the memory through a different
> > alias from the kernel lowmem mapping (iow, vmalloc) then the DMA API
> > can't help you.
> >
> > However, the correct place to use flush_kernel_vmap_range() etc is not
> > in drivers - it's supposed to be done in the callers that know that
> > the memory is aliased.
> 
> OK, so this should be done in the ubifs layer instead ? xfs already does
> this, but no other fs.

These APIs were created when XFS was being used on older ARMs and people
experienced corruption.  XFS was the only filesystem driver which wanted
to do this (horrid, imho) DMA to memory that it accessed via a vmalloc
area mapping.

If ubifs is also doing this, it's followed XFS down the same route, but
ignored the need for additional flushing.

The down-side to adding this at the filesystem layer is that you get the
impact whether or not the driver does DMA.  However, for XFS that's
absolutely necessary, as block devices will transfer to the kernel lowmem
mapping, which itself will alias with the vmalloc area mapping.

SPI is rather another special case - rather than SPI following the
established mechanism of passing data references via scatterlists or
similar, it also passes them via virtual addresses, which means SPI
can directly access the vmalloc area when performing PIO.  This
really makes the problem more complex, because it means that if you
do have a SPI driver that does that, it's going to be reading/writing
direct from vmalloc space.

That's not a problem as long as the data is only accessed via vmalloc
space, but it will definitely go totally wrong if the data is
subsequently mapped into userspace.

The other important thing to realise is that the interfaces in
cachetlb.txt assume that it's the lowmem mapping that will be accessed,
and the IO device will push that data out to physical memory (either via
the DMA API, or flush_kernel_dcache_page()).  That's not true of SPI,
as it passes virtual addresses around.

So... overall, I'm not sure that this problem is properly solvable given
SPIs insistance on passing virtual addresses and the differences in this
area between SPI and block.

What I'm quite sure about is that adding yet more cache flushing
interfaces for legacy cache types really isn't the way forward.

-- 
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16
  2017-02-17 10:38     ` Frode Isaksen
@ 2017-02-17 16:37         ` Arnd Bergmann
  -1 siblings, 0 replies; 50+ messages in thread
From: Arnd Bergmann @ 2017-02-17 16:37 UTC (permalink / raw)
  To: Frode Isaksen
  Cc: nsekhar-l0cyMroinI0, Kevin Hilman,
	ptitiano-rdvid1DuHRBWk0Htik3J/w, Linux ARM, Mark Brown,
	linux-spi-u79uwXL29TY76Z2rM5mHXA

On Fri, Feb 17, 2017 at 11:38 AM, Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org> wrote:
> Higher bitrate and lower CPU load if using PIO in this case.
>
> Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
> ---
>  drivers/spi/spi-davinci.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
> index b69a370..826bff1 100644
> --- a/drivers/spi/spi-davinci.c
> +++ b/drivers/spi/spi-davinci.c
> @@ -110,6 +110,8 @@
>  #define SPIDEF         0x4c
>  #define SPIFMT0                0x50
>
> +#define DMA_MIN_BYTES  16

16 seems low as the cutoff. Have you found this experimentally and
tested that for e.g. 32
bytes there is actually an advantage in using DMA?

    Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16
@ 2017-02-17 16:37         ` Arnd Bergmann
  0 siblings, 0 replies; 50+ messages in thread
From: Arnd Bergmann @ 2017-02-17 16:37 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Feb 17, 2017 at 11:38 AM, Frode Isaksen <fisaksen@baylibre.com> wrote:
> Higher bitrate and lower CPU load if using PIO in this case.
>
> Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
> ---
>  drivers/spi/spi-davinci.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
> index b69a370..826bff1 100644
> --- a/drivers/spi/spi-davinci.c
> +++ b/drivers/spi/spi-davinci.c
> @@ -110,6 +110,8 @@
>  #define SPIDEF         0x4c
>  #define SPIFMT0                0x50
>
> +#define DMA_MIN_BYTES  16

16 seems low as the cutoff. Have you found this experimentally and
tested that for e.g. 32
bytes there is actually an advantage in using DMA?

    Arnd

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16
  2017-02-17 16:37         ` Arnd Bergmann
@ 2017-02-17 16:43             ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 16:43 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: nsekhar-l0cyMroinI0, Kevin Hilman,
	ptitiano-rdvid1DuHRBWk0Htik3J/w, Linux ARM, Mark Brown,
	linux-spi-u79uwXL29TY76Z2rM5mHXA



On 17/02/2017 17:37, Arnd Bergmann wrote:
> On Fri, Feb 17, 2017 at 11:38 AM, Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org> wrote:
>> Higher bitrate and lower CPU load if using PIO in this case.
>>
>> Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
>> ---
>>  drivers/spi/spi-davinci.c | 13 +++++++------
>>  1 file changed, 7 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
>> index b69a370..826bff1 100644
>> --- a/drivers/spi/spi-davinci.c
>> +++ b/drivers/spi/spi-davinci.c
>> @@ -110,6 +110,8 @@
>>  #define SPIDEF         0x4c
>>  #define SPIFMT0                0x50
>>
>> +#define DMA_MIN_BYTES  16
> 16 seems low as the cutoff. Have you found this experimentally and
> tested that for e.g. 32
> bytes there is actually an advantage in using DMA?
Yes, I have tested for different sizes. Actually 32 bytes is better @30MHz, but if the bit rate goes down, DMA wins over PIO. Do you prefer 32 bytes ?

Frode
>
>     Arnd

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16
@ 2017-02-17 16:43             ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 16:43 UTC (permalink / raw)
  To: linux-arm-kernel



On 17/02/2017 17:37, Arnd Bergmann wrote:
> On Fri, Feb 17, 2017 at 11:38 AM, Frode Isaksen <fisaksen@baylibre.com> wrote:
>> Higher bitrate and lower CPU load if using PIO in this case.
>>
>> Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
>> ---
>>  drivers/spi/spi-davinci.c | 13 +++++++------
>>  1 file changed, 7 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
>> index b69a370..826bff1 100644
>> --- a/drivers/spi/spi-davinci.c
>> +++ b/drivers/spi/spi-davinci.c
>> @@ -110,6 +110,8 @@
>>  #define SPIDEF         0x4c
>>  #define SPIFMT0                0x50
>>
>> +#define DMA_MIN_BYTES  16
> 16 seems low as the cutoff. Have you found this experimentally and
> tested that for e.g. 32
> bytes there is actually an advantage in using DMA?
Yes, I have tested for different sizes. Actually 32 bytes is better @30MHz, but if the bit rate goes down, DMA wins over PIO. Do you prefer 32 bytes ?

Frode
>
>     Arnd

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16
  2017-02-17 16:43             ` Frode Isaksen
@ 2017-02-17 16:55                 ` Arnd Bergmann
  -1 siblings, 0 replies; 50+ messages in thread
From: Arnd Bergmann @ 2017-02-17 16:55 UTC (permalink / raw)
  To: Frode Isaksen
  Cc: nsekhar-l0cyMroinI0, Kevin Hilman,
	ptitiano-rdvid1DuHRBWk0Htik3J/w, Linux ARM, Mark Brown,
	linux-spi-u79uwXL29TY76Z2rM5mHXA

On Fri, Feb 17, 2017 at 5:43 PM, Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org> wrote:
> On 17/02/2017 17:37, Arnd Bergmann wrote:
>> On Fri, Feb 17, 2017 at 11:38 AM, Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org> wrote:
>> 16 seems low as the cutoff. Have you found this experimentally and
>> tested that for e.g. 32
>> bytes there is actually an advantage in using DMA?
> Yes, I have tested for different sizes. Actually 32 bytes is better @30MHz, but if the bit rate goes down, DMA wins over PIO. Do you prefer 32 bytes ?

No, I don't care about the specific value, just making sure that you had
actually tested this, as I had expected PIO to be faster here.

     Arnd
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16
@ 2017-02-17 16:55                 ` Arnd Bergmann
  0 siblings, 0 replies; 50+ messages in thread
From: Arnd Bergmann @ 2017-02-17 16:55 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Feb 17, 2017 at 5:43 PM, Frode Isaksen <fisaksen@baylibre.com> wrote:
> On 17/02/2017 17:37, Arnd Bergmann wrote:
>> On Fri, Feb 17, 2017 at 11:38 AM, Frode Isaksen <fisaksen@baylibre.com> wrote:
>> 16 seems low as the cutoff. Have you found this experimentally and
>> tested that for e.g. 32
>> bytes there is actually an advantage in using DMA?
> Yes, I have tested for different sizes. Actually 32 bytes is better @30MHz, but if the bit rate goes down, DMA wins over PIO. Do you prefer 32 bytes ?

No, I don't care about the specific value, just making sure that you had
actually tested this, as I had expected PIO to be faster here.

     Arnd

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-17 12:07                     ` Russell King - ARM Linux
@ 2017-02-17 17:45                       ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 17:45 UTC (permalink / raw)
  To: Russell King - ARM Linux
  Cc: Vignesh R, khilman, nsekhar, linux-spi, broonie, ptitiano,
	linux-arm-kernel



On 17/02/2017 13:07, Russell King - ARM Linux wrote:
> On Fri, Feb 17, 2017 at 12:36:17PM +0100, Frode Isaksen wrote:
>> On 17/02/2017 12:22, Russell King - ARM Linux wrote:
>>> The DMA API deals with the _kernel_ lowmem mapping.  It has no knowledge
>>> of any other aliases in the system.  When you have a VIVT cache (as all
>>> old ARM CPUs have) then if you access the memory through a different
>>> alias from the kernel lowmem mapping (iow, vmalloc) then the DMA API
>>> can't help you.
>>>
>>> However, the correct place to use flush_kernel_vmap_range() etc is not
>>> in drivers - it's supposed to be done in the callers that know that
>>> the memory is aliased.
>> OK, so this should be done in the ubifs layer instead ? xfs already does
>> this, but no other fs.
> These APIs were created when XFS was being used on older ARMs and people
> experienced corruption.  XFS was the only filesystem driver which wanted
> to do this (horrid, imho) DMA to memory that it accessed via a vmalloc
> area mapping.
>
> If ubifs is also doing this, it's followed XFS down the same route, but
> ignored the need for additional flushing.
>
> The down-side to adding this at the filesystem layer is that you get the
> impact whether or not the driver does DMA.  However, for XFS that's
> absolutely necessary, as block devices will transfer to the kernel lowmem
> mapping, which itself will alias with the vmalloc area mapping.
>
> SPI is rather another special case - rather than SPI following the
> established mechanism of passing data references via scatterlists or
> similar, it also passes them via virtual addresses, which means SPI
> can directly access the vmalloc area when performing PIO.  This
> really makes the problem more complex, because it means that if you
> do have a SPI driver that does that, it's going to be reading/writing
> direct from vmalloc space.
>
> That's not a problem as long as the data is only accessed via vmalloc
> space, but it will definitely go totally wrong if the data is
> subsequently mapped into userspace.
Thanks a lot for explaining...
It often(2/5) goes wrong within this simple function called when mounting the UBIFS filesystem w/o a priori any userspace interaction:
fs/ubifs/lpt.c:
static int read_ltab(struct ubifs_info *c)
{
    int err;
    void *buf;
    int retry=1;

    buf = vmalloc(c->ltab_sz);
    if (!buf)
        return -ENOMEM;
    err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1);
    if (err)
        goto out;
retry:
    err = unpack_ltab(c, buf);
    if (err && retry--) {
        pr_err("%s: flush cache %p[%d] and retry\n", __func__, buf, c->ltab_sz);
        invalidate_kernel_vmap_range(buf, c->ltab_sz);
        goto retry;
    }
out:
    vfree(buf);
    return err;
}
The retry code is added by me and with this code there is no error when retrying after flushing the cache. As you can see, the vmalloc'es buffer is allocated and freed within this function.
read_ltab: flush cache c9543000[11] and retry
Ok after this !!

Thanks again,
Frode
>
> The other important thing to realise is that the interfaces in
> cachetlb.txt assume that it's the lowmem mapping that will be accessed,
> and the IO device will push that data out to physical memory (either via
> the DMA API, or flush_kernel_dcache_page()).  That's not true of SPI,
> as it passes virtual addresses around.
>
> So... overall, I'm not sure that this problem is properly solvable given
> SPIs insistance on passing virtual addresses and the differences in this
> area between SPI and block.
>
> What I'm quite sure about is that adding yet more cache flushing
> interfaces for legacy cache types really isn't the way forward.
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-17 17:45                       ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-17 17:45 UTC (permalink / raw)
  To: linux-arm-kernel



On 17/02/2017 13:07, Russell King - ARM Linux wrote:
> On Fri, Feb 17, 2017 at 12:36:17PM +0100, Frode Isaksen wrote:
>> On 17/02/2017 12:22, Russell King - ARM Linux wrote:
>>> The DMA API deals with the _kernel_ lowmem mapping.  It has no knowledge
>>> of any other aliases in the system.  When you have a VIVT cache (as all
>>> old ARM CPUs have) then if you access the memory through a different
>>> alias from the kernel lowmem mapping (iow, vmalloc) then the DMA API
>>> can't help you.
>>>
>>> However, the correct place to use flush_kernel_vmap_range() etc is not
>>> in drivers - it's supposed to be done in the callers that know that
>>> the memory is aliased.
>> OK, so this should be done in the ubifs layer instead ? xfs already does
>> this, but no other fs.
> These APIs were created when XFS was being used on older ARMs and people
> experienced corruption.  XFS was the only filesystem driver which wanted
> to do this (horrid, imho) DMA to memory that it accessed via a vmalloc
> area mapping.
>
> If ubifs is also doing this, it's followed XFS down the same route, but
> ignored the need for additional flushing.
>
> The down-side to adding this at the filesystem layer is that you get the
> impact whether or not the driver does DMA.  However, for XFS that's
> absolutely necessary, as block devices will transfer to the kernel lowmem
> mapping, which itself will alias with the vmalloc area mapping.
>
> SPI is rather another special case - rather than SPI following the
> established mechanism of passing data references via scatterlists or
> similar, it also passes them via virtual addresses, which means SPI
> can directly access the vmalloc area when performing PIO.  This
> really makes the problem more complex, because it means that if you
> do have a SPI driver that does that, it's going to be reading/writing
> direct from vmalloc space.
>
> That's not a problem as long as the data is only accessed via vmalloc
> space, but it will definitely go totally wrong if the data is
> subsequently mapped into userspace.
Thanks a lot for explaining...
It often(2/5) goes wrong within this simple function called when mounting the UBIFS filesystem w/o a priori any userspace interaction:
fs/ubifs/lpt.c:
static int read_ltab(struct ubifs_info *c)
{
    int err;
    void *buf;
    int retry=1;

    buf = vmalloc(c->ltab_sz);
    if (!buf)
        return -ENOMEM;
    err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1);
    if (err)
        goto out;
retry:
    err = unpack_ltab(c, buf);
    if (err && retry--) {
        pr_err("%s: flush cache %p[%d] and retry\n", __func__, buf, c->ltab_sz);
        invalidate_kernel_vmap_range(buf, c->ltab_sz);
        goto retry;
    }
out:
    vfree(buf);
    return err;
}
The retry code is added by me and with this code there is no error when retrying after flushing the cache. As you can see, the vmalloc'es buffer is allocated and freed within this function.
read_ltab: flush cache c9543000[11] and retry
Ok after this !!

Thanks again,
Frode
>
> The other important thing to realise is that the interfaces in
> cachetlb.txt assume that it's the lowmem mapping that will be accessed,
> and the IO device will push that data out to physical memory (either via
> the DMA API, or flush_kernel_dcache_page()).  That's not true of SPI,
> as it passes virtual addresses around.
>
> So... overall, I'm not sure that this problem is properly solvable given
> SPIs insistance on passing virtual addresses and the differences in this
> area between SPI and block.
>
> What I'm quite sure about is that adding yet more cache flushing
> interfaces for legacy cache types really isn't the way forward.
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 1/6] spi: davinci: Use SPI framework to handle DMA mapping
  2017-02-17 10:38     ` Frode Isaksen
@ 2017-02-19 16:29         ` Mark Brown
  -1 siblings, 0 replies; 50+ messages in thread
From: Mark Brown @ 2017-02-19 16:29 UTC (permalink / raw)
  To: Frode Isaksen
  Cc: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-spi-u79uwXL29TY76Z2rM5mHXA, Fabien Parent

[-- Attachment #1: Type: text/plain, Size: 619 bytes --]

On Fri, Feb 17, 2017 at 11:38:19AM +0100, Frode Isaksen wrote:

> +static inline bool __davinci_spi_can_dma(struct spi_device *spi)
> +{
> +	struct davinci_spi_config *spicfg = spi->controller_data;
> +
> +	return spicfg ? spicfg->io_type == SPI_IO_TYPE_DMA : false;
> +}
> +
> +static bool davinci_spi_can_dma(struct spi_master *master,
> +				struct spi_device *spi,
> +				struct spi_transfer *xfer)
> +{
> +	return __davinci_spi_can_dma(spi);
> +}

Why the wrapper function?  __davinci_spi_can_dma has exactly one user
immediately below it.  Also please write normal if statements, it really
helps with legibility.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 1/6] spi: davinci: Use SPI framework to handle DMA mapping
@ 2017-02-19 16:29         ` Mark Brown
  0 siblings, 0 replies; 50+ messages in thread
From: Mark Brown @ 2017-02-19 16:29 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Feb 17, 2017 at 11:38:19AM +0100, Frode Isaksen wrote:

> +static inline bool __davinci_spi_can_dma(struct spi_device *spi)
> +{
> +	struct davinci_spi_config *spicfg = spi->controller_data;
> +
> +	return spicfg ? spicfg->io_type == SPI_IO_TYPE_DMA : false;
> +}
> +
> +static bool davinci_spi_can_dma(struct spi_master *master,
> +				struct spi_device *spi,
> +				struct spi_transfer *xfer)
> +{
> +	return __davinci_spi_can_dma(spi);
> +}

Why the wrapper function?  __davinci_spi_can_dma has exactly one user
immediately below it.  Also please write normal if statements, it really
helps with legibility.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20170219/ac1c20df/attachment.sig>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 2/6] spi: davinci: enable DMA when channels are defined in DT
  2017-02-17 10:38     ` Frode Isaksen
@ 2017-02-19 16:30         ` Mark Brown
  -1 siblings, 0 replies; 50+ messages in thread
From: Mark Brown @ 2017-02-19 16:30 UTC (permalink / raw)
  To: Frode Isaksen
  Cc: nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	linux-spi-u79uwXL29TY76Z2rM5mHXA, Fabien Parent

[-- Attachment #1: Type: text/plain, Size: 475 bytes --]

On Fri, Feb 17, 2017 at 11:38:20AM +0100, Frode Isaksen wrote:
> From: Fabien Parent <fparent-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
> 
> When booting with DT the SPI driver is always using
> the SPI_IO_TYPE_INTR mode to transfer data even if DMA channels are
> defined in the DT.

This looks good but I'll hold off on applying it without patch 1 in case
it introduces regressions for existing systems due to switching to DMA
but not handling vmalloc() mappings.

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 2/6] spi: davinci: enable DMA when channels are defined in DT
@ 2017-02-19 16:30         ` Mark Brown
  0 siblings, 0 replies; 50+ messages in thread
From: Mark Brown @ 2017-02-19 16:30 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Feb 17, 2017 at 11:38:20AM +0100, Frode Isaksen wrote:
> From: Fabien Parent <fparent@baylibre.com>
> 
> When booting with DT the SPI driver is always using
> the SPI_IO_TYPE_INTR mode to transfer data even if DMA channels are
> defined in the DT.

This looks good but I'll hold off on applying it without patch 1 in case
it introduces regressions for existing systems due to switching to DMA
but not handling vmalloc() mappings.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20170219/5355c65d/attachment.sig>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-17 12:07                     ` Russell King - ARM Linux
@ 2017-02-20  6:55                         ` Vignesh R
  -1 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-20  6:55 UTC (permalink / raw)
  To: broonie-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Russell King - ARM Linux, Frode Isaksen,
	khilman-rdvid1DuHRBWk0Htik3J/w, Nori, Sekhar,
	linux-spi-u79uwXL29TY76Z2rM5mHXA,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r



On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
[...]
> SPI is rather another special case - rather than SPI following the 
> established mechanism of passing data references via scatterlists or 
> similar, it also passes them via virtual addresses, which means SPI 
> can directly access the vmalloc area when performing PIO.  This 
> really makes the problem more complex, because it means that if you 
> do have a SPI driver that does that, it's going to be
> reading/writing direct from vmalloc space.
> 
> That's not a problem as long as the data is only accessed via
> vmalloc space, but it will definitely go totally wrong if the data
> is subsequently mapped into userspace.
> 
> The other important thing to realise is that the interfaces in 
> cachetlb.txt assume that it's the lowmem mapping that will be
> accessed, and the IO device will push that data out to physical
> memory (either via the DMA API, or flush_kernel_dcache_page()).
> That's not true of SPI, as it passes virtual addresses around.
> 
> So... overall, I'm not sure that this problem is properly solvable
> given SPIs insistance on passing virtual addresses and the
> differences in this area between SPI and block.
> 

I am debugging another issue with UBIFS wherein pages allocated by
vmalloc are in highmem region that are not addressable using 32 bit
addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
buffers at all.
When dma_map_sg() is called to map these pages by spi_map_buf() the
physical address is just truncated to 32 bit in pfn_to_dma() (as part of
dma_map_sg() call). This results in random crashes as DMA starts
accessing random memory during SPI read.

Given, the above problem and also issue surrounding VIVT caches, I am
thinking of may be using pre-allocated fixed size bounce buffer to
handle buffers not in lowmem mapping.
I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
running at 76.8MHz and do not see any significant degradation in
performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
buffers only during initial preparing and mounting phase and not during
file read/write.

So, is it an acceptable solution to modify spi_map_buf() to use bounce
buffer when buffer does not belong to lowmem region?

-- 
Regards
Vignesh
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-20  6:55                         ` Vignesh R
  0 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-20  6:55 UTC (permalink / raw)
  To: linux-arm-kernel



On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
[...]
> SPI is rather another special case - rather than SPI following the 
> established mechanism of passing data references via scatterlists or 
> similar, it also passes them via virtual addresses, which means SPI 
> can directly access the vmalloc area when performing PIO.  This 
> really makes the problem more complex, because it means that if you 
> do have a SPI driver that does that, it's going to be
> reading/writing direct from vmalloc space.
> 
> That's not a problem as long as the data is only accessed via
> vmalloc space, but it will definitely go totally wrong if the data
> is subsequently mapped into userspace.
> 
> The other important thing to realise is that the interfaces in 
> cachetlb.txt assume that it's the lowmem mapping that will be
> accessed, and the IO device will push that data out to physical
> memory (either via the DMA API, or flush_kernel_dcache_page()).
> That's not true of SPI, as it passes virtual addresses around.
> 
> So... overall, I'm not sure that this problem is properly solvable
> given SPIs insistance on passing virtual addresses and the
> differences in this area between SPI and block.
> 

I am debugging another issue with UBIFS wherein pages allocated by
vmalloc are in highmem region that are not addressable using 32 bit
addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
buffers at all.
When dma_map_sg() is called to map these pages by spi_map_buf() the
physical address is just truncated to 32 bit in pfn_to_dma() (as part of
dma_map_sg() call). This results in random crashes as DMA starts
accessing random memory during SPI read.

Given, the above problem and also issue surrounding VIVT caches, I am
thinking of may be using pre-allocated fixed size bounce buffer to
handle buffers not in lowmem mapping.
I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
running at 76.8MHz and do not see any significant degradation in
performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
buffers only during initial preparing and mounting phase and not during
file read/write.

So, is it an acceptable solution to modify spi_map_buf() to use bounce
buffer when buffer does not belong to lowmem region?

-- 
Regards
Vignesh

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-20  6:55                         ` Vignesh R
@ 2017-02-20  9:26                             ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-20  9:26 UTC (permalink / raw)
  To: Vignesh R, broonie-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Russell King - ARM Linux, khilman-rdvid1DuHRBWk0Htik3J/w, Nori,
	Sekhar, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r



On 20/02/2017 07:55, Vignesh R wrote:
>
> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
> [...]
>> SPI is rather another special case - rather than SPI following the 
>> established mechanism of passing data references via scatterlists or 
>> similar, it also passes them via virtual addresses, which means SPI 
>> can directly access the vmalloc area when performing PIO.  This 
>> really makes the problem more complex, because it means that if you 
>> do have a SPI driver that does that, it's going to be
>> reading/writing direct from vmalloc space.
>>
>> That's not a problem as long as the data is only accessed via
>> vmalloc space, but it will definitely go totally wrong if the data
>> is subsequently mapped into userspace.
>>
>> The other important thing to realise is that the interfaces in 
>> cachetlb.txt assume that it's the lowmem mapping that will be
>> accessed, and the IO device will push that data out to physical
>> memory (either via the DMA API, or flush_kernel_dcache_page()).
>> That's not true of SPI, as it passes virtual addresses around.
>>
>> So... overall, I'm not sure that this problem is properly solvable
>> given SPIs insistance on passing virtual addresses and the
>> differences in this area between SPI and block.
>>
> I am debugging another issue with UBIFS wherein pages allocated by
> vmalloc are in highmem region that are not addressable using 32 bit
> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
> buffers at all.
> When dma_map_sg() is called to map these pages by spi_map_buf() the
> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
> dma_map_sg() call). This results in random crashes as DMA starts
> accessing random memory during SPI read.
>
> Given, the above problem and also issue surrounding VIVT caches, I am
> thinking of may be using pre-allocated fixed size bounce buffer to
> handle buffers not in lowmem mapping.
> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
> running at 76.8MHz and do not see any significant degradation in
> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
> buffers only during initial preparing and mounting phase and not during
> file read/write.
I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?

Thanks,
Frode
>
> So, is it an acceptable solution to modify spi_map_buf() to use bounce
> buffer when buffer does not belong to lowmem region?
>

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-20  9:26                             ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-20  9:26 UTC (permalink / raw)
  To: linux-arm-kernel



On 20/02/2017 07:55, Vignesh R wrote:
>
> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
> [...]
>> SPI is rather another special case - rather than SPI following the 
>> established mechanism of passing data references via scatterlists or 
>> similar, it also passes them via virtual addresses, which means SPI 
>> can directly access the vmalloc area when performing PIO.  This 
>> really makes the problem more complex, because it means that if you 
>> do have a SPI driver that does that, it's going to be
>> reading/writing direct from vmalloc space.
>>
>> That's not a problem as long as the data is only accessed via
>> vmalloc space, but it will definitely go totally wrong if the data
>> is subsequently mapped into userspace.
>>
>> The other important thing to realise is that the interfaces in 
>> cachetlb.txt assume that it's the lowmem mapping that will be
>> accessed, and the IO device will push that data out to physical
>> memory (either via the DMA API, or flush_kernel_dcache_page()).
>> That's not true of SPI, as it passes virtual addresses around.
>>
>> So... overall, I'm not sure that this problem is properly solvable
>> given SPIs insistance on passing virtual addresses and the
>> differences in this area between SPI and block.
>>
> I am debugging another issue with UBIFS wherein pages allocated by
> vmalloc are in highmem region that are not addressable using 32 bit
> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
> buffers at all.
> When dma_map_sg() is called to map these pages by spi_map_buf() the
> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
> dma_map_sg() call). This results in random crashes as DMA starts
> accessing random memory during SPI read.
>
> Given, the above problem and also issue surrounding VIVT caches, I am
> thinking of may be using pre-allocated fixed size bounce buffer to
> handle buffers not in lowmem mapping.
> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
> running at 76.8MHz and do not see any significant degradation in
> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
> buffers only during initial preparing and mounting phase and not during
> file read/write.
I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?

Thanks,
Frode
>
> So, is it an acceptable solution to modify spi_map_buf() to use bounce
> buffer when buffer does not belong to lowmem region?
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-20  9:26                             ` Frode Isaksen
@ 2017-02-20  9:47                                 ` Vignesh R
  -1 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-20  9:47 UTC (permalink / raw)
  To: Frode Isaksen, broonie-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Russell King - ARM Linux, khilman-rdvid1DuHRBWk0Htik3J/w, Nori,
	Sekhar, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r



On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
> 
> 
> On 20/02/2017 07:55, Vignesh R wrote:
>>
>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
>> [...]
>>> SPI is rather another special case - rather than SPI following the 
>>> established mechanism of passing data references via scatterlists or 
>>> similar, it also passes them via virtual addresses, which means SPI 
>>> can directly access the vmalloc area when performing PIO.  This 
>>> really makes the problem more complex, because it means that if you 
>>> do have a SPI driver that does that, it's going to be
>>> reading/writing direct from vmalloc space.
>>>
>>> That's not a problem as long as the data is only accessed via
>>> vmalloc space, but it will definitely go totally wrong if the data
>>> is subsequently mapped into userspace.
>>>
>>> The other important thing to realise is that the interfaces in 
>>> cachetlb.txt assume that it's the lowmem mapping that will be
>>> accessed, and the IO device will push that data out to physical
>>> memory (either via the DMA API, or flush_kernel_dcache_page()).
>>> That's not true of SPI, as it passes virtual addresses around.
>>>
>>> So... overall, I'm not sure that this problem is properly solvable
>>> given SPIs insistance on passing virtual addresses and the
>>> differences in this area between SPI and block.
>>>
>> I am debugging another issue with UBIFS wherein pages allocated by
>> vmalloc are in highmem region that are not addressable using 32 bit
>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>> buffers at all.
>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>> dma_map_sg() call). This results in random crashes as DMA starts
>> accessing random memory during SPI read.
>>
>> Given, the above problem and also issue surrounding VIVT caches, I am
>> thinking of may be using pre-allocated fixed size bounce buffer to
>> handle buffers not in lowmem mapping.
>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>> running at 76.8MHz and do not see any significant degradation in
>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>> buffers only during initial preparing and mounting phase and not during
>> file read/write.
> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?

read_ltab() isn't the only place where vmalloc() is used. A quick grep
for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
vmalloc() call can potentially allocate memory from highmem and might
potentially cause issue for VIVT and such aliasing caches.
Fixing just one such case isn't going to help IMHO.


-- 
Regards
Vignesh
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-20  9:47                                 ` Vignesh R
  0 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-20  9:47 UTC (permalink / raw)
  To: linux-arm-kernel



On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
> 
> 
> On 20/02/2017 07:55, Vignesh R wrote:
>>
>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
>> [...]
>>> SPI is rather another special case - rather than SPI following the 
>>> established mechanism of passing data references via scatterlists or 
>>> similar, it also passes them via virtual addresses, which means SPI 
>>> can directly access the vmalloc area when performing PIO.  This 
>>> really makes the problem more complex, because it means that if you 
>>> do have a SPI driver that does that, it's going to be
>>> reading/writing direct from vmalloc space.
>>>
>>> That's not a problem as long as the data is only accessed via
>>> vmalloc space, but it will definitely go totally wrong if the data
>>> is subsequently mapped into userspace.
>>>
>>> The other important thing to realise is that the interfaces in 
>>> cachetlb.txt assume that it's the lowmem mapping that will be
>>> accessed, and the IO device will push that data out to physical
>>> memory (either via the DMA API, or flush_kernel_dcache_page()).
>>> That's not true of SPI, as it passes virtual addresses around.
>>>
>>> So... overall, I'm not sure that this problem is properly solvable
>>> given SPIs insistance on passing virtual addresses and the
>>> differences in this area between SPI and block.
>>>
>> I am debugging another issue with UBIFS wherein pages allocated by
>> vmalloc are in highmem region that are not addressable using 32 bit
>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>> buffers at all.
>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>> dma_map_sg() call). This results in random crashes as DMA starts
>> accessing random memory during SPI read.
>>
>> Given, the above problem and also issue surrounding VIVT caches, I am
>> thinking of may be using pre-allocated fixed size bounce buffer to
>> handle buffers not in lowmem mapping.
>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>> running at 76.8MHz and do not see any significant degradation in
>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>> buffers only during initial preparing and mounting phase and not during
>> file read/write.
> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?

read_ltab() isn't the only place where vmalloc() is used. A quick grep
for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
vmalloc() call can potentially allocate memory from highmem and might
potentially cause issue for VIVT and such aliasing caches.
Fixing just one such case isn't going to help IMHO.


-- 
Regards
Vignesh

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-20  9:47                                 ` Vignesh R
@ 2017-02-20 10:34                                     ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-20 10:34 UTC (permalink / raw)
  To: Vignesh R, broonie-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Russell King - ARM Linux, khilman-rdvid1DuHRBWk0Htik3J/w, Nori,
	Sekhar, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r



On 20/02/2017 10:47, Vignesh R wrote:
>
> On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
>>
>> On 20/02/2017 07:55, Vignesh R wrote:
>>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
>>> [...]
>>>> SPI is rather another special case - rather than SPI following the 
>>>> established mechanism of passing data references via scatterlists or 
>>>> similar, it also passes them via virtual addresses, which means SPI 
>>>> can directly access the vmalloc area when performing PIO.  This 
>>>> really makes the problem more complex, because it means that if you 
>>>> do have a SPI driver that does that, it's going to be
>>>> reading/writing direct from vmalloc space.
>>>>
>>>> That's not a problem as long as the data is only accessed via
>>>> vmalloc space, but it will definitely go totally wrong if the data
>>>> is subsequently mapped into userspace.
>>>>
>>>> The other important thing to realise is that the interfaces in 
>>>> cachetlb.txt assume that it's the lowmem mapping that will be
>>>> accessed, and the IO device will push that data out to physical
>>>> memory (either via the DMA API, or flush_kernel_dcache_page()).
>>>> That's not true of SPI, as it passes virtual addresses around.
>>>>
>>>> So... overall, I'm not sure that this problem is properly solvable
>>>> given SPIs insistance on passing virtual addresses and the
>>>> differences in this area between SPI and block.
>>>>
>>> I am debugging another issue with UBIFS wherein pages allocated by
>>> vmalloc are in highmem region that are not addressable using 32 bit
>>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>>> buffers at all.
>>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>>> dma_map_sg() call). This results in random crashes as DMA starts
>>> accessing random memory during SPI read.
>>>
>>> Given, the above problem and also issue surrounding VIVT caches, I am
>>> thinking of may be using pre-allocated fixed size bounce buffer to
>>> handle buffers not in lowmem mapping.
>>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>>> running at 76.8MHz and do not see any significant degradation in
>>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>>> buffers only during initial preparing and mounting phase and not during
>>> file read/write.
>> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?
> read_ltab() isn't the only place where vmalloc() is used. A quick grep
> for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
> vmalloc() call can potentially allocate memory from highmem and might
> potentially cause issue for VIVT and such aliasing caches.
> Fixing just one such case isn't going to help IMHO.
Of course fixing it only in one place is of course not going to help..
For the moment there are 3 solutions to the UBIFS DMA problem:
1) Always use bounce buffer for vmalloc'ed buffers - impacts everyone.
2) Remove use of vmalloc'ed buffers in UBIFS - is it possible ?
3) Flush cache in UBIFS when reading/wring vmalloc'ed buffers - does not fix the higmem case.
I will try to see if option 2) is possible.

Thanks,
Frode
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-20 10:34                                     ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-20 10:34 UTC (permalink / raw)
  To: linux-arm-kernel



On 20/02/2017 10:47, Vignesh R wrote:
>
> On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
>>
>> On 20/02/2017 07:55, Vignesh R wrote:
>>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
>>> [...]
>>>> SPI is rather another special case - rather than SPI following the 
>>>> established mechanism of passing data references via scatterlists or 
>>>> similar, it also passes them via virtual addresses, which means SPI 
>>>> can directly access the vmalloc area when performing PIO.  This 
>>>> really makes the problem more complex, because it means that if you 
>>>> do have a SPI driver that does that, it's going to be
>>>> reading/writing direct from vmalloc space.
>>>>
>>>> That's not a problem as long as the data is only accessed via
>>>> vmalloc space, but it will definitely go totally wrong if the data
>>>> is subsequently mapped into userspace.
>>>>
>>>> The other important thing to realise is that the interfaces in 
>>>> cachetlb.txt assume that it's the lowmem mapping that will be
>>>> accessed, and the IO device will push that data out to physical
>>>> memory (either via the DMA API, or flush_kernel_dcache_page()).
>>>> That's not true of SPI, as it passes virtual addresses around.
>>>>
>>>> So... overall, I'm not sure that this problem is properly solvable
>>>> given SPIs insistance on passing virtual addresses and the
>>>> differences in this area between SPI and block.
>>>>
>>> I am debugging another issue with UBIFS wherein pages allocated by
>>> vmalloc are in highmem region that are not addressable using 32 bit
>>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>>> buffers at all.
>>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>>> dma_map_sg() call). This results in random crashes as DMA starts
>>> accessing random memory during SPI read.
>>>
>>> Given, the above problem and also issue surrounding VIVT caches, I am
>>> thinking of may be using pre-allocated fixed size bounce buffer to
>>> handle buffers not in lowmem mapping.
>>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>>> running at 76.8MHz and do not see any significant degradation in
>>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>>> buffers only during initial preparing and mounting phase and not during
>>> file read/write.
>> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?
> read_ltab() isn't the only place where vmalloc() is used. A quick grep
> for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
> vmalloc() call can potentially allocate memory from highmem and might
> potentially cause issue for VIVT and such aliasing caches.
> Fixing just one such case isn't going to help IMHO.
Of course fixing it only in one place is of course not going to help..
For the moment there are 3 solutions to the UBIFS DMA problem:
1) Always use bounce buffer for vmalloc'ed buffers - impacts everyone.
2) Remove use of vmalloc'ed buffers in UBIFS - is it possible ?
3) Flush cache in UBIFS when reading/wring vmalloc'ed buffers - does not fix the higmem case.
I will try to see if option 2) is possible.

Thanks,
Frode
>
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-20 10:34                                     ` Frode Isaksen
@ 2017-02-20 11:27                                         ` Vignesh R
  -1 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-20 11:27 UTC (permalink / raw)
  To: Frode Isaksen, broonie-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Russell King - ARM Linux, khilman-rdvid1DuHRBWk0Htik3J/w, Nori,
	Sekhar, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r



On Monday 20 February 2017 04:04 PM, Frode Isaksen wrote:
> 
> 
> On 20/02/2017 10:47, Vignesh R wrote:
>>
>> On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
>>>
>>> On 20/02/2017 07:55, Vignesh R wrote:
>>>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
[...]
>>>> I am debugging another issue with UBIFS wherein pages allocated by
>>>> vmalloc are in highmem region that are not addressable using 32 bit
>>>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>>>> buffers at all.
>>>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>>>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>>>> dma_map_sg() call). This results in random crashes as DMA starts
>>>> accessing random memory during SPI read.
>>>>
>>>> Given, the above problem and also issue surrounding VIVT caches, I am
>>>> thinking of may be using pre-allocated fixed size bounce buffer to
>>>> handle buffers not in lowmem mapping.
>>>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>>>> running at 76.8MHz and do not see any significant degradation in
>>>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>>>> buffers only during initial preparing and mounting phase and not during
>>>> file read/write.
>>> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?
>> read_ltab() isn't the only place where vmalloc() is used. A quick grep
>> for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
>> vmalloc() call can potentially allocate memory from highmem and might
>> potentially cause issue for VIVT and such aliasing caches.
>> Fixing just one such case isn't going to help IMHO.
> Of course fixing it only in one place is of course not going to help..
> For the moment there are 3 solutions to the UBIFS DMA problem:
> 1) Always use bounce buffer for vmalloc'ed buffers - impacts everyone.
> 2) Remove use of vmalloc'ed buffers in UBIFS - is it possible ?

Maybe. But what about mtdblock with JFFS2 on top of it, mtdblock still
uses vmalloc'd buffers?


-- 
Regards
Vignesh
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-20 11:27                                         ` Vignesh R
  0 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-20 11:27 UTC (permalink / raw)
  To: linux-arm-kernel



On Monday 20 February 2017 04:04 PM, Frode Isaksen wrote:
> 
> 
> On 20/02/2017 10:47, Vignesh R wrote:
>>
>> On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
>>>
>>> On 20/02/2017 07:55, Vignesh R wrote:
>>>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
[...]
>>>> I am debugging another issue with UBIFS wherein pages allocated by
>>>> vmalloc are in highmem region that are not addressable using 32 bit
>>>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>>>> buffers at all.
>>>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>>>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>>>> dma_map_sg() call). This results in random crashes as DMA starts
>>>> accessing random memory during SPI read.
>>>>
>>>> Given, the above problem and also issue surrounding VIVT caches, I am
>>>> thinking of may be using pre-allocated fixed size bounce buffer to
>>>> handle buffers not in lowmem mapping.
>>>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>>>> running at 76.8MHz and do not see any significant degradation in
>>>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>>>> buffers only during initial preparing and mounting phase and not during
>>>> file read/write.
>>> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?
>> read_ltab() isn't the only place where vmalloc() is used. A quick grep
>> for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
>> vmalloc() call can potentially allocate memory from highmem and might
>> potentially cause issue for VIVT and such aliasing caches.
>> Fixing just one such case isn't going to help IMHO.
> Of course fixing it only in one place is of course not going to help..
> For the moment there are 3 solutions to the UBIFS DMA problem:
> 1) Always use bounce buffer for vmalloc'ed buffers - impacts everyone.
> 2) Remove use of vmalloc'ed buffers in UBIFS - is it possible ?

Maybe. But what about mtdblock with JFFS2 on top of it, mtdblock still
uses vmalloc'd buffers?


-- 
Regards
Vignesh

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-20 11:27                                         ` Vignesh R
@ 2017-02-20 15:49                                             ` Frode Isaksen
  -1 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-20 15:49 UTC (permalink / raw)
  To: Vignesh R, broonie-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Russell King - ARM Linux, khilman-rdvid1DuHRBWk0Htik3J/w, Nori,
	Sekhar, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r



On 20/02/2017 12:27, Vignesh R wrote:
>
> On Monday 20 February 2017 04:04 PM, Frode Isaksen wrote:
>>
>> On 20/02/2017 10:47, Vignesh R wrote:
>>> On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
>>>> On 20/02/2017 07:55, Vignesh R wrote:
>>>>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
> [...]
>>>>> I am debugging another issue with UBIFS wherein pages allocated by
>>>>> vmalloc are in highmem region that are not addressable using 32 bit
>>>>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>>>>> buffers at all.
>>>>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>>>>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>>>>> dma_map_sg() call). This results in random crashes as DMA starts
>>>>> accessing random memory during SPI read.
>>>>>
>>>>> Given, the above problem and also issue surrounding VIVT caches, I am
>>>>> thinking of may be using pre-allocated fixed size bounce buffer to
>>>>> handle buffers not in lowmem mapping.
>>>>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>>>>> running at 76.8MHz and do not see any significant degradation in
>>>>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>>>>> buffers only during initial preparing and mounting phase and not during
>>>>> file read/write.
>>>> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?
>>> read_ltab() isn't the only place where vmalloc() is used. A quick grep
>>> for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
>>> vmalloc() call can potentially allocate memory from highmem and might
>>> potentially cause issue for VIVT and such aliasing caches.
>>> Fixing just one such case isn't going to help IMHO.
>> Of course fixing it only in one place is of course not going to help..
>> For the moment there are 3 solutions to the UBIFS DMA problem:
>> 1) Always use bounce buffer for vmalloc'ed buffers - impacts everyone.
>> 2) Remove use of vmalloc'ed buffers in UBIFS - is it possible ?
> Maybe. But what about mtdblock with JFFS2 on top of it, mtdblock still
> uses vmalloc'd buffers?
That's why I put the cache flush in the SPI driver in the first try..
FYI, I replaced the use of vmalloc with kmalloc in a few places in the UBIFS layer and it seems to work. The size of the buffers allocated varies from 11 (?) to 65408 bytes.
Maybe the best place to handle this is in the SPI flash drivers(s). These drivers knows that they may handle vmalloc'ed buffers and the should assume that the underlying SPI driver may use DMA.

Thanks,
Frode
>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-20 15:49                                             ` Frode Isaksen
  0 siblings, 0 replies; 50+ messages in thread
From: Frode Isaksen @ 2017-02-20 15:49 UTC (permalink / raw)
  To: linux-arm-kernel



On 20/02/2017 12:27, Vignesh R wrote:
>
> On Monday 20 February 2017 04:04 PM, Frode Isaksen wrote:
>>
>> On 20/02/2017 10:47, Vignesh R wrote:
>>> On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
>>>> On 20/02/2017 07:55, Vignesh R wrote:
>>>>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
> [...]
>>>>> I am debugging another issue with UBIFS wherein pages allocated by
>>>>> vmalloc are in highmem region that are not addressable using 32 bit
>>>>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>>>>> buffers at all.
>>>>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>>>>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>>>>> dma_map_sg() call). This results in random crashes as DMA starts
>>>>> accessing random memory during SPI read.
>>>>>
>>>>> Given, the above problem and also issue surrounding VIVT caches, I am
>>>>> thinking of may be using pre-allocated fixed size bounce buffer to
>>>>> handle buffers not in lowmem mapping.
>>>>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>>>>> running at 76.8MHz and do not see any significant degradation in
>>>>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>>>>> buffers only during initial preparing and mounting phase and not during
>>>>> file read/write.
>>>> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?
>>> read_ltab() isn't the only place where vmalloc() is used. A quick grep
>>> for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
>>> vmalloc() call can potentially allocate memory from highmem and might
>>> potentially cause issue for VIVT and such aliasing caches.
>>> Fixing just one such case isn't going to help IMHO.
>> Of course fixing it only in one place is of course not going to help..
>> For the moment there are 3 solutions to the UBIFS DMA problem:
>> 1) Always use bounce buffer for vmalloc'ed buffers - impacts everyone.
>> 2) Remove use of vmalloc'ed buffers in UBIFS - is it possible ?
> Maybe. But what about mtdblock with JFFS2 on top of it, mtdblock still
> uses vmalloc'd buffers?
That's why I put the cache flush in the SPI driver in the first try..
FYI, I replaced the use of vmalloc with kmalloc in a few places in the UBIFS layer and it seems to work. The size of the buffers allocated varies from 11 (?) to 65408 bytes.
Maybe the best place to handle this is in the SPI flash drivers(s). These drivers knows that they may handle vmalloc'ed buffers and the should assume that the underlying SPI driver may use DMA.

Thanks,
Frode
>
>

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Re: [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
  2017-02-20 15:49                                             ` Frode Isaksen
@ 2017-02-21  5:08                                                 ` Vignesh R
  -1 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-21  5:08 UTC (permalink / raw)
  To: Frode Isaksen, broonie-DgEjT+Ai2ygdnm+yROfE0A
  Cc: Russell King - ARM Linux, khilman-rdvid1DuHRBWk0Htik3J/w, Nori,
	Sekhar, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r



On Monday 20 February 2017 09:19 PM, Frode Isaksen wrote:
> 
> 
> On 20/02/2017 12:27, Vignesh R wrote:
>>
>> On Monday 20 February 2017 04:04 PM, Frode Isaksen wrote:
>>>
>>> On 20/02/2017 10:47, Vignesh R wrote:
>>>> On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
>>>>> On 20/02/2017 07:55, Vignesh R wrote:
>>>>>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
>> [...]
>>>>>> I am debugging another issue with UBIFS wherein pages allocated by
>>>>>> vmalloc are in highmem region that are not addressable using 32 bit
>>>>>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>>>>>> buffers at all.
>>>>>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>>>>>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>>>>>> dma_map_sg() call). This results in random crashes as DMA starts
>>>>>> accessing random memory during SPI read.
>>>>>>
>>>>>> Given, the above problem and also issue surrounding VIVT caches, I am
>>>>>> thinking of may be using pre-allocated fixed size bounce buffer to
>>>>>> handle buffers not in lowmem mapping.
>>>>>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>>>>>> running at 76.8MHz and do not see any significant degradation in
>>>>>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>>>>>> buffers only during initial preparing and mounting phase and not during
>>>>>> file read/write.
>>>>> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?
>>>> read_ltab() isn't the only place where vmalloc() is used. A quick grep
>>>> for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
>>>> vmalloc() call can potentially allocate memory from highmem and might
>>>> potentially cause issue for VIVT and such aliasing caches.
>>>> Fixing just one such case isn't going to help IMHO.
>>> Of course fixing it only in one place is of course not going to help..
>>> For the moment there are 3 solutions to the UBIFS DMA problem:
>>> 1) Always use bounce buffer for vmalloc'ed buffers - impacts everyone.
>>> 2) Remove use of vmalloc'ed buffers in UBIFS - is it possible ?
>> Maybe. But what about mtdblock with JFFS2 on top of it, mtdblock still
>> uses vmalloc'd buffers?
> That's why I put the cache flush in the SPI driver in the first try..
> FYI, I replaced the use of vmalloc with kmalloc in a few places in the UBIFS layer and it seems to work. The size of the buffers allocated varies from 11 (?) to 65408 bytes.

FYI, here is the discussion on DMA + UBIFS:
http://lists.infradead.org/pipermail/linux-mtd/2016-October/069856.html

> Maybe the best place to handle this is in the SPI flash drivers(s). These drivers knows that they may handle vmalloc'ed buffers and the should assume that the underlying SPI driver may use DMA.
> 

I guess so, NAND core already seems to use bounce buffer for vmalloc'd
addresses.


-- 
Regards
Vignesh
--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 50+ messages in thread

* [PATCH v2 4/6] spi: davinci: flush caches when performing DMA
@ 2017-02-21  5:08                                                 ` Vignesh R
  0 siblings, 0 replies; 50+ messages in thread
From: Vignesh R @ 2017-02-21  5:08 UTC (permalink / raw)
  To: linux-arm-kernel



On Monday 20 February 2017 09:19 PM, Frode Isaksen wrote:
> 
> 
> On 20/02/2017 12:27, Vignesh R wrote:
>>
>> On Monday 20 February 2017 04:04 PM, Frode Isaksen wrote:
>>>
>>> On 20/02/2017 10:47, Vignesh R wrote:
>>>> On Monday 20 February 2017 02:56 PM, Frode Isaksen wrote:
>>>>> On 20/02/2017 07:55, Vignesh R wrote:
>>>>>> On Friday 17 February 2017 05:37 PM, Russell King - ARM Linux wrote:
>> [...]
>>>>>> I am debugging another issue with UBIFS wherein pages allocated by
>>>>>> vmalloc are in highmem region that are not addressable using 32 bit
>>>>>> addresses and is backed by LPAE. So, a 32 bit DMA cannot access these
>>>>>> buffers at all.
>>>>>> When dma_map_sg() is called to map these pages by spi_map_buf() the
>>>>>> physical address is just truncated to 32 bit in pfn_to_dma() (as part of
>>>>>> dma_map_sg() call). This results in random crashes as DMA starts
>>>>>> accessing random memory during SPI read.
>>>>>>
>>>>>> Given, the above problem and also issue surrounding VIVT caches, I am
>>>>>> thinking of may be using pre-allocated fixed size bounce buffer to
>>>>>> handle buffers not in lowmem mapping.
>>>>>> I have tried using 64KB pre-allocated buffer on TI DRA74 EVM with QSPI
>>>>>> running at 76.8MHz and do not see any significant degradation in
>>>>>> performance with UBIFS. Mainly because UBIFS seems to use vmalloc'd
>>>>>> buffers only during initial preparing and mounting phase and not during
>>>>>> file read/write.
>>>>> I am seeing a bug caused by VIVT cache in 'read_ltab()' function. In this function, the vmalloc'ed buffer is of size 11. Isn't it better to use kmalloc in this case ?
>>>> read_ltab() isn't the only place where vmalloc() is used. A quick grep
>>>> for vmalloc on fs/ubifs/ shows about ~19 occurrence. I guess every
>>>> vmalloc() call can potentially allocate memory from highmem and might
>>>> potentially cause issue for VIVT and such aliasing caches.
>>>> Fixing just one such case isn't going to help IMHO.
>>> Of course fixing it only in one place is of course not going to help..
>>> For the moment there are 3 solutions to the UBIFS DMA problem:
>>> 1) Always use bounce buffer for vmalloc'ed buffers - impacts everyone.
>>> 2) Remove use of vmalloc'ed buffers in UBIFS - is it possible ?
>> Maybe. But what about mtdblock with JFFS2 on top of it, mtdblock still
>> uses vmalloc'd buffers?
> That's why I put the cache flush in the SPI driver in the first try..
> FYI, I replaced the use of vmalloc with kmalloc in a few places in the UBIFS layer and it seems to work. The size of the buffers allocated varies from 11 (?) to 65408 bytes.

FYI, here is the discussion on DMA + UBIFS:
http://lists.infradead.org/pipermail/linux-mtd/2016-October/069856.html

> Maybe the best place to handle this is in the SPI flash drivers(s). These drivers knows that they may handle vmalloc'ed buffers and the should assume that the underlying SPI driver may use DMA.
> 

I guess so, NAND core already seems to use bounce buffer for vmalloc'd
addresses.


-- 
Regards
Vignesh

^ permalink raw reply	[flat|nested] 50+ messages in thread

* Applied "spi: davinci: use rx buffer as dummy tx buffer" to the spi tree
  2017-02-17 10:38     ` Frode Isaksen
@ 2017-03-15 19:37         ` Mark Brown
  -1 siblings, 0 replies; 50+ messages in thread
From: Mark Brown @ 2017-03-15 19:37 UTC (permalink / raw)
  To: Frode Isaksen
  Cc: Mark Brown, nsekhar-l0cyMroinI0, khilman-rdvid1DuHRBWk0Htik3J/w,
	ptitiano-rdvid1DuHRBWk0Htik3J/w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	broonie-DgEjT+Ai2ygdnm+yROfE0A, linux-spi-u79uwXL29TY76Z2rM5mHXA,
	linux-spi-u79uwXL29TY76Z2rM5mHXA

The patch

   spi: davinci: use rx buffer as dummy tx buffer

has been applied to the spi tree at

   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git 

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From 6b3a631e7f8eca75a987ed760898d28fb3628143 Mon Sep 17 00:00:00 2001
From: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
Date: Thu, 23 Feb 2017 19:01:58 +0100
Subject: [PATCH] spi: davinci: use rx buffer as dummy tx buffer

When doing rx-only transfer, the transfer will fail
if the number of SG entries exceeds 20.
This happens because the eDMA DMA engine is limited
to 20 SG entries in one transaction, and when the
DMA transcation is resumed (which takes > 150us),
rx errors occurs because the slave is still transmitting.
Fix this by using the rx buffer as the dummy tx buffer,
so that resuming the rx transcation happens at the same
time as resuming the tx transcation.

Signed-off-by: Frode Isaksen <fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
Signed-off-by: Mark Brown <broonie-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
---
 drivers/spi/spi-davinci.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 1e24395a04f2..ca122165a3c6 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -655,6 +655,12 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		if (!rxdesc)
 			goto err_desc;
 
+		if (!t->tx_buf) {
+			/* use rx buffer as dummy tx buffer */
+			t->tx_sg.sgl = t->rx_sg.sgl;
+			t->tx_sg.nents = t->rx_sg.nents;
+		}
+
 		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
 				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -957,7 +963,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
-	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
+	master->flags = SPI_MASTER_MUST_RX;
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
 	master->can_dma = davinci_spi_can_dma;
-- 
2.11.0

--
To unsubscribe from this list: send the line "unsubscribe linux-spi" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 50+ messages in thread

* Applied "spi: davinci: use rx buffer as dummy tx buffer" to the spi tree
@ 2017-03-15 19:37         ` Mark Brown
  0 siblings, 0 replies; 50+ messages in thread
From: Mark Brown @ 2017-03-15 19:37 UTC (permalink / raw)
  To: linux-arm-kernel

The patch

   spi: davinci: use rx buffer as dummy tx buffer

has been applied to the spi tree at

   git://git.kernel.org/pub/scm/linux/kernel/git/broonie/spi.git 

All being well this means that it will be integrated into the linux-next
tree (usually sometime in the next 24 hours) and sent to Linus during
the next merge window (or sooner if it is a bug fix), however if
problems are discovered then the patch may be dropped or reverted.  

You may get further e-mails resulting from automated or manual testing
and review of the tree, please engage with people reporting problems and
send followup patches addressing any issues that are reported if needed.

If any updates are required or you are submitting further changes they
should be sent as incremental updates against current git, existing
patches will not be replaced.

Please add any relevant lists and maintainers to the CCs when replying
to this mail.

Thanks,
Mark

>From 6b3a631e7f8eca75a987ed760898d28fb3628143 Mon Sep 17 00:00:00 2001
From: Frode Isaksen <fisaksen@baylibre.com>
Date: Thu, 23 Feb 2017 19:01:58 +0100
Subject: [PATCH] spi: davinci: use rx buffer as dummy tx buffer

When doing rx-only transfer, the transfer will fail
if the number of SG entries exceeds 20.
This happens because the eDMA DMA engine is limited
to 20 SG entries in one transaction, and when the
DMA transcation is resumed (which takes > 150us),
rx errors occurs because the slave is still transmitting.
Fix this by using the rx buffer as the dummy tx buffer,
so that resuming the rx transcation happens at the same
time as resuming the tx transcation.

Signed-off-by: Frode Isaksen <fisaksen@baylibre.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-davinci.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index 1e24395a04f2..ca122165a3c6 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -655,6 +655,12 @@ static int davinci_spi_bufs(struct spi_device *spi, struct spi_transfer *t)
 		if (!rxdesc)
 			goto err_desc;
 
+		if (!t->tx_buf) {
+			/* use rx buffer as dummy tx buffer */
+			t->tx_sg.sgl = t->rx_sg.sgl;
+			t->tx_sg.nents = t->rx_sg.nents;
+		}
+
 		txdesc = dmaengine_prep_slave_sg(dspi->dma_tx,
 				t->tx_sg.sgl, t->tx_sg.nents, DMA_MEM_TO_DEV,
 				DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
@@ -957,7 +963,7 @@ static int davinci_spi_probe(struct platform_device *pdev)
 	master->bus_num = pdev->id;
 	master->num_chipselect = pdata->num_chipselect;
 	master->bits_per_word_mask = SPI_BPW_RANGE_MASK(2, 16);
-	master->flags = (SPI_MASTER_MUST_RX | SPI_MASTER_MUST_TX);
+	master->flags = SPI_MASTER_MUST_RX;
 	master->setup = davinci_spi_setup;
 	master->cleanup = davinci_spi_cleanup;
 	master->can_dma = davinci_spi_can_dma;
-- 
2.11.0

^ permalink raw reply related	[flat|nested] 50+ messages in thread

end of thread, other threads:[~2017-03-15 19:37 UTC | newest]

Thread overview: 50+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-02-17 10:38 [PATCH v2 0/6] Enable DMA for daVinci SPI controller Frode Isaksen
2017-02-17 10:38 ` Frode Isaksen
     [not found] ` <1487327904-28311-1-git-send-email-fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-17 10:38   ` [PATCH v2 1/6] spi: davinci: Use SPI framework to handle DMA mapping Frode Isaksen
2017-02-17 10:38     ` Frode Isaksen
     [not found]     ` <1487327904-28311-2-git-send-email-fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-19 16:29       ` Mark Brown
2017-02-19 16:29         ` Mark Brown
2017-02-17 10:38   ` [PATCH v2 2/6] spi: davinci: enable DMA when channels are defined in DT Frode Isaksen
2017-02-17 10:38     ` Frode Isaksen
     [not found]     ` <1487327904-28311-3-git-send-email-fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-19 16:30       ` Mark Brown
2017-02-19 16:30         ` Mark Brown
2017-02-17 10:38   ` [PATCH v2 3/6] spi: davinci: use rx buffer as dummy tx buffer Frode Isaksen
2017-02-17 10:38     ` Frode Isaksen
     [not found]     ` <1487327904-28311-4-git-send-email-fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-03-15 19:37       ` Applied "spi: davinci: use rx buffer as dummy tx buffer" to the spi tree Mark Brown
2017-03-15 19:37         ` Mark Brown
2017-02-17 10:38   ` [PATCH v2 4/6] spi: davinci: flush caches when performing DMA Frode Isaksen
2017-02-17 10:38     ` Frode Isaksen
     [not found]     ` <1487327904-28311-5-git-send-email-fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-17 10:57       ` Vignesh R
2017-02-17 10:57         ` Vignesh R
     [not found]         ` <5fab0c33-6e1d-c63a-8758-6672236045a7-l0cyMroinI0@public.gmane.org>
2017-02-17 11:22           ` Russell King - ARM Linux
2017-02-17 11:22             ` Russell King - ARM Linux
     [not found]             ` <20170217112247.GE21222-l+eeeJia6m9URfEZ8mYm6t73F7V6hmMc@public.gmane.org>
2017-02-17 11:36               ` Frode Isaksen
2017-02-17 11:36                 ` Frode Isaksen
     [not found]                 ` <eaf93c6c-52a7-826b-9c53-2b13c0b280ca-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-17 12:07                   ` Russell King - ARM Linux
2017-02-17 12:07                     ` Russell King - ARM Linux
2017-02-17 17:45                     ` Frode Isaksen
2017-02-17 17:45                       ` Frode Isaksen
     [not found]                     ` <20170217120749.GF21222-l+eeeJia6m9URfEZ8mYm6t73F7V6hmMc@public.gmane.org>
2017-02-20  6:55                       ` Vignesh R
2017-02-20  6:55                         ` Vignesh R
     [not found]                         ` <0f3607fd-4542-be92-da2e-b2da6f8ac26f-l0cyMroinI0@public.gmane.org>
2017-02-20  9:26                           ` Frode Isaksen
2017-02-20  9:26                             ` Frode Isaksen
     [not found]                             ` <f0fcc000-6c07-647a-ada2-04b61f0e2e3b-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-20  9:47                               ` Vignesh R
2017-02-20  9:47                                 ` Vignesh R
     [not found]                                 ` <57721a60-52bb-c9d1-d5a7-ae450e7adcc0-l0cyMroinI0@public.gmane.org>
2017-02-20 10:34                                   ` Frode Isaksen
2017-02-20 10:34                                     ` Frode Isaksen
     [not found]                                     ` <2a1954a3-edbd-7ac4-6fde-9b6d046a6560-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-20 11:27                                       ` Vignesh R
2017-02-20 11:27                                         ` Vignesh R
     [not found]                                         ` <954333da-4d15-ce77-4021-268530365edc-l0cyMroinI0@public.gmane.org>
2017-02-20 15:49                                           ` Frode Isaksen
2017-02-20 15:49                                             ` Frode Isaksen
     [not found]                                             ` <69d41f8a-371b-abd9-7e78-f8701652877f-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-21  5:08                                               ` Vignesh R
2017-02-21  5:08                                                 ` Vignesh R
2017-02-17 10:38   ` [PATCH v2 5/6] spi: davinci: do not use DMA if transfer length is less than 16 Frode Isaksen
2017-02-17 10:38     ` Frode Isaksen
     [not found]     ` <1487327904-28311-6-git-send-email-fisaksen-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-17 16:37       ` Arnd Bergmann
2017-02-17 16:37         ` Arnd Bergmann
     [not found]         ` <CAK8P3a24HkxrU-6a727RY5JqwQgyf9SpprPXvurSqf1Q_W_AMQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-02-17 16:43           ` Frode Isaksen
2017-02-17 16:43             ` Frode Isaksen
     [not found]             ` <3358b5f1-c7b6-0955-0c36-6f135a585762-rdvid1DuHRBWk0Htik3J/w@public.gmane.org>
2017-02-17 16:55               ` Arnd Bergmann
2017-02-17 16:55                 ` Arnd Bergmann
2017-02-17 10:38   ` [PATCH v2 6/6] spi: loopback-test: add option to use vmalloc'ed buffers Frode Isaksen
2017-02-17 10:38     ` Frode Isaksen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.