All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
@ 2010-10-06  9:42 Linus Walleij
  2010-12-19 16:32 ` Russell King - ARM Linux
  0 siblings, 1 reply; 18+ messages in thread
From: Linus Walleij @ 2010-10-06  9:42 UTC (permalink / raw)
  To: linux-arm-kernel

This extends the MMCI/PL180 driver with generic DMA engine support
using the PrimeCell DMA engine interface.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
---
Changes to previous version:

- Added burst size compensation for the Ux500 variants: these
  require that you send single words (=32 bits, 4 bytes) of bursts
  when the burst is < 16 bytes.
---
 drivers/mmc/host/mmci.c   |  270 ++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/host/mmci.h   |   13 ++
 include/linux/amba/mmci.h |   16 +++
 3 files changed, 285 insertions(+), 14 deletions(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 09bcce7..951ac37 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -2,7 +2,7 @@
  *  linux/drivers/mmc/host/mmci.c - ARM PrimeCell MMCI PL180/1 driver
  *
  *  Copyright (C) 2003 Deep Blue Solutions, Ltd, All Rights Reserved.
- *  Copyright (C) 2010 ST-Ericsson AB.
+ *  Copyright (C) 2010 ST-Ericsson SA
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -23,8 +23,10 @@
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio.h>
-#include <linux/amba/mmci.h>
 #include <linux/regulator/consumer.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/div64.h>
 #include <asm/io.h>
@@ -40,20 +42,26 @@ static unsigned int fmax = 515633;
  * struct variant_data - MMCI variant-specific quirks
  * @clkreg: default value for MCICLOCK register
  * @clkreg_enable: enable value for MMCICLOCK register
+ * @dmareg_enable: enable value for MMCIDATACTRL register
  * @datalength_bits: number of bits in the MMCIDATALENGTH register
  * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
  *	      is asserted (likewise for RX)
  * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
  *		  is asserted (likewise for RX)
  * @singleirq: true if only one IRQ line is available
+ * @txsize_threshold: Sets burst size to minimal if transfer size is less or
+ *		equal to this threshold. This shall be specified in
+ *		number of bytes. Set 0 for no burst compensation.
  */
 struct variant_data {
 	unsigned int		clkreg;
 	unsigned int		clkreg_enable;
+	unsigned int		dmareg_enable;
 	unsigned int		datalength_bits;
 	unsigned int		fifosize;
 	unsigned int		fifohalfsize;
 	bool			singleirq;
+	unsigned int		txsize_threshold;
 };
 
 static struct variant_data variant_arm = {
@@ -74,8 +82,10 @@ static struct variant_data variant_ux500 = {
 	.fifohalfsize		= 8 * 4,
 	.clkreg			= MCI_CLK_ENABLE,
 	.clkreg_enable		= 1 << 14, /* HWFCEN */
+	.dmareg_enable		= 1 << 12, /* DMAREQCTRL */
 	.datalength_bits	= 24,
 	.singleirq		= true,
+	.txsize_threshold	= 16,
 };
 
 /*
@@ -169,6 +179,209 @@ static void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
 	sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
 }
 
+/*
+ * All the DMA operation mode stuff goes inside this ifdef.
+ * This assumes that you have a generic DMA device interface,
+ * no custom DMA interfaces are supported.
+ */
+#ifdef CONFIG_DMA_ENGINE
+static void __devinit mmci_setup_dma(struct mmci_host *host)
+{
+	struct mmci_platform_data *plat = host->plat;
+	dma_cap_mask_t mask;
+
+	if (!plat || !plat->dma_filter) {
+		dev_err(mmc_dev(host->mmc), "no DMA platform data!\n");
+		return;
+	}
+
+	/* Try to acquire a generic DMA engine slave channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	/*
+	 * If only an RX channel is specified, the driver will
+	 * attempt to use it bidirectionally, however if it is
+	 * is specified but cannot be located, DMA will be disabled.
+	 */
+	host->dma_rx_channel = dma_request_channel(mask,
+						plat->dma_filter,
+						plat->dma_rx_param);
+	/* E.g if no DMA hardware is present */
+	if (!host->dma_rx_channel) {
+		dev_err(mmc_dev(host->mmc), "no RX DMA channel!\n");
+		return;
+	}
+	if (plat->dma_tx_param) {
+		host->dma_tx_channel = dma_request_channel(mask,
+							   plat->dma_filter,
+							   plat->dma_tx_param);
+		if (!host->dma_tx_channel) {
+			dma_release_channel(host->dma_rx_channel);
+			host->dma_rx_channel = NULL;
+			return;
+		}
+	} else {
+		host->dma_tx_channel = host->dma_rx_channel;
+	}
+	host->dma_enable = true;
+	dev_info(mmc_dev(host->mmc), "use DMA channels DMA RX %s, DMA TX %s\n",
+		 dma_chan_name(host->dma_rx_channel),
+		 dma_chan_name(host->dma_tx_channel));
+}
+
+/*
+ * This is used in __devinit or __devexit so inline it
+ * so it can be discarded.
+ */
+static inline void mmci_disable_dma(struct mmci_host *host)
+{
+	if (host->dma_rx_channel)
+		dma_release_channel(host->dma_rx_channel);
+	if (host->dma_tx_channel)
+		dma_release_channel(host->dma_tx_channel);
+	host->dma_enable = false;
+}
+
+static void mmci_dma_data_end(struct mmci_host *host)
+{
+	struct mmc_data *data = host->data;
+
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+		     (data->flags & MMC_DATA_WRITE)
+		     ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+}
+
+static void mmci_dma_data_error(struct mmci_host *host)
+{
+	struct mmc_data *data = host->data;
+	struct dma_chan *chan;
+
+	dev_err(mmc_dev(host->mmc), "error during DMA transfer!\n");
+	if (data->flags & MMC_DATA_READ)
+		chan = host->dma_rx_channel;
+	else
+		chan = host->dma_tx_channel;
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+		     (data->flags & MMC_DATA_WRITE)
+		     ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+}
+
+static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	struct variant_data *variant = host->variant;
+	struct dma_slave_config rx_conf = {
+		.src_addr = host->phybase + MMCIFIFO,
+		.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.direction = DMA_FROM_DEVICE,
+		.src_maxburst = variant->fifohalfsize >> 2, /* # of words */
+	};
+	struct dma_slave_config tx_conf = {
+		.dst_addr = host->phybase + MMCIFIFO,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.direction = DMA_TO_DEVICE,
+		.dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
+	};
+	struct mmc_data *data = host->data;
+	enum dma_data_direction direction;
+	struct dma_chan *chan;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sg;
+	dma_cookie_t cookie;
+	int i;
+
+	datactrl |= MCI_DPSM_DMAENABLE;
+	datactrl |= variant->dmareg_enable;
+
+	if (data->flags & MMC_DATA_READ) {
+		if (host->size <= variant->txsize_threshold)
+			rx_conf.src_maxburst = 1;
+
+		direction = DMA_FROM_DEVICE;
+		chan = host->dma_rx_channel;
+		chan->device->device_control(chan, DMA_SLAVE_CONFIG,
+					     (unsigned long) &rx_conf);
+	} else {
+		if (host->size <= variant->txsize_threshold)
+			tx_conf.dst_maxburst = 1;
+
+		direction = DMA_TO_DEVICE;
+		chan = host->dma_tx_channel;
+		chan->device->device_control(chan, DMA_SLAVE_CONFIG,
+					     (unsigned long) &tx_conf);
+	}
+
+	/* Check for weird stuff in the sg list */
+	for_each_sg(data->sg, sg, data->sg_len, i) {
+		dev_vdbg(mmc_dev(host->mmc), "MMCI SGlist %d dir %d: length: %08x\n",
+			 i, direction, sg->length);
+		if (sg->offset & 3 || sg->length & 3)
+			return -EINVAL;
+	}
+
+	dma_map_sg(mmc_dev(host->mmc), data->sg,
+		   data->sg_len, direction);
+
+	desc = chan->device->device_prep_slave_sg(chan,
+					data->sg, data->sg_len, direction,
+					DMA_CTRL_ACK);
+	if (!desc)
+		goto unmap_exit;
+
+	host->dma_desc = desc;
+	dev_vdbg(mmc_dev(host->mmc), "Submit MMCI DMA job, sglen %d "
+		 "blksz %04x blks %04x flags %08x\n",
+		 data->sg_len, data->blksz, data->blocks, data->flags);
+	cookie = desc->tx_submit(desc);
+
+	/* Here overloaded DMA controllers may fail */
+	if (dma_submit_error(cookie))
+		goto unmap_exit;
+
+	host->dma_on_current_xfer = true;
+	chan->device->device_issue_pending(chan);
+
+	/* Trigger the DMA transfer */
+	writel(datactrl, host->base + MMCIDATACTRL);
+	/*
+	 * Let the MMCI say when the data is ended and it's time
+	 * to fire next DMA request. When that happens, MMCI will
+	 * call mmci_data_end()
+	 */
+	writel(readl(host->base + MMCIMASK0) | MCI_DATAENDMASK,
+	       host->base + MMCIMASK0);
+	return 0;
+
+unmap_exit:
+	chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, direction);
+	host->dma_on_current_xfer = false;
+	return -ENOMEM;
+}
+#else
+/* Blank functions if the DMA engine is not available */
+static inline void mmci_setup_dma(struct mmci_host *host)
+{
+}
+
+static inline void mmci_disable_dma(struct mmci_host *host)
+{
+}
+
+static inline void mmci_dma_data_end(struct mmci_host *host)
+{
+}
+
+static inline void mmci_dma_data_error(struct mmci_host *host)
+{
+}
+
+static inline int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	return -ENOSYS;
+}
+#endif
+
 static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 {
 	struct variant_data *variant = host->variant;
@@ -183,8 +396,8 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	host->data = data;
 	host->size = data->blksz * data->blocks;
 	host->data_xfered = 0;
-
-	mmci_init_sg(host, data);
+	host->blockend = false;
+	host->dataend = false;
 
 	clks = (unsigned long long)data->timeout_ns * host->cclk;
 	do_div(clks, 1000000000UL);
@@ -199,13 +412,32 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	BUG_ON(1 << blksz_bits != data->blksz);
 
 	datactrl = MCI_DPSM_ENABLE | blksz_bits << 4;
-	if (data->flags & MMC_DATA_READ) {
+
+	if (data->flags & MMC_DATA_READ)
 		datactrl |= MCI_DPSM_DIRECTION;
+
+	if (host->dma_enable) {
+		int ret;
+
+		/*
+		 * Attempt to use DMA operation mode, if this
+		 * should fail, fall back to PIO mode
+		 */
+		ret = mmci_dma_start_data(host, datactrl);
+		if (!ret)
+			return;
+	}
+
+	/* IRQ mode, map the SG list for CPU reading/writing */
+	mmci_init_sg(host, data);
+
+	if (data->flags & MMC_DATA_READ) {
 		irqmask = MCI_RXFIFOHALFFULLMASK;
 
 		/*
-		 * If we have less than a FIFOSIZE of bytes to transfer,
-		 * trigger a PIO interrupt as soon as any data is available.
+		 * If we have less than a FIFOSIZE of bytes to
+		 * transfer, trigger a PIO interrupt as soon as any
+		 * data is available.
 		 */
 		if (host->size < variant->fifosize)
 			irqmask |= MCI_RXDATAAVLBLMASK;
@@ -270,9 +502,12 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 
 		/*
 		 * We hit an error condition.  Ensure that any data
-		 * partially written to a page is properly coherent.
+		 * partially written to a page is properly coherent,
+		 * unless we're using DMA.
 		 */
-		if (data->flags & MMC_DATA_READ) {
+		if (host->dma_on_current_xfer)
+			mmci_dma_data_error(host);
+		else if (data->flags & MMC_DATA_READ) {
 			struct sg_mapping_iter *sg_miter = &host->sg_miter;
 			unsigned long flags;
 
@@ -308,7 +543,7 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 		 * use this progressive update in DMA or single-IRQ
 		 * mode.
 		 */
-		if (!host->variant->singleirq)
+		if (!host->variant->singleirq && !host->dma_on_current_xfer)
 			host->data_xfered += data->blksz;
 		host->blockend = true;
 	}
@@ -321,6 +556,7 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 	 * with the blockend signal since they can appear out-of-order.
 	 */
 	if (host->dataend && (host->blockend || host->variant->singleirq)) {
+		mmci_dma_data_end(host);
 		mmci_stop_data(host);
 
 		/* Reset these flags */
@@ -332,7 +568,7 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 		 * and in DMA mode this signal need to be synchronized
 		 * with MCI_DATEND
 		 */
-		if (host->variant->singleirq && !data->error)
+		if ((host->variant->singleirq || host->dma_on_current_xfer) && !data->error)
 			host->data_xfered += data->blksz * data->blocks;
 
 		if (!data->stop) {
@@ -761,6 +997,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 		dev_dbg(mmc_dev(mmc), "eventual mclk rate: %u Hz\n",
 			host->mclk);
 	}
+	host->phybase = dev->res.start;
 	host->base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!host->base) {
 		ret = -ENOMEM;
@@ -871,6 +1108,8 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	    && host->gpio_cd_irq < 0)
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
+	mmci_setup_dma(host);
+
 	ret = request_irq(dev->irq[0], mmci_irq, IRQF_SHARED, DRIVER_NAME " (cmd)", host);
 	if (ret)
 		goto unmap;
@@ -897,15 +1136,17 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 
 	mmc_add_host(mmc);
 
-	dev_info(&dev->dev, "%s: MMCI rev %x cfg %02x at 0x%016llx irq %d,%d\n",
-		mmc_hostname(mmc), amba_rev(dev), amba_config(dev),
-		(unsigned long long)dev->res.start, dev->irq[0], dev->irq[1]);
+	dev_info(&dev->dev, "%s: MMCI/PL180 manf %x rev %x cfg %02x at 0x%016llx\n",
+		 mmc_hostname(mmc), amba_manf(dev), amba_rev(dev), amba_config(dev),
+		 (unsigned long long)dev->res.start);
+	dev_info(&dev->dev, "IRQ %d, %d (pio)\n", dev->irq[0], dev->irq[1]);
 
 	return 0;
 
  irq0_free:
 	free_irq(dev->irq[0], host);
  unmap:
+	mmci_disable_dma(host);
 	if (host->gpio_wp != -ENOSYS)
 		gpio_free(host->gpio_wp);
  err_gpio_wp:
@@ -945,6 +1186,7 @@ static int __devexit mmci_remove(struct amba_device *dev)
 		writel(0, host->base + MMCICOMMAND);
 		writel(0, host->base + MMCIDATACTRL);
 
+		mmci_disable_dma(host);
 		free_irq(dev->irq[0], host);
 		if (!variant->singleirq)
 			free_irq(dev->irq[1], host);
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index acac081..9e2a4c6 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -142,8 +142,11 @@
 
 struct clk;
 struct variant_data;
+struct dma_chan;
+struct dma_async_tx_descriptor;
 
 struct mmci_host {
+	phys_addr_t		phybase;
 	void __iomem		*base;
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
@@ -176,6 +179,16 @@ struct mmci_host {
 	/* pio stuff */
 	struct sg_mapping_iter	sg_miter;
 	unsigned int		size;
+
 	struct regulator	*vcc;
+
+	/* DMA stuff */
+	bool			dma_enable;
+	bool			dma_on_current_xfer;
+#ifdef CONFIG_DMA_ENGINE
+	struct dma_chan		*dma_rx_channel;
+	struct dma_chan		*dma_tx_channel;
+	struct dma_async_tx_descriptor *dma_desc;
+#endif
 };
 
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index f4ee9ac..dbeba68 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -6,6 +6,8 @@
 
 #include <linux/mmc/host.h>
 
+/* Just some dummy forwarding */
+struct dma_chan;
 /**
  * struct mmci_platform_data - platform configuration for the MMCI
  * (also known as PL180) block.
@@ -27,6 +29,17 @@
  * @cd_invert: true if the gpio_cd pin value is active low
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
+ * @dma_filter: function used to select an apropriate RX and TX
+ * DMA channel to be used for DMA, if and only if you're deploying the
+ * generic DMA engine
+ * @dma_rx_param: parameter passed to the DMA allocation
+ * filter in order to select an apropriate RX channel. If
+ * there is a bidirectional RX+TX channel, then just specify
+ * this and leave dma_tx_param set to NULL
+ * @dma_tx_param: parameter passed to the DMA allocation
+ * filter in order to select an apropriate TX channel. If this
+ * is NULL the driver will attempt to use the RX channel as a
+ * bidirectional channel
  */
 struct mmci_platform_data {
 	unsigned int f_max;
@@ -38,6 +51,9 @@ struct mmci_platform_data {
 	int	gpio_cd;
 	bool	cd_invert;
 	unsigned long capabilities;
+	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+	void *dma_rx_param;
+	void *dma_tx_param;
 };
 
 #endif
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2010-10-06  9:42 [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180 Linus Walleij
@ 2010-12-19 16:32 ` Russell King - ARM Linux
  2010-12-19 19:59   ` Russell King - ARM Linux
  0 siblings, 1 reply; 18+ messages in thread
From: Russell King - ARM Linux @ 2010-12-19 16:32 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Oct 06, 2010 at 11:42:39AM +0200, Linus Walleij wrote:
> This extends the MMCI/PL180 driver with generic DMA engine support
> using the PrimeCell DMA engine interface.
> 
> Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>

The latest revision of this doesn't work with non-DMA setups.

You map the DMA scatterlist in mmci_dma_start_data(), called from
mmci_start_data() if host->dma_enable is true (it isn't.)  So the
scatterlist in PIO mode is not mapped.

However, in the IRQ handler, it calls mmci_dma_data_end() irrespective
of whether DMA is being used.  This unconditionally calls dma_unmap_sg(),
which results in an DMA unmap operation happening without a previous
map of the scatterlist.

mmci-pl18x mb:mmci: no DMA platform data!
mmci-pl18x mb:mmci: mmc0: MMCI/PL180 manf 41 rev 0 cfg 00 at 0x0000000010005000
mmci-pl18x mb:mmci: IRQ 41, 42 (pio)
...
Unable to handle kernel paging request at virtual address bf81d000
pgd = c0004000
[bf81d000] *pgd=00000000
Internal error: Oops: 5 [#1] SMP
last sysfs file:
Modules linked in:
CPU: 0    Not tainted  (2.6.37-rc6+ #410)
PC is at dma_cache_maint_page+0x28/0x11c
LR is at ___dma_page_dev_to_cpu+0x78/0xb8
pc : [<c003dd88>]    lr : [<c003def4>]    psr: 60000193
sp : c03a1e28  ip : c03a1e60  fp : c03a1e5c
r10: c00416a8  r9 : 00000fff  r8 : 00000002
r7 : 00000008  r6 : 00000002  r5 : bf81d000  r4 : 00000000
r3 : 00000002  r2 : 00000008  r1 : 00000000  r0 : bf81d000
Flags: nZCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment kernel
Control: 10c5387d  Table: 6000404a  DAC: 00000015
Process swapper (pid: 0, stack limit = 0xc03a02f0)
Stack: (0xc03a1e28 to 0xc03a2000)
1e20:                   c0c245c0 a0000193 c03a1e5c 00000000 00000008 00000002
1e40: bf81d000 c03f79a0 00000001 c03b881c c03a1e84 c03a1e60 c003def4 c003dd6c
1e60: c00416a8 2a846e12 e784fe40 00000001 00000002 00000001 c03a1eac c03a1e88
1e80: c003dfcc c003de88 c03a1eac e7ba0200 00000500 00000000 00000500 e784fe00
1ea0: c03a1ee4 c03a1eb0 c0222f8c c003df8c 00000029 e7ba0288 c03a1edc e7b32440
1ec0: 00000000 00000000 00000029 60020474 410fc091 00000000 c03a1f04 c03a1ee8
1ee0: c0085e64 c0222d1c c03a3580 00000029 c03a35c4 c02d696c c03a1f24 c03a1f08
1f00: c00886dc c0085e44 c0059bc0 00000029 00000000 00000029 c03a1f3c c03a1f28
1f20: c0028090 c00885fc ffffffff f8e00100 c03a1f94 c03a1f40 c0034038 c002800c
1f40: 30479e80 c03a6350 00000000 00000000 c03a0000 c03a9db0 c03bf6f4 c02d696c
1f60: 60020474 410fc091 00000000 c03a1f94 c03a1f98 c03a1f88 c0035858 c003585c
1f80: 60000013 ffffffff c03a1fb4 c03a1f98 c0035edc c0035840 c03a6be4 c002199c
1fa0: c03bf640 c03a9da4 c03a1fc4 c03a1fb8 c02ca24c c0035e68 c03a1ff4 c03a1fc8
1fc0: c0008c7c c02ca1f8 c00087a4 00000000 00000000 c002199c 00000000 10c5387d
1fe0: c03a6290 c0021da0 00000000 c03a1ff8 60008038 c0008a48 00000000 00000000
Backtrace:
[<c003dd60>] (dma_cache_maint_page+0x0/0x11c) from [<c003def4>] (___dma_page_dev_to_cpu+0x78/0xb8)
[<c003de7c>] (___dma_page_dev_to_cpu+0x0/0xb8) from [<c003dfcc>] (dma_unmap_sg+0x4c/0x70)
 r7:00000001 r6:00000002 r5:00000001 r4:e784fe40
[<c003df80>] (dma_unmap_sg+0x0/0x70) from [<c0222f8c>] (mmci_irq+0x27c/0x414)
 r8:e784fe00 r7:00000500 r6:00000000 r5:00000500 r4:e7ba0200
[<c0222d10>] (mmci_irq+0x0/0x414) from [<c0085e64>] (handle_IRQ_event+0x2c/0xc8)[<c0085e38>] (handle_IRQ_event+0x0/0xc8) from [<c00886dc>] (handle_level_irq+0xec/0x180)
 r7:c02d696c r6:c03a35c4 r5:00000029 r4:c03a3580
[<c00885f0>] (handle_level_irq+0x0/0x180) from [<c0028090>] (asm_do_IRQ+0x90/0xcc)
atkbd serio0: keyboard reset failed on mb:kmi0
 r6:00000029 r5:00000000 r4:00000029
[<c0028000>] (asm_do_IRQ+0x0/0xcc) from [<c0034038>] (__irq_svc+0x38/0xc0)
Exception stack(0xc03a1f40 to 0xc03a1f88)
1f40: 30479e80 c03a6350 00000000 00000000 c03a0000 c03a9db0 c03bf6f4 c02d696c
1f60: 60020474 410fc091 00000000 c03a1f94 c03a1f98 c03a1f88 c0035858 c003585c
1f80: 60000013 ffffffff
 r5:f8e00100 r4:ffffffff
[<c0035834>] (default_idle+0x0/0x2c) from [<c0035edc>] (cpu_idle+0x80/0xc4)
[<c0035e5c>] (cpu_idle+0x0/0xc4) from [<c02ca24c>] (rest_init+0x60/0x78)
 r7:c03a9da4 r6:c03bf640 r5:c002199c r4:c03a6be4
[<c02ca1ec>] (rest_init+0x0/0x78) from [<c0008c7c>] (start_kernel+0x240/0x298)
[<c0008a3c>] (start_kernel+0x0/0x298) from [<60008038>] (0x60008038)
 r6:c0021da0 r5:c03a6290 r4:10c5387d
Code: e1a04001 e1a08003 e1a07002 e3009fff (e5953000)
---[ end trace 791035a14e7db7d4 ]---

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2010-12-19 16:32 ` Russell King - ARM Linux
@ 2010-12-19 19:59   ` Russell King - ARM Linux
  2010-12-21 13:54     ` Russell King - ARM Linux
  0 siblings, 1 reply; 18+ messages in thread
From: Russell King - ARM Linux @ 2010-12-19 19:59 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, Dec 19, 2010 at 04:32:40PM +0000, Russell King - ARM Linux wrote:
> On Wed, Oct 06, 2010 at 11:42:39AM +0200, Linus Walleij wrote:
> > This extends the MMCI/PL180 driver with generic DMA engine support
> > using the PrimeCell DMA engine interface.
> > 
> > Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
> 
> The latest revision of this doesn't work with non-DMA setups.
> 
> You map the DMA scatterlist in mmci_dma_start_data(), called from
> mmci_start_data() if host->dma_enable is true (it isn't.)  So the
> scatterlist in PIO mode is not mapped.
> 
> However, in the IRQ handler, it calls mmci_dma_data_end() irrespective
> of whether DMA is being used.  This unconditionally calls dma_unmap_sg(),
> which results in an DMA unmap operation happening without a previous
> map of the scatterlist.

Here's a patch which fixes this, and a few other minor changes:
- optional whether the platform supplies data for rx and tx channels
  (which controls which dma channels are allocated.)  If we can get,
  eg, just the rx channel without the tx channel, we're satisfied.

- get rid of host->dma_enable - we can use the presence of
  host->dma_*_channel to indicate whether we can do DMA in the required
  direction.

- get rid of the write-only host->dma_desc.

This then boots on Versatile Express without DMA support, but with DMA
engine support enabled.

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 0b4a5bf..c8c5315 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -2,7 +2,7 @@
  *  linux/drivers/mmc/host/mmci.c - ARM PrimeCell MMCI PL180/1 driver
  *
  *  Copyright (C) 2003 Deep Blue Solutions, Ltd, All Rights Reserved.
- *  Copyright (C) 2010 ST-Ericsson AB.
+ *  Copyright (C) 2010 ST-Ericsson SA
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -24,8 +24,10 @@
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio.h>
-#include <linux/amba/mmci.h>
 #include <linux/regulator/consumer.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/div64.h>
 #include <asm/io.h>
@@ -41,11 +43,15 @@ static unsigned int fmax = 515633;
  * struct variant_data - MMCI variant-specific quirks
  * @clkreg: default value for MCICLOCK register
  * @clkreg_enable: enable value for MMCICLOCK register
+ * @dmareg_enable: enable value for MMCIDATACTRL register
  * @datalength_bits: number of bits in the MMCIDATALENGTH register
  * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
  *	      is asserted (likewise for RX)
  * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
  *		  is asserted (likewise for RX)
+ * @txsize_threshold: Sets DMA burst size to minimal if transfer size is
+ *		less or equal to this threshold. This shall be specified in
+ *		number of bytes. Set 0 for no burst compensation
  * @broken_blockend: the MCI_DATABLOCKEND is broken on the hardware
  *		and will not work at all.
  * @broken_blockend_dma: the MCI_DATABLOCKEND is broken on the hardware when
@@ -56,9 +62,11 @@ static unsigned int fmax = 515633;
 struct variant_data {
 	unsigned int		clkreg;
 	unsigned int		clkreg_enable;
+	unsigned int		dmareg_enable;
 	unsigned int		datalength_bits;
 	unsigned int		fifosize;
 	unsigned int		fifohalfsize;
+	unsigned int		txsize_threshold;
 	bool			broken_blockend;
 	bool			broken_blockend_dma;
 	bool			sdio;
@@ -83,8 +91,10 @@ static struct variant_data variant_u300 = {
 static struct variant_data variant_ux500 = {
 	.fifosize		= 30 * 4,
 	.fifohalfsize		= 8 * 4,
+	.txsize_threshold	= 16,
 	.clkreg			= MCI_CLK_ENABLE,
 	.clkreg_enable		= 1 << 14, /* HWFCEN */
+	.dmareg_enable		= 1 << 12, /* DMAREQCTRL */
 	.datalength_bits	= 24,
 	.broken_blockend	= true,
 	.sdio			= true,
@@ -196,6 +206,222 @@ static void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
 	sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
 }
 
+/*
+ * All the DMA operation mode stuff goes inside this ifdef.
+ * This assumes that you have a generic DMA device interface,
+ * no custom DMA interfaces are supported.
+ */
+#ifdef CONFIG_DMA_ENGINE
+static void __devinit mmci_setup_dma(struct mmci_host *host)
+{
+	struct mmci_platform_data *plat = host->plat;
+	const char *rxname, *txname;
+	dma_cap_mask_t mask;
+
+	if (!plat || !plat->dma_filter) {
+		dev_err(mmc_dev(host->mmc), "no DMA platform data!\n");
+		return;
+	}
+
+	/* Try to acquire a generic DMA engine slave channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/*
+	 * If only an RX channel is specified, the driver will
+	 * attempt to use it bidirectionally, however if it is
+	 * is specified but cannot be located, DMA will be disabled.
+	 */
+	if (plat->dma_rx_param) {
+		host->dma_rx_channel = dma_request_channel(mask,
+							   plat->dma_filter,
+							   plat->dma_rx_param);
+		/* E.g if no DMA hardware is present */
+		if (!host->dma_rx_channel)
+			dev_err(mmc_dev(host->mmc), "no RX DMA channel!\n");
+	}
+
+	if (plat->dma_tx_param) {
+		host->dma_tx_channel = dma_request_channel(mask,
+							   plat->dma_filter,
+							   plat->dma_tx_param);
+		if (!host->dma_tx_channel)
+			dev_warn(mmc_dev(host->mmc), "no TX DMA channel\n");
+	} else {
+		host->dma_tx_channel = host->dma_rx_channel;
+	}
+
+	if (host->dma_rx_channel)
+		rxname = dma_chan_name(host->dma_rx_channel);
+	else
+		rxname = "none";
+
+	if (host->dma_tx_channel)
+		txname = dma_chan_name(host->dma_tx_channel);
+	else
+		txname = "none";
+
+	dev_info(mmc_dev(host->mmc), "use DMA channels DMA RX %s, DMA TX %s\n",
+		 rxname, txname);
+}
+
+/*
+ * This is used in __devinit or __devexit so inline it
+ * so it can be discarded.
+ */
+static inline void mmci_disable_dma(struct mmci_host *host)
+{
+	if (host->dma_rx_channel)
+		dma_release_channel(host->dma_rx_channel);
+	if (host->dma_tx_channel)
+		dma_release_channel(host->dma_tx_channel);
+}
+
+static void mmci_dma_data_end(struct mmci_host *host)
+{
+	struct mmc_data *data = host->data;
+
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+		     (data->flags & MMC_DATA_WRITE)
+		     ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+}
+
+static void mmci_dma_data_error(struct mmci_host *host)
+{
+	struct mmc_data *data = host->data;
+	struct dma_chan *chan;
+
+	dev_err(mmc_dev(host->mmc), "error during DMA transfer!\n");
+	if (data->flags & MMC_DATA_READ)
+		chan = host->dma_rx_channel;
+	else
+		chan = host->dma_tx_channel;
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
+		     (data->flags & MMC_DATA_WRITE)
+		     ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+}
+
+static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	struct variant_data *variant = host->variant;
+	struct dma_slave_config rx_conf = {
+		.src_addr = host->phybase + MMCIFIFO,
+		.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.direction = DMA_FROM_DEVICE,
+		.src_maxburst = variant->fifohalfsize >> 2, /* # of words */
+	};
+	struct dma_slave_config tx_conf = {
+		.dst_addr = host->phybase + MMCIFIFO,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.direction = DMA_TO_DEVICE,
+		.dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
+	};
+	struct dma_slave_config *conf;
+	struct mmc_data *data = host->data;
+	enum dma_data_direction direction;
+	struct dma_chan *chan;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sg;
+	dma_cookie_t cookie;
+	int i;
+
+	if (data->flags & MMC_DATA_READ) {
+		if (host->size <= variant->txsize_threshold)
+			rx_conf.src_maxburst = 1;
+
+		direction = DMA_FROM_DEVICE;
+		chan = host->dma_rx_channel;
+		conf = &rx_conf;
+	} else {
+		if (host->size <= variant->txsize_threshold)
+			tx_conf.dst_maxburst = 1;
+
+		direction = DMA_TO_DEVICE;
+		chan = host->dma_tx_channel;
+		conf = &tx_conf;
+	}
+
+	/* If there's no DMA channel, fall back to PIO */
+	if (!chan)
+		return -EINVAL;
+
+	datactrl |= MCI_DPSM_DMAENABLE;
+	datactrl |= variant->dmareg_enable;
+
+	chan->device->device_control(chan, DMA_SLAVE_CONFIG,
+				     (unsigned long) conf);
+
+	/* Check for weird stuff in the sg list */
+	for_each_sg(data->sg, sg, data->sg_len, i) {
+		dev_vdbg(mmc_dev(host->mmc), "MMCI SGlist %d dir %d: length: %08x\n",
+			 i, direction, sg->length);
+		if (sg->offset & 3 || sg->length & 3)
+			return -EINVAL;
+	}
+
+	dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len, direction);
+
+	desc = chan->device->device_prep_slave_sg(chan, data->sg,
+						  data->sg_len, direction,
+						  DMA_CTRL_ACK);
+	if (!desc)
+		goto unmap_exit;
+
+	dev_vdbg(mmc_dev(host->mmc), "Submit MMCI DMA job, sglen %d "
+		 "blksz %04x blks %04x flags %08x\n",
+		 data->sg_len, data->blksz, data->blocks, data->flags);
+	cookie = desc->tx_submit(desc);
+
+	/* Here overloaded DMA controllers may fail */
+	if (dma_submit_error(cookie))
+		goto unmap_exit;
+
+	host->dma_on_current_xfer = true;
+	chan->device->device_issue_pending(chan);
+
+	/* Trigger the DMA transfer */
+	writel(datactrl, host->base + MMCIDATACTRL);
+
+	/*
+	 * Let the MMCI say when the data is ended and it's time
+	 * to fire next DMA request. When that happens, MMCI will
+	 * call mmci_data_end()
+	 */
+	writel(readl(host->base + MMCIMASK0) | MCI_DATAENDMASK,
+	       host->base + MMCIMASK0);
+	return 0;
+
+unmap_exit:
+	chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len, direction);
+	host->dma_on_current_xfer = false;
+	return -ENOMEM;
+}
+#else
+/* Blank functions if the DMA engine is not available */
+static inline void mmci_setup_dma(struct mmci_host *host)
+{
+}
+
+static inline void mmci_disable_dma(struct mmci_host *host)
+{
+}
+
+static inline void mmci_dma_data_end(struct mmci_host *host)
+{
+}
+
+static inline void mmci_dma_data_error(struct mmci_host *host)
+{
+}
+
+static inline int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	return -ENOSYS;
+}
+#endif
+
 static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 {
 	struct variant_data *variant = host->variant;
@@ -213,8 +439,6 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	host->blockend = false;
 	host->dataend = false;
 
-	mmci_init_sg(host, data);
-
 	clks = (unsigned long long)data->timeout_ns * host->cclk;
 	do_div(clks, 1000000000UL);
 
@@ -228,13 +452,27 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	BUG_ON(1 << blksz_bits != data->blksz);
 
 	datactrl = MCI_DPSM_ENABLE | blksz_bits << 4;
-	if (data->flags & MMC_DATA_READ) {
+
+	if (data->flags & MMC_DATA_READ)
 		datactrl |= MCI_DPSM_DIRECTION;
+
+	/*
+	 * Attempt to use DMA operation mode, if this
+	 * should fail, fall back to PIO mode
+	 */
+	if (!mmci_dma_start_data(host, datactrl))
+		return;
+
+	/* IRQ mode, map the SG list for CPU reading/writing */
+	mmci_init_sg(host, data);
+
+	if (data->flags & MMC_DATA_READ) {
 		irqmask = MCI_RXFIFOHALFFULLMASK;
 
 		/*
-		 * If we have less than a FIFOSIZE of bytes to transfer,
-		 * trigger a PIO interrupt as soon as any data is available.
+		 * If we have less than a FIFOSIZE of bytes to
+		 * transfer, trigger a PIO interrupt as soon as any
+		 * data is available.
 		 */
 		if (host->size < variant->fifosize)
 			irqmask |= MCI_RXDATAAVLBLMASK;
@@ -306,9 +544,12 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 
 		/*
 		 * We hit an error condition.  Ensure that any data
-		 * partially written to a page is properly coherent.
+		 * partially written to a page is properly coherent,
+		 * unless we're using DMA.
 		 */
-		if (data->flags & MMC_DATA_READ) {
+		if (host->dma_on_current_xfer)
+			mmci_dma_data_error(host);
+		else if (data->flags & MMC_DATA_READ) {
 			struct sg_mapping_iter *sg_miter = &host->sg_miter;
 			unsigned long flags;
 
@@ -351,7 +592,8 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 		 * flag is unreliable: since it can stay high between
 		 * IRQs it will corrupt the transfer counter.
 		 */
-		if (!variant->broken_blockend)
+		if (!variant->broken_blockend &&
+		    !(host->dma_on_current_xfer && variant->broken_blockend_dma))
 			host->data_xfered += data->blksz;
 		host->blockend = true;
 	}
@@ -365,6 +607,8 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 	 * appear out-of-order.
 	 */
 	if (host->dataend && (host->blockend || variant->broken_blockend)) {
+		if (host->dma_on_current_xfer)
+			mmci_dma_data_end(host);
 		mmci_stop_data(host);
 
 		/* Reset these flags */
@@ -375,7 +619,9 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 		 * Variants with broken blockend flags need to handle the
 		 * end of the entire transfer here.
 		 */
-		if (variant->broken_blockend && !data->error)
+		if ((variant->broken_blockend ||
+		     (host->dma_on_current_xfer && variant->broken_blockend_dma))
+		    && !data->error)
 			host->data_xfered += data->blksz * data->blocks;
 
 		if (!data->stop) {
@@ -828,6 +1074,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 		dev_dbg(mmc_dev(mmc), "eventual mclk rate: %u Hz\n",
 			host->mclk);
 	}
+	host->phybase = dev->res.start;
 	host->base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!host->base) {
 		ret = -ENOMEM;
@@ -938,6 +1185,8 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	    && host->gpio_cd_irq < 0)
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
+	mmci_setup_dma(host);
+
 	ret = request_irq(dev->irq[0], mmci_irq, IRQF_SHARED, DRIVER_NAME " (cmd)", host);
 	if (ret)
 		goto unmap;
@@ -962,15 +1211,17 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 
 	mmc_add_host(mmc);
 
-	dev_info(&dev->dev, "%s: MMCI rev %x cfg %02x at 0x%016llx irq %d,%d\n",
-		mmc_hostname(mmc), amba_rev(dev), amba_config(dev),
-		(unsigned long long)dev->res.start, dev->irq[0], dev->irq[1]);
+	dev_info(&dev->dev, "%s: MMCI/PL180 manf %x rev %x cfg %02x at 0x%016llx\n",
+		 mmc_hostname(mmc), amba_manf(dev), amba_rev(dev), amba_config(dev),
+		 (unsigned long long)dev->res.start);
+	dev_info(&dev->dev, "IRQ %d, %d (pio)\n", dev->irq[0], dev->irq[1]);
 
 	return 0;
 
  irq0_free:
 	free_irq(dev->irq[0], host);
  unmap:
+	mmci_disable_dma(host);
 	if (host->gpio_wp != -ENOSYS)
 		gpio_free(host->gpio_wp);
  err_gpio_wp:
@@ -1009,6 +1260,7 @@ static int __devexit mmci_remove(struct amba_device *dev)
 		writel(0, host->base + MMCICOMMAND);
 		writel(0, host->base + MMCIDATACTRL);
 
+		mmci_disable_dma(host);
 		free_irq(dev->irq[0], host);
 		if (!host->singleirq)
 			free_irq(dev->irq[1], host);
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index df06f01..675f810 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -148,8 +148,11 @@
 
 struct clk;
 struct variant_data;
+struct dma_chan;
+struct dma_async_tx_descriptor;
 
 struct mmci_host {
+	phys_addr_t		phybase;
 	void __iomem		*base;
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
@@ -183,6 +186,14 @@ struct mmci_host {
 	/* pio stuff */
 	struct sg_mapping_iter	sg_miter;
 	unsigned int		size;
+
 	struct regulator	*vcc;
+
+	/* DMA stuff */
+	bool			dma_on_current_xfer;
+#ifdef CONFIG_DMA_ENGINE
+	struct dma_chan		*dma_rx_channel;
+	struct dma_chan		*dma_tx_channel;
+#endif
 };
 
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index f4ee9ac..dbeba68 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -6,6 +6,8 @@
 
 #include <linux/mmc/host.h>
 
+/* Just some dummy forwarding */
+struct dma_chan;
 /**
  * struct mmci_platform_data - platform configuration for the MMCI
  * (also known as PL180) block.
@@ -27,6 +29,17 @@
  * @cd_invert: true if the gpio_cd pin value is active low
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
+ * @dma_filter: function used to select an apropriate RX and TX
+ * DMA channel to be used for DMA, if and only if you're deploying the
+ * generic DMA engine
+ * @dma_rx_param: parameter passed to the DMA allocation
+ * filter in order to select an apropriate RX channel. If
+ * there is a bidirectional RX+TX channel, then just specify
+ * this and leave dma_tx_param set to NULL
+ * @dma_tx_param: parameter passed to the DMA allocation
+ * filter in order to select an apropriate TX channel. If this
+ * is NULL the driver will attempt to use the RX channel as a
+ * bidirectional channel
  */
 struct mmci_platform_data {
 	unsigned int f_max;
@@ -38,6 +51,9 @@ struct mmci_platform_data {
 	int	gpio_cd;
 	bool	cd_invert;
 	unsigned long capabilities;
+	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+	void *dma_rx_param;
+	void *dma_tx_param;
 };
 
 #endif

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2010-12-19 19:59   ` Russell King - ARM Linux
@ 2010-12-21 13:54     ` Russell King - ARM Linux
  2010-12-21 15:59       ` Russell King - ARM Linux
  0 siblings, 1 reply; 18+ messages in thread
From: Russell King - ARM Linux @ 2010-12-21 13:54 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, Dec 19, 2010 at 07:59:59PM +0000, Russell King - ARM Linux wrote:
> +static void mmci_dma_data_end(struct mmci_host *host)
> +{
> +	struct mmc_data *data = host->data;
> +
> +	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,

Note that when using a separate DMA agent to the targetted device, the
struct device appropriate for performing the mapping is the DMA agent
itself, not the targetted device.

That's because the memory which is addressible by DMA is determined by
the DMA agent rather than the device.  It's much the same idea as a USB
bus device submitting buffers needs to map them using the USB _host_
device, rather than its own struct device.

IOW, dma_chan->device->dev, not mmc_dev(host->mmc).

Same goes for other primecell drivers.  I'll update this patch later today
for that.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2010-12-21 13:54     ` Russell King - ARM Linux
@ 2010-12-21 15:59       ` Russell King - ARM Linux
  2010-12-22 21:55         ` Linus Walleij
  0 siblings, 1 reply; 18+ messages in thread
From: Russell King - ARM Linux @ 2010-12-21 15:59 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Dec 21, 2010 at 01:54:21PM +0000, Russell King - ARM Linux wrote:
> On Sun, Dec 19, 2010 at 07:59:59PM +0000, Russell King - ARM Linux wrote:
> > +static void mmci_dma_data_end(struct mmci_host *host)
> > +{
> > +	struct mmc_data *data = host->data;
> > +
> > +	dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
> 
> Note that when using a separate DMA agent to the targetted device, the
> struct device appropriate for performing the mapping is the DMA agent
> itself, not the targetted device.
> 
> That's because the memory which is addressible by DMA is determined by
> the DMA agent rather than the device.  It's much the same idea as a USB
> bus device submitting buffers needs to map them using the USB _host_
> device, rather than its own struct device.
> 
> IOW, dma_chan->device->dev, not mmc_dev(host->mmc).
> 
> Same goes for other primecell drivers.  I'll update this patch later today
> for that.

This also contains a few other tweaks on top of your original patch too.
All the DMA conditionals get optimized out when we're not building with
a DMA engine enabled platform.

Note that in your original code, mmci_dma_data_error() is always followed
by a call to mmci_dma_data_end(), and that resulted in dma_unmap_sg()
being called twice for the same buffer.

Also, dma_map_sg() returns the number of physical entries mapped, or zero
on failure.  Should we have a DMA mapping implementation which coalesces
entries, the returned number will be less than data->nr_sg, and it is the
number of coalesced entries which should be passed to the DMA engine driver.
You have this bug in the PL022 driver too, so I suspect that your other
drivers need fixing for this.

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 0b4a5bf..bd1fa13 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -2,7 +2,7 @@
  *  linux/drivers/mmc/host/mmci.c - ARM PrimeCell MMCI PL180/1 driver
  *
  *  Copyright (C) 2003 Deep Blue Solutions, Ltd, All Rights Reserved.
- *  Copyright (C) 2010 ST-Ericsson AB.
+ *  Copyright (C) 2010 ST-Ericsson SA
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -24,8 +24,10 @@
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio.h>
-#include <linux/amba/mmci.h>
 #include <linux/regulator/consumer.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/div64.h>
 #include <asm/io.h>
@@ -41,11 +43,15 @@ static unsigned int fmax = 515633;
  * struct variant_data - MMCI variant-specific quirks
  * @clkreg: default value for MCICLOCK register
  * @clkreg_enable: enable value for MMCICLOCK register
+ * @dmareg_enable: enable value for MMCIDATACTRL register
  * @datalength_bits: number of bits in the MMCIDATALENGTH register
  * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
  *	      is asserted (likewise for RX)
  * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
  *		  is asserted (likewise for RX)
+ * @dmasize_threshold: Sets DMA burst size to minimal if transfer size is
+ *		less or equal to this threshold. This shall be specified in
+ *		number of bytes. Set 0 for no burst compensation
  * @broken_blockend: the MCI_DATABLOCKEND is broken on the hardware
  *		and will not work at all.
  * @broken_blockend_dma: the MCI_DATABLOCKEND is broken on the hardware when
@@ -56,9 +62,11 @@ static unsigned int fmax = 515633;
 struct variant_data {
 	unsigned int		clkreg;
 	unsigned int		clkreg_enable;
+	unsigned int		dmareg_enable;
 	unsigned int		datalength_bits;
 	unsigned int		fifosize;
 	unsigned int		fifohalfsize;
+	unsigned int		dmasize_threshold;
 	bool			broken_blockend;
 	bool			broken_blockend_dma;
 	bool			sdio;
@@ -83,8 +91,10 @@ static struct variant_data variant_u300 = {
 static struct variant_data variant_ux500 = {
 	.fifosize		= 30 * 4,
 	.fifohalfsize		= 8 * 4,
+	.dmasize_threshold	= 16,
 	.clkreg			= MCI_CLK_ENABLE,
 	.clkreg_enable		= 1 << 14, /* HWFCEN */
+	.dmareg_enable		= 1 << 12, /* DMAREQCTRL */
 	.datalength_bits	= 24,
 	.broken_blockend	= true,
 	.sdio			= true,
@@ -196,6 +206,224 @@ static void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
 	sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
 }
 
+/*
+ * All the DMA operation mode stuff goes inside this ifdef.
+ * This assumes that you have a generic DMA device interface,
+ * no custom DMA interfaces are supported.
+ */
+#ifdef CONFIG_DMA_ENGINE
+static void __devinit mmci_dma_setup(struct mmci_host *host)
+{
+	struct mmci_platform_data *plat = host->plat;
+	const char *rxname, *txname;
+	dma_cap_mask_t mask;
+
+	if (!plat || !plat->dma_filter) {
+		dev_err(mmc_dev(host->mmc), "no DMA platform data\n");
+		return;
+	}
+
+	/* Try to acquire a generic DMA engine slave channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/*
+	 * If only an RX channel is specified, the driver will
+	 * attempt to use it bidirectionally, however if it is
+	 * is specified but cannot be located, DMA will be disabled.
+	 */
+	if (plat->dma_rx_param) {
+		host->dma_rx_channel = dma_request_channel(mask,
+							   plat->dma_filter,
+							   plat->dma_rx_param);
+		/* E.g if no DMA hardware is present */
+		if (!host->dma_rx_channel)
+			dev_err(mmc_dev(host->mmc), "no RX DMA channel\n");
+	}
+
+	if (plat->dma_tx_param) {
+		host->dma_tx_channel = dma_request_channel(mask,
+							   plat->dma_filter,
+							   plat->dma_tx_param);
+		if (!host->dma_tx_channel)
+			dev_warn(mmc_dev(host->mmc), "no TX DMA channel\n");
+	} else {
+		host->dma_tx_channel = host->dma_rx_channel;
+	}
+
+	if (host->dma_rx_channel)
+		rxname = dma_chan_name(host->dma_rx_channel);
+	else
+		rxname = "none";
+
+	if (host->dma_tx_channel)
+		txname = dma_chan_name(host->dma_tx_channel);
+	else
+		txname = "none";
+
+	dev_info(mmc_dev(host->mmc), "DMA channels DMA RX %s, DMA TX %s\n",
+		 rxname, txname);
+}
+
+/*
+ * This is used in __devinit or __devexit so inline it
+ * so it can be discarded.
+ */
+static inline void mmci_dma_release(struct mmci_host *host)
+{
+	struct mmci_platform_data *plat = host->plat;
+
+	if (host->dma_rx_channel)
+		dma_release_channel(host->dma_rx_channel);
+	if (host->dma_tx_channel && plat->dma_tx_param)
+		dma_release_channel(host->dma_tx_channel);
+}
+
+static void mmci_dma_unmap(struct mmci_host *host, struct mmc_data *data)
+{
+	if (dma_inprogress(host)) {
+		struct dma_device *dmadev = host->dma_current->device;
+		enum dma_data_direction dir;
+
+		if (data->flags & MMC_DATA_WRITE) {
+			dir = DMA_TO_DEVICE;
+		} else {
+			dir = DMA_FROM_DEVICE;
+		}
+
+		dma_unmap_sg(dmadev->dev, data->sg, data->sg_len, dir);
+	}
+}
+
+static void mmci_dma_data_error(struct mmci_host *host)
+{
+	struct dma_chan *chan = host->dma_current;
+
+	dev_err(mmc_dev(host->mmc), "error during DMA transfer!\n");
+	chan->device->device_control(chan, DMA_TERMINATE_ALL, 0);
+}
+
+static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	struct variant_data *variant = host->variant;
+	struct dma_slave_config rx_conf = {
+		.src_addr = host->phybase + MMCIFIFO,
+		.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.direction = DMA_FROM_DEVICE,
+		.src_maxburst = variant->fifohalfsize >> 2, /* # of words */
+	};
+	struct dma_slave_config tx_conf = {
+		.dst_addr = host->phybase + MMCIFIFO,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.direction = DMA_TO_DEVICE,
+		.dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
+	};
+	struct dma_slave_config *conf;
+	struct mmc_data *data = host->data;
+	struct dma_chan *chan;
+	struct dma_device *device;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sg;
+	dma_cookie_t cookie;
+	int i, nr_sg;
+
+	host->dma_current = NULL;
+
+	if (data->flags & MMC_DATA_READ) {
+		if (host->size <= variant->dmasize_threshold)
+			rx_conf.src_maxburst = 1;
+
+		chan = host->dma_rx_channel;
+		conf = &rx_conf;
+	} else {
+		if (host->size <= variant->dmasize_threshold)
+			tx_conf.dst_maxburst = 1;
+
+		chan = host->dma_tx_channel;
+		conf = &tx_conf;
+	}
+
+	/* If there's no DMA channel, fall back to PIO */
+	if (!chan)
+		return -EINVAL;
+
+	/* Check for weird stuff in the sg list */
+	for_each_sg(data->sg, sg, data->sg_len, i) {
+		dev_vdbg(mmc_dev(host->mmc), "MMCI SGlist %d dir %d: length: %08x\n",
+			 i, conf->direction, sg->length);
+		if (sg->offset & 3 || sg->length & 3)
+			return -EINVAL;
+	}
+
+	device = chan->device;
+	nr_sg = dma_map_sg(device->dev, data->sg, data->sg_len,
+			   conf->direction);
+	if (nr_sg == 0)
+		return -EINVAL;
+
+	device->device_control(chan, DMA_SLAVE_CONFIG, (unsigned long)conf);
+	desc = device->device_prep_slave_sg(chan, data->sg, nr_sg,
+					    conf->direction, DMA_CTRL_ACK);
+	if (!desc)
+		goto unmap_exit;
+
+	dev_vdbg(mmc_dev(host->mmc),
+		 "Submit MMCI DMA job, sglen %d blksz %04x blks %04x flags %08x\n",
+		 data->sg_len, data->blksz, data->blocks, data->flags);
+	cookie = desc->tx_submit(desc);
+
+	/* Here overloaded DMA controllers may fail */
+	if (dma_submit_error(cookie))
+		goto unmap_exit;
+
+	host->dma_current = chan;
+
+	device->device_issue_pending(chan);
+
+	datactrl |= MCI_DPSM_DMAENABLE;
+	datactrl |= variant->dmareg_enable;
+
+	/* Trigger the DMA transfer */
+	writel(datactrl, host->base + MMCIDATACTRL);
+
+	/*
+	 * Let the MMCI say when the data is ended and it's time
+	 * to fire next DMA request. When that happens, MMCI will
+	 * call mmci_data_end()
+	 */
+	writel(readl(host->base + MMCIMASK0) | MCI_DATAENDMASK,
+	       host->base + MMCIMASK0);
+	return 0;
+
+unmap_exit:
+	device->device_control(chan, DMA_TERMINATE_ALL, 0);
+	dma_unmap_sg(device->dev, data->sg, data->sg_len, conf->direction);
+	return -ENOMEM;
+}
+#else
+/* Blank functions if the DMA engine is not available */
+static inline void mmci_dma_setup(struct mmci_host *host)
+{
+}
+
+static inline void mmci_dma_release(struct mmci_host *host)
+{
+}
+
+static inline void mmci_dma_unmap(struct mmci_host *host, struct mmc_data *data)
+{
+}
+
+static inline void mmci_dma_data_error(struct mmci_host *host)
+{
+}
+
+static inline int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	return -ENOSYS;
+}
+#endif
+
 static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 {
 	struct variant_data *variant = host->variant;
@@ -213,8 +441,6 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	host->blockend = false;
 	host->dataend = false;
 
-	mmci_init_sg(host, data);
-
 	clks = (unsigned long long)data->timeout_ns * host->cclk;
 	do_div(clks, 1000000000UL);
 
@@ -228,13 +454,27 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	BUG_ON(1 << blksz_bits != data->blksz);
 
 	datactrl = MCI_DPSM_ENABLE | blksz_bits << 4;
-	if (data->flags & MMC_DATA_READ) {
+
+	if (data->flags & MMC_DATA_READ)
 		datactrl |= MCI_DPSM_DIRECTION;
+
+	/*
+	 * Attempt to use DMA operation mode, if this
+	 * should fail, fall back to PIO mode
+	 */
+	if (!mmci_dma_start_data(host, datactrl))
+		return;
+
+	/* IRQ mode, map the SG list for CPU reading/writing */
+	mmci_init_sg(host, data);
+
+	if (data->flags & MMC_DATA_READ) {
 		irqmask = MCI_RXFIFOHALFFULLMASK;
 
 		/*
-		 * If we have less than a FIFOSIZE of bytes to transfer,
-		 * trigger a PIO interrupt as soon as any data is available.
+		 * If we have less than a FIFOSIZE of bytes to
+		 * transfer, trigger a PIO interrupt as soon as any
+		 * data is available.
 		 */
 		if (host->size < variant->fifosize)
 			irqmask |= MCI_RXDATAAVLBLMASK;
@@ -306,9 +546,12 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 
 		/*
 		 * We hit an error condition.  Ensure that any data
-		 * partially written to a page is properly coherent.
+		 * partially written to a page is properly coherent,
+		 * unless we're using DMA.
 		 */
-		if (data->flags & MMC_DATA_READ) {
+		if (dma_inprogress(host))
+			mmci_dma_data_error(host);
+		else if (data->flags & MMC_DATA_READ) {
 			struct sg_mapping_iter *sg_miter = &host->sg_miter;
 			unsigned long flags;
 
@@ -351,7 +594,8 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 		 * flag is unreliable: since it can stay high between
 		 * IRQs it will corrupt the transfer counter.
 		 */
-		if (!variant->broken_blockend)
+		if (!variant->broken_blockend &&
+		    !(dma_inprogress(host) && variant->broken_blockend_dma))
 			host->data_xfered += data->blksz;
 		host->blockend = true;
 	}
@@ -366,6 +610,7 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 	 */
 	if (host->dataend && (host->blockend || variant->broken_blockend)) {
 		mmci_stop_data(host);
+		mmci_dma_unmap(host, data);
 
 		/* Reset these flags */
 		host->blockend = false;
@@ -375,7 +620,9 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 		 * Variants with broken blockend flags need to handle the
 		 * end of the entire transfer here.
 		 */
-		if (variant->broken_blockend && !data->error)
+		if ((variant->broken_blockend ||
+		     (dma_inprogress(host) && variant->broken_blockend_dma))
+		    && !data->error)
 			host->data_xfered += data->blksz * data->blocks;
 
 		if (!data->stop) {
@@ -828,6 +1075,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 		dev_dbg(mmc_dev(mmc), "eventual mclk rate: %u Hz\n",
 			host->mclk);
 	}
+	host->phybase = dev->res.start;
 	host->base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!host->base) {
 		ret = -ENOMEM;
@@ -938,6 +1186,8 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 	    && host->gpio_cd_irq < 0)
 		mmc->caps |= MMC_CAP_NEEDS_POLL;
 
+	mmci_dma_setup(host);
+
 	ret = request_irq(dev->irq[0], mmci_irq, IRQF_SHARED, DRIVER_NAME " (cmd)", host);
 	if (ret)
 		goto unmap;
@@ -962,15 +1212,17 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 
 	mmc_add_host(mmc);
 
-	dev_info(&dev->dev, "%s: MMCI rev %x cfg %02x at 0x%016llx irq %d,%d\n",
-		mmc_hostname(mmc), amba_rev(dev), amba_config(dev),
-		(unsigned long long)dev->res.start, dev->irq[0], dev->irq[1]);
+	dev_info(&dev->dev, "%s: MMCI/PL180 manf %x rev %x cfg %02x at 0x%016llx\n",
+		 mmc_hostname(mmc), amba_manf(dev), amba_rev(dev), amba_config(dev),
+		 (unsigned long long)dev->res.start);
+	dev_info(&dev->dev, "IRQ %d, %d (pio)\n", dev->irq[0], dev->irq[1]);
 
 	return 0;
 
  irq0_free:
 	free_irq(dev->irq[0], host);
  unmap:
+	mmci_dma_release(host);
 	if (host->gpio_wp != -ENOSYS)
 		gpio_free(host->gpio_wp);
  err_gpio_wp:
@@ -1009,6 +1261,7 @@ static int __devexit mmci_remove(struct amba_device *dev)
 		writel(0, host->base + MMCICOMMAND);
 		writel(0, host->base + MMCIDATACTRL);
 
+		mmci_dma_release(host);
 		free_irq(dev->irq[0], host);
 		if (!host->singleirq)
 			free_irq(dev->irq[1], host);
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index df06f01..659b416 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -148,8 +148,10 @@
 
 struct clk;
 struct variant_data;
+struct dma_chan;
 
 struct mmci_host {
+	phys_addr_t		phybase;
 	void __iomem		*base;
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
@@ -183,6 +185,18 @@ struct mmci_host {
 	/* pio stuff */
 	struct sg_mapping_iter	sg_miter;
 	unsigned int		size;
+
 	struct regulator	*vcc;
+
+#ifdef CONFIG_DMA_ENGINE
+	/* DMA stuff */
+	struct dma_chan		*dma_current;
+	struct dma_chan		*dma_rx_channel;
+	struct dma_chan		*dma_tx_channel;
+
+#define dma_inprogress(host)	((host)->dma_current)
+#else
+#define dma_inprogress(host)	(0)
+#endif
 };
 
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index f4ee9ac..dbeba68 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -6,6 +6,8 @@
 
 #include <linux/mmc/host.h>
 
+/* Just some dummy forwarding */
+struct dma_chan;
 /**
  * struct mmci_platform_data - platform configuration for the MMCI
  * (also known as PL180) block.
@@ -27,6 +29,17 @@
  * @cd_invert: true if the gpio_cd pin value is active low
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
+ * @dma_filter: function used to select an apropriate RX and TX
+ * DMA channel to be used for DMA, if and only if you're deploying the
+ * generic DMA engine
+ * @dma_rx_param: parameter passed to the DMA allocation
+ * filter in order to select an apropriate RX channel. If
+ * there is a bidirectional RX+TX channel, then just specify
+ * this and leave dma_tx_param set to NULL
+ * @dma_tx_param: parameter passed to the DMA allocation
+ * filter in order to select an apropriate TX channel. If this
+ * is NULL the driver will attempt to use the RX channel as a
+ * bidirectional channel
  */
 struct mmci_platform_data {
 	unsigned int f_max;
@@ -38,6 +51,9 @@ struct mmci_platform_data {
 	int	gpio_cd;
 	bool	cd_invert;
 	unsigned long capabilities;
+	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+	void *dma_rx_param;
+	void *dma_tx_param;
 };
 
 #endif

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2010-12-21 15:59       ` Russell King - ARM Linux
@ 2010-12-22 21:55         ` Linus Walleij
  2011-01-24 16:01           ` Russell King - ARM Linux
  0 siblings, 1 reply; 18+ messages in thread
From: Linus Walleij @ 2010-12-22 21:55 UTC (permalink / raw)
  To: linux-arm-kernel

2010/12/21 Russell King - ARM Linux <linux@arm.linux.org.uk>:

> This also contains a few other tweaks on top of your original patch too.
> All the DMA conditionals get optimized out when we're not building with
> a DMA engine enabled platform.

Thanks.

> Note that in your original code, mmci_dma_data_error() is always followed
> by a call to mmci_dma_data_end(), and that resulted in dma_unmap_sg()
> being called twice for the same buffer.

Ulf Hansson notified me on this too, last week or so.
Sorry for not fixing in time.

> Also, dma_map_sg() returns the number of physical entries mapped, or zero
> on failure. ?Should we have a DMA mapping implementation which coalesces
> entries, the returned number will be less than data->nr_sg, and it is the
> number of coalesced entries which should be passed to the DMA engine driver.

Per F?rlin reported this one as well.
Sorry again.

> You have this bug in the PL022 driver too, so I suspect that your other
> drivers need fixing for this.

Fixing patches for PL022 now, thanks a lot Russell.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2010-12-22 21:55         ` Linus Walleij
@ 2011-01-24 16:01           ` Russell King - ARM Linux
  2011-01-24 21:06             ` Linus Walleij
  0 siblings, 1 reply; 18+ messages in thread
From: Russell King - ARM Linux @ 2011-01-24 16:01 UTC (permalink / raw)
  To: linux-arm-kernel

Linus,

Any response to doing this for MMCI:

+static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+       struct variant_data *variant = host->variant;
+       struct dma_slave_config conf = {
+               .src_addr = host->phybase + MMCIFIFO,
+               .dst_addr = host->phybase + MMCIFIFO,
+               .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+               .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+               .src_maxburst = variant->fifohalfsize >> 2, /* # of words */
+               .dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
+       };
...
+       if (data->flags & MMC_DATA_READ) {
+               conf.direction = DMA_FROM_DEVICE;
+               chan = host->dma_rx_channel;
+       } else {
+               conf.direction = DMA_TO_DEVICE;
+               chan = host->dma_tx_channel;
+       }
...
+       /* shouldn't this be in the DMA engine driver? */
+       if (host->size <= variant->dmasize_threshold) {
+               conf.src_maxburst = 1;
+               conf.dst_maxburst = 1;
+       }
+
+       /* Check for weird stuff in the sg list */
+       /* shouldn't this be in the DMA engine driver? */
+       for_each_sg(data->sg, sg, data->sg_len, i) {
+               dev_vdbg(mmc_dev(host->mmc), "MMCI SGlist %d dir %d: length: %08
+                        i, conf.direction, sg->length);
+               if (sg->offset & 3 || sg->length & 3)
+                       return -EINVAL;
+       }

Can we move that size threshold and scatterlist checks inside the slave
DMA engine driver?

It is entirely possible that different DMA engines have different alignment
requirements and different bursting requirements, so surely it makes sense
for this kind of stuff to be in their drivers rather than in the DMA engine
users?

After all, the DMA engine users don't have much idea about what kind of
DMA engine will be used with themselves with Primecells.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-24 16:01           ` Russell King - ARM Linux
@ 2011-01-24 21:06             ` Linus Walleij
  2011-01-24 21:22               ` Russell King - ARM Linux
  0 siblings, 1 reply; 18+ messages in thread
From: Linus Walleij @ 2011-01-24 21:06 UTC (permalink / raw)
  To: linux-arm-kernel

2011/1/24 Russell King - ARM Linux <linux@arm.linux.org.uk>:

> Any response to doing this for MMCI:

Sorry, got stalled tinkering with other stuff.

> +static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
> +{
> + ? ? ? struct variant_data *variant = host->variant;
> + ? ? ? struct dma_slave_config conf = {
> + ? ? ? ? ? ? ? .src_addr = host->phybase + MMCIFIFO,
> + ? ? ? ? ? ? ? .dst_addr = host->phybase + MMCIFIFO,
> + ? ? ? ? ? ? ? .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
> + ? ? ? ? ? ? ? .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
> + ? ? ? ? ? ? ? .src_maxburst = variant->fifohalfsize >> 2, /* # of words */
> + ? ? ? ? ? ? ? .dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
> + ? ? ? };
> ...
> + ? ? ? if (data->flags & MMC_DATA_READ) {
> + ? ? ? ? ? ? ? conf.direction = DMA_FROM_DEVICE;
> + ? ? ? ? ? ? ? chan = host->dma_rx_channel;
> + ? ? ? } else {
> + ? ? ? ? ? ? ? conf.direction = DMA_TO_DEVICE;
> + ? ? ? ? ? ? ? chan = host->dma_tx_channel;
> + ? ? ? }
> ...
> + ? ? ? /* shouldn't this be in the DMA engine driver? */
> + ? ? ? if (host->size <= variant->dmasize_threshold) {
> + ? ? ? ? ? ? ? conf.src_maxburst = 1;
> + ? ? ? ? ? ? ? conf.dst_maxburst = 1;
> + ? ? ? }

This was an addition from Per Forlin, IIRC the requirement
to not push more than one word at the time into the FIFO
below a certain threadhold is a requirement for the MMCI,
not the DMA engine.

What it does is to emulate single requests below a certain
threshold by requesting one-word bursts. I think this is
primarily for SDIO, not card transfers.

I'd have to ask around as to the reasons for. We'd better
understand that, but I noted we added a special hardware
register to the PL011 just to handle this kind of thresholds,
so there is something about emulating that threshold in
software.

> + ? ? ? /* Check for weird stuff in the sg list */
> + ? ? ? /* shouldn't this be in the DMA engine driver? */
> + ? ? ? for_each_sg(data->sg, sg, data->sg_len, i) {
> + ? ? ? ? ? ? ? dev_vdbg(mmc_dev(host->mmc), "MMCI SGlist %d dir %d: length: %08
> + ? ? ? ? ? ? ? ? ? ? ? ?i, conf.direction, sg->length);
> + ? ? ? ? ? ? ? if (sg->offset & 3 || sg->length & 3)
> + ? ? ? ? ? ? ? ? ? ? ? return -EINVAL;
> + ? ? ? }

This one is just overcautious and for SDIO we don't even want to do
such things, so it should be dropped altogether.

The original intent was to avoid writing anything than 32bit words
into the register, and with the SDIO-modified versions you can
actually do that.

So I'll drop it for the next iteration.

> Can we move that size threshold and scatterlist checks inside the slave
> DMA engine driver?
>
> It is entirely possible that different DMA engines have different alignment
> requirements and different bursting requirements, so surely it makes sense
> for this kind of stuff to be in their drivers rather than in the DMA engine
> users?
>
> After all, the DMA engine users don't have much idea about what kind of
> DMA engine will be used with themselves with Primecells.

True.

Linus Walleij

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-24 21:06             ` Linus Walleij
@ 2011-01-24 21:22               ` Russell King - ARM Linux
  2011-01-25  8:33                 ` Linus Walleij
                                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Russell King - ARM Linux @ 2011-01-24 21:22 UTC (permalink / raw)
  To: linux-arm-kernel

On Mon, Jan 24, 2011 at 10:06:53PM +0100, Linus Walleij wrote:
> This was an addition from Per Forlin, IIRC the requirement
> to not push more than one word at the time into the FIFO
> below a certain threadhold is a requirement for the MMCI,
> not the DMA engine.

The MMCI has four DMA request signals:
- BREQ (burst request)
- SREQ (single request)
- LBREQ (last burst request)
- LSREQ (last single request)

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-24 21:22               ` Russell King - ARM Linux
@ 2011-01-25  8:33                 ` Linus Walleij
  2011-01-26  9:24                   ` Russell King - ARM Linux
  2011-01-26  9:50                   ` Russell King - ARM Linux
  2011-01-25  9:36                 ` Linus Walleij
  2011-01-25  9:47                 ` Linus Walleij
  2 siblings, 2 replies; 18+ messages in thread
From: Linus Walleij @ 2011-01-25  8:33 UTC (permalink / raw)
  To: linux-arm-kernel

On 24 January 2011 22:22, Russell King - ARM Linux
<linux@arm.linux.org.uk>wrote:


> On Mon, Jan 24, 2011 at 10:06:53PM +0100, Linus Walleij wrote:
> > This was an addition from Per Forlin, IIRC the requirement
> > to not push more than one word at the time into the FIFO
> > below a certain threadhold is a requirement for the MMCI,
> > not the DMA engine.
>
> The MMCI has four DMA request signals:
> - BREQ (burst request)
> - SREQ (single request)
> - LBREQ (last burst request)
> - LSREQ (last single request)
>
> From PL181 TRM (DDI0205B) 2.3.2:
> For SREQ:
>        For receive: Asserted if data counter is
>        zero and receive FIFO contains more than
>        one and fewer than eight words.
>        For transmit: Asserted if fewer than eight
>        and more than one word remain for
>        transfer to FIFO.
>
> For BREQ:
>        For receive: Asserted if FIFO contains
>        eight words and data counter is not zero,
>        or if FIFO contains more than eight words.
>        For transmit: Asserted if more than eight
>        words remain for transfer to FIFO.
>
> For LSREQ:
>        For receive: Asserted if data counter is
>        zero and FIFO contains only one word.
>        For transmit: Asserted if only one word
>        remains for transfer to FIFO.
>
> For LBREQ:
>        For receive: Asserted if data counter is
>        zero and FIFO contains eight words.
>        For transmit: Asserted if only eight words
>        remain for transfer to FIFO.
>
> So, for small transfers (less than half the FIFO depth), SREQ will be
> asserted to transfer a single word at a time, and LSREQ for the last
> word.  There shouldn't be any bursts from the DMA controller.
>
> The second argument is that if you have a burst size of, say, 8 words
> and you program the DMA to transfer 4 words, it should _not_ transfer
> 8 words to the peripheral.
>
> > What it does is to emulate single requests below a certain
> > threshold by requesting one-word bursts. I think this is
> > primarily for SDIO, not card transfers.
>
> This should be handled in hardware, if not it's DMA controller specific
> as it shouldn't burst past the remainder of the transfer.  If your DMA
> controller does burst past the number of bytes in the transfer, surely
> that's something that your DMA controller code needs to work around?
>

Yes that indeed seems resonable. With the COH901318 DMA engine
it worked just fine without the burst threshold. Just want to give my
colleagues a chance to have a second opinion on this.


> > > +       /* Check for weird stuff in the sg list */
> > > +       /* shouldn't this be in the DMA engine driver? */
> > > +       for_each_sg(data->sg, sg, data->sg_len, i) {
> > > +               dev_vdbg(mmc_dev(host->mmc), "MMCI SGlist %d dir %d:
> length: %08
> > > +                        i, conf.direction, sg->length);
> > > +               if (sg->offset & 3 || sg->length & 3)
> > > +                       return -EINVAL;
> > > +       }
> >
> > This one is just overcautious and for SDIO we don't even want to do
> > such things, so it should be dropped altogether.
> >
> > The original intent was to avoid writing anything than 32bit words
> > into the register, and with the SDIO-modified versions you can
> > actually do that.
>
> Surely that's to do with the DMA controller though?
>

Absolutely.


> > So I'll drop it for the next iteration.
>
> Given that I've already heavily modified the code, it's probably better if
> you just let me know the answer... ;)
>

In any case I'm happy if you post/commit the MMCI support
you have, if there are problems we can certainly sort it out later
in any case.

Hm, does this mean you got it working with one of the
reference boards eventually?

Yours,
Linus Walleij
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20110125/0f16c540/attachment-0001.html>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-24 21:22               ` Russell King - ARM Linux
  2011-01-25  8:33                 ` Linus Walleij
@ 2011-01-25  9:36                 ` Linus Walleij
  2011-01-25  9:47                 ` Linus Walleij
  2 siblings, 0 replies; 18+ messages in thread
From: Linus Walleij @ 2011-01-25  9:36 UTC (permalink / raw)
  To: linux-arm-kernel

On 24 January 2011 22:22, Russell King - ARM Linux
<linux@arm.linux.org.uk>wrote:

So, for small transfers (less than half the FIFO depth), SREQ will be
> asserted to transfer a single word at a time, and LSREQ for the last
> word.  There shouldn't be any bursts from the DMA controller.
>

Actually, drilling into the datasheet we have this statement for the version
used in U8500:

-------------------
if data to be transferred in not multiple of 8 words, then:
* when Tx mode (DMAreqctl field of DCTRL register should be = 1),
  MCIDMABREQ will be asserted until less 8 words remain. After this
  SDI will assert MCIDMALBREQ for last words transfer to terminate
  the transfer.
* when Rx mode (DMAreqctl field of DCTRL register should be = 0),
  MCIDMABREQ will be asserted until less 8 words remain. After
  this SDI will assert MCIDMASREQ until 1 word remains and then
  it will assert MCIDMALSREQ to terminate the transfer.
-------------------

So the way I read it, the U8500 version of PL180 will do what you
describe for RX transfers (so as not to overwrite any memory)
but for TX it will simply read some extra bytes to make a
complete burst which is faster and then discard the remainder.

This wiring has consequences as we'll see later...

(I don't know if the original PL180 acually works the same, it may
just be some undocumented "feature"...)


The second argument is that if you have a burst size of, say, 8 words
> and you program the DMA to transfer 4 words, it should _not_ transfer
> 8 words to the peripheral.
>
> > What it does is to emulate single requests below a certain
> > threshold by requesting one-word bursts. I think this is
> > primarily for SDIO, not card transfers.
>
> This should be handled in hardware, if not it's DMA controller specific
> as it shouldn't burst past the remainder of the transfer.  If your DMA
> controller does burst past the number of bytes in the transfer, surely
> that's something that your DMA controller code needs to work around?
>

Here is another snippet:

-------------------
For burst size = 1, if the data transfer is more than 7 words and the
data to be transferred is a multiple of 8 words, a single burst transfer
cannot be done, because when data transfer count reaches 8 words,
MCIDMALBREQ is generated and 1 word will be transferred from
memory to the SDI host FIFO. But as the DMA controller sees
MCIDMALBREQ, it terminates the transfer and the remaining 7
words are not be transferred to the Tx FIFO.
-------------------

So we must use a burst size of 8 for anything exceeding and
including 8 words. Else it will break. (Follows logically from the
first snippet in some way, likely the VHDL code just checks
the higher bits.)

Yet, if it tried to issue a burst request size 8 for n<8, nothing
would be transfered, and that's due to limitations in the PL180
derivates way of issuing the request signals, not in the DMA
controller.

So what the driver has to do is issue 1 word requests to avoid
issuing a single burst request when n<8, because of limitations
in the PL180 derivate.

It would require the same treatment no matter what DMA
engine is used on the other side.

So right now I believe this code actually belongs in the mmci
driver albeit with the addition of vendor spec flag, like
.odd_st_bursts for the ST Micro versions.

We're looking into another parameter here: for some packet
size say maybe < 512 DMA may just give overhead, and we may
want to resort to PIO. This would do away with the whole issue
in another way. But that's a separate issue that we need
to back up with measurements.

Yours,
Linus Walleij
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20110125/69f364fa/attachment-0001.html>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-24 21:22               ` Russell King - ARM Linux
  2011-01-25  8:33                 ` Linus Walleij
  2011-01-25  9:36                 ` Linus Walleij
@ 2011-01-25  9:47                 ` Linus Walleij
  2011-01-25 10:23                   ` Russell King - ARM Linux
  2 siblings, 1 reply; 18+ messages in thread
From: Linus Walleij @ 2011-01-25  9:47 UTC (permalink / raw)
  To: linux-arm-kernel

On 24 January 2011 22:22, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:

> So, for small transfers (less than half the FIFO depth), SREQ will be
> asserted to transfer a single word at a time, and LSREQ for the last
> word.  There shouldn't be any bursts from the DMA controller.

Actually, drilling into the datasheet we have this statement for the version
used in U8500:

-------------------
if data to be transferred in not multiple of 8 words, then:
* when Tx mode (DMAreqctl field of DCTRL register should be = 1),
  MCIDMABREQ will be asserted until less 8 words remain. After this
  SDI will assert MCIDMALBREQ for last words transfer to terminate
  the transfer.
* when Rx mode (DMAreqctl field of DCTRL register should be = 0),
  MCIDMABREQ will be asserted until less 8 words remain. After
  this SDI will assert MCIDMASREQ until 1 word remains and then
  it will assert MCIDMALSREQ to terminate the transfer.
-------------------

So the way I read it, the U8500 version of PL180 will do what you
describe for RX transfers (so as not to overwrite any memory)
but for TX it will simply read some extra bytes to make a
complete burst which is faster and then discard the remainder.

This wiring has consequences as we'll see later...

(I don't know if the original PL180 acually works the same, it may
just be some undocumented "feature"...)


> The second argument is that if you have a burst size of, say, 8 words
> and you program the DMA to transfer 4 words, it should _not_ transfer
> 8 words to the peripheral.
>
> > What it does is to emulate single requests below a certain
> > threshold by requesting one-word bursts. I think this is
> > primarily for SDIO, not card transfers.

> This should be handled in hardware, if not it's DMA controller specific
> as it shouldn't burst past the remainder of the transfer.  If your DMA
> controller does burst past the number of bytes in the transfer, surely
> that's something that your DMA controller code needs to work around?

Here is another snippet:

-------------------
For burst size = 1, if the data transfer is more than 7 words and the
data to be transferred is a multiple of 8 words, a single burst transfer
cannot be done, because when data transfer count reaches 8 words,
MCIDMALBREQ is generated and 1 word will be transferred from
memory to the SDI host FIFO. But as the DMA controller sees
MCIDMALBREQ, it terminates the transfer and the remaining 7
words are not be transferred to the Tx FIFO.
-------------------

So we must use a burst size of 8 for anything exceeding and
including 8 words. Else it will break. (Follows logically from the
first snippet in some way, likely the VHDL code just checks
the higher bits.)

Yet, if it tried to issue a burst request size 8 for n<8, nothing
would be transfered, and that's due to limitations in the PL180
derivates way of issuing the request signals, not in the DMA
controller.

So what the driver has to do is issue 1 word requests to avoid
issuing a single burst request when n<8, because of limitations
in the PL180 derivate.

It would require the same treatment no matter what DMA
engine is used on the other side.

So right now I believe this code actually belongs in the mmci
driver albeit with the addition of vendor spec flag, like
.odd_st_bursts for the ST Micro versions.

We're looking into another parameter here: for some packet
size say maybe < 512 DMA may just give overhead, and we may
want to resort to PIO. This would do away with the whole issue
in another way. But that's a separate issue that we need
to back up with measurements.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-25  9:47                 ` Linus Walleij
@ 2011-01-25 10:23                   ` Russell King - ARM Linux
  2011-01-27 13:07                     ` Linus Walleij
  0 siblings, 1 reply; 18+ messages in thread
From: Russell King - ARM Linux @ 2011-01-25 10:23 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 25, 2011 at 10:47:50AM +0100, Linus Walleij wrote:
> On 24 January 2011 22:22, Russell King - ARM Linux
> <linux@arm.linux.org.uk> wrote:
> 
> > So, for small transfers (less than half the FIFO depth), SREQ will be
> > asserted to transfer a single word at a time, and LSREQ for the last
> > word.  There shouldn't be any bursts from the DMA controller.
> 
> Actually, drilling into the datasheet we have this statement for the version
> used in U8500:
> 
> -------------------
> if data to be transferred in not multiple of 8 words, then:
> * when Tx mode (DMAreqctl field of DCTRL register should be = 1),
>   MCIDMABREQ will be asserted until less 8 words remain. After this
>   SDI will assert MCIDMALBREQ for last words transfer to terminate
>   the transfer.
> * when Rx mode (DMAreqctl field of DCTRL register should be = 0),
>   MCIDMABREQ will be asserted until less 8 words remain. After
>   this SDI will assert MCIDMASREQ until 1 word remains and then
>   it will assert MCIDMALSREQ to terminate the transfer.
> -------------------
> 
> So the way I read it, the U8500 version of PL180 will do what you
> describe for RX transfers (so as not to overwrite any memory)
> but for TX it will simply read some extra bytes to make a
> complete burst which is faster and then discard the remainder.

If the DMA controller is aware of the amount of data to be transferred,
then it should never go past that number of bytes, even if burst_size >=
remainder.  What you quote above looks entirely sensible.

> This wiring has consequences as we'll see later...
> 
> (I don't know if the original PL180 acually works the same, it may
> just be some undocumented "feature"...)

It should be as I quoted, which in practice should mean:
For 31 words:
Signal:	BREQ	BREQ	BREQ	SREQ	SREQ ..	SREQ	SREQ	LSREQ
Counter:31	23	15	7	6	5	2	1
T'ferd:	8	8	8	1	1	1	1	1

For 32 words:
Signal:	BREQ	BREQ	BREQ	LBREQ
Counter:32	24	16	8
T'ferd:	8	8	8	8

And ARM's primecell expects the DMA controller to only burst a full burst
with BREQ and LBREQ, and a single word on SREQ or LSREQ.  So that's
almost the same, except that ST optimized the TX condition so that it
behaves as per the illustrated 32-word case.

> > The second argument is that if you have a burst size of, say, 8 words
> > and you program the DMA to transfer 4 words, it should _not_ transfer
> > 8 words to the peripheral.
> >
> > > What it does is to emulate single requests below a certain
> > > threshold by requesting one-word bursts. I think this is
> > > primarily for SDIO, not card transfers.
> 
> > This should be handled in hardware, if not it's DMA controller specific
> > as it shouldn't burst past the remainder of the transfer.  If your DMA
> > controller does burst past the number of bytes in the transfer, surely
> > that's something that your DMA controller code needs to work around?
> 
> Here is another snippet:
> 
> -------------------
> For burst size = 1, if the data transfer is more than 7 words and the
> data to be transferred is a multiple of 8 words, a single burst transfer
> cannot be done, because when data transfer count reaches 8 words,
> MCIDMALBREQ is generated and 1 word will be transferred from
> memory to the SDI host FIFO. But as the DMA controller sees
> MCIDMALBREQ, it terminates the transfer and the remaining 7
> words are not be transferred to the Tx FIFO.
> -------------------
> 
> So we must use a burst size of 8 for anything exceeding and
> including 8 words. Else it will break. (Follows logically from the
> first snippet in some way, likely the VHDL code just checks
> the higher bits.)

The DMA controller must be programmed with the right burst size for the
peripheral, which in this case would be 8 words, otherwise the 'last'
burst signalling goes wrong.

> Yet, if it tried to issue a burst request size 8 for n<8, nothing
> would be transfered, and that's due to limitations in the PL180
> derivates way of issuing the request signals, not in the DMA
> controller.

I don't see that from what you've quoted.

> So what the driver has to do is issue 1 word requests to avoid
> issuing a single burst request when n<8, because of limitations
> in the PL180 derivate.

That seems to go in the face of your second quote.  If n<8, surely
under the first quote, your MMCI asserts LBREQ.  According to the
second quote, LBREQ is seen by the DMA controller which, as its
programmed for 1 word requests, transfers one word and then
terminates the transfer.

So I think you should always have the DMA controller programmed for
8 word bursts.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-25  8:33                 ` Linus Walleij
@ 2011-01-26  9:24                   ` Russell King - ARM Linux
  2011-01-26  9:50                   ` Russell King - ARM Linux
  1 sibling, 0 replies; 18+ messages in thread
From: Russell King - ARM Linux @ 2011-01-26  9:24 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 25, 2011 at 09:33:07AM +0100, Linus Walleij wrote:
> In any case I'm happy if you post/commit the MMCI support
> you have, if there are problems we can certainly sort it out later
> in any case.
> 
> Hm, does this mean you got it working with one of the
> reference boards eventually?

Nope.  ARM reference boards all seem to have broken DMA.  I've given up
with the idea of getting DMA working on any of these platforms, and as
yet I've not had a response from the ARM Support request which Catalin
logged for me on this subject.

I don't believe it is possible for MMCI to ever work correctly with DMA
on ARM development platforms.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-25  8:33                 ` Linus Walleij
  2011-01-26  9:24                   ` Russell King - ARM Linux
@ 2011-01-26  9:50                   ` Russell King - ARM Linux
  1 sibling, 0 replies; 18+ messages in thread
From: Russell King - ARM Linux @ 2011-01-26  9:50 UTC (permalink / raw)
  To: linux-arm-kernel

On Tue, Jan 25, 2011 at 09:33:07AM +0100, Linus Walleij wrote:
> On 24 January 2011 22:22, Russell King - ARM Linux
> <linux@arm.linux.org.uk>wrote:
> > On Mon, Jan 24, 2011 at 10:06:53PM +0100, Linus Walleij wrote:
> > > What it does is to emulate single requests below a certain
> > > threshold by requesting one-word bursts. I think this is
> > > primarily for SDIO, not card transfers.
> >
> > This should be handled in hardware, if not it's DMA controller specific
> > as it shouldn't burst past the remainder of the transfer.  If your DMA
> > controller does burst past the number of bytes in the transfer, surely
> > that's something that your DMA controller code needs to work around?
> 
> Yes that indeed seems resonable. With the COH901318 DMA engine
> it worked just fine without the burst threshold. Just want to give my
> colleagues a chance to have a second opinion on this.

I've left this in, but noted a todo item in the commit message to remove
this.

I'd like to remove this, and then propose that DMA slave drivers should
store the entire channel configuration (and we drop the direction argument)
rather than picking out the source/destination parts of the structure so
we can avoid calling the DMA configuration function on every DMA request.

It'd also be less susceptible to mismatches between the configured direction
and the direction passed into the prep_dma_slave_sg() function (which I
think should be responsible for picking the configuration from the
dma_slave_config structure.)

> > > The original intent was to avoid writing anything than 32bit words
> > > into the register, and with the SDIO-modified versions you can
> > > actually do that.
> >
> > Surely that's to do with the DMA controller though?
> 
> Absolutely.

So shouldn't this be in the DMA engine driver, so that prep_dma_slave_sg()
returns NULL if the scatterlist is unsuitable?

Here's the current patch, rebased on top of the blockend removal stuff.

8<------
From: Russell King - ARM Linux <linux@arm.linux.org.uk>
Subject: [PATCH] ARM: MMCI: add dmaengine-based DMA support

Based on a patch from Linus Walleij.

Add dmaengine based support for DMA to the MMCI driver, using the
Primecell DMA engine interface.  The changes over Linus' driver are:

- rename txsize_threshold to dmasize_threshold, as this reflects the
  purpose more.
- use 'mmci_dma_' as the function prefix rather than 'dma_mmci_'.
- clean up requesting of dma channels.
- don't release a single channel twice when it's shared between tx and rx.
- get rid of 'dma_enable' bool - instead check whether the channel is NULL.
- detect incomplete DMA at the end of a transfer.  Some DMA controllers
  (eg, PL08x) are unable to be configured for scatter DMA and also listen
  to all four DMA request signals [BREQ,SREQ,LBREQ,LSREQ] from the MMCI.
  They can do one or other but not both.  As MMCI uses LBREQ/LSREQ for the
  final burst/words, PL08x does not transfer the last few words.
- map and unmap DMA buffers using the DMA engine struct device, not the
  MMCI struct device - the DMA engine is doing the DMA transfer, not us.
- avoid double-unmapping of the DMA buffers on MMCI data errors.
- don't check for negative values from the dmaengine tx submission
  function - Dan says this must never fail.
- use new dmaengine helper functions rather than using the ugly function
  pointers directly.
- allow DMA code to be fully optimized away using dma_inprogress() which
  is defined to constant 0 if DMA engine support is disabled.
- request maximum segment size from the DMA engine struct device and
  set this appropriately.
- removed checking of buffer alignment - the DMA engine should deal with
  its own restrictions on buffer alignment, not the individual DMA engine
  users.
TODO: remove burst setting

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mmc/host/mmci.c   |  296 +++++++++++++++++++++++++++++++++++++++++++--
 drivers/mmc/host/mmci.h   |   13 ++
 include/linux/amba/mmci.h |   17 +++
 3 files changed, 316 insertions(+), 10 deletions(-)

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 2de12fe..3eaeed1 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -2,7 +2,7 @@
  *  linux/drivers/mmc/host/mmci.c - ARM PrimeCell MMCI PL180/1 driver
  *
  *  Copyright (C) 2003 Deep Blue Solutions, Ltd, All Rights Reserved.
- *  Copyright (C) 2010 ST-Ericsson AB.
+ *  Copyright (C) 2010 ST-Ericsson SA
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -24,8 +24,10 @@
 #include <linux/clk.h>
 #include <linux/scatterlist.h>
 #include <linux/gpio.h>
-#include <linux/amba/mmci.h>
 #include <linux/regulator/consumer.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/amba/mmci.h>
 
 #include <asm/div64.h>
 #include <asm/io.h>
@@ -41,20 +43,26 @@ static unsigned int fmax = 515633;
  * struct variant_data - MMCI variant-specific quirks
  * @clkreg: default value for MCICLOCK register
  * @clkreg_enable: enable value for MMCICLOCK register
+ * @dmareg_enable: enable value for MMCIDATACTRL register
  * @datalength_bits: number of bits in the MMCIDATALENGTH register
  * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
  *	      is asserted (likewise for RX)
  * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
  *		  is asserted (likewise for RX)
+ * @dmasize_threshold: Sets DMA burst size to minimal if transfer size is
+ *		less or equal to this threshold. This shall be specified in
+ *		number of bytes. Set 0 for no burst compensation
  * @sdio: variant supports SDIO
  * @st_clkdiv: true if using a ST-specific clock divider algorithm
  */
 struct variant_data {
 	unsigned int		clkreg;
 	unsigned int		clkreg_enable;
+	unsigned int		dmareg_enable;
 	unsigned int		datalength_bits;
 	unsigned int		fifosize;
 	unsigned int		fifohalfsize;
+	unsigned int		dmasize_threshold;
 	bool			sdio;
 	bool			st_clkdiv;
 };
@@ -76,8 +84,10 @@ static struct variant_data variant_u300 = {
 static struct variant_data variant_ux500 = {
 	.fifosize		= 30 * 4,
 	.fifohalfsize		= 8 * 4,
+	.dmasize_threshold	= 16,
 	.clkreg			= MCI_CLK_ENABLE,
 	.clkreg_enable		= 1 << 14, /* HWFCEN */
+	.dmareg_enable		= 1 << 12, /* DMAREQCTRL */
 	.datalength_bits	= 24,
 	.sdio			= true,
 	.st_clkdiv		= true,
@@ -188,6 +198,251 @@ static void mmci_init_sg(struct mmci_host *host, struct mmc_data *data)
 	sg_miter_start(&host->sg_miter, data->sg, data->sg_len, flags);
 }
 
+/*
+ * All the DMA operation mode stuff goes inside this ifdef.
+ * This assumes that you have a generic DMA device interface,
+ * no custom DMA interfaces are supported.
+ */
+#ifdef CONFIG_DMA_ENGINE
+static void __devinit mmci_dma_setup(struct mmci_host *host)
+{
+	struct mmci_platform_data *plat = host->plat;
+	const char *rxname, *txname;
+	dma_cap_mask_t mask;
+
+	if (!plat || !plat->dma_filter) {
+		dev_info(mmc_dev(host->mmc), "no DMA platform data\n");
+		return;
+	}
+
+	/* Try to acquire a generic DMA engine slave channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+
+	/*
+	 * If only an RX channel is specified, the driver will
+	 * attempt to use it bidirectionally, however if it is
+	 * is specified but cannot be located, DMA will be disabled.
+	 */
+	if (plat->dma_rx_param) {
+		host->dma_rx_channel = dma_request_channel(mask,
+							   plat->dma_filter,
+							   plat->dma_rx_param);
+		/* E.g if no DMA hardware is present */
+		if (!host->dma_rx_channel)
+			dev_err(mmc_dev(host->mmc), "no RX DMA channel\n");
+	}
+
+	if (plat->dma_tx_param) {
+		host->dma_tx_channel = dma_request_channel(mask,
+							   plat->dma_filter,
+							   plat->dma_tx_param);
+		if (!host->dma_tx_channel)
+			dev_warn(mmc_dev(host->mmc), "no TX DMA channel\n");
+	} else {
+		host->dma_tx_channel = host->dma_rx_channel;
+	}
+
+	if (host->dma_rx_channel)
+		rxname = dma_chan_name(host->dma_rx_channel);
+	else
+		rxname = "none";
+
+	if (host->dma_tx_channel)
+		txname = dma_chan_name(host->dma_tx_channel);
+	else
+		txname = "none";
+
+	dev_info(mmc_dev(host->mmc), "DMA channels RX %s, TX %s\n",
+		 rxname, txname);
+
+	/*
+	 * Limit the maximum segment size in any SG entry according to
+	 * the parameters of the DMA engine device.
+	 */
+	if (host->dma_tx_channel) {
+		struct device *dev = host->dma_tx_channel->device->dev;
+		unsigned int max_seg_size = dma_get_max_seg_size(dev);
+
+		if (max_seg_size < host->mmc->max_seg_size)
+			host->mmc->max_seg_size = max_seg_size;
+	}
+	if (host->dma_rx_channel) {
+		struct device *dev = host->dma_rx_channel->device->dev;
+		unsigned int max_seg_size = dma_get_max_seg_size(dev);
+
+		if (max_seg_size < host->mmc->max_seg_size)
+			host->mmc->max_seg_size = max_seg_size;
+	}
+}
+
+/*
+ * This is used in __devinit or __devexit so inline it
+ * so it can be discarded.
+ */
+static inline void mmci_dma_release(struct mmci_host *host)
+{
+	struct mmci_platform_data *plat = host->plat;
+
+	if (host->dma_rx_channel)
+		dma_release_channel(host->dma_rx_channel);
+	if (host->dma_tx_channel && plat->dma_tx_param)
+		dma_release_channel(host->dma_tx_channel);
+	host->dma_rx_channel = host->dma_tx_channel = NULL;
+}
+
+static void mmci_dma_unmap(struct mmci_host *host, struct mmc_data *data)
+{
+	struct dma_chan *chan = host->dma_current;
+	enum dma_data_direction dir;
+	u32 status;
+	int i;
+
+	/* Wait up to 1ms for the DMA to complete */
+	for (i = 0; ; i++) {
+		status = readl(host->base + MMCISTATUS);
+		if (!(status & MCI_RXDATAAVLBLMASK) || i >= 100)
+			break;
+		udelay(10);
+	}
+
+	/*
+	 * Check to see whether we still have some data left in the FIFO -
+	 * this catches DMA controllers which are unable to monitor the
+	 * DMALBREQ and DMALSREQ signals while allowing us to DMA to non-
+	 * contiguous buffers.  On TX, we'll get a FIFO underrun error.
+	 */
+	if (status & MCI_RXDATAAVLBLMASK) {
+		dmaengine_terminate_all(chan);
+		data->error = -EIO;
+	}
+
+	if (data->flags & MMC_DATA_WRITE) {
+		dir = DMA_TO_DEVICE;
+	} else {
+		dir = DMA_FROM_DEVICE;
+	}
+
+	dma_unmap_sg(chan->device->dev, data->sg, data->sg_len, dir);
+
+	/*
+	 * Use of DMA with scatter-gather is impossible.
+	 * Give up with DMA and switch back to PIO mode.
+	 */
+	if (status & MCI_RXDATAAVLBLMASK) {
+		dev_err(mmc_dev(host->mmc), "buggy DMA detected. Taking evasive action.\n");
+		mmci_dma_release(host);
+	}
+}
+
+static void mmci_dma_data_error(struct mmci_host *host)
+{
+	dev_err(mmc_dev(host->mmc), "error during DMA transfer!\n");
+	dmaengine_terminate_all(host->dma_current);
+}
+
+static int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	struct variant_data *variant = host->variant;
+	struct dma_slave_config conf = {
+		.src_addr = host->phybase + MMCIFIFO,
+		.dst_addr = host->phybase + MMCIFIFO,
+		.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+		.src_maxburst = variant->fifohalfsize >> 2, /* # of words */
+		.dst_maxburst = variant->fifohalfsize >> 2, /* # of words */
+	};
+	struct mmc_data *data = host->data;
+	struct dma_chan *chan;
+	struct dma_device *device;
+	struct dma_async_tx_descriptor *desc;
+	struct scatterlist *sg;
+	int i, nr_sg;
+
+	host->dma_current = NULL;
+
+	if (data->flags & MMC_DATA_READ) {
+		conf.direction = DMA_FROM_DEVICE;
+		chan = host->dma_rx_channel;
+	} else {
+		conf.direction = DMA_TO_DEVICE;
+		chan = host->dma_tx_channel;
+	}
+
+	/* If there's no DMA channel, fall back to PIO */
+	if (!chan)
+		return -EINVAL;
+
+	/* shouldn't this be in the DMA engine driver? */
+	if (host->size <= variant->dmasize_threshold) {
+		conf.src_maxburst = 1;
+		conf.dst_maxburst = 1;
+	}
+
+	device = chan->device;
+	nr_sg = dma_map_sg(device->dev, data->sg, data->sg_len, conf.direction);
+	if (nr_sg == 0)
+		return -EINVAL;
+
+	dmaengine_slave_config(chan, &conf);
+	desc = device->device_prep_slave_sg(chan, data->sg, nr_sg,
+					    conf.direction, DMA_CTRL_ACK);
+	if (!desc)
+		goto unmap_exit;
+
+	/* Okay, go for it. */
+	host->dma_current = chan;
+
+	dev_vdbg(mmc_dev(host->mmc),
+		 "Submit MMCI DMA job, sglen %d blksz %04x blks %04x flags %08x\n",
+		 data->sg_len, data->blksz, data->blocks, data->flags);
+	dmaengine_submit(desc);
+	dma_async_issue_pending(chan);
+
+	datactrl |= MCI_DPSM_DMAENABLE;
+	datactrl |= variant->dmareg_enable;
+
+	/* Trigger the DMA transfer */
+	writel(datactrl, host->base + MMCIDATACTRL);
+
+	/*
+	 * Let the MMCI say when the data is ended and it's time
+	 * to fire next DMA request. When that happens, MMCI will
+	 * call mmci_data_end()
+	 */
+	writel(readl(host->base + MMCIMASK0) | MCI_DATAENDMASK,
+	       host->base + MMCIMASK0);
+	return 0;
+
+unmap_exit:
+	dmaengine_terminate_all(chan);
+	dma_unmap_sg(device->dev, data->sg, data->sg_len, conf.direction);
+	return -ENOMEM;
+}
+#else
+/* Blank functions if the DMA engine is not available */
+static inline void mmci_dma_setup(struct mmci_host *host)
+{
+}
+
+static inline void mmci_dma_release(struct mmci_host *host)
+{
+}
+
+static inline void mmci_dma_unmap(struct mmci_host *host, struct mmc_data *data)
+{
+}
+
+static inline void mmci_dma_data_error(struct mmci_host *host)
+{
+}
+
+static inline int mmci_dma_start_data(struct mmci_host *host, unsigned int datactrl)
+{
+	return -ENOSYS;
+}
+#endif
+
 static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 {
 	struct variant_data *variant = host->variant;
@@ -203,8 +458,6 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	host->size = data->blksz * data->blocks;
 	host->data_xfered = 0;
 
-	mmci_init_sg(host, data);
-
 	clks = (unsigned long long)data->timeout_ns * host->cclk;
 	do_div(clks, 1000000000UL);
 
@@ -218,8 +471,21 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
 	BUG_ON(1 << blksz_bits != data->blksz);
 
 	datactrl = MCI_DPSM_ENABLE | blksz_bits << 4;
-	if (data->flags & MMC_DATA_READ) {
+
+	if (data->flags & MMC_DATA_READ)
 		datactrl |= MCI_DPSM_DIRECTION;
+
+	/*
+	 * Attempt to use DMA operation mode, if this
+	 * should fail, fall back to PIO mode
+	 */
+	if (!mmci_dma_start_data(host, datactrl))
+		return;
+
+	/* IRQ mode, map the SG list for CPU reading/writing */
+	mmci_init_sg(host, data);
+
+	if (data->flags & MMC_DATA_READ) {
 		irqmask = MCI_RXFIFOHALFFULLMASK;
 
 		/*
@@ -301,9 +567,12 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 
 		/*
 		 * We hit an error condition.  Ensure that any data
-		 * partially written to a page is properly coherent.
+		 * partially written to a page is properly coherent,
+		 * unless we're using DMA.
 		 */
-		if (data->flags & MMC_DATA_READ) {
+		if (dma_inprogress(host))
+			mmci_dma_data_error(host);
+		else if (data->flags & MMC_DATA_READ) {
 			struct sg_mapping_iter *sg_miter = &host->sg_miter;
 			unsigned long flags;
 
@@ -320,6 +589,8 @@ mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
 		dev_err(mmc_dev(host->mmc), "stray MCI_DATABLOCKEND interrupt\n");
 
 	if (status & MCI_DATAEND) {
+		if (dma_inprogress(host))
+			mmci_dma_unmap(host, data);
 		mmci_stop_data(host);
 
 		if (!data->error)
@@ -775,6 +1046,7 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 		dev_dbg(mmc_dev(mmc), "eventual mclk rate: %u Hz\n",
 			host->mclk);
 	}
+	host->phybase = dev->res.start;
 	host->base = ioremap(dev->res.start, resource_size(&dev->res));
 	if (!host->base) {
 		ret = -ENOMEM;
@@ -902,9 +1174,12 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 
 	amba_set_drvdata(dev, mmc);
 
-	dev_info(&dev->dev, "%s: PL%03x rev%u at 0x%08llx irq %d,%d\n",
-		mmc_hostname(mmc), amba_part(dev), amba_rev(dev),
-		(unsigned long long)dev->res.start, dev->irq[0], dev->irq[1]);
+	dev_info(&dev->dev, "%s: PL%03x manf %x rev%u at 0x%08llx irq %d,%d (pio)\n",
+		 mmc_hostname(mmc), amba_part(dev), amba_manf(dev),
+		 amba_rev(dev), (unsigned long long)dev->res.start,
+		 dev->irq[0], dev->irq[1]);
+
+	mmci_dma_setup(host);
 
 	mmc_add_host(mmc);
 
@@ -951,6 +1226,7 @@ static int __devexit mmci_remove(struct amba_device *dev)
 		writel(0, host->base + MMCICOMMAND);
 		writel(0, host->base + MMCIDATACTRL);
 
+		mmci_dma_release(host);
 		free_irq(dev->irq[0], host);
 		if (!host->singleirq)
 			free_irq(dev->irq[1], host);
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index c1df7b8..9626f51 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -148,8 +148,10 @@
 
 struct clk;
 struct variant_data;
+struct dma_chan;
 
 struct mmci_host {
+	phys_addr_t		phybase;
 	void __iomem		*base;
 	struct mmc_request	*mrq;
 	struct mmc_command	*cmd;
@@ -181,5 +183,16 @@ struct mmci_host {
 	struct sg_mapping_iter	sg_miter;
 	unsigned int		size;
 	struct regulator	*vcc;
+
+#ifdef CONFIG_DMA_ENGINE
+	/* DMA stuff */
+	struct dma_chan		*dma_current;
+	struct dma_chan		*dma_rx_channel;
+	struct dma_chan		*dma_tx_channel;
+
+#define dma_inprogress(host)	((host)->dma_current)
+#else
+#define dma_inprogress(host)	(0)
+#endif
 };
 
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index f4ee9ac..f602270 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -6,6 +6,9 @@
 
 #include <linux/mmc/host.h>
 
+/* Just some dummy forwarding */
+struct dma_chan;
+
 /**
  * struct mmci_platform_data - platform configuration for the MMCI
  * (also known as PL180) block.
@@ -27,6 +30,17 @@
  * @cd_invert: true if the gpio_cd pin value is active low
  * @capabilities: the capabilities of the block as implemented in
  * this platform, signify anything MMC_CAP_* from mmc/host.h
+ * @dma_filter: function used to select an apropriate RX and TX
+ * DMA channel to be used for DMA, if and only if you're deploying the
+ * generic DMA engine
+ * @dma_rx_param: parameter passed to the DMA allocation
+ * filter in order to select an apropriate RX channel. If
+ * there is a bidirectional RX+TX channel, then just specify
+ * this and leave dma_tx_param set to NULL
+ * @dma_tx_param: parameter passed to the DMA allocation
+ * filter in order to select an apropriate TX channel. If this
+ * is NULL the driver will attempt to use the RX channel as a
+ * bidirectional channel
  */
 struct mmci_platform_data {
 	unsigned int f_max;
@@ -38,6 +52,9 @@ struct mmci_platform_data {
 	int	gpio_cd;
 	bool	cd_invert;
 	unsigned long capabilities;
+	bool (*dma_filter)(struct dma_chan *chan, void *filter_param);
+	void *dma_rx_param;
+	void *dma_tx_param;
 };
 
 #endif
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-25 10:23                   ` Russell King - ARM Linux
@ 2011-01-27 13:07                     ` Linus Walleij
  2011-02-01 13:30                       ` Russell King - ARM Linux
  0 siblings, 1 reply; 18+ messages in thread
From: Linus Walleij @ 2011-01-27 13:07 UTC (permalink / raw)
  To: linux-arm-kernel

2011/1/25 Russell King - ARM Linux <linux@arm.linux.org.uk>:

> The DMA controller must be programmed with the right burst size for the
> peripheral, which in this case would be 8 words, otherwise the 'last'
> burst signalling goes wrong.

Now I think I found the real bug behind this...

In the Vendor data we set this:
	.dmareg_enable		= 1 << 12, /* DMAREQCTRL */

This actually means:

DMAREQCTL: DMA requests control.
0: SREQ are generated when data to be transferred is
less than 8 words.
1: no SREQ is generated when data to be transferred
is less than 8 words, LBREQ is generated instead.

Which will work for all MMC card transfers I believe, since
these are even 512 byte multiples. No problem...

Then comes an SDIO request and it is 7 bytes so LBREQ
is emitted 7 times instead of 6 * SREQ + 1 LSREQ.

Which works if you set down the burst size to 1 byte,
firing 7 consecutive bursts each terminated with LBREQ
which the DMA controller is perfectly happy with.

So while setting the burstsize down to 1 byte works
since it matches the twisted logic of DMAREQCTL,
instead flipping the bit in DMAREQCTL to 0
if (size % 8 != 0) makes some more kind of sense.

This needs to be tested to see whether it works or
not, so I'll see what we can do.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-01-27 13:07                     ` Linus Walleij
@ 2011-02-01 13:30                       ` Russell King - ARM Linux
  2011-02-01 14:15                         ` Linus Walleij
  0 siblings, 1 reply; 18+ messages in thread
From: Russell King - ARM Linux @ 2011-02-01 13:30 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jan 27, 2011 at 02:07:21PM +0100, Linus Walleij wrote:
> 2011/1/25 Russell King - ARM Linux <linux@arm.linux.org.uk>:
> 
> > The DMA controller must be programmed with the right burst size for the
> > peripheral, which in this case would be 8 words, otherwise the 'last'
> > burst signalling goes wrong.
> 
> Now I think I found the real bug behind this...
> 
> In the Vendor data we set this:
> 	.dmareg_enable		= 1 << 12, /* DMAREQCTRL */
> 
> This actually means:
> 
> DMAREQCTL: DMA requests control.
> 0: SREQ are generated when data to be transferred is
> less than 8 words.
> 1: no SREQ is generated when data to be transferred
> is less than 8 words, LBREQ is generated instead.
> 
> Which will work for all MMC card transfers I believe, since
> these are even 512 byte multiples. No problem...
> 
> Then comes an SDIO request and it is 7 bytes so LBREQ
> is emitted 7 times instead of 6 * SREQ + 1 LSREQ.
> 
> Which works if you set down the burst size to 1 byte,
> firing 7 consecutive bursts each terminated with LBREQ
> which the DMA controller is perfectly happy with.
> 
> So while setting the burstsize down to 1 byte works
> since it matches the twisted logic of DMAREQCTL,
> instead flipping the bit in DMAREQCTL to 0
> if (size % 8 != 0) makes some more kind of sense.
> 
> This needs to be tested to see whether it works or
> not, so I'll see what we can do.

I really can't see this making any difference.  In the case of a 7 word
transfer, you've setup the DMA controller to transfer 7 * 4 = 28 bytes.

1. If DMAREQCTL = 1, the MMCI issues a LBREQ.

   If the DMA controller has a burst size of 32 bytes, if it fetches 32
   bytes from the device, it is broken.  It doesn't matter what the
   primecell does - it's the DMA controller misbehaving.  In that case,
   the work-around needs to be in the DMA controller code to reduce the
   bursting so that the DMA controller doesn't transfer more than the
   requested size.

   This is not specific to MMCI as other devices will suffer the same
   problem.  So it's wrong to put it into every primecell driver just
   because someone has a broken DMA controller somewhere.

2. If DMAREQCTL = 0, the MMCI issues six SREQ plus one LSREQ.

   This reflects the behaviour of the ARM primecell, and SREQ should
   result in the DMA controller performing one transfer per request.

So, the solutions as I see it are:

1. If DMAREQCTL = 1, move the burst limiting work-around into the DMA
   engine driver, if it really is a problem.

2. Set DMAREQCTL = 0.

(2) seems to be the most sane solution to me.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180
  2011-02-01 13:30                       ` Russell King - ARM Linux
@ 2011-02-01 14:15                         ` Linus Walleij
  0 siblings, 0 replies; 18+ messages in thread
From: Linus Walleij @ 2011-02-01 14:15 UTC (permalink / raw)
  To: linux-arm-kernel

2011/2/1 Russell King - ARM Linux <linux@arm.linux.org.uk>:

> So, the solutions as I see it are:
>
> 1. If DMAREQCTL = 1, move the burst limiting work-around into the DMA
> ? engine driver, if it really is a problem.
>
> 2. Set DMAREQCTL = 0.
>
> (2) seems to be the most sane solution to me.

Let's go with that as indicated in other mail.

However the DMAREQCTL is an odd beast and I just cannot
wrap my head around it. It certainly isn't there to solve any
software problems, so I need to find some usecase for it.

Generating SDIO traffic with odd bytechunks isn't that easy
though.

Linus Walleij

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2011-02-01 14:15 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-10-06  9:42 [PATCH] ARM: add PrimeCell generic DMA to MMCI/PL180 Linus Walleij
2010-12-19 16:32 ` Russell King - ARM Linux
2010-12-19 19:59   ` Russell King - ARM Linux
2010-12-21 13:54     ` Russell King - ARM Linux
2010-12-21 15:59       ` Russell King - ARM Linux
2010-12-22 21:55         ` Linus Walleij
2011-01-24 16:01           ` Russell King - ARM Linux
2011-01-24 21:06             ` Linus Walleij
2011-01-24 21:22               ` Russell King - ARM Linux
2011-01-25  8:33                 ` Linus Walleij
2011-01-26  9:24                   ` Russell King - ARM Linux
2011-01-26  9:50                   ` Russell King - ARM Linux
2011-01-25  9:36                 ` Linus Walleij
2011-01-25  9:47                 ` Linus Walleij
2011-01-25 10:23                   ` Russell King - ARM Linux
2011-01-27 13:07                     ` Linus Walleij
2011-02-01 13:30                       ` Russell King - ARM Linux
2011-02-01 14:15                         ` Linus Walleij

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.