All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings
@ 2017-04-08 12:04 Linus Walleij
       [not found] ` <20170408120457.22750-1-linus.walleij-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
                   ` (5 more replies)
  0 siblings, 6 replies; 14+ messages in thread
From: Linus Walleij @ 2017-04-08 12:04 UTC (permalink / raw)
  To: linux-arm-kernel

We cannot use bits from configuration registers as API between
platforms and driver like this, abstract it out to two enums
and mimic the stuff passed as device tree data.

This is done to make it possible for the driver to generate the
ccfg word on-the-fly so we can support more PL08x derivatives.

Cc: arm at kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
ARM SoC folks: I'd like your ACK on this eventually, it needs to
go through the DMA engine tree with the patches following this one.
---
 arch/arm/mach-lpc32xx/phy3250.c |   3 +
 arch/arm/mach-s3c64xx/pl080.c   |  28 +++------
 arch/arm/mach-spear/spear3xx.c  |  14 ++---
 arch/arm/mach-spear/spear6xx.c  |  14 ++---
 drivers/dma/amba-pl08x.c        | 131 ++++++++++++++++++++++++++++------------
 include/linux/amba/pl08x.h      |  30 +++++++--
 6 files changed, 138 insertions(+), 82 deletions(-)

diff --git a/arch/arm/mach-lpc32xx/phy3250.c b/arch/arm/mach-lpc32xx/phy3250.c
index 6c52bd32610e..e48cc06c2aec 100644
--- a/arch/arm/mach-lpc32xx/phy3250.c
+++ b/arch/arm/mach-lpc32xx/phy3250.c
@@ -137,6 +137,9 @@ static void pl08x_put_signal(const struct pl08x_channel_data *cd, int ch)
 }
 
 static struct pl08x_platform_data pl08x_pd = {
+	/* Some reasonable memcpy defaults */
+	.memcpy_burst_size = PL08X_BURST_SZ_256,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
 	.slave_channels = &pl08x_slave_channels[0],
 	.num_slave_channels = ARRAY_SIZE(pl08x_slave_channels),
 	.get_xfer_signal = pl08x_get_signal,
diff --git a/arch/arm/mach-s3c64xx/pl080.c b/arch/arm/mach-s3c64xx/pl080.c
index 261820a855ec..66fc774b70ec 100644
--- a/arch/arm/mach-s3c64xx/pl080.c
+++ b/arch/arm/mach-s3c64xx/pl080.c
@@ -137,16 +137,10 @@ static const struct dma_slave_map s3c64xx_dma0_slave_map[] = {
 };
 
 struct pl08x_platform_data s3c64xx_dma0_plat_data = {
-	.memcpy_channel = {
-		.bus_id = "memcpy",
-		.cctl_memcpy =
-			(PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
-			PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT |
-			PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT |
-			PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE |
-			PL080_CONTROL_PROT_SYS),
-	},
+	.memcpy_burst_size = PL08X_BURST_SZ_4,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+	.memcpy_prot_buff = true,
+	.memcpy_prot_cache = true,
 	.lli_buses = PL08X_AHB1,
 	.mem_buses = PL08X_AHB1,
 	.get_xfer_signal = pl08x_get_xfer_signal,
@@ -238,16 +232,10 @@ static const struct dma_slave_map s3c64xx_dma1_slave_map[] = {
 };
 
 struct pl08x_platform_data s3c64xx_dma1_plat_data = {
-	.memcpy_channel = {
-		.bus_id = "memcpy",
-		.cctl_memcpy =
-			(PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
-			PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT |
-			PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT |
-			PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE |
-			PL080_CONTROL_PROT_SYS),
-	},
+	.memcpy_burst_size = PL08X_BURST_SZ_4,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+	.memcpy_prot_buff = true,
+	.memcpy_prot_cache = true,
 	.lli_buses = PL08X_AHB1,
 	.mem_buses = PL08X_AHB1,
 	.get_xfer_signal = pl08x_get_xfer_signal,
diff --git a/arch/arm/mach-spear/spear3xx.c b/arch/arm/mach-spear/spear3xx.c
index 23394ac76cf2..8537fcffe5a8 100644
--- a/arch/arm/mach-spear/spear3xx.c
+++ b/arch/arm/mach-spear/spear3xx.c
@@ -44,16 +44,10 @@ struct pl022_ssp_controller pl022_plat_data = {
 
 /* dmac device registration */
 struct pl08x_platform_data pl080_plat_data = {
-	.memcpy_channel = {
-		.bus_id = "memcpy",
-		.cctl_memcpy =
-			(PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | \
-			PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT | \
-			PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | \
-			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT | \
-			PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE | \
-			PL080_CONTROL_PROT_SYS),
-	},
+	.memcpy_burst_size = PL08X_BURST_SZ_16,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+	.memcpy_prot_buff = true,
+	.memcpy_prot_cache = true,
 	.lli_buses = PL08X_AHB1,
 	.mem_buses = PL08X_AHB1,
 	.get_xfer_signal = pl080_get_signal,
diff --git a/arch/arm/mach-spear/spear6xx.c b/arch/arm/mach-spear/spear6xx.c
index ccf3573b831c..c5fc110134ba 100644
--- a/arch/arm/mach-spear/spear6xx.c
+++ b/arch/arm/mach-spear/spear6xx.c
@@ -322,16 +322,10 @@ static struct pl08x_channel_data spear600_dma_info[] = {
 };
 
 static struct pl08x_platform_data spear6xx_pl080_plat_data = {
-	.memcpy_channel = {
-		.bus_id = "memcpy",
-		.cctl_memcpy =
-			(PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT | \
-			PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT | \
-			PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT | \
-			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT | \
-			PL080_CONTROL_PROT_BUFF | PL080_CONTROL_PROT_CACHE | \
-			PL080_CONTROL_PROT_SYS),
-	},
+	.memcpy_burst_size = PL08X_BURST_SZ_16,
+	.memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS,
+	.memcpy_prot_buff = true,
+	.memcpy_prot_cache = true,
 	.lli_buses = PL08X_AHB1,
 	.mem_buses = PL08X_AHB1,
 	.get_xfer_signal = pl080_get_signal,
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index e7c504f49ca0..11c89df9a958 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1433,6 +1433,7 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 	struct pl08x_driver_data *pl08x = plchan->host;
 	struct pl08x_txd *txd;
 	struct pl08x_sg *dsg;
+	u32 cctl = 0;
 	int ret;
 
 	txd = pl08x_get_txd(plchan);
@@ -1455,15 +1456,83 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 
 	/* Set platform data for m2m */
 	txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
-	txd->cctl = pl08x->pd->memcpy_channel.cctl_memcpy &
-			~(PL080_CONTROL_DST_AHB2 | PL080_CONTROL_SRC_AHB2);
+
+	/* Conjure cctl */
+	switch (pl08x->pd->memcpy_burst_size) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal burst size for memcpy, set to 1\n");
+		/* Fall through */
+	case PL08X_BURST_SZ_1:
+		cctl |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_4:
+		cctl |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_8:
+		cctl |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_16:
+		cctl |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_32:
+		cctl |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_64:
+		cctl |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_128:
+		cctl |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	case PL08X_BURST_SZ_256:
+		cctl |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
+			PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+		break;
+	}
+
+	switch (pl08x->pd->memcpy_bus_width) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal bus width for memcpy, set to 8 bits\n");
+		/* Fall through */
+	case PL08X_BUS_WIDTH_8_BITS:
+		cctl |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_16_BITS:
+		cctl |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_32_BITS:
+		cctl |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
+			PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		break;
+	}
+
+	/* Protection flags */
+	if (pl08x->pd->memcpy_prot_buff)
+		cctl |= PL080_CONTROL_PROT_BUFF;
+	if (pl08x->pd->memcpy_prot_cache)
+		cctl |= PL080_CONTROL_PROT_CACHE;
+
+	/* We are the kernel, so we are in privileged mode */
+	cctl |= PL080_CONTROL_PROT_SYS;
 
 	/* Both to be incremented or the code will break */
-	txd->cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
+	cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
 
 	if (pl08x->vd->dualmaster)
-		txd->cctl |= pl08x_select_bus(pl08x->mem_buses,
-					      pl08x->mem_buses);
+		cctl |= pl08x_select_bus(pl08x->mem_buses,
+					 pl08x->mem_buses);
+
+	txd->cctl = cctl;
 
 	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
 	if (!ret) {
@@ -1925,9 +1994,16 @@ static int pl08x_dma_init_virtual_channels(struct pl08x_driver_data *pl08x,
 			chan->signal = i;
 			pl08x_dma_slave_init(chan);
 		} else {
-			chan->cd = &pl08x->pd->memcpy_channel;
+			chan->cd = kzalloc(sizeof(*chan->cd), GFP_KERNEL);
+			if (!chan->cd) {
+				kfree(chan);
+				return -ENOMEM;
+			}
+			chan->cd->bus_id = "memcpy";
+			chan->cd->periph_buses = pl08x->pd->mem_buses;
 			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
 			if (!chan->name) {
+				kfree(chan->cd);
 				kfree(chan);
 				return -ENOMEM;
 			}
@@ -2099,7 +2175,6 @@ static int pl08x_of_probe(struct amba_device *adev,
 {
 	struct pl08x_platform_data *pd;
 	struct pl08x_channel_data *chanp = NULL;
-	u32 cctl_memcpy = 0;
 	u32 val;
 	int ret;
 	int i;
@@ -2139,36 +2214,28 @@ static int pl08x_of_probe(struct amba_device *adev,
 		dev_err(&adev->dev, "illegal burst size for memcpy, set to 1\n");
 		/* Fall through */
 	case 1:
-		cctl_memcpy |= PL080_BSIZE_1 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_1 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_1;
 		break;
 	case 4:
-		cctl_memcpy |= PL080_BSIZE_4 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_4 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_4;
 		break;
 	case 8:
-		cctl_memcpy |= PL080_BSIZE_8 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_8 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_8;
 		break;
 	case 16:
-		cctl_memcpy |= PL080_BSIZE_16 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_16 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_16;
 		break;
 	case 32:
-		cctl_memcpy |= PL080_BSIZE_32 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_32 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_32;
 		break;
 	case 64:
-		cctl_memcpy |= PL080_BSIZE_64 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_64 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_64;
 		break;
 	case 128:
-		cctl_memcpy |= PL080_BSIZE_128 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_128 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_128;
 		break;
 	case 256:
-		cctl_memcpy |= PL080_BSIZE_256 << PL080_CONTROL_SB_SIZE_SHIFT |
-			       PL080_BSIZE_256 << PL080_CONTROL_DB_SIZE_SHIFT;
+		pd->memcpy_burst_size = PL08X_BURST_SZ_256;
 		break;
 	}
 
@@ -2182,28 +2249,16 @@ static int pl08x_of_probe(struct amba_device *adev,
 		dev_err(&adev->dev, "illegal bus width for memcpy, set to 8 bits\n");
 		/* Fall through */
 	case 8:
-		cctl_memcpy |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			       PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_8_BITS;
 		break;
 	case 16:
-		cctl_memcpy |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			       PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_16_BITS;
 		break;
 	case 32:
-		cctl_memcpy |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT |
-			       PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
+		pd->memcpy_bus_width = PL08X_BUS_WIDTH_32_BITS;
 		break;
 	}
 
-	/* This is currently the only thing making sense */
-	cctl_memcpy |= PL080_CONTROL_PROT_SYS;
-
-	/* Set up memcpy channel */
-	pd->memcpy_channel.bus_id = "memcpy";
-	pd->memcpy_channel.cctl_memcpy = cctl_memcpy;
-	/* Use the buses that can access memory, obviously */
-	pd->memcpy_channel.periph_buses = pd->mem_buses;
-
 	/*
 	 * Allocate channel data for all possible slave channels (one
 	 * for each possible signal), channels will then be allocated
diff --git a/include/linux/amba/pl08x.h b/include/linux/amba/pl08x.h
index 5308eae9ce35..79d1bcee738d 100644
--- a/include/linux/amba/pl08x.h
+++ b/include/linux/amba/pl08x.h
@@ -47,8 +47,6 @@ enum {
  * devices with static assignments
  * @muxval: a number usually used to poke into some mux regiser to
  * mux in the signal to this channel
- * @cctl_memcpy: options for the channel control register for memcpy
- *  *** not used for slave channels ***
  * @addr: source/target address in physical memory for this DMA channel,
  * can be the address of a FIFO register for burst requests for example.
  * This can be left undefined if the PrimeCell API is used for configuring
@@ -63,12 +61,28 @@ struct pl08x_channel_data {
 	int min_signal;
 	int max_signal;
 	u32 muxval;
-	u32 cctl_memcpy;
 	dma_addr_t addr;
 	bool single;
 	u8 periph_buses;
 };
 
+enum pl08x_burst_size {
+	PL08X_BURST_SZ_1,
+	PL08X_BURST_SZ_4,
+	PL08X_BURST_SZ_8,
+	PL08X_BURST_SZ_16,
+	PL08X_BURST_SZ_32,
+	PL08X_BURST_SZ_64,
+	PL08X_BURST_SZ_128,
+	PL08X_BURST_SZ_256,
+};
+
+enum pl08x_bus_width {
+	PL08X_BUS_WIDTH_8_BITS,
+	PL08X_BUS_WIDTH_16_BITS,
+	PL08X_BUS_WIDTH_32_BITS,
+};
+
 /**
  * struct pl08x_platform_data - the platform configuration for the PL08x
  * PrimeCells.
@@ -76,6 +90,11 @@ struct pl08x_channel_data {
  * platform, all inclusive, including multiplexed channels. The available
  * physical channels will be multiplexed around these signals as they are
  * requested, just enumerate all possible channels.
+ * @num_slave_channels: number of elements in the slave channel array
+ * @memcpy_burst_size: the appropriate burst size for memcpy operations
+ * @memcpy_bus_width: memory bus width
+ * @memcpy_prot_buff: whether memcpy DMA is bufferable
+ * @memcpy_prot_cache: whether memcpy DMA is cacheable
  * @get_xfer_signal: request a physical signal to be used for a DMA transfer
  * immediately: if there is some multiplexing or similar blocking the use
  * of the channel the transfer can be denied by returning less than zero,
@@ -90,7 +109,10 @@ struct pl08x_channel_data {
 struct pl08x_platform_data {
 	struct pl08x_channel_data *slave_channels;
 	unsigned int num_slave_channels;
-	struct pl08x_channel_data memcpy_channel;
+	enum pl08x_burst_size memcpy_burst_size;
+	enum pl08x_bus_width memcpy_bus_width;
+	bool memcpy_prot_buff;
+	bool memcpy_prot_cache;
 	int (*get_xfer_signal)(const struct pl08x_channel_data *);
 	void (*put_xfer_signal)(const struct pl08x_channel_data *, int);
 	u8 lli_buses;
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/6] dma: pl08x: Add Faraday FTDMAC020 to compatible list
  2017-04-08 12:04 [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Linus Walleij
@ 2017-04-08 12:04     ` Linus Walleij
  2017-04-08 12:04 ` [PATCH 3/6] dma: pl08x: Make slave engine optional Linus Walleij
                       ` (4 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Linus Walleij @ 2017-04-08 12:04 UTC (permalink / raw)
  To: Vinod Koul, dmaengine-u79uwXL29TY76Z2rM5mHXA
  Cc: Janos Laube, Paulius Zaleckas,
	openwrt-devel-p3rKhJxN3npAfugRpC6u6w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	Hans Ulli Kroll, Florian Fainelli, Kuo-Jung Su, Linus Walleij,
	devicetree-u79uwXL29TY76Z2rM5mHXA

This augments the PL08x bindings to include the Faraday Technology
FTDMAC020 DMA engine, as it is clearly a derivative of the PL08x
PrimeCell. Also specify that it needs the special peripheral ID
specified to work properly.

Cc: devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: Linus Walleij <linus.walleij-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
---
Seeking a DT maintainer ACK on this.
---
 Documentation/devicetree/bindings/dma/arm-pl08x.txt | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/dma/arm-pl08x.txt b/Documentation/devicetree/bindings/dma/arm-pl08x.txt
index 8a0097a029d3..0ba81f79266f 100644
--- a/Documentation/devicetree/bindings/dma/arm-pl08x.txt
+++ b/Documentation/devicetree/bindings/dma/arm-pl08x.txt
@@ -3,6 +3,11 @@
 Required properties:
 - compatible: "arm,pl080", "arm,primecell";
 	      "arm,pl081", "arm,primecell";
+	      "faraday,ftdmac020", "arm,primecell"
+- arm,primecell-periphid: on the FTDMAC020 the primecell ID is not hard-coded
+  in the hardware and must be specified here as <0x0003b080>. This number
+  follows the PrimeCell standard numbering using the JEP106 vendor code 0x38
+  for Faraday Technology.
 - reg: Address range of the PL08x registers
 - interrupt: The PL08x interrupt number
 - clocks: The clock running the IP core clock
@@ -20,8 +25,8 @@ Optional properties:
 - dma-requests: contains the total number of DMA requests supported by the DMAC
 - memcpy-burst-size: the size of the bursts for memcpy: 1, 4, 8, 16, 32
   64, 128 or 256 bytes are legal values
-- memcpy-bus-width: the bus width used for memcpy: 8, 16 or 32 are legal
-  values
+- memcpy-bus-width: the bus width used for memcpy in bits: 8, 16 or 32 are legal
+  values, the Faraday FTDMAC020 can also accept 64 bits
 
 Clients
 Required properties:
-- 
2.9.3

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/6] dma: pl08x: Add Faraday FTDMAC020 to compatible list
@ 2017-04-08 12:04     ` Linus Walleij
  0 siblings, 0 replies; 14+ messages in thread
From: Linus Walleij @ 2017-04-08 12:04 UTC (permalink / raw)
  To: linux-arm-kernel

This augments the PL08x bindings to include the Faraday Technology
FTDMAC020 DMA engine, as it is clearly a derivative of the PL08x
PrimeCell. Also specify that it needs the special peripheral ID
specified to work properly.

Cc: devicetree at vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
Seeking a DT maintainer ACK on this.
---
 Documentation/devicetree/bindings/dma/arm-pl08x.txt | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/dma/arm-pl08x.txt b/Documentation/devicetree/bindings/dma/arm-pl08x.txt
index 8a0097a029d3..0ba81f79266f 100644
--- a/Documentation/devicetree/bindings/dma/arm-pl08x.txt
+++ b/Documentation/devicetree/bindings/dma/arm-pl08x.txt
@@ -3,6 +3,11 @@
 Required properties:
 - compatible: "arm,pl080", "arm,primecell";
 	      "arm,pl081", "arm,primecell";
+	      "faraday,ftdmac020", "arm,primecell"
+- arm,primecell-periphid: on the FTDMAC020 the primecell ID is not hard-coded
+  in the hardware and must be specified here as <0x0003b080>. This number
+  follows the PrimeCell standard numbering using the JEP106 vendor code 0x38
+  for Faraday Technology.
 - reg: Address range of the PL08x registers
 - interrupt: The PL08x interrupt number
 - clocks: The clock running the IP core clock
@@ -20,8 +25,8 @@ Optional properties:
 - dma-requests: contains the total number of DMA requests supported by the DMAC
 - memcpy-burst-size: the size of the bursts for memcpy: 1, 4, 8, 16, 32
   64, 128 or 256 bytes are legal values
-- memcpy-bus-width: the bus width used for memcpy: 8, 16 or 32 are legal
-  values
+- memcpy-bus-width: the bus width used for memcpy in bits: 8, 16 or 32 are legal
+  values, the Faraday FTDMAC020 can also accept 64 bits
 
 Clients
 Required properties:
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/6] dma: pl08x: Make slave engine optional
  2017-04-08 12:04 [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Linus Walleij
       [not found] ` <20170408120457.22750-1-linus.walleij-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
@ 2017-04-08 12:04 ` Linus Walleij
  2017-04-08 12:04 ` [PATCH 4/6] dma: pl08x: Add support for Faraday Technology FTDMAC020 Linus Walleij
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 14+ messages in thread
From: Linus Walleij @ 2017-04-08 12:04 UTC (permalink / raw)
  To: linux-arm-kernel

If the vendor data does not specify any signals, we do not
have to support slave DMA. Make the registration of the slave
DMA engine optional, so we can use this for the FTDMAC020
in the Gemini that only has memcpy support.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/dma/amba-pl08x.c | 128 +++++++++++++++++++++++++++++------------------
 1 file changed, 78 insertions(+), 50 deletions(-)

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 11c89df9a958..06bc0659a8f8 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -253,8 +253,9 @@ struct pl08x_dma_chan {
 
 /**
  * struct pl08x_driver_data - the local state holder for the PL08x
- * @slave: slave engine for this instance
+ * @slave: optional slave engine for this instance
  * @memcpy: memcpy engine for this instance
+ * @has_slave: the PL08x has a slave engine (routed signals)
  * @base: virtual memory base (remapped) for the PL08x
  * @adev: the corresponding AMBA (PrimeCell) bus entry
  * @vd: vendor data for this PL08x variant
@@ -269,6 +270,7 @@ struct pl08x_dma_chan {
 struct pl08x_driver_data {
 	struct dma_device slave;
 	struct dma_device memcpy;
+	bool has_slave;
 	void __iomem *base;
 	struct amba_device *adev;
 	const struct vendor_data *vd;
@@ -705,7 +707,7 @@ static void pl08x_phy_free(struct pl08x_dma_chan *plchan)
 			break;
 		}
 
-	if (!next) {
+	if (!next && pl08x->has_slave) {
 		list_for_each_entry(p, &pl08x->slave.channels, vc.chan.device_node)
 			if (p->state == PL08X_CHAN_WAITING) {
 				next = p;
@@ -2085,12 +2087,15 @@ static int pl08x_debugfs_show(struct seq_file *s, void *data)
 			   pl08x_state_str(chan->state));
 	}
 
-	seq_printf(s, "\nPL08x virtual slave channels:\n");
-	seq_printf(s, "CHANNEL:\tSTATE:\n");
-	seq_printf(s, "--------\t------\n");
-	list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) {
-		seq_printf(s, "%s\t\t%s\n", chan->name,
-			   pl08x_state_str(chan->state));
+	if (pl08x->has_slave) {
+		seq_printf(s, "\nPL08x virtual slave channels:\n");
+		seq_printf(s, "CHANNEL:\tSTATE:\n");
+		seq_printf(s, "--------\t------\n");
+		list_for_each_entry(chan, &pl08x->slave.channels,
+				    vc.chan.device_node) {
+			seq_printf(s, "%s\t\t%s\n", chan->name,
+				   pl08x_state_str(chan->state));
+		}
 	}
 
 	return 0;
@@ -2128,6 +2133,10 @@ static struct dma_chan *pl08x_find_chan_id(struct pl08x_driver_data *pl08x,
 {
 	struct pl08x_dma_chan *chan;
 
+	/* Trying to get a slave channel from something with no slave support */
+	if (!pl08x->has_slave)
+		return NULL;
+
 	list_for_each_entry(chan, &pl08x->slave.channels, vc.chan.device_node) {
 		if (chan->signal == id)
 			return &chan->vc.chan;
@@ -2265,20 +2274,24 @@ static int pl08x_of_probe(struct amba_device *adev,
 	 * for a device and have it's AHB interfaces set up at
 	 * translation time.
 	 */
-	chanp = devm_kcalloc(&adev->dev,
-			pl08x->vd->signals,
-			sizeof(struct pl08x_channel_data),
-			GFP_KERNEL);
-	if (!chanp)
-		return -ENOMEM;
+	if (pl08x->vd->signals) {
+		chanp = devm_kcalloc(&adev->dev,
+				     pl08x->vd->signals,
+				     sizeof(struct pl08x_channel_data),
+				     GFP_KERNEL);
+		if (!chanp)
+			return -ENOMEM;
 
-	pd->slave_channels = chanp;
-	for (i = 0; i < pl08x->vd->signals; i++) {
-		/* chanp->periph_buses will be assigned at translation */
-		chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i);
-		chanp++;
+		pd->slave_channels = chanp;
+		for (i = 0; i < pl08x->vd->signals; i++) {
+			/*
+			 * chanp->periph_buses will be assigned at translation
+			 */
+			chanp->bus_id = kasprintf(GFP_KERNEL, "slave%d", i);
+			chanp++;
+		}
+		pd->num_slave_channels = pl08x->vd->signals;
 	}
-	pd->num_slave_channels = pl08x->vd->signals;
 
 	pl08x->pd = pd;
 
@@ -2340,24 +2353,34 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM);
 	pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
 
-	/* Initialize slave engine */
-	dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
-	dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
-	pl08x->slave.dev = &adev->dev;
-	pl08x->slave.device_free_chan_resources = pl08x_free_chan_resources;
-	pl08x->slave.device_prep_dma_interrupt = pl08x_prep_dma_interrupt;
-	pl08x->slave.device_tx_status = pl08x_dma_tx_status;
-	pl08x->slave.device_issue_pending = pl08x_issue_pending;
-	pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
-	pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic;
-	pl08x->slave.device_config = pl08x_config;
-	pl08x->slave.device_pause = pl08x_pause;
-	pl08x->slave.device_resume = pl08x_resume;
-	pl08x->slave.device_terminate_all = pl08x_terminate_all;
-	pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS;
-	pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
-	pl08x->slave.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-	pl08x->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	/*
+	 * Initialize slave engine, if the block has no signals, that means
+	 * we have no slave support.
+	 */
+	if (vd->signals) {
+		pl08x->has_slave = true;
+		dma_cap_set(DMA_SLAVE, pl08x->slave.cap_mask);
+		dma_cap_set(DMA_CYCLIC, pl08x->slave.cap_mask);
+		pl08x->slave.dev = &adev->dev;
+		pl08x->slave.device_free_chan_resources =
+			pl08x_free_chan_resources;
+		pl08x->slave.device_prep_dma_interrupt =
+			pl08x_prep_dma_interrupt;
+		pl08x->slave.device_tx_status = pl08x_dma_tx_status;
+		pl08x->slave.device_issue_pending = pl08x_issue_pending;
+		pl08x->slave.device_prep_slave_sg = pl08x_prep_slave_sg;
+		pl08x->slave.device_prep_dma_cyclic = pl08x_prep_dma_cyclic;
+		pl08x->slave.device_config = pl08x_config;
+		pl08x->slave.device_pause = pl08x_pause;
+		pl08x->slave.device_resume = pl08x_resume;
+		pl08x->slave.device_terminate_all = pl08x_terminate_all;
+		pl08x->slave.src_addr_widths = PL80X_DMA_BUSWIDTHS;
+		pl08x->slave.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
+		pl08x->slave.directions =
+			BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+		pl08x->slave.residue_granularity =
+			DMA_RESIDUE_GRANULARITY_SEGMENT;
+	}
 
 	/* Get the platform data */
 	pl08x->pd = dev_get_platdata(&adev->dev);
@@ -2465,13 +2488,15 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	}
 
 	/* Register slave channels */
-	ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
-			pl08x->pd->num_slave_channels, true);
-	if (ret < 0) {
-		dev_warn(&pl08x->adev->dev,
-			"%s failed to enumerate slave channels - %d\n",
-				__func__, ret);
-		goto out_no_slave;
+	if (pl08x->has_slave) {
+		ret = pl08x_dma_init_virtual_channels(pl08x, &pl08x->slave,
+					pl08x->pd->num_slave_channels, true);
+		if (ret < 0) {
+			dev_warn(&pl08x->adev->dev,
+				 "%s failed to enumerate slave channels - %d\n",
+				 __func__, ret);
+			goto out_no_slave;
+		}
 	}
 
 	ret = dma_async_device_register(&pl08x->memcpy);
@@ -2482,12 +2507,14 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 		goto out_no_memcpy_reg;
 	}
 
-	ret = dma_async_device_register(&pl08x->slave);
-	if (ret) {
-		dev_warn(&pl08x->adev->dev,
+	if (pl08x->has_slave) {
+		ret = dma_async_device_register(&pl08x->slave);
+		if (ret) {
+			dev_warn(&pl08x->adev->dev,
 			"%s failed to register slave as an async device - %d\n",
 			__func__, ret);
-		goto out_no_slave_reg;
+			goto out_no_slave_reg;
+		}
 	}
 
 	amba_set_drvdata(adev, pl08x);
@@ -2501,7 +2528,8 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 out_no_slave_reg:
 	dma_async_device_unregister(&pl08x->memcpy);
 out_no_memcpy_reg:
-	pl08x_free_virtual_channels(&pl08x->slave);
+	if (pl08x->has_slave)
+		pl08x_free_virtual_channels(&pl08x->slave);
 out_no_slave:
 	pl08x_free_virtual_channels(&pl08x->memcpy);
 out_no_memcpy:
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 4/6] dma: pl08x: Add support for Faraday Technology FTDMAC020
  2017-04-08 12:04 [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Linus Walleij
       [not found] ` <20170408120457.22750-1-linus.walleij-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
  2017-04-08 12:04 ` [PATCH 3/6] dma: pl08x: Make slave engine optional Linus Walleij
@ 2017-04-08 12:04 ` Linus Walleij
  2017-05-14 12:34   ` Vinod Koul
  2017-04-08 12:04 ` [PATCH 5/6] ARM: gemini: select ARM_AMBA Linus Walleij
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 14+ messages in thread
From: Linus Walleij @ 2017-04-08 12:04 UTC (permalink / raw)
  To: linux-arm-kernel

After reading the specs for the Faraday Technology FTDMAC020 found
in the Gemini platform, it becomes pretty evident that this is just
another PL08x derivative, and should be handled like such by simply
extending the existing PL08x driver to handle the quirks in this
hardware.

This patch makes memcpy work and has been tested on the Gemini and
also regression-tested on the Nomadik NHK15 using dmatest with
10 threads per channel without a hinch for hours.

I have not implemented slave DMA in those codepaths, because this
device (Gemini) does not use slave DMA, and it seems like devices
using FTDMAC020 for device DMA have a slightly different register
layout so some real hardware is needed to proceed with this. I
left some FIXME etc in the code for this.

I had to do some refactorings of some helper functions, but I have
not split those into separate patches because these refactorings
do not make much sense without the increased complexity of handling
the FTDMAC020.

The DMA test would hang the platform on me on the Gemini after a
few thousand iterations, however after turning of the caches the
problem immediately disappeared and I could run the DMA engine
with 10 threads pers physical channel for days in a row without
a crash. I think there is no problem with the DMA driver: instead
it is something fishy in the FA526 cache handling code that get
pretty heavily exercised by the DMA engine and we need to go and
fix that instead.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/dma/Kconfig        |   6 +-
 drivers/dma/amba-pl08x.c   | 767 ++++++++++++++++++++++++++++++++++++---------
 include/linux/amba/pl080.h |  83 ++++-
 3 files changed, 698 insertions(+), 158 deletions(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d01d59812cf3..9f9e3f65bd51 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -62,8 +62,10 @@ config AMBA_PL08X
 	select DMA_ENGINE
 	select DMA_VIRTUAL_CHANNELS
 	help
-	  Platform has a PL08x DMAC device
-	  which can provide DMA engine support
+	  Say yes if your platform has a PL08x DMAC device which can
+	  provide DMA engine support. This includes the original ARM
+	  PL080 and PL081, Samsungs PL080 derivative and Faraday
+	  Technology's FTDMAC020 PL080 derivative.
 
 config AMCC_PPC440SPE_ADMA
 	tristate "AMCC PPC440SPe ADMA support"
diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 06bc0659a8f8..06cb694444e9 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -1,9 +1,10 @@
 /*
  * Copyright (c) 2006 ARM Ltd.
  * Copyright (c) 2010 ST-Ericsson SA
+ * Copyirght (c) 2017 Linaro Ltd.
  *
  * Author: Peter Pearse <peter.pearse@arm.com>
- * Author: Linus Walleij <linus.walleij@stericsson.com>
+ * Author: Linus Walleij <linus.walleij@linaro.org>
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the Free
@@ -109,11 +110,12 @@ struct pl08x_driver_data;
  * @channels: the number of channels available in this variant
  * @signals: the number of request signals available from the hardware
  * @dualmaster: whether this version supports dual AHB masters or not.
- * @nomadik: whether the channels have Nomadik security extension bits
- *	that need to be checked for permission before use and some registers are
- *	missing
- * @pl080s: whether this version is a PL080S, which has separate register and
- *	LLI word for transfer size.
+ * @nomadik: whether this variant is a ST Microelectronics Nomadik, where the
+ *	channels have Nomadik security extension bits that need to be checked
+ *	for permission before use and some registers are missing
+ * @pl080s: whether this variant is a Samsung PL080S, which has separate
+ *	register and LLI word for transfer size.
+ * @ftdmac020: whether this variant is a Faraday Technology FTDMAC020
  * @max_transfer_size: the maximum single element transfer size for this
  *	PL08x variant.
  */
@@ -124,6 +126,7 @@ struct vendor_data {
 	bool dualmaster;
 	bool nomadik;
 	bool pl080s;
+	bool ftdmac020;
 	u32 max_transfer_size;
 };
 
@@ -147,19 +150,34 @@ struct pl08x_bus_data {
  * @id: physical index to this channel
  * @base: memory base address for this physical channel
  * @reg_config: configuration address for this physical channel
+ * @reg_control: control address for this physical channel
+ * @reg_src: transfer source address register
+ * @reg_dst: transfer destination address register
+ * @reg_lli: transfer LLI address register
+ * @reg_busy: if the variant has a special per-channel busy register,
+ * this contains a pointer to it
  * @lock: a lock to use when altering an instance of this struct
  * @serving: the virtual channel currently being served by this physical
  * channel
  * @locked: channel unavailable for the system, e.g. dedicated to secure
  * world
+ * @ftdmac020: channel is on a FTDMAC020
+ * @pl080s: channel is on a PL08s
  */
 struct pl08x_phy_chan {
 	unsigned int id;
 	void __iomem *base;
 	void __iomem *reg_config;
+	void __iomem *reg_control;
+	void __iomem *reg_src;
+	void __iomem *reg_dst;
+	void __iomem *reg_lli;
+	void __iomem *reg_busy;
 	spinlock_t lock;
 	struct pl08x_dma_chan *serving;
 	bool locked;
+	bool ftdmac020;
+	bool pl080s;
 };
 
 /**
@@ -362,10 +380,24 @@ static int pl08x_phy_channel_busy(struct pl08x_phy_chan *ch)
 {
 	unsigned int val;
 
+	/* If we have a special busy register, take a shortcut */
+	if (ch->reg_busy) {
+		val = readl(ch->reg_busy);
+		return !!(val & BIT(ch->id));
+	}
 	val = readl(ch->reg_config);
 	return val & PL080_CONFIG_ACTIVE;
 }
 
+/*
+ * pl08x_write_lli() - Write an LLI into the DMA controller.
+ *
+ * The PL08x derivatives support linked lists, but the first item of the
+ * list containing the source, destination, control word and next LLI is
+ * ignored. Instead the driver has to write those values directly into the
+ * SRC, DST, LLI and control registers. On FTDMAC020 also the SIZE
+ * register need to be set up for the first transfer.
+ */
 static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
 		struct pl08x_phy_chan *phychan, const u32 *lli, u32 ccfg)
 {
@@ -383,11 +415,112 @@ static void pl08x_write_lli(struct pl08x_driver_data *pl08x,
 			phychan->id, lli[PL080_LLI_SRC], lli[PL080_LLI_DST],
 			lli[PL080_LLI_LLI], lli[PL080_LLI_CCTL], ccfg);
 
-	writel_relaxed(lli[PL080_LLI_SRC], phychan->base + PL080_CH_SRC_ADDR);
-	writel_relaxed(lli[PL080_LLI_DST], phychan->base + PL080_CH_DST_ADDR);
-	writel_relaxed(lli[PL080_LLI_LLI], phychan->base + PL080_CH_LLI);
-	writel_relaxed(lli[PL080_LLI_CCTL], phychan->base + PL080_CH_CONTROL);
+	writel_relaxed(lli[PL080_LLI_SRC], phychan->reg_src);
+	writel_relaxed(lli[PL080_LLI_DST], phychan->reg_dst);
+	writel_relaxed(lli[PL080_LLI_LLI], phychan->reg_lli);
 
+	/*
+	 * The FTMAC020 has a different layout in the CCTL word of the LLI
+	 * and the CCTL register which is split in CSR and SIZE registers.
+	 * Convert the LLI item CCTL into the proper values to write into
+	 * the CSR and SIZE registers.
+	 */
+	if (phychan->ftdmac020) {
+		u32 llictl = lli[PL080_LLI_CCTL];
+		u32 val = 0;
+
+		/* Write the transfer size (12 bits) to the size register */
+		writel_relaxed(llictl & FTDMAC020_LLI_TRANSFER_SIZE_MASK,
+			       phychan->base + FTDMAC020_CH_SIZE);
+		/*
+		 * Then write the control bits 28..16 to the control register
+		 * by shuffleing the bits around to where they are in the
+		 * main register. The mapping is as follows:
+		 * Bit 28: TC_MSK - mask on all except last LLI
+		 * Bit 27..25: SRC_WIDTH
+		 * Bit 24..22: DST_WIDTH
+		 * Bit 21..20: SRCAD_CTRL
+		 * Bit 19..17: DSTAD_CTRL
+		 * Bit 17: SRC_SEL
+		 * Bit 16: DST_SEL
+		 */
+		if (llictl & FTDMAC020_LLI_TC_MSK)
+			val |= FTDMAC020_CH_CSR_TC_MSK;
+		val |= ((llictl  & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+			(FTDMAC020_LLI_SRC_WIDTH_SHIFT -
+			 FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+			(FTDMAC020_LLI_DST_WIDTH_SHIFT -
+			 FTDMAC020_CH_CSR_DST_WIDTH_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_SRCAD_CTL_MSK) >>
+			(FTDMAC020_LLI_SRCAD_CTL_SHIFT -
+			 FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT));
+		val |= ((llictl  & FTDMAC020_LLI_DSTAD_CTL_MSK) >>
+			(FTDMAC020_LLI_DSTAD_CTL_SHIFT -
+			 FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT));
+		if (llictl & FTDMAC020_LLI_SRC_SEL)
+			val |= FTDMAC020_CH_CSR_SRC_SEL;
+		if (llictl & FTDMAC020_LLI_DST_SEL)
+			val |= FTDMAC020_CH_CSR_DST_SEL;
+
+		/*
+		 * Set up the bits that exist in the CSR but are not
+		 * part the LLI, i.e. only gets written to the control
+		 * register right here.
+		 *
+		 * FIXME: do not just handle memcpy, also handle slave DMA.
+		 */
+		switch (pl08x->pd->memcpy_burst_size) {
+		default:
+		case PL08X_BURST_SZ_1:
+			val |= PL080_BSIZE_1 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_4:
+			val |= PL080_BSIZE_4 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_8:
+			val |= PL080_BSIZE_8 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_16:
+			val |= PL080_BSIZE_16 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_32:
+			val |= PL080_BSIZE_32 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_64:
+			val |= PL080_BSIZE_64 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_128:
+			val |= PL080_BSIZE_128 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		case PL08X_BURST_SZ_256:
+			val |= PL080_BSIZE_256 <<
+				FTDMAC020_CH_CSR_SRC_SIZE_SHIFT;
+			break;
+		}
+
+		/* Protection flags */
+		if (pl08x->pd->memcpy_prot_buff)
+			val |= FTDMAC020_CH_CSR_PROT2;
+		if (pl08x->pd->memcpy_prot_cache)
+			val |= FTDMAC020_CH_CSR_PROT3;
+		/* We are the kernel, so we are in privileged mode */
+		val |= FTDMAC020_CH_CSR_PROT1;
+
+		writel_relaxed(val, phychan->reg_control);
+	} else {
+		/* Bits are just identical */
+		writel_relaxed(lli[PL080_LLI_CCTL], phychan->reg_control);
+	}
+
+	/* Second control word on the PL080s */
 	if (pl08x->vd->pl080s)
 		writel_relaxed(lli[PL080S_LLI_CCTL2],
 				phychan->base + PL080S_CH_CONTROL2);
@@ -425,11 +558,25 @@ static void pl08x_start_next_txd(struct pl08x_dma_chan *plchan)
 		cpu_relax();
 
 	/* Do not access config register until channel shows as inactive */
-	val = readl(phychan->reg_config);
-	while ((val & PL080_CONFIG_ACTIVE) || (val & PL080_CONFIG_ENABLE))
+	if (phychan->ftdmac020) {
+		val = readl(phychan->reg_config);
+		while (val & FTDMAC020_CH_CFG_BUSY)
+			val = readl(phychan->reg_config);
+
+		val = readl(phychan->reg_control);
+		while (val & FTDMAC020_CH_CSR_EN)
+			val = readl(phychan->reg_control);
+
+		writel(val | FTDMAC020_CH_CSR_EN,
+		       phychan->reg_control);
+	} else {
 		val = readl(phychan->reg_config);
+		while ((val & PL080_CONFIG_ACTIVE) ||
+		       (val & PL080_CONFIG_ENABLE))
+			val = readl(phychan->reg_config);
 
-	writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
+		writel(val | PL080_CONFIG_ENABLE, phychan->reg_config);
+	}
 }
 
 /*
@@ -447,6 +594,14 @@ static void pl08x_pause_phy_chan(struct pl08x_phy_chan *ch)
 	u32 val;
 	int timeout;
 
+	if (ch->ftdmac020) {
+		/* Use the enable bit on the FTDMAC020 */
+		val = readl(ch->reg_control);
+		val &= ~FTDMAC020_CH_CSR_EN;
+		writel(val, ch->reg_control);
+		return;
+	}
+
 	/* Set the HALT bit and wait for the FIFO to drain */
 	val = readl(ch->reg_config);
 	val |= PL080_CONFIG_HALT;
@@ -466,6 +621,14 @@ static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
 {
 	u32 val;
 
+	/* Use the enable bit on the FTDMAC020 */
+	if (ch->ftdmac020) {
+		val = readl(ch->reg_control);
+		val |= FTDMAC020_CH_CSR_EN;
+		writel(val, ch->reg_control);
+		return;
+	}
+
 	/* Clear the HALT bit */
 	val = readl(ch->reg_config);
 	val &= ~PL080_CONFIG_HALT;
@@ -481,25 +644,68 @@ static void pl08x_resume_phy_chan(struct pl08x_phy_chan *ch)
 static void pl08x_terminate_phy_chan(struct pl08x_driver_data *pl08x,
 	struct pl08x_phy_chan *ch)
 {
-	u32 val = readl(ch->reg_config);
+	u32 val;
+
+	/* The layout for the FTDMAC020 is different */
+	if (ch->ftdmac020) {
+		/* Disable all interrupts */
+		val = readl(ch->reg_config);
+		val |= (FTDMAC020_CH_CFG_INT_ABT_MASK |
+			FTDMAC020_CH_CFG_INT_ERR_MASK |
+			FTDMAC020_CH_CFG_INT_TC_MASK);
+		writel(val, ch->reg_config);
+
+		/* Abort and disable channel */
+		val = readl(ch->reg_control);
+		val &= ~FTDMAC020_CH_CSR_EN;
+		val |= FTDMAC020_CH_CSR_ABT;
+		writel(val, ch->reg_control);
+
+		/* Clear ABT and ERR interrupt flags */
+		writel(BIT(ch->id) | BIT(ch->id + 16),
+		       pl08x->base + PL080_ERR_CLEAR);
+		writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
 
+		return;
+	}
+
+	val = readl(ch->reg_config);
 	val &= ~(PL080_CONFIG_ENABLE | PL080_CONFIG_ERR_IRQ_MASK |
 		 PL080_CONFIG_TC_IRQ_MASK);
-
 	writel(val, ch->reg_config);
 
 	writel(BIT(ch->id), pl08x->base + PL080_ERR_CLEAR);
 	writel(BIT(ch->id), pl08x->base + PL080_TC_CLEAR);
 }
 
-static inline u32 get_bytes_in_cctl(u32 cctl)
+static u32 get_bytes_in_phy_channel(struct pl08x_phy_chan *ch)
 {
-	/* The source width defines the number of bytes */
-	u32 bytes = cctl & PL080_CONTROL_TRANSFER_SIZE_MASK;
+	u32 val;
+	u32 bytes;
+
+	if (ch->ftdmac020) {
+		bytes = readl(ch->base + FTDMAC020_CH_SIZE);
+
+		val = readl(ch->reg_control);
+		val &= FTDMAC020_CH_CSR_SRC_WIDTH_MSK;
+		val >>= FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT;
+	} else if (ch->pl080s) {
+		val = readl(ch->base + PL080S_CH_CONTROL2);
+		bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
 
-	cctl &= PL080_CONTROL_SWIDTH_MASK;
+		val = readl(ch->reg_control);
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	} else {
+		/* Plain PL08x */
+		val = readl(ch->reg_control);
+		bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	}
 
-	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	switch (val) {
 	case PL080_WIDTH_8BIT:
 		break;
 	case PL080_WIDTH_16BIT:
@@ -512,14 +718,35 @@ static inline u32 get_bytes_in_cctl(u32 cctl)
 	return bytes;
 }
 
-static inline u32 get_bytes_in_cctl_pl080s(u32 cctl, u32 cctl1)
+static u32 get_bytes_in_lli(struct pl08x_phy_chan *ch, const u32 *llis_va)
 {
-	/* The source width defines the number of bytes */
-	u32 bytes = cctl1 & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+	u32 val;
+	u32 bytes;
+
+	if (ch->ftdmac020) {
+		val = llis_va[PL080_LLI_CCTL];
+		bytes = val & FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+		val = llis_va[PL080_LLI_CCTL];
+		val &= FTDMAC020_LLI_SRC_WIDTH_MSK;
+		val >>= FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+	} else if (ch->pl080s) {
+		val = llis_va[PL080S_LLI_CCTL2];
+		bytes = val & PL080S_CONTROL_TRANSFER_SIZE_MASK;
+
+		val = llis_va[PL080_LLI_CCTL];
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	} else {
+		/* Plain PL08x */
+		val = llis_va[PL080_LLI_CCTL];
+		bytes = val & PL080_CONTROL_TRANSFER_SIZE_MASK;
 
-	cctl &= PL080_CONTROL_SWIDTH_MASK;
+		val &= PL080_CONTROL_SWIDTH_MASK;
+		val >>= PL080_CONTROL_SWIDTH_SHIFT;
+	}
 
-	switch (cctl >> PL080_CONTROL_SWIDTH_SHIFT) {
+	switch (val) {
 	case PL080_WIDTH_8BIT:
 		break;
 	case PL080_WIDTH_16BIT:
@@ -554,15 +781,10 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
 	 * Follow the LLIs to get the number of remaining
 	 * bytes in the currently active transaction.
 	 */
-	clli = readl(ch->base + PL080_CH_LLI) & ~PL080_LLI_LM_AHB2;
+	clli = readl(ch->reg_lli) & ~PL080_LLI_LM_AHB2;
 
 	/* First get the remaining bytes in the active transfer */
-	if (pl08x->vd->pl080s)
-		bytes = get_bytes_in_cctl_pl080s(
-				readl(ch->base + PL080_CH_CONTROL),
-				readl(ch->base + PL080S_CH_CONTROL2));
-	else
-		bytes = get_bytes_in_cctl(readl(ch->base + PL080_CH_CONTROL));
+	bytes = get_bytes_in_phy_channel(ch);
 
 	if (!clli)
 		return bytes;
@@ -583,12 +805,7 @@ static u32 pl08x_getbytes_chan(struct pl08x_dma_chan *plchan)
 	llis_va_limit = llis_va + llis_max_words;
 
 	for (; llis_va < llis_va_limit; llis_va += pl08x->lli_words) {
-		if (pl08x->vd->pl080s)
-			bytes += get_bytes_in_cctl_pl080s(
-						llis_va[PL080_LLI_CCTL],
-						llis_va[PL080S_LLI_CCTL2]);
-		else
-			bytes += get_bytes_in_cctl(llis_va[PL080_LLI_CCTL]);
+		bytes += get_bytes_in_lli(ch, llis_va);
 
 		/*
 		 * A LLI pointer going backward terminates the LLI list
@@ -748,9 +965,30 @@ static void pl08x_phy_free(struct pl08x_dma_chan *plchan)
  * LLI handling
  */
 
-static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
+static inline unsigned int
+pl08x_get_bytes_for_lli(struct pl08x_driver_data *pl08x,
+			u32 cctl,
+			bool source)
 {
-	switch (coded) {
+	u32 val;
+
+	if (pl08x->vd->ftdmac020) {
+		if (source)
+			val = (cctl & FTDMAC020_LLI_SRC_WIDTH_MSK) >>
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+		else
+			val = (cctl & FTDMAC020_LLI_DST_WIDTH_MSK) >>
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+	} else {
+		if (source)
+			val = (cctl & PL080_CONTROL_SWIDTH_MASK) >>
+				PL080_CONTROL_SWIDTH_SHIFT;
+		else
+			val = (cctl & PL080_CONTROL_DWIDTH_MASK) >>
+				PL080_CONTROL_DWIDTH_SHIFT;
+	}
+
+	switch (val) {
 	case PL080_WIDTH_8BIT:
 		return 1;
 	case PL080_WIDTH_16BIT:
@@ -764,49 +1002,106 @@ static inline unsigned int pl08x_get_bytes_for_cctl(unsigned int coded)
 	return 0;
 }
 
-static inline u32 pl08x_cctl_bits(u32 cctl, u8 srcwidth, u8 dstwidth,
-				  size_t tsize)
+static inline u32 pl08x_lli_control_bits(struct pl08x_driver_data *pl08x,
+					 u32 cctl,
+					 u8 srcwidth, u8 dstwidth,
+					 size_t tsize)
 {
 	u32 retbits = cctl;
 
-	/* Remove all src, dst and transfer size bits */
-	retbits &= ~PL080_CONTROL_DWIDTH_MASK;
-	retbits &= ~PL080_CONTROL_SWIDTH_MASK;
-	retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+	/*
+	 * Remove all src, dst and transfer size bits, then set the
+	 * width and size according to the parameters. The bit offsets
+	 * are different in the FTDMAC020 so we need to accound for this.
+	 */
+	if (pl08x->vd->ftdmac020) {
+		retbits &= ~FTDMAC020_LLI_DST_WIDTH_MSK;
+		retbits &= ~FTDMAC020_LLI_SRC_WIDTH_MSK;
+		retbits &= ~FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+
+		switch (srcwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				FTDMAC020_LLI_SRC_WIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
 
-	/* Then set the bits according to the parameters */
-	switch (srcwidth) {
-	case 1:
-		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_SWIDTH_SHIFT;
-		break;
-	case 2:
-		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_SWIDTH_SHIFT;
-		break;
-	case 4:
-		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_SWIDTH_SHIFT;
-		break;
-	default:
-		BUG();
-		break;
-	}
+		switch (dstwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				FTDMAC020_LLI_DST_WIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
 
-	switch (dstwidth) {
-	case 1:
-		retbits |= PL080_WIDTH_8BIT << PL080_CONTROL_DWIDTH_SHIFT;
-		break;
-	case 2:
-		retbits |= PL080_WIDTH_16BIT << PL080_CONTROL_DWIDTH_SHIFT;
-		break;
-	case 4:
-		retbits |= PL080_WIDTH_32BIT << PL080_CONTROL_DWIDTH_SHIFT;
-		break;
-	default:
-		BUG();
-		break;
+		tsize &= FTDMAC020_LLI_TRANSFER_SIZE_MASK;
+		retbits |= tsize << FTDMAC020_LLI_TRANSFER_SIZE_SHIFT;
+	} else {
+		retbits &= ~PL080_CONTROL_DWIDTH_MASK;
+		retbits &= ~PL080_CONTROL_SWIDTH_MASK;
+		retbits &= ~PL080_CONTROL_TRANSFER_SIZE_MASK;
+
+		switch (srcwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				PL080_CONTROL_SWIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		switch (dstwidth) {
+		case 1:
+			retbits |= PL080_WIDTH_8BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		case 2:
+			retbits |= PL080_WIDTH_16BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		case 4:
+			retbits |= PL080_WIDTH_32BIT <<
+				PL080_CONTROL_DWIDTH_SHIFT;
+			break;
+		default:
+			BUG();
+			break;
+		}
+
+		tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
+		retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
 	}
 
-	tsize &= PL080_CONTROL_TRANSFER_SIZE_MASK;
-	retbits |= tsize << PL080_CONTROL_TRANSFER_SIZE_SHIFT;
 	return retbits;
 }
 
@@ -827,13 +1122,35 @@ struct pl08x_lli_build_data {
  * - prefers the destination bus if both available
  * - prefers bus with fixed address (i.e. peripheral)
  */
-static void pl08x_choose_master_bus(struct pl08x_lli_build_data *bd,
-	struct pl08x_bus_data **mbus, struct pl08x_bus_data **sbus, u32 cctl)
+static void pl08x_choose_master_bus(struct pl08x_driver_data *pl08x,
+				    struct pl08x_lli_build_data *bd,
+				    struct pl08x_bus_data **mbus,
+				    struct pl08x_bus_data **sbus,
+				    u32 cctl)
 {
-	if (!(cctl & PL080_CONTROL_DST_INCR)) {
+	bool dst_incr;
+	bool src_incr;
+
+	/*
+	 * The FTDMAC020 only supports memory-to-memory transfer, so
+	 * source and destination always increase.
+	 */
+	if (pl08x->vd->ftdmac020) {
+		dst_incr = true;
+		src_incr = true;
+	} else {
+		dst_incr = !!(cctl & PL080_CONTROL_DST_INCR);
+		src_incr = !!(cctl & PL080_CONTROL_SRC_INCR);
+	}
+
+	/*
+	 * If either bus is not advancing, i.e. it is a peripheral, that
+	 * one becomes master
+	 */
+	if (!dst_incr) {
 		*mbus = &bd->dstbus;
 		*sbus = &bd->srcbus;
-	} else if (!(cctl & PL080_CONTROL_SRC_INCR)) {
+	} else if (!src_incr) {
 		*mbus = &bd->srcbus;
 		*sbus = &bd->dstbus;
 	} else {
@@ -871,10 +1188,16 @@ static void pl08x_fill_lli_for_desc(struct pl08x_driver_data *pl08x,
 	if (pl08x->vd->pl080s)
 		llis_va[PL080S_LLI_CCTL2] = cctl2;
 
-	if (cctl & PL080_CONTROL_SRC_INCR)
+	if (pl08x->vd->ftdmac020) {
+		/* FIXME: only memcpy so far so both increase */
 		bd->srcbus.addr += len;
-	if (cctl & PL080_CONTROL_DST_INCR)
 		bd->dstbus.addr += len;
+	} else {
+		if (cctl & PL080_CONTROL_SRC_INCR)
+			bd->srcbus.addr += len;
+		if (cctl & PL080_CONTROL_DST_INCR)
+			bd->dstbus.addr += len;
+	}
 
 	BUG_ON(bd->remainder < len);
 
@@ -885,12 +1208,12 @@ static inline void prep_byte_width_lli(struct pl08x_driver_data *pl08x,
 			struct pl08x_lli_build_data *bd, u32 *cctl, u32 len,
 			int num_llis, size_t *total_bytes)
 {
-	*cctl = pl08x_cctl_bits(*cctl, 1, 1, len);
+	*cctl = pl08x_lli_control_bits(pl08x, *cctl, 1, 1, len);
 	pl08x_fill_lli_for_desc(pl08x, bd, num_llis, len, *cctl, len);
 	(*total_bytes) += len;
 }
 
-#ifdef VERBOSE_DEBUG
+#if 1
 static void pl08x_dump_lli(struct pl08x_driver_data *pl08x,
 			   const u32 *llis_va, int num_llis)
 {
@@ -955,14 +1278,10 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 	cctl = txd->cctl;
 
 	/* Find maximum width of the source bus */
-	bd.srcbus.maxwidth =
-		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_SWIDTH_MASK) >>
-				       PL080_CONTROL_SWIDTH_SHIFT);
+	bd.srcbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, true);
 
 	/* Find maximum width of the destination bus */
-	bd.dstbus.maxwidth =
-		pl08x_get_bytes_for_cctl((cctl & PL080_CONTROL_DWIDTH_MASK) >>
-				       PL080_CONTROL_DWIDTH_SHIFT);
+	bd.dstbus.maxwidth = pl08x_get_bytes_for_lli(pl08x, cctl, false);
 
 	list_for_each_entry(dsg, &txd->dsg_list, node) {
 		total_bytes = 0;
@@ -974,7 +1293,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		bd.srcbus.buswidth = bd.srcbus.maxwidth;
 		bd.dstbus.buswidth = bd.dstbus.maxwidth;
 
-		pl08x_choose_master_bus(&bd, &mbus, &sbus, cctl);
+		pl08x_choose_master_bus(pl08x, &bd, &mbus, &sbus, cctl);
 
 		dev_vdbg(&pl08x->adev->dev,
 			"src=0x%08llx%s/%u dst=0x%08llx%s/%u len=%zu\n",
@@ -1011,8 +1330,14 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		 *   supported. Thus, we can't have scattered addresses.
 		 */
 		if (!bd.remainder) {
-			u32 fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
-				PL080_CONFIG_FLOW_CONTROL_SHIFT;
+			u32 fc;
+
+			/* FTDMAC020 only does memory-to-memory */
+			if (pl08x->vd->ftdmac020)
+				fc = PL080_FLOW_MEM2MEM;
+			else
+				fc = (txd->ccfg & PL080_CONFIG_FLOW_CONTROL_MASK) >>
+					PL080_CONFIG_FLOW_CONTROL_SHIFT;
 			if (!((fc >= PL080_FLOW_SRC2DST_DST) &&
 					(fc <= PL080_FLOW_SRC2DST_SRC))) {
 				dev_err(&pl08x->adev->dev, "%s sg len can't be zero",
@@ -1029,8 +1354,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 				return 0;
 			}
 
-			cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
-					bd.dstbus.buswidth, 0);
+			cctl = pl08x_lli_control_bits(pl08x, cctl,
+					bd.srcbus.buswidth, bd.dstbus.buswidth,
+					0);
 			pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
 					0, cctl, 0);
 			break;
@@ -1109,8 +1435,9 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 					"size 0x%08zx (remainder 0x%08zx)\n",
 					__func__, lli_len, bd.remainder);
 
-				cctl = pl08x_cctl_bits(cctl, bd.srcbus.buswidth,
-					bd.dstbus.buswidth, tsize);
+				cctl = pl08x_lli_control_bits(pl08x, cctl,
+					bd.srcbus.buswidth, bd.dstbus.buswidth,
+					tsize);
 				pl08x_fill_lli_for_desc(pl08x, &bd, num_llis++,
 						lli_len, cctl, tsize);
 				total_bytes += lli_len;
@@ -1153,7 +1480,10 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		/* The final LLI terminates the LLI. */
 		last_lli[PL080_LLI_LLI] = 0;
 		/* The final LLI element shall also fire an interrupt. */
-		last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
+		if (pl08x->vd->ftdmac020)
+			last_lli[PL080_LLI_CCTL] &= ~FTDMAC020_LLI_TC_MSK;
+		else
+			last_lli[PL080_LLI_CCTL] |= PL080_CONTROL_TC_IRQ_EN;
 	}
 
 	pl08x_dump_lli(pl08x, llis_va, num_llis);
@@ -1319,14 +1649,25 @@ static const struct burst_table burst_sizes[] = {
  * will be routed to each port.  We try to have source and destination
  * on separate ports, but always respect the allowable settings.
  */
-static u32 pl08x_select_bus(u8 src, u8 dst)
+static u32 pl08x_select_bus(bool ftdmac020, u8 src, u8 dst)
 {
 	u32 cctl = 0;
+	u32 dst_ahb2;
+	u32 src_ahb2;
+
+	/* The FTDMAC020 use different bits to indicate src/dst bus */
+	if (ftdmac020) {
+		dst_ahb2 = FTDMAC020_LLI_DST_SEL;
+		src_ahb2 = FTDMAC020_LLI_SRC_SEL;
+	} else {
+		dst_ahb2 = PL080_CONTROL_DST_AHB2;
+		src_ahb2 = PL080_CONTROL_SRC_AHB2;
+	}
 
 	if (!(dst & PL08X_AHB1) || ((dst & PL08X_AHB2) && (src & PL08X_AHB1)))
-		cctl |= PL080_CONTROL_DST_AHB2;
+		cctl |= dst_ahb2;
 	if (!(src & PL08X_AHB1) || ((src & PL08X_AHB2) && !(dst & PL08X_AHB2)))
-		cctl |= PL080_CONTROL_SRC_AHB2;
+		cctl |= src_ahb2;
 
 	return cctl;
 }
@@ -1414,50 +1755,14 @@ static struct pl08x_txd *pl08x_get_txd(struct pl08x_dma_chan *plchan)
 {
 	struct pl08x_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
 
-	if (txd) {
+	if (txd)
 		INIT_LIST_HEAD(&txd->dsg_list);
-
-		/* Always enable error and terminal interrupts */
-		txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
-			    PL080_CONFIG_TC_IRQ_MASK;
-	}
 	return txd;
 }
 
-/*
- * Initialize a descriptor to be used by memcpy submit
- */
-static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
-		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
-		size_t len, unsigned long flags)
+static u32 pl08x_memcpy_cctl(struct pl08x_driver_data *pl08x)
 {
-	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
-	struct pl08x_driver_data *pl08x = plchan->host;
-	struct pl08x_txd *txd;
-	struct pl08x_sg *dsg;
 	u32 cctl = 0;
-	int ret;
-
-	txd = pl08x_get_txd(plchan);
-	if (!txd) {
-		dev_err(&pl08x->adev->dev,
-			"%s no memory for descriptor\n", __func__);
-		return NULL;
-	}
-
-	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
-	if (!dsg) {
-		pl08x_free_txd(pl08x, txd);
-		return NULL;
-	}
-	list_add_tail(&dsg->node, &txd->dsg_list);
-
-	dsg->src_addr = src;
-	dsg->dst_addr = dest;
-	dsg->len = len;
-
-	/* Set platform data for m2m */
-	txd->ccfg |= PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
 
 	/* Conjure cctl */
 	switch (pl08x->pd->memcpy_burst_size) {
@@ -1531,10 +1836,95 @@ static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
 	cctl |= PL080_CONTROL_SRC_INCR | PL080_CONTROL_DST_INCR;
 
 	if (pl08x->vd->dualmaster)
-		cctl |= pl08x_select_bus(pl08x->mem_buses,
+		cctl |= pl08x_select_bus(false,
+					 pl08x->mem_buses,
+					 pl08x->mem_buses);
+
+	return cctl;
+}
+
+static u32 pl08x_ftdmac020_memcpy_cctl(struct pl08x_driver_data *pl08x)
+{
+	u32 cctl = 0;
+
+	/* Conjure cctl */
+	switch (pl08x->pd->memcpy_bus_width) {
+	default:
+		dev_err(&pl08x->adev->dev,
+			"illegal bus width for memcpy, set to 8 bits\n");
+		/* Fall through */
+	case PL08X_BUS_WIDTH_8_BITS:
+		cctl |= PL080_WIDTH_8BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_8BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_16_BITS:
+		cctl |= PL080_WIDTH_16BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_16BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	case PL08X_BUS_WIDTH_32_BITS:
+		cctl |= PL080_WIDTH_32BIT << FTDMAC020_LLI_SRC_WIDTH_SHIFT |
+			PL080_WIDTH_32BIT << FTDMAC020_LLI_DST_WIDTH_SHIFT;
+		break;
+	}
+
+	/*
+	 * By default mask the TC IRQ on all LLIs, it will be unmasked on
+	 * the last LLI item by other code.
+	 */
+	cctl |= FTDMAC020_LLI_TC_MSK;
+
+	/*
+	 * Both to be incremented so leave bits FTDMAC020_LLI_SRCAD_CTL
+	 * and FTDMAC020_LLI_DSTAD_CTL as zero
+	 */
+	if (pl08x->vd->dualmaster)
+		cctl |= pl08x_select_bus(true,
+					 pl08x->mem_buses,
 					 pl08x->mem_buses);
 
-	txd->cctl = cctl;
+	return cctl;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *pl08x_prep_dma_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct pl08x_dma_chan *plchan = to_pl08x_chan(chan);
+	struct pl08x_driver_data *pl08x = plchan->host;
+	struct pl08x_txd *txd;
+	struct pl08x_sg *dsg;
+	int ret;
+
+	txd = pl08x_get_txd(plchan);
+	if (!txd) {
+		dev_err(&pl08x->adev->dev,
+			"%s no memory for descriptor\n", __func__);
+		return NULL;
+	}
+
+	dsg = kzalloc(sizeof(struct pl08x_sg), GFP_NOWAIT);
+	if (!dsg) {
+		pl08x_free_txd(pl08x, txd);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+	if (pl08x->vd->ftdmac020) {
+		/* Writing CCFG zero ENABLES all interrupts */
+		txd->ccfg = 0;
+		txd->cctl = pl08x_ftdmac020_memcpy_cctl(pl08x);
+	} else {
+		txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+			PL080_CONFIG_TC_IRQ_MASK |
+			PL080_FLOW_MEM2MEM << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+		txd->cctl = pl08x_memcpy_cctl(pl08x);
+	}
 
 	ret = pl08x_fill_llis_for_desc(plchan->host, txd);
 	if (!ret) {
@@ -1598,7 +1988,7 @@ static struct pl08x_txd *pl08x_init_txd(
 		return NULL;
 	}
 
-	txd->cctl = cctl | pl08x_select_bus(src_buses, dst_buses);
+	txd->cctl = cctl | pl08x_select_bus(false, src_buses, dst_buses);
 
 	if (plchan->cfg.device_fc)
 		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER_PER :
@@ -1607,7 +1997,9 @@ static struct pl08x_txd *pl08x_init_txd(
 		tmp = (direction == DMA_MEM_TO_DEV) ? PL080_FLOW_MEM2PER :
 			PL080_FLOW_PER2MEM;
 
-	txd->ccfg |= tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
+	txd->ccfg = PL080_CONFIG_ERR_IRQ_MASK |
+		PL080_CONFIG_TC_IRQ_MASK |
+		tmp << PL080_CONFIG_FLOW_CONTROL_SHIFT;
 
 	ret = pl08x_request_mux(plchan);
 	if (ret < 0) {
@@ -1884,6 +2276,11 @@ static void pl08x_ensure_on(struct pl08x_driver_data *pl08x)
 	/* The Nomadik variant does not have the config register */
 	if (pl08x->vd->nomadik)
 		return;
+	/* The FTDMAC020 variant does this in another register */
+	if (pl08x->vd->ftdmac020) {
+		writel(PL080_CONFIG_ENABLE, pl08x->base + FTDMAC020_CSR);
+		return;
+	}
 	writel(PL080_CONFIG_ENABLE, pl08x->base + PL080_CONFIG);
 }
 
@@ -2310,7 +2707,7 @@ static inline int pl08x_of_probe(struct amba_device *adev,
 static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 {
 	struct pl08x_driver_data *pl08x;
-	const struct vendor_data *vd = id->data;
+	struct vendor_data *vd = id->data;
 	struct device_node *np = adev->dev.of_node;
 	u32 tsfr_size;
 	int ret = 0;
@@ -2336,6 +2733,34 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	pl08x->adev = adev;
 	pl08x->vd = vd;
 
+	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
+	if (!pl08x->base) {
+		ret = -ENOMEM;
+		goto out_no_ioremap;
+	}
+
+	if (vd->ftdmac020) {
+		u32 val;
+
+		val = readl(pl08x->base + FTDMAC020_REVISION);
+		dev_info(&pl08x->adev->dev, "FTDMAC020 %d.%d rel %d\n",
+			 (val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
+		val = readl(pl08x->base + FTDMAC020_FEATURE);
+		dev_info(&pl08x->adev->dev, "FTDMAC020 %d channels, "
+			 "%s built-in bridge, %s, %s linked lists\n",
+			 (val >> 12) & 0x0f,
+			 (val & BIT(10)) ? "no" : "has",
+			 (val & BIT(9)) ? "AHB0 and AHB1" : "AHB0",
+			 (val & BIT(8)) ? "supports" : "does not support");
+
+		/* Vendor data from feature register */
+		if (!(val & BIT(8)))
+			dev_warn(&pl08x->adev->dev,
+				 "linked lists not supported, required\n");
+		vd->channels = (val >> 12) & 0x0f;
+		vd->dualmaster = !!(val & BIT(9));
+	}
+
 	/* Initialize memcpy engine */
 	dma_cap_set(DMA_MEMCPY, pl08x->memcpy.cap_mask);
 	pl08x->memcpy.dev = &adev->dev;
@@ -2352,6 +2777,9 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 	pl08x->memcpy.dst_addr_widths = PL80X_DMA_BUSWIDTHS;
 	pl08x->memcpy.directions = BIT(DMA_MEM_TO_MEM);
 	pl08x->memcpy.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
+	if (vd->ftdmac020)
+		pl08x->memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES;
+
 
 	/*
 	 * Initialize slave engine, if the block has no signals, that means
@@ -2422,19 +2850,18 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 		goto out_no_lli_pool;
 	}
 
-	pl08x->base = ioremap(adev->res.start, resource_size(&adev->res));
-	if (!pl08x->base) {
-		ret = -ENOMEM;
-		goto out_no_ioremap;
-	}
-
 	/* Turn on the PL08x */
 	pl08x_ensure_on(pl08x);
 
-	/* Attach the interrupt handler */
-	writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
+	/* Clear any pending interrupts */
+	if (vd->ftdmac020)
+		/* This variant has error IRQs in bits 16-19 */
+		writel(0x0000FFFF, pl08x->base + PL080_ERR_CLEAR);
+	else
+		writel(0x000000FF, pl08x->base + PL080_ERR_CLEAR);
 	writel(0x000000FF, pl08x->base + PL080_TC_CLEAR);
 
+	/* Attach the interrupt handler */
 	ret = request_irq(adev->irq[0], pl08x_irq, 0, DRIVER_NAME, pl08x);
 	if (ret) {
 		dev_err(&adev->dev, "%s failed to request interrupt %d\n",
@@ -2455,7 +2882,25 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 
 		ch->id = i;
 		ch->base = pl08x->base + PL080_Cx_BASE(i);
-		ch->reg_config = ch->base + vd->config_offset;
+		if (vd->ftdmac020) {
+			/* FTDMA020 has a special channel busy register */
+			ch->reg_busy = ch->base + FTDMAC020_CH_BUSY;
+			ch->reg_config = ch->base + FTDMAC020_CH_CFG;
+			ch->reg_control = ch->base + FTDMAC020_CH_CSR;
+			ch->reg_src = ch->base + FTDMAC020_CH_SRC_ADDR;
+			ch->reg_dst = ch->base + FTDMAC020_CH_DST_ADDR;
+			ch->reg_lli = ch->base + FTDMAC020_CH_LLP;
+			ch->ftdmac020 = true;
+		} else {
+			ch->reg_config = ch->base + vd->config_offset;
+			ch->reg_control = ch->base + PL080_CH_CONTROL;
+			ch->reg_src = ch->base + PL080_CH_SRC_ADDR;
+			ch->reg_dst = ch->base + PL080_CH_DST_ADDR;
+			ch->reg_lli = ch->base + PL080_CH_LLI;
+		}
+		if (vd->pl080s)
+			ch->pl080s = true;
+
 		spin_lock_init(&ch->lock);
 
 		/*
@@ -2537,11 +2982,11 @@ static int pl08x_probe(struct amba_device *adev, const struct amba_id *id)
 out_no_phychans:
 	free_irq(adev->irq[0], pl08x);
 out_no_irq:
-	iounmap(pl08x->base);
-out_no_ioremap:
 	dma_pool_destroy(pl08x->pool);
 out_no_lli_pool:
 out_no_platdata:
+	iounmap(pl08x->base);
+out_no_ioremap:
 	kfree(pl08x);
 out_no_pl08x:
 	amba_release_regions(adev);
@@ -2582,6 +3027,12 @@ static struct vendor_data vendor_pl081 = {
 	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
 };
 
+static struct vendor_data vendor_ftdmac020 = {
+	.config_offset = PL080_CH_CONFIG,
+	.ftdmac020 = true,
+	.max_transfer_size = PL080_CONTROL_TRANSFER_SIZE_MASK,
+};
+
 static struct amba_id pl08x_ids[] = {
 	/* Samsung PL080S variant */
 	{
@@ -2607,6 +3058,12 @@ static struct amba_id pl08x_ids[] = {
 		.mask	= 0x00ffffff,
 		.data	= &vendor_nomadik,
 	},
+	/* Faraday Technology FTDMAC020 */
+	{
+		.id	= 0x0003b080,
+		.mask	= 0x000fffff,
+		.data	= &vendor_ftdmac020,
+	},
 	{ 0, 0 },
 };
 
diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h
index 580b5323a717..10124c9f9db5 100644
--- a/include/linux/amba/pl080.h
+++ b/include/linux/amba/pl080.h
@@ -44,7 +44,14 @@
 
 #define PL080_SYNC				(0x34)
 
-/* Per channel configuration registers */
+/* The Faraday Technology FTDMAC020 variant registers */
+#define FTDMAC020_CH_BUSY			(0x20)
+/* Identical to PL080_CONFIG */
+#define FTDMAC020_CSR				(0x24)
+/* Identical to PL080_SYNC */
+#define FTDMAC020_SYNC				(0x2C)
+#define FTDMAC020_REVISION			(0x30)
+#define FTDMAC020_FEATURE			(0x34)
 
 /* Per channel configuration registers */
 #define PL080_Cx_BASE(x)			((0x100 + (x * 0x20)))
@@ -55,6 +62,13 @@
 #define PL080_CH_CONFIG				(0x10)
 #define PL080S_CH_CONTROL2			(0x10)
 #define PL080S_CH_CONFIG			(0x14)
+/* The Faraday FTDMAC020 derivative shuffles the registers around */
+#define FTDMAC020_CH_CSR			(0x00)
+#define FTDMAC020_CH_CFG			(0x04)
+#define FTDMAC020_CH_SRC_ADDR			(0x08)
+#define FTDMAC020_CH_DST_ADDR			(0x0C)
+#define FTDMAC020_CH_LLP			(0x10)
+#define FTDMAC020_CH_SIZE			(0x14)
 
 #define PL080_LLI_ADDR_MASK			(0x3fffffff << 2)
 #define PL080_LLI_ADDR_SHIFT			(2)
@@ -119,6 +133,73 @@
 #define PL080_FLOW_PER2MEM_PER			(0x6)
 #define PL080_FLOW_SRC2DST_SRC			(0x7)
 
+#define FTDMAC020_CH_CSR_TC_MSK			BIT(31)
+/* Later versions have a threshold in bits 24..26,  */
+#define FTDMAC020_CH_CSR_FIFOTH_MSK		(0x7 << 24)
+#define FTDMAC020_CH_CSR_FIFOTH_SHIFT		(24)
+#define FTDMAC020_CH_CSR_CHPR1_MSK		(0x3 << 22)
+#define FTDMAC020_CH_CSR_PROT3			BIT(21)
+#define FTDMAC020_CH_CSR_PROT2			BIT(20)
+#define FTDMAC020_CH_CSR_PROT1			BIT(19)
+#define FTDMAC020_CH_CSR_SRC_SIZE_MSK		(0x7 << 16)
+#define FTDMAC020_CH_CSR_SRC_SIZE_SHIFT		(16)
+#define FTDMAC020_CH_CSR_ABT			BIT(15)
+#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK		(0x7 << 11)
+#define FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT	(11)
+#define FTDMAC020_CH_CSR_DST_WIDTH_MSK		(0x7 << 8)
+#define FTDMAC020_CH_CSR_DST_WIDTH_SHIFT	(8)
+#define FTDMAC020_CH_CSR_MODE			BIT(7)
+/* 00 = increase, 01 = decrease, 10 = fix */
+#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK		(0x3 << 5)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT	(5)
+#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK		(0x3 << 3)
+#define FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT	(3)
+#define FTDMAC020_CH_CSR_SRC_SEL		BIT(2)
+#define FTDMAC020_CH_CSR_DST_SEL		BIT(1)
+#define FTDMAC020_CH_CSR_EN			BIT(0)
+
+/* FIFO threshold setting */
+#define FTDMAC020_CH_CSR_FIFOTH_1		(0x0)
+#define FTDMAC020_CH_CSR_FIFOTH_2		(0x1)
+#define FTDMAC020_CH_CSR_FIFOTH_4		(0x2)
+#define FTDMAC020_CH_CSR_FIFOTH_8		(0x3)
+#define FTDMAC020_CH_CSR_FIFOTH_16		(0x4)
+/* The FTDMAC020 supports 64bit wide transfers */
+#define FTDMAC020_WIDTH_64BIT			(0x3)
+/* Address can be increased, decreased or fixed */
+#define FTDMAC020_CH_CSR_SRCAD_CTL_INC		(0x0)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_DEC		(0x1)
+#define FTDMAC020_CH_CSR_SRCAD_CTL_FIXED	(0x2)
+
+#define FTDMAC020_CH_CFG_LLP_CNT_MASK		(0xf << 16)
+#define FTDMAC020_CH_CFG_LLP_CNT_SHIFT		(16)
+#define FTDMAC020_CH_CFG_BUSY			BIT(8)
+#define FTDMAC020_CH_CFG_INT_ABT_MASK		BIT(2)
+#define FTDMAC020_CH_CFG_INT_ERR_MASK		BIT(1)
+#define FTDMAC020_CH_CFG_INT_TC_MASK		BIT(0)
+
+/* Inside the LLIs, the applicable CSR fields are mapped differently */
+#define FTDMAC020_LLI_TC_MSK			BIT(28)
+#define FTDMAC020_LLI_SRC_WIDTH_MSK		(0x7 << 25)
+#define FTDMAC020_LLI_SRC_WIDTH_SHIFT		(25)
+#define FTDMAC020_LLI_DST_WIDTH_MSK		(0x7 << 22)
+#define FTDMAC020_LLI_DST_WIDTH_SHIFT		(22)
+#define FTDMAC020_LLI_SRCAD_CTL_MSK		(0x3 << 20)
+#define FTDMAC020_LLI_SRCAD_CTL_SHIFT		(20)
+#define FTDMAC020_LLI_DSTAD_CTL_MSK		(0x3 << 18)
+#define FTDMAC020_LLI_DSTAD_CTL_SHIFT		(18)
+#define FTDMAC020_LLI_SRC_SEL			BIT(17)
+#define FTDMAC020_LLI_DST_SEL			BIT(16)
+#define FTDMAC020_LLI_TRANSFER_SIZE_MASK	(0xfff << 0)
+#define FTDMAC020_LLI_TRANSFER_SIZE_SHIFT	(0)
+
+#define FTDMAC020_CFG_LLP_CNT_MASK		(0x0f << 16)
+#define FTDMAC020_CFG_LLP_CNT_SHIFT		(16)
+#define FTDMAC020_CFG_BUSY			BIT(8)
+#define FTDMAC020_CFG_INT_ABT_MSK		BIT(2)
+#define FTDMAC020_CFG_INT_ERR_MSK		BIT(1)
+#define FTDMAC020_CFG_INT_TC_MSK		BIT(0)
+
 /* DMA linked list chain structure */
 
 struct pl080_lli {
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 5/6] ARM: gemini: select ARM_AMBA
  2017-04-08 12:04 [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Linus Walleij
                   ` (2 preceding siblings ...)
  2017-04-08 12:04 ` [PATCH 4/6] dma: pl08x: Add support for Faraday Technology FTDMAC020 Linus Walleij
@ 2017-04-08 12:04 ` Linus Walleij
  2017-04-08 12:04 ` [PATCH 6/6] ARM: dts: Add Gemini DMA controller Linus Walleij
  2017-04-19 13:58 ` [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Olof Johansson
  5 siblings, 0 replies; 14+ messages in thread
From: Linus Walleij @ 2017-04-08 12:04 UTC (permalink / raw)
  To: linux-arm-kernel

This selects the ARM_AMBA PrimeCell bus for the Gemini so we can
use the PL08x DMA engine derivative FTDMAC020 through the
combined PL08x driver.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
Vinod: this is just for context, this patch will be directed through
the ARM SoC tree once the rest is done.
---
 arch/arm/mach-gemini/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/mach-gemini/Kconfig b/arch/arm/mach-gemini/Kconfig
index eaae82637da3..2c5bc692bc59 100644
--- a/arch/arm/mach-gemini/Kconfig
+++ b/arch/arm/mach-gemini/Kconfig
@@ -1,6 +1,7 @@
 menuconfig ARCH_GEMINI
 	bool "Cortina Systems Gemini"
 	depends on ARCH_MULTI_V4
+	select ARM_AMBA
 	select ARM_APPENDED_DTB # Old Redboot bootloaders deployed
 	select FARADAY_FTINTC010
 	select FTTMR010_TIMER
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 6/6] ARM: dts: Add Gemini DMA controller
  2017-04-08 12:04 [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Linus Walleij
                   ` (3 preceding siblings ...)
  2017-04-08 12:04 ` [PATCH 5/6] ARM: gemini: select ARM_AMBA Linus Walleij
@ 2017-04-08 12:04 ` Linus Walleij
  2017-04-19 13:58 ` [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Olof Johansson
  5 siblings, 0 replies; 14+ messages in thread
From: Linus Walleij @ 2017-04-08 12:04 UTC (permalink / raw)
  To: linux-arm-kernel

This adds the Faraday Technology FTDMAC020 DMA controller to
the Gemini SoC DTSI file. It is only used for memcpy work so
we can activate it for all users of the chipset.

To make the PrimeCell bus work properly we need to add a summy
PCLK clock, as the generic clock framework always gets compiled
in on multiplatform builds.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
Vinod: this is just for context, this patch will be directed through
the ARM SoC tree once the rest is done.
---
 arch/arm/boot/dts/gemini.dtsi | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm/boot/dts/gemini.dtsi b/arch/arm/boot/dts/gemini.dtsi
index e3fedcb69e2f..468ebdf9e594 100644
--- a/arch/arm/boot/dts/gemini.dtsi
+++ b/arch/arm/boot/dts/gemini.dtsi
@@ -8,6 +8,12 @@
 #include <dt-bindings/gpio/gpio.h>
 
 / {
+	pclk: pclk at 0 {
+		#clock-cells = <0>;
+		compatible = "fixed-clock";
+		clock-frequency = <0>;
+	};
+
 	soc {
 		#address-cells = <1>;
 		#size-cells = <1>;
@@ -186,5 +192,21 @@
 			syscon = <&syscon>;
 			status = "disabled";
 		};
+
+		dma-controller at 67000000 {
+			compatible = "faraday,ftdma020", "arm,pl080", "arm,primecell";
+			/* Faraday Technology FTDMAC020 variant */
+			arm,primecell-periphid = <0x0003b080>;
+			reg = <0x67000000 0x1000>;
+			interrupts = <9 IRQ_TYPE_EDGE_RISING>;
+			clocks = <&pclk>;
+			clock-names = "apb_pclk";
+			/* Bus interface AHB1 (AHB0) is totally tilted */
+			lli-bus-interface-ahb2;
+			mem-bus-interface-ahb2;
+			memcpy-burst-size = <256>;
+			memcpy-bus-width = <32>;
+			#dma-cells = <2>;
+		};
 	};
 };
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/6] dma: pl08x: Add Faraday FTDMAC020 to compatible list
  2017-04-08 12:04     ` Linus Walleij
@ 2017-04-13 20:03         ` Rob Herring
  -1 siblings, 0 replies; 14+ messages in thread
From: Rob Herring @ 2017-04-13 20:03 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Vinod Koul, dmaengine-u79uwXL29TY76Z2rM5mHXA, Janos Laube,
	Paulius Zaleckas, openwrt-devel-p3rKhJxN3npAfugRpC6u6w,
	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	Hans Ulli Kroll, Florian Fainelli, Kuo-Jung Su,
	devicetree-u79uwXL29TY76Z2rM5mHXA

On Sat, Apr 08, 2017 at 02:04:53PM +0200, Linus Walleij wrote:
> This augments the PL08x bindings to include the Faraday Technology
> FTDMAC020 DMA engine, as it is clearly a derivative of the PL08x
> PrimeCell. Also specify that it needs the special peripheral ID
> specified to work properly.
> 
> Cc: devicetree-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Signed-off-by: Linus Walleij <linus.walleij-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
> ---
> Seeking a DT maintainer ACK on this.
> ---
>  Documentation/devicetree/bindings/dma/arm-pl08x.txt | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)

Acked-by: Rob Herring <robh-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 2/6] dma: pl08x: Add Faraday FTDMAC020 to compatible list
@ 2017-04-13 20:03         ` Rob Herring
  0 siblings, 0 replies; 14+ messages in thread
From: Rob Herring @ 2017-04-13 20:03 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Apr 08, 2017 at 02:04:53PM +0200, Linus Walleij wrote:
> This augments the PL08x bindings to include the Faraday Technology
> FTDMAC020 DMA engine, as it is clearly a derivative of the PL08x
> PrimeCell. Also specify that it needs the special peripheral ID
> specified to work properly.
> 
> Cc: devicetree at vger.kernel.org
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> ---
> Seeking a DT maintainer ACK on this.
> ---
>  Documentation/devicetree/bindings/dma/arm-pl08x.txt | 9 +++++++--
>  1 file changed, 7 insertions(+), 2 deletions(-)

Acked-by: Rob Herring <robh@kernel.org>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings
  2017-04-08 12:04 [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Linus Walleij
                   ` (4 preceding siblings ...)
  2017-04-08 12:04 ` [PATCH 6/6] ARM: dts: Add Gemini DMA controller Linus Walleij
@ 2017-04-19 13:58 ` Olof Johansson
  2017-04-19 19:43   ` Arnd Bergmann
  5 siblings, 1 reply; 14+ messages in thread
From: Olof Johansson @ 2017-04-19 13:58 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Apr 08, 2017 at 02:04:52PM +0200, Linus Walleij wrote:
> We cannot use bits from configuration registers as API between
> platforms and driver like this, abstract it out to two enums
> and mimic the stuff passed as device tree data.
> 
> This is done to make it possible for the driver to generate the
> ccfg word on-the-fly so we can support more PL08x derivatives.
> 
> Cc: arm at kernel.org
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> ---
> ARM SoC folks: I'd like your ACK on this eventually, it needs to
> go through the DMA engine tree with the patches following this one.

Acked-by: Olof Johansson <olof@lixom.net>


-Olof

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings
  2017-04-19 13:58 ` [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Olof Johansson
@ 2017-04-19 19:43   ` Arnd Bergmann
  0 siblings, 0 replies; 14+ messages in thread
From: Arnd Bergmann @ 2017-04-19 19:43 UTC (permalink / raw)
  To: linux-arm-kernel

On Wed, Apr 19, 2017 at 3:58 PM, Olof Johansson <olof@lixom.net> wrote:
> On Sat, Apr 08, 2017 at 02:04:52PM +0200, Linus Walleij wrote:
>> We cannot use bits from configuration registers as API between
>> platforms and driver like this, abstract it out to two enums
>> and mimic the stuff passed as device tree data.
>>
>> This is done to make it possible for the driver to generate the
>> ccfg word on-the-fly so we can support more PL08x derivatives.
>>
>> Cc: arm at kernel.org
>> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
>> ---
>> ARM SoC folks: I'd like your ACK on this eventually, it needs to
>> go through the DMA engine tree with the patches following this one.
>
> Acked-by: Olof Johansson <olof@lixom.net>

Very nice cleanup!

Acked-by: Arnd Bergmann <arnd@arndb.de>

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/6] dma: pl08x: Add support for Faraday Technology FTDMAC020
  2017-04-08 12:04 ` [PATCH 4/6] dma: pl08x: Add support for Faraday Technology FTDMAC020 Linus Walleij
@ 2017-05-14 12:34   ` Vinod Koul
  2017-05-20 17:56     ` Linus Walleij
  0 siblings, 1 reply; 14+ messages in thread
From: Vinod Koul @ 2017-05-14 12:34 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, Apr 08, 2017 at 02:04:55PM +0200, Linus Walleij wrote:

> +#define FTDMAC020_CH_CSR_FIFOTH_MSK		(0x7 << 24)

IIUC we can have a GENMASK(27, 25), won't that be a bit better here and
other places?

> +#define FTDMAC020_CH_CSR_FIFOTH_SHIFT		(24)

and you may use ffs(FTDMAC020_CH_CSR_FIFOTH_MSK) or keep a shift define

> +#define FTDMAC020_CH_CSR_CHPR1_MSK		(0x3 << 22)
> +#define FTDMAC020_CH_CSR_PROT3			BIT(21)
> +#define FTDMAC020_CH_CSR_PROT2			BIT(20)
> +#define FTDMAC020_CH_CSR_PROT1			BIT(19)
> +#define FTDMAC020_CH_CSR_SRC_SIZE_MSK		(0x7 << 16)
> +#define FTDMAC020_CH_CSR_SRC_SIZE_SHIFT		(16)
> +#define FTDMAC020_CH_CSR_ABT			BIT(15)
> +#define FTDMAC020_CH_CSR_SRC_WIDTH_MSK		(0x7 << 11)
> +#define FTDMAC020_CH_CSR_SRC_WIDTH_SHIFT	(11)
> +#define FTDMAC020_CH_CSR_DST_WIDTH_MSK		(0x7 << 8)
> +#define FTDMAC020_CH_CSR_DST_WIDTH_SHIFT	(8)
> +#define FTDMAC020_CH_CSR_MODE			BIT(7)
> +/* 00 = increase, 01 = decrease, 10 = fix */
> +#define FTDMAC020_CH_CSR_SRCAD_CTL_MSK		(0x3 << 5)
> +#define FTDMAC020_CH_CSR_SRCAD_CTL_SHIFT	(5)
> +#define FTDMAC020_CH_CSR_DSTAD_CTL_MSK		(0x3 << 3)
> +#define FTDMAC020_CH_CSR_DSTAD_CTL_SHIFT	(3)
> +#define FTDMAC020_CH_CSR_SRC_SEL		BIT(2)
> +#define FTDMAC020_CH_CSR_DST_SEL		BIT(1)
> +#define FTDMAC020_CH_CSR_EN			BIT(0)
> +
> +/* FIFO threshold setting */
> +#define FTDMAC020_CH_CSR_FIFOTH_1		(0x0)
> +#define FTDMAC020_CH_CSR_FIFOTH_2		(0x1)
> +#define FTDMAC020_CH_CSR_FIFOTH_4		(0x2)
> +#define FTDMAC020_CH_CSR_FIFOTH_8		(0x3)
> +#define FTDMAC020_CH_CSR_FIFOTH_16		(0x4)
> +/* The FTDMAC020 supports 64bit wide transfers */
> +#define FTDMAC020_WIDTH_64BIT			(0x3)
> +/* Address can be increased, decreased or fixed */
> +#define FTDMAC020_CH_CSR_SRCAD_CTL_INC		(0x0)
> +#define FTDMAC020_CH_CSR_SRCAD_CTL_DEC		(0x1)
> +#define FTDMAC020_CH_CSR_SRCAD_CTL_FIXED	(0x2)
> +
> +#define FTDMAC020_CH_CFG_LLP_CNT_MASK		(0xf << 16)
> +#define FTDMAC020_CH_CFG_LLP_CNT_SHIFT		(16)
> +#define FTDMAC020_CH_CFG_BUSY			BIT(8)
> +#define FTDMAC020_CH_CFG_INT_ABT_MASK		BIT(2)
> +#define FTDMAC020_CH_CFG_INT_ERR_MASK		BIT(1)
> +#define FTDMAC020_CH_CFG_INT_TC_MASK		BIT(0)
> +
> +/* Inside the LLIs, the applicable CSR fields are mapped differently */
> +#define FTDMAC020_LLI_TC_MSK			BIT(28)
> +#define FTDMAC020_LLI_SRC_WIDTH_MSK		(0x7 << 25)
> +#define FTDMAC020_LLI_SRC_WIDTH_SHIFT		(25)
> +#define FTDMAC020_LLI_DST_WIDTH_MSK		(0x7 << 22)
> +#define FTDMAC020_LLI_DST_WIDTH_SHIFT		(22)
> +#define FTDMAC020_LLI_SRCAD_CTL_MSK		(0x3 << 20)
> +#define FTDMAC020_LLI_SRCAD_CTL_SHIFT		(20)
> +#define FTDMAC020_LLI_DSTAD_CTL_MSK		(0x3 << 18)
> +#define FTDMAC020_LLI_DSTAD_CTL_SHIFT		(18)
> +#define FTDMAC020_LLI_SRC_SEL			BIT(17)
> +#define FTDMAC020_LLI_DST_SEL			BIT(16)
> +#define FTDMAC020_LLI_TRANSFER_SIZE_MASK	(0xfff << 0)
> +#define FTDMAC020_LLI_TRANSFER_SIZE_SHIFT	(0)
> +
> +#define FTDMAC020_CFG_LLP_CNT_MASK		(0x0f << 16)
> +#define FTDMAC020_CFG_LLP_CNT_SHIFT		(16)
> +#define FTDMAC020_CFG_BUSY			BIT(8)
> +#define FTDMAC020_CFG_INT_ABT_MSK		BIT(2)
> +#define FTDMAC020_CFG_INT_ERR_MSK		BIT(1)
> +#define FTDMAC020_CFG_INT_TC_MSK		BIT(0)
> +
>  /* DMA linked list chain structure */
>  
>  struct pl080_lli {
> -- 
> 2.9.3
> 

-- 
~Vinod

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/6] dma: pl08x: Add support for Faraday Technology FTDMAC020
  2017-05-14 12:34   ` Vinod Koul
@ 2017-05-20 17:56     ` Linus Walleij
  2017-05-24  4:11       ` Vinod Koul
  0 siblings, 1 reply; 14+ messages in thread
From: Linus Walleij @ 2017-05-20 17:56 UTC (permalink / raw)
  To: linux-arm-kernel

On Sun, May 14, 2017 at 2:34 PM, Vinod Koul <vinod.koul@intel.com> wrote:
> On Sat, Apr 08, 2017 at 02:04:55PM +0200, Linus Walleij wrote:
>
>> +#define FTDMAC020_CH_CSR_FIFOTH_MSK          (0x7 << 24)
>
> IIUC we can have a GENMASK(27, 25), won't that be a bit better here and
> other places?

I will send an additional patch at the end of the series switching *all*
of these to use GENMASK() so we keep it to one technical step
per patch, OK?

>> +#define FTDMAC020_CH_CSR_FIFOTH_SHIFT                (24)
>
> and you may use ffs(FTDMAC020_CH_CSR_FIFOTH_MSK) or keep a shift define

I'm more convenient with the shift at least here.

ffs(DEFINE) makes me thin ffs() is a function invoked all the
time even if I know very well the compiler will mangle it into
a constant... just confusing for my perception.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH 4/6] dma: pl08x: Add support for Faraday Technology FTDMAC020
  2017-05-20 17:56     ` Linus Walleij
@ 2017-05-24  4:11       ` Vinod Koul
  0 siblings, 0 replies; 14+ messages in thread
From: Vinod Koul @ 2017-05-24  4:11 UTC (permalink / raw)
  To: linux-arm-kernel

On Sat, May 20, 2017 at 07:56:34PM +0200, Linus Walleij wrote:
> On Sun, May 14, 2017 at 2:34 PM, Vinod Koul <vinod.koul@intel.com> wrote:
> > On Sat, Apr 08, 2017 at 02:04:55PM +0200, Linus Walleij wrote:
> >
> >> +#define FTDMAC020_CH_CSR_FIFOTH_MSK          (0x7 << 24)
> >
> > IIUC we can have a GENMASK(27, 25), won't that be a bit better here and
> > other places?
> 
> I will send an additional patch at the end of the series switching *all*
> of these to use GENMASK() so we keep it to one technical step
> per patch, OK?

Sounds good to me

> 
> >> +#define FTDMAC020_CH_CSR_FIFOTH_SHIFT                (24)
> >
> > and you may use ffs(FTDMAC020_CH_CSR_FIFOTH_MSK) or keep a shift define
> 
> I'm more convenient with the shift at least here.
> 
> ffs(DEFINE) makes me thin ffs() is a function invoked all the
> time even if I know very well the compiler will mangle it into
> a constant... just confusing for my perception.

Perhaps we should have a SHIFT_MACRO(x) which would give us the shifted value
from a mask value :)

-- 
~Vinod

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-05-24  4:11 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-08 12:04 [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Linus Walleij
     [not found] ` <20170408120457.22750-1-linus.walleij-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
2017-04-08 12:04   ` [PATCH 2/6] dma: pl08x: Add Faraday FTDMAC020 to compatible list Linus Walleij
2017-04-08 12:04     ` Linus Walleij
     [not found]     ` <20170408120457.22750-2-linus.walleij-QSEj5FYQhm4dnm+yROfE0A@public.gmane.org>
2017-04-13 20:03       ` Rob Herring
2017-04-13 20:03         ` Rob Herring
2017-04-08 12:04 ` [PATCH 3/6] dma: pl08x: Make slave engine optional Linus Walleij
2017-04-08 12:04 ` [PATCH 4/6] dma: pl08x: Add support for Faraday Technology FTDMAC020 Linus Walleij
2017-05-14 12:34   ` Vinod Koul
2017-05-20 17:56     ` Linus Walleij
2017-05-24  4:11       ` Vinod Koul
2017-04-08 12:04 ` [PATCH 5/6] ARM: gemini: select ARM_AMBA Linus Walleij
2017-04-08 12:04 ` [PATCH 6/6] ARM: dts: Add Gemini DMA controller Linus Walleij
2017-04-19 13:58 ` [PATCH 1/6] ARM/dma: pl08x: pass reasonable memcpy settings Olof Johansson
2017-04-19 19:43   ` Arnd Bergmann

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.