linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] PL330: Add PL330 DMA controller driver
@ 2010-03-25  3:17 Joonyoung Shim
  2010-03-25  5:34 ` jassi brar
                   ` (3 more replies)
  0 siblings, 4 replies; 29+ messages in thread
From: Joonyoung Shim @ 2010-03-25  3:17 UTC (permalink / raw)
  To: dan.j.williams
  Cc: linus.ml.walleij, kyungmin.park, linux-arm-kernel, linux-kernel

The PL330 is currently the dma controller using at the S5PC1XX arm SoC.
This supports DMA_MEMCPY and DMA_SLAVE.

The datasheet for the PL330 can find below url:
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0424a/DDI0424A_dmac_pl330_r0p0_trm.pdf

Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
---
Change log:

v2: Convert into an amba_device driver.
    Code clean and update from v1 patch review.

 drivers/dma/Kconfig        |    7 +
 drivers/dma/Makefile       |    1 +
 drivers/dma/pl330_dmac.c   |  900 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/dma/pl330_dmac.h   |  175 +++++++++
 include/linux/amba/pl330.h |   64 ++++
 5 files changed, 1147 insertions(+), 0 deletions(-)
 create mode 100644 drivers/dma/pl330_dmac.c
 create mode 100644 drivers/dma/pl330_dmac.h
 create mode 100644 include/linux/amba/pl330.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index c27f80e..5989b6e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -149,6 +149,13 @@ config AMCC_PPC440SPE_ADMA
 	help
 	  Enable support for the AMCC PPC440SPe RAID engines.
 
+config PL330_DMAC
+	bool "PrimeCell DMA Controller(PL330) support"
+	depends on ARCH_S5PC1XX
+	select DMA_ENGINE
+	help
+	  Enable support for the PrimeCell DMA Controller(PL330) support.
+
 config ARCH_HAS_ASYNC_TX_FIND_CHANNEL
 	bool
 
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 22bba3d..d98be12 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -20,3 +20,4 @@ obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
 obj-$(CONFIG_SH_DMAE) += shdma.o
 obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_PL330_DMAC) += pl330_dmac.o
diff --git a/drivers/dma/pl330_dmac.c b/drivers/dma/pl330_dmac.c
new file mode 100644
index 0000000..4427110
--- /dev/null
+++ b/drivers/dma/pl330_dmac.c
@@ -0,0 +1,900 @@
+/*
+ * pl330_dmac.c  --  Driver for PL330 DMA Controller
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/amba/bus.h>
+#include <linux/amba/pl330.h>
+
+#include "pl330_dmac.h"
+
+#define to_pl330_chan(chan)	container_of(chan, struct pl330_chan, common)
+#define to_pl330_desc(node)	container_of(node, struct pl330_desc, desc_node)
+#define tx_to_pl330_desc(tx)	container_of(tx, struct pl330_desc, async_tx)
+
+/* instruction set functions */
+static inline int pl330_dmaaddh(u8 *desc_pool_virt, u16 imm, bool ra)
+{
+	u8 opcode = DMAADDH | (ra << 1);
+
+	writeb(opcode, desc_pool_virt++);
+	writew(imm, desc_pool_virt);
+	return 3;
+}
+
+static inline int pl330_dmaend(u8 *desc_pool_virt)
+{
+	u8 opcode = DMAEND;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmaflushp(u8 *desc_pool_virt, u8 periph)
+{
+	u8 opcode = DMAFLUSHHP;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(periph << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmald(u8 *desc_pool_virt)
+{
+	u8 opcode = DMALD;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmalds(u8 *desc_pool_virt)
+{
+	u8 opcode = DMALDS;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmaldb(u8 *desc_pool_virt)
+{
+	u8 opcode = DMALDB;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmaldps(u8 *desc_pool_virt, u8 periph)
+{
+	u8 opcode = DMALDPS;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(periph << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmaldpb(u8 *desc_pool_virt, u8 periph)
+{
+	u8 opcode = DMALDPB;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(periph << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmalp(u8 *desc_pool_virt, u8 iter, bool lc)
+{
+	u8 opcode = DMALP | (lc << 1);
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(iter, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmalpend(u8 *desc_pool_virt, u8 backwards_jump, bool lc)
+{
+	u8 opcode = DMALPEND | (lc << 2);
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(backwards_jump, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmalpends(u8 *desc_pool_virt, u8 backwards_jump,
+		bool lc)
+{
+	u8 opcode = DMALPENDS | (lc << 2);
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(backwards_jump, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmalpendb(u8 *desc_pool_virt, u8 backwards_jump,
+		bool lc)
+{
+	u8 opcode = DMALPENDB | (lc << 2);
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(backwards_jump, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmalpfe(u8 *desc_pool_virt, u8 backwards_jump, bool lc)
+{
+	u8 opcode = DMALPFE | (lc << 2);
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(backwards_jump, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmakill(u8 *desc_pool_virt)
+{
+	u8 opcode = DMAKILL;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmamov(u8 *desc_pool_virt, u8 rd, u32 imm)
+{
+	u8 opcode = DMAMOV;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(rd, desc_pool_virt++);
+	writel(imm, desc_pool_virt);
+	return 6;
+}
+
+static inline int pl330_dmanop(u8 *desc_pool_virt)
+{
+	u8 opcode = DMANOP;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmarmb(u8 *desc_pool_virt)
+{
+	u8 opcode = DMARMB;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmasev(u8 *desc_pool_virt, u8 event_num)
+{
+	u8 opcode = DMASEV;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(event_num << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmast(u8 *desc_pool_virt)
+{
+	u8 opcode = DMAST;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmasts(u8 *desc_pool_virt)
+{
+	u8 opcode = DMASTS;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmastb(u8 *desc_pool_virt)
+{
+	u8 opcode = DMASTB;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmastps(u8 *desc_pool_virt, u8 periph)
+{
+	u8 opcode = DMASTPS;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(periph << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmastpb(u8 *desc_pool_virt, u8 periph)
+{
+	u8 opcode = DMASTPB;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(periph << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmastz(u8 *desc_pool_virt)
+{
+	u8 opcode = DMASTZ;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static inline int pl330_dmawfe(u8 *desc_pool_virt, u8 event_num, bool invalid)
+{
+	u8 opcode = DMAWFE;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb((event_num << 3) | (invalid << 1), desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmawfps(u8 *desc_pool_virt, u8 periph)
+{
+	u8 opcode = DMAWFPS;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(periph << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmawfpb(u8 *desc_pool_virt, u8 periph)
+{
+	u8 opcode = DMAWFPB;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(periph << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmawfpp(u8 *desc_pool_virt, u8 periph)
+{
+	u8 opcode = DMAWFPP;
+
+	writeb(opcode, desc_pool_virt++);
+	writeb(periph << 3, desc_pool_virt);
+	return 2;
+}
+
+static inline int pl330_dmawmb(u8 *desc_pool_virt)
+{
+	u8 opcode = DMAWMB;
+
+	writeb(opcode, desc_pool_virt);
+	return 1;
+}
+
+static void pl330_dmago(struct pl330_chan *pl330_ch, struct pl330_desc *desc,
+		bool ns)
+{
+	unsigned int val;
+	u8 opcode = DMAGO | (ns << 1);
+
+	val = (pl330_ch->id << 24) | (opcode << 16) | (pl330_ch->id << 8);
+	writel(val, pl330_ch->pl330_dev->reg_base + PL330_DBGINST0);
+
+	val = desc->async_tx.phys;
+	writel(val, pl330_ch->pl330_dev->reg_base + PL330_DBGINST1);
+
+	writel(0, pl330_ch->pl330_dev->reg_base + PL330_DBGCMD);
+}
+
+static void __pl330_terminate_all(struct pl330_chan *pl330_ch)
+{
+	list_splice_init(&pl330_ch->complete_desc, &pl330_ch->free_desc);
+	list_splice_init(&pl330_ch->queue_desc, &pl330_ch->free_desc);
+}
+
+static void pl330_terminate_all(struct dma_chan *chan)
+{
+	struct pl330_chan *pl330_ch = to_pl330_chan(chan);
+
+	/* Before freeing channel resources first check
+	 * if they have been previously allocated for this channel.
+	 */
+	if (pl330_ch->desc_num == 0)
+		return;
+
+	__pl330_terminate_all(pl330_ch);
+}
+
+static dma_cookie_t pl330_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct pl330_chan *pl330_ch = to_pl330_chan(tx->chan);
+	struct pl330_desc *desc = tx_to_pl330_desc(tx);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	spin_lock_irqsave(&pl330_ch->lock, flags);
+
+	cookie = pl330_ch->common.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	desc->async_tx.cookie = cookie;
+	pl330_ch->common.cookie = cookie;
+
+	list_add_tail(&desc->desc_node, &pl330_ch->queue_desc);
+
+	spin_unlock_irqrestore(&pl330_ch->lock, flags);
+
+	return cookie;
+}
+
+static struct pl330_desc *
+pl330_alloc_descriptor(struct pl330_chan *pl330_ch, gfp_t flags)
+{
+	struct device *dev = pl330_ch->pl330_dev->common.dev;
+	struct pl330_desc *desc;
+	dma_addr_t phys;
+
+	desc = kzalloc(sizeof(*desc), flags);
+	if (!desc)
+		return NULL;
+
+	desc->desc_pool_virt = dma_alloc_coherent(dev, PL330_POOL_SIZE, &phys,
+			flags);
+	if (!desc->desc_pool_virt) {
+		kfree(desc);
+		return NULL;
+	}
+
+	dma_async_tx_descriptor_init(&desc->async_tx, &pl330_ch->common);
+	desc->async_tx.tx_submit = pl330_tx_submit;
+	desc->async_tx.phys = phys;
+
+	return desc;
+}
+
+static struct pl330_desc *pl330_get_descriptor(struct pl330_chan *pl330_ch)
+{
+	struct device *dev = pl330_ch->pl330_dev->common.dev;
+	struct pl330_desc *desc;
+
+	if (!list_empty(&pl330_ch->free_desc)) {
+		desc = to_pl330_desc(pl330_ch->free_desc.next);
+		list_del(&desc->desc_node);
+	} else {
+		/* try to get another desc */
+		desc = pl330_alloc_descriptor(pl330_ch, GFP_ATOMIC);
+		if (!desc) {
+			dev_err(dev, "descriptor alloc failed\n");
+			return NULL;
+		}
+	}
+
+	return desc;
+}
+
+static int pl330_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct pl330_chan *pl330_ch = to_pl330_chan(chan);
+	struct device *dev = pl330_ch->pl330_dev->common.dev;
+	struct pl330_desc *desc;
+	int i;
+	LIST_HEAD(tmp_list);
+
+	/* have we already been set up? */
+	if (!list_empty(&pl330_ch->free_desc))
+		return pl330_ch->desc_num;
+
+	for (i = 0; i < PL330_DESC_NUM; i++) {
+		desc = pl330_alloc_descriptor(pl330_ch, GFP_KERNEL);
+		if (!desc) {
+			dev_err(dev, "Only %d initial descriptors\n", i);
+			break;
+		}
+		list_add_tail(&desc->desc_node, &tmp_list);
+	}
+
+	pl330_ch->completed = chan->cookie = 1;
+	pl330_ch->desc_num = i;
+	list_splice(&tmp_list, &pl330_ch->free_desc);
+
+	return pl330_ch->desc_num;
+}
+
+static void pl330_free_chan_resources(struct dma_chan *chan)
+{
+	struct pl330_chan *pl330_ch = to_pl330_chan(chan);
+	struct device *dev = pl330_ch->pl330_dev->common.dev;
+	struct pl330_desc *desc, *_desc;
+
+	/* Before freeing channel resources first check
+	 * if they have been previously allocated for this channel.
+	 */
+	if (pl330_ch->desc_num == 0)
+		return;
+
+	__pl330_terminate_all(pl330_ch);
+
+	list_for_each_entry_safe(desc, _desc, &pl330_ch->free_desc,
+			desc_node) {
+		list_del(&desc->desc_node);
+		dma_free_coherent(dev, PL330_POOL_SIZE, desc->desc_pool_virt,
+				desc->async_tx.phys);
+		kfree(desc);
+	}
+}
+
+static enum dma_status pl330_is_tx_complete(struct dma_chan *chan,
+		dma_cookie_t cookie, dma_cookie_t *done, dma_cookie_t *used)
+{
+	struct pl330_chan *pl330_ch = to_pl330_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+	int ret;
+
+	last_complete = pl330_ch->completed;
+	last_used = chan->cookie;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+
+	return ret;
+}
+
+static void pl330_issue_pending(struct dma_chan *chan)
+{
+	struct pl330_chan *pl330_ch = to_pl330_chan(chan);
+	struct pl330_desc *desc;
+	unsigned int val;
+
+	if (!list_empty(&pl330_ch->queue_desc)) {
+		val = readl(pl330_ch->pl330_dev->reg_base + PL330_DBGSTATUS);
+		if (val == PL330_DBG_BUSY)
+			return;
+
+		desc = to_pl330_desc(pl330_ch->queue_desc.next);
+		list_move_tail(&desc->desc_node, &pl330_ch->complete_desc);
+
+		pl330_dmago(pl330_ch, desc, NS_NONSECURE);
+	}
+}
+
+static unsigned int pl330_make_instructions(struct pl330_chan *pl330_ch,
+		struct pl330_desc *desc, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned int inst_size,
+		enum dma_data_direction direction)
+{
+	struct device *dev = pl330_ch->pl330_dev->common.dev;
+	void *buf = desc->desc_pool_virt;
+	u32 control = *(u32 *)&pl330_ch->pl330_reg_cc;
+	unsigned int loop_size;
+	unsigned int loop_size_rest;
+	unsigned int loop_count0;
+	unsigned int loop_count1 = 0;
+	unsigned int loop_count0_rest = 0;
+	unsigned int loop_start0 = 0;
+	unsigned int loop_start1 = 0;
+
+	dev_dbg(dev, "desc_pool_phys: 0x%x\n", desc->async_tx.phys);
+	dev_dbg(dev, "control: 0x%x\n", control);
+	dev_dbg(dev, "dest: 0x%x\n", dest);
+	dev_dbg(dev, "src: 0x%x\n", src);
+	dev_dbg(dev, "len: 0x%x\n", len);
+
+	/* calculate loop count */
+	loop_size = (pl330_ch->pl330_reg_cc.src_burst_len + 1) *
+		(1 << pl330_ch->pl330_reg_cc.src_burst_size);
+	loop_count0 = (len / loop_size) - 1;
+	loop_size_rest = len % loop_size;
+
+	dev_dbg(dev, "loop_size: 0x%x\n", loop_size);
+	dev_dbg(dev, "loop_count0: 0x%x\n", loop_count0);
+	dev_dbg(dev, "loop_size_rest: 0x%x\n", loop_size_rest);
+
+	if (loop_size_rest) {
+		dev_err(dev, "Transfer length must be aligned to loop_size\n");
+		return -EINVAL;
+	}
+
+	if (loop_count0 >= PL330_MAX_LOOPS) {
+		loop_count1 = (loop_count0 / PL330_MAX_LOOPS) - 1;
+		loop_count0_rest = (loop_count0 % PL330_MAX_LOOPS) + 1;
+		loop_count0 = PL330_MAX_LOOPS - 1;
+		dev_dbg(dev, "loop_count0: 0x%x\n", loop_count0);
+		dev_dbg(dev, "loop_count0_rest: 0x%x\n", loop_count0_rest);
+		dev_dbg(dev, "loop_count1: 0x%x\n", loop_count1);
+
+		if (loop_count1 >= PL330_MAX_LOOPS)
+			dev_dbg(dev, "loop_count1 overflow\n");
+	}
+
+	/* write instruction sets on buffer */
+	inst_size += pl330_dmamov(buf + inst_size, RD_DAR, dest);
+	inst_size += pl330_dmamov(buf + inst_size, RD_SAR, src);
+	inst_size += pl330_dmamov(buf + inst_size, RD_CCR, control);
+
+	if (loop_count1) {
+		inst_size += pl330_dmalp(buf + inst_size, loop_count1, LC_1);
+		loop_start1 = inst_size;
+	}
+
+	if (loop_count0) {
+		inst_size += pl330_dmalp(buf + inst_size, loop_count0, LC_0);
+		loop_start0 = inst_size;
+	}
+
+	if (direction == DMA_TO_DEVICE) {
+		struct pl330_dma_slave *dma_slave = pl330_ch->common.private;
+		u8 periph = dma_slave->peri_num;
+		inst_size += pl330_dmawfps(buf + inst_size, periph);
+		inst_size += pl330_dmald(buf + inst_size);
+		inst_size += pl330_dmastps(buf + inst_size, periph);
+		inst_size += pl330_dmaflushp(buf + inst_size, periph);
+	} else if (direction == DMA_FROM_DEVICE) {
+		struct pl330_dma_slave *dma_slave = pl330_ch->common.private;
+		u8 periph = dma_slave->peri_num;
+		inst_size += pl330_dmawfps(buf + inst_size, periph);
+		inst_size += pl330_dmaldps(buf + inst_size, periph);
+		inst_size += pl330_dmast(buf + inst_size);
+		inst_size += pl330_dmaflushp(buf + inst_size, periph);
+	} else {
+		inst_size += pl330_dmald(buf + inst_size);
+		inst_size += pl330_dmarmb(buf + inst_size);
+		inst_size += pl330_dmast(buf + inst_size);
+		inst_size += pl330_dmawmb(buf + inst_size);
+	}
+
+	if (loop_count0)
+		inst_size += pl330_dmalpend(buf + inst_size,
+				inst_size - loop_start0, LC_0);
+
+	if (loop_count1)
+		inst_size += pl330_dmalpend(buf + inst_size,
+				inst_size - loop_start1, LC_1);
+
+	if (loop_count0_rest) {
+		inst_size += pl330_dmalp(buf + inst_size, loop_count0_rest - 1,
+				LC_0);
+		loop_start0 = inst_size;
+
+		if (direction == DMA_TO_DEVICE) {
+			struct pl330_dma_slave *dma_slave =
+				pl330_ch->common.private;
+			u8 periph = dma_slave->peri_num;
+			inst_size += pl330_dmawfps(buf + inst_size, periph);
+			inst_size += pl330_dmald(buf + inst_size);
+			inst_size += pl330_dmastps(buf + inst_size, periph);
+			inst_size += pl330_dmaflushp(buf + inst_size, periph);
+		} else if (direction == DMA_FROM_DEVICE) {
+			struct pl330_dma_slave *dma_slave =
+				pl330_ch->common.private;
+			u8 periph = dma_slave->peri_num;
+			inst_size += pl330_dmawfps(buf + inst_size, periph);
+			inst_size += pl330_dmaldps(buf + inst_size, periph);
+			inst_size += pl330_dmast(buf + inst_size);
+			inst_size += pl330_dmaflushp(buf + inst_size, periph);
+		} else {
+			inst_size += pl330_dmald(buf + inst_size);
+			inst_size += pl330_dmarmb(buf + inst_size);
+			inst_size += pl330_dmast(buf + inst_size);
+			inst_size += pl330_dmawmb(buf + inst_size);
+		}
+
+		inst_size += pl330_dmalpend(buf + inst_size,
+				inst_size - loop_start0, LC_0);
+	}
+
+	inst_size += pl330_dmasev(buf + inst_size, pl330_ch->id);
+	inst_size += pl330_dmaend(buf + inst_size);
+
+	return inst_size;
+}
+
+static struct dma_async_tx_descriptor *
+pl330_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct pl330_chan *pl330_ch = to_pl330_chan(chan);
+	struct device *dev = pl330_ch->pl330_dev->common.dev;
+	struct pl330_desc *desc;
+	int inst_size;
+
+	if (!chan || !len)
+		return NULL;
+
+	desc = pl330_get_descriptor(pl330_ch);
+	if (!desc)
+		return NULL;
+
+	inst_size = pl330_make_instructions(pl330_ch, desc, dest, src, len, 0,
+			DMA_NONE);
+	if (inst_size < 0) {
+		dev_err(dev, "Failed to make instructions for memcpy\n");
+		return NULL;
+	}
+
+	desc->async_tx.flags = flags;
+
+	return &desc->async_tx;
+}
+
+static struct dma_async_tx_descriptor *
+pl330_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct pl330_chan *pl330_ch = to_pl330_chan(chan);
+	struct device *dev = pl330_ch->pl330_dev->common.dev;
+	struct pl330_register_cc *pl330_reg_cc = &pl330_ch->pl330_reg_cc;
+	struct pl330_dma_slave *dma_slave = chan->private;
+	struct pl330_desc *desc;
+	struct scatterlist *sg;
+	int inst_size = 0;
+	int i;
+
+	BUG_ON(!dma_slave);
+
+	if (!dma_slave->tx_reg)
+		BUG_ON(direction == DMA_TO_DEVICE);
+
+	if (!dma_slave->rx_reg)
+		BUG_ON(direction == DMA_FROM_DEVICE);
+
+	if (unlikely(!sg_len))
+		return NULL;
+
+	desc = pl330_get_descriptor(pl330_ch);
+	if (!desc)
+		return NULL;
+
+	for_each_sg(sgl, sg, sg_len, i) {
+		dma_addr_t dest;
+		dma_addr_t src;
+		unsigned int len = sg_dma_len(sg);
+
+		if (direction == DMA_TO_DEVICE) {
+			dest = dma_slave->tx_reg;
+			src = sg_dma_address(sg);
+			pl330_reg_cc->dst_inc = 0;
+		} else {
+			dest = sg_dma_address(sg);
+			src = dma_slave->rx_reg;
+			pl330_reg_cc->src_inc = 0;
+		}
+		pl330_reg_cc->src_burst_size = dma_slave->reg_width;
+		pl330_reg_cc->dst_burst_size = dma_slave->reg_width;
+
+		inst_size = pl330_make_instructions(pl330_ch, desc, dest, src,
+				len, inst_size, direction);
+		if (inst_size < 0) {
+			dev_err(dev, "Failed to make instructions for slave\n");
+			return NULL;
+		}
+	}
+
+	desc->async_tx.flags = flags;
+
+	return &desc->async_tx;
+}
+
+static void pl330_xfer_complete(struct pl330_chan *pl330_ch)
+{
+	struct pl330_desc *desc;
+	dma_async_tx_callback callback;
+	void *callback_param;
+
+	/* execute next desc */
+	pl330_issue_pending(&pl330_ch->common);
+
+	if (list_empty(&pl330_ch->complete_desc))
+		return;
+
+	desc = to_pl330_desc(pl330_ch->complete_desc.next);
+	list_move_tail(&desc->desc_node, &pl330_ch->free_desc);
+
+	pl330_ch->completed = desc->async_tx.cookie;
+
+	callback = desc->async_tx.callback;
+	callback_param = desc->async_tx.callback_param;
+	if (callback)
+		callback(callback_param);
+}
+
+static void pl330_ch_tasklet(unsigned long data)
+{
+	struct pl330_chan *pl330_ch = (struct pl330_chan *)data;
+	unsigned int val;
+
+	pl330_xfer_complete(pl330_ch);
+
+	/* enable channel interrupt */
+	val = readl(pl330_ch->pl330_dev->reg_base + PL330_INTEN);
+	val |= (1 << pl330_ch->id);
+	writel(val, pl330_ch->pl330_dev->reg_base + PL330_INTEN);
+}
+
+static irqreturn_t pl330_irq_handler(int irq, void *data)
+{
+	struct pl330_device *pl330_dev = data;
+	struct pl330_chan *pl330_ch;
+	unsigned int intstatus;
+	unsigned int inten;
+	int i;
+
+	intstatus = readl(pl330_dev->reg_base + PL330_INTSTATUS);
+
+	if (intstatus == 0)
+		return IRQ_HANDLED;
+
+	inten = readl(pl330_dev->reg_base + PL330_INTEN);
+	for (i = 0; i < PL330_MAX_CHANS; i++) {
+		if (intstatus & (1 << i)) {
+			pl330_ch = &pl330_dev->pl330_ch[i];
+			writel(1 << i, pl330_dev->reg_base + PL330_INTCLR);
+
+			/* disable channel interrupt */
+			inten &= ~(1 << i);
+			writel(inten, pl330_dev->reg_base + PL330_INTEN);
+
+			tasklet_schedule(&pl330_ch->tasklet);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static void pl330_reg_cc_init(struct pl330_register_cc *pl330_reg_cc)
+{
+	pl330_reg_cc->src_inc = 0x1;
+	pl330_reg_cc->src_burst_size = 0;
+	pl330_reg_cc->src_burst_len = 0;
+	pl330_reg_cc->src_prot_ctrl = 0x2;
+	pl330_reg_cc->src_cache_ctrl = 0;
+	pl330_reg_cc->dst_inc = 0x1;
+	pl330_reg_cc->dst_burst_size = 0;
+	pl330_reg_cc->dst_burst_len = 0;
+	pl330_reg_cc->dst_prot_ctrl = 0x2;
+	pl330_reg_cc->dst_cache_ctrl = 0;
+	pl330_reg_cc->endian_swqp_size = 0;
+}
+
+static int __devinit pl330_probe(struct amba_device *adev, struct amba_id *id)
+{
+	struct pl330_device *pl330_dev;
+	struct dma_device *dma_dev;
+	struct pl330_platform_data *pdata = adev->dev.platform_data;
+	int err;
+	int i;
+
+	if (!pdata) {
+		dev_err(&adev->dev, "platform data is required!\n");
+		return -EINVAL;
+	}
+
+	pl330_dev = devm_kzalloc(&adev->dev, sizeof(*pl330_dev), GFP_KERNEL);
+	if (!pl330_dev)
+		return -ENOMEM;
+
+	err = amba_request_regions(adev, NULL);
+	if (err)
+		return err;
+
+	pl330_dev->reg_base = devm_ioremap(&adev->dev, adev->res.start,
+			resource_size(&adev->res));
+	if (!pl330_dev->reg_base)
+		return -EBUSY;
+
+	err = devm_request_irq(&adev->dev, adev->irq[0], pl330_irq_handler, 0,
+			dev_name(&adev->dev), pl330_dev);
+	if (err)
+		return err;
+
+	dma_dev = &pl330_dev->common;
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_alloc_chan_resources = pl330_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = pl330_free_chan_resources;
+	dma_dev->device_is_tx_complete = pl330_is_tx_complete;
+	dma_dev->device_issue_pending = pl330_issue_pending;
+	dma_dev->device_terminate_all = pl330_terminate_all;
+	dma_dev->dev = &adev->dev;
+	dma_dev->cap_mask = pdata->cap_mask;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memcpy = pl330_prep_dma_memcpy;
+	if (dma_has_cap(DMA_SLAVE, dma_dev->cap_mask))
+		dma_dev->device_prep_slave_sg = pl330_prep_slave_sg;
+
+	for (i = 0; i < PL330_MAX_CHANS; i++) {
+		struct pl330_chan *pl330_ch = &pl330_dev->pl330_ch[i];
+		unsigned int val;
+
+		spin_lock_init(&pl330_ch->lock);
+		pl330_ch->id = i;
+		pl330_ch->pl330_dev = pl330_dev;
+		pl330_ch->common.device = dma_dev;
+		tasklet_init(&pl330_ch->tasklet, pl330_ch_tasklet,
+				(unsigned long)pl330_ch);
+		INIT_LIST_HEAD(&pl330_ch->free_desc);
+		INIT_LIST_HEAD(&pl330_ch->queue_desc);
+		INIT_LIST_HEAD(&pl330_ch->complete_desc);
+		list_add_tail(&pl330_ch->common.device_node,
+				&dma_dev->channels);
+		dma_dev->chancnt++;
+		pl330_reg_cc_init(&pl330_ch->pl330_reg_cc);
+		val = readl(pl330_ch->pl330_dev->reg_base + PL330_INTEN);
+		val |= (1 << pl330_ch->id);
+		writel(val, pl330_ch->pl330_dev->reg_base + PL330_INTEN);
+	}
+
+	amba_set_drvdata(adev, pl330_dev);
+
+	dev_info(&adev->dev, "PL330 DMA Controller: ( %s%s)\n",
+		dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "memcpy " : "",
+		dma_has_cap(DMA_SLAVE, dma_dev->cap_mask) ? "slave " : "");
+
+	dma_async_device_register(dma_dev);
+	return 0;
+}
+
+static int __devexit pl330_remove(struct amba_device *adev)
+{
+	struct pl330_device *pl330_dev = amba_get_drvdata(adev);
+	struct pl330_chan *pl330_ch, *_pl330_ch;
+
+	dma_async_device_unregister(&pl330_dev->common);
+
+	list_for_each_entry_safe(pl330_ch, _pl330_ch,
+			&pl330_dev->common.channels, common.device_node) {
+		list_del(&pl330_ch->common.device_node);
+		tasklet_kill(&pl330_ch->tasklet);
+	}
+
+	amba_set_drvdata(adev, NULL);
+
+	return 0;
+}
+
+static struct amba_id pl330_ids[] = {
+	{
+		.id	= 0x00041330,
+		.mask	= 0x000fffff,
+	},
+	{ 0, 0 },
+};
+
+static struct amba_driver pl330_driver = {
+	.drv = {
+		.owner	= THIS_MODULE,
+		.name	= "pl330",
+	},
+	.probe		= pl330_probe,
+	.remove		= __devexit_p(pl330_remove),
+	.id_table	= pl330_ids,
+};
+
+static int __init pl330_init(void)
+{
+	return amba_driver_register(&pl330_driver);
+}
+subsys_initcall(pl330_init);
+
+static void __exit pl330_exit(void)
+{
+	amba_driver_unregister(&pl330_driver);
+}
+
+module_exit(pl330_exit);
+
+MODULE_DESCRIPTION("Driver for PL330 DMA Controller");
+MODULE_AUTHOR("Joonyoung Shim <jy0922.shim@samsung.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/pl330_dmac.h b/drivers/dma/pl330_dmac.h
new file mode 100644
index 0000000..d2cbd4e
--- /dev/null
+++ b/drivers/dma/pl330_dmac.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ * Author: Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#ifndef __PL330_DMAC_H
+#define __PL330_DMAC_H
+
+#define PL330_MAX_CHANS		8
+#define PL330_MAX_LOOPS		256
+#define PL330_POOL_SIZE		SZ_256
+#define PL330_DESC_NUM		8
+
+/* registers */
+#define PL330_DS		0x00
+#define PL330_DPC		0x04
+#define PL330_INTEN		0x20			/* R/W */
+#define PL330_ES		0x24
+#define PL330_INTSTATUS		0x28
+#define PL330_INTCLR		0x2c			/* W/O */
+#define PL330_FSM		0x30
+#define PL330_FSC		0x34
+#define PL330_FTM		0x38
+#define PL330_FTC(ch)		(0x40 + (ch << 2))
+#define PL330_CS(ch)		(0x100 + (ch << 3))
+#define PL330_CPC(ch)		(0x104 + (ch << 3))
+#define PL330_SA(ch)		(0x400 + (ch << 5))
+#define PL330_DA(ch)		(0x404 + (ch << 5))
+#define PL330_CC(ch)		(0x408 + (ch << 5))
+#define PL330_LC0(ch)		(0x40c + (ch << 5))
+#define PL330_LC1(ch)		(0x410 + (ch << 5))
+#define PL330_DBGSTATUS		0xd00
+#define PL330_DBGCMD		0xd04			/* W/O */
+#define PL330_DBGINST0		0xd08			/* W/O */
+#define PL330_DBGINST1		0xd0c			/* W/O */
+#define PL330_CR0		0xe00
+#define PL330_CR1		0xe04
+#define PL330_CR2		0xe08
+#define PL330_CR3		0xe0c
+#define PL330_CR4		0xe10
+#define PL330_CRDN		0xe14
+#define PL330_PERIPH_ID0	0xfe0
+#define PL330_PERIPH_ID1	0xfe4
+#define PL330_PERIPH_ID2	0xfe8
+#define PL330_PERIPH_ID3	0xfec
+#define PL330_PCELL_ID0		0xff0
+#define PL330_PCELL_ID1		0xff4
+#define PL330_PCELL_ID2		0xff8
+#define PL330_PCELL_ID3		0xffc
+
+/* PL330_CC */
+#define PL330_SRC_INC			(1 << 0)
+#define PL330_SRC_BSIZE_1BYTE		(1 << 1)
+#define PL330_SRC_BSIZE_2BYTE		(2 << 1)
+#define PL330_SRC_BSIZE_4BYTE		(3 << 1)
+#define PL330_SRC_BSIZE_16BYTE		(4 << 1)
+#define PL330_SRC_BSIZE_32BYTE		(5 << 1)
+#define PL330_SRC_BSIZE_64BYTE		(6 << 1)
+#define PL330_SRC_BSIZE_128BYTE		(7 << 1)
+#define PL330_SRC_BLEN(n)		((n - 1) << 4)
+#define PL330_DEST_INC			(1 << 14)
+#define PL330_DEST_BSIZE_1BYTE		(1 << 15)
+#define PL330_DEST_BSIZE_2BYTE		(2 << 15)
+#define PL330_DEST_BSIZE_4BYTE		(3 << 15)
+#define PL330_DEST_BSIZE_16BYTE		(4 << 15)
+#define PL330_DEST_BSIZE_32BYTE		(5 << 15)
+#define PL330_DEST_BSIZE_64BYTE		(6 << 15)
+#define PL330_DEST_BSIZE_128BYTE	(7 << 15)
+#define PL330_DEST_BLEN(n)		((n - 18) << 4)
+
+/* PL330_DBGSTATUS */
+#define PL330_DBG_IDLE		0
+#define PL330_DBG_BUSY		1
+
+/* instruction set opcode */
+#define DMAADDH			(0x54)
+#define DMAEND			(0x00)
+#define DMAFLUSHHP		(0x35)
+#define DMAGO			(0xa0)
+#define DMALD			(0x04)
+#define DMALDS			(0x05)
+#define DMALDB			(0x07)
+#define DMALDPS			(0x25)
+#define DMALDPB			(0x27)
+#define DMALP			(0x20)
+#define DMALPEND		(0x38)
+#define DMALPENDS		(0x39)
+#define DMALPENDB		(0x3b)
+#define DMALPFE			(0x28)
+#define DMAKILL			(0x01)
+#define DMAMOV			(0xbc)
+#define DMANOP			(0xbc)
+#define DMARMB			(0x12)
+#define DMASEV			(0x34)
+#define DMAST			(0x08)
+#define DMASTS			(0x09)
+#define DMASTB			(0x0b)
+#define DMASTPS			(0x29)
+#define DMASTPB			(0x2b)
+#define DMASTZ			(0x0c)
+#define DMAWFE			(0x36)
+#define DMAWFPS			(0x30)
+#define DMAWFPB			(0x32)
+#define DMAWFPP			(0x31)
+#define DMAWMB			(0x13)
+
+/* ra DMAADDH */
+#define RA_SA			0
+#define RA_DA			1
+
+/* ns DMAGO */
+#define NS_SECURE		0
+#define NS_NONSECURE		1
+
+/* lc DMALP* */
+#define LC_0			0
+#define LC_1			1
+
+/* rd DMAMOV */
+#define RD_SAR			0
+#define RD_CCR			1
+#define RD_DAR			2
+
+/* invalid DMAWFE */
+#define INVALID_OFF		0
+#define INVALID_ON		1
+
+/* struct for PL330_CC Register */
+struct pl330_register_cc {
+	unsigned int src_inc:1;
+	unsigned int src_burst_size:3;
+	unsigned int src_burst_len:4;
+	unsigned int src_prot_ctrl:3;
+	unsigned int src_cache_ctrl:3;
+	unsigned int dst_inc:1;
+	unsigned int dst_burst_size:3;
+	unsigned int dst_burst_len:4;
+	unsigned int dst_prot_ctrl:3;
+	unsigned int dst_cache_ctrl:3;
+	unsigned int endian_swqp_size:4;
+};
+
+struct pl330_desc {
+	struct dma_async_tx_descriptor	async_tx;
+	struct list_head		desc_node;
+	void				*desc_pool_virt;
+};
+
+struct pl330_chan {
+	struct pl330_device		*pl330_dev;
+	struct pl330_register_cc	pl330_reg_cc;
+	struct dma_chan			common;
+	struct tasklet_struct		tasklet;
+	struct list_head		free_desc;
+	struct list_head		queue_desc;
+	struct list_head		complete_desc;
+	spinlock_t			lock;
+	dma_cookie_t			completed;
+	unsigned int			id;
+	unsigned int			desc_num;
+};
+
+struct pl330_device {
+	void __iomem		*reg_base;
+	struct pl330_chan	pl330_ch[PL330_MAX_CHANS];
+	struct dma_device	common;
+};
+
+#endif
diff --git a/include/linux/amba/pl330.h b/include/linux/amba/pl330.h
new file mode 100644
index 0000000..566b441
--- /dev/null
+++ b/include/linux/amba/pl330.h
@@ -0,0 +1,64 @@
+/*
+ * include/linux/amba/pl330.h
+ *
+ * Copyright (C) 2009 Samsung Electronics Co.Ltd
+ *	Joonyoung Shim <jy0922.shim@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#ifndef __AMBA_PL330_H
+#define __AMBA_PL330_H
+
+#include <linux/dmaengine.h>
+
+/**
+ * struct pl330_platform_data - Platform device data for PL330 DMAC
+ * @cap_mask: one or more dma_capability flags
+ */
+struct pl330_platform_data {
+	dma_cap_mask_t		cap_mask;
+};
+
+/**
+ * enum pl330_dma_slave_width - DMA slave register access width.
+ * @PL330_DMA_SLAVE_WIDTH_1BYTE: Do 1-byte slave register accesses
+ * @PL330_DMA_SLAVE_WIDTH_2BYTE: Do 2-byte slave register accesses
+ * @PL330_DMA_SLAVE_WIDTH_4BYTE: Do 4-byte slave register accesses
+ * @PL330_DMA_SLAVE_WIDTH_8BYTE: Do 8-byte slave register accesses
+ * @PL330_DMA_SLAVE_WIDTH_16BYTE: Do 16-byte slave register accesses
+ * @PL330_DMA_SLAVE_WIDTH_32BYTE: Do 32-byte slave register accesses
+ * @PL330_DMA_SLAVE_WIDTH_64BYTE: Do 64-byte slave register accesses
+ * @PL330_DMA_SLAVE_WIDTH_128BYTE: Do 128-byte slave register accesses
+ */
+enum pl330_dma_slave_width {
+	PL330_DMA_SLAVE_WIDTH_1BYTE = 0,
+	PL330_DMA_SLAVE_WIDTH_2BYTE,
+	PL330_DMA_SLAVE_WIDTH_4BYTE,
+	PL330_DMA_SLAVE_WIDTH_8BYTE,
+	PL330_DMA_SLAVE_WIDTH_16BYTE,
+	PL330_DMA_SLAVE_WIDTH_32BYTE,
+	PL330_DMA_SLAVE_WIDTH_64BYTE,
+	PL330_DMA_SLAVE_WIDTH_128BYTE,
+};
+
+/**
+ * struct pl330_dma_slave - Controller-specific information about a slave
+ * @tx_reg: physical address of data register used for
+ *	memory-to-peripheral transfers
+ * @rx_reg: physical address of data register used for
+ *	peripheral-to-memory transfers
+ * @reg_width: peripheral register width
+ * @peri_num: peripheral number
+ */
+struct pl330_dma_slave {
+	dma_addr_t			tx_reg;
+	dma_addr_t			rx_reg;
+	enum pl330_dma_slave_width	reg_width;
+	unsigned int			peri_num;
+};
+
+#endif /* __AMBA_PL330_H */
-- 
1.6.3.3

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  3:17 [PATCH v2] PL330: Add PL330 DMA controller driver Joonyoung Shim
@ 2010-03-25  5:34 ` jassi brar
  2010-03-25  8:30   ` Linus Walleij
  2010-03-25  5:44 ` Marc Zyngier
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 29+ messages in thread
From: jassi brar @ 2010-03-25  5:34 UTC (permalink / raw)
  To: Joonyoung Shim
  Cc: dan.j.williams, linus.ml.walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Thu, Mar 25, 2010 at 12:17 PM, Joonyoung Shim
<jy0922.shim@samsung.com> wrote:
> The PL330 is currently the dma controller using at the S5PC1XX arm SoC.
> This supports DMA_MEMCPY and DMA_SLAVE.
>
> The datasheet for the PL330 can find below url:
> http://infocenter.arm.com/help/topic/com.arm.doc.ddi0424a/DDI0424A_dmac_pl330_r0p0_trm.pdf
>
> Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
> ---
> Change log:
>
> v2: Convert into an amba_device driver.
>    Code clean and update from v1 patch review.

[CC'ing Russell and Ben as stakeholders]

Dear Maintainers

I too have been writing a driver for PL330 after taking into account the
suggestions of Russell, Ben and other participants of the thread
http://lists.infradead.org/pipermail/linux-arm-kernel/2010-February/009856.html

If you don't think this driver conflicts with the theme of the thread,
may I ask you
to please put this driver on hold until you checkout my implementation
of solution
to the issue... which should be soon.

Regards,
Jaswinder Singh
Solution-1 Group
Linux Kernel Team
System LSI Division
Samsung Electronics Co., Ltd.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  3:17 [PATCH v2] PL330: Add PL330 DMA controller driver Joonyoung Shim
  2010-03-25  5:34 ` jassi brar
@ 2010-03-25  5:44 ` Marc Zyngier
  2010-03-25  9:01   ` Joonyoung Shim
  2010-03-25  8:26 ` Linus Walleij
  2010-03-26  2:08 ` jassi brar
  3 siblings, 1 reply; 29+ messages in thread
From: Marc Zyngier @ 2010-03-25  5:44 UTC (permalink / raw)
  To: Joonyoung Shim
  Cc: dan.j.williams, linus.ml.walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel

On Thu, 25 Mar 2010 12:17:15 +0900
Joonyoung Shim <jy0922.shim@samsung.com> wrote:

> +/* instruction set functions */
> +static inline int pl330_dmaaddh(u8 *desc_pool_virt, u16 imm, bool ra)
> +{
> +	u8 opcode = DMAADDH | (ra << 1);
> +
> +	writeb(opcode, desc_pool_virt++);

desc_pool_virt is a virtual address (from dma_alloc_coherent). In such
case, write[bwl] seems to be the wrong interface. I suggest the
following code:

	*desc_pool_virt++ = opcode;

> +	writew(imm, desc_pool_virt);

Does anything ensure that this won't generate an unaligned access?

> +	return 3;
> +}

	M.
-- 
I'm the slime oozin' out from your TV set...

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  3:17 [PATCH v2] PL330: Add PL330 DMA controller driver Joonyoung Shim
  2010-03-25  5:34 ` jassi brar
  2010-03-25  5:44 ` Marc Zyngier
@ 2010-03-25  8:26 ` Linus Walleij
  2010-03-26  2:08 ` jassi brar
  3 siblings, 0 replies; 29+ messages in thread
From: Linus Walleij @ 2010-03-25  8:26 UTC (permalink / raw)
  To: Joonyoung Shim
  Cc: dan.j.williams, kyungmin.park, linux-arm-kernel, linux-kernel

2010/3/25 Joonyoung Shim <jy0922.shim@samsung.com>:

> The PL330 is currently the dma controller using at the S5PC1XX arm SoC.
> This supports DMA_MEMCPY and DMA_SLAVE.

Looks good to me now so:
Acked-by: Linus Walleij <linus.walleij@stericsson.com>

Linus Walleij

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  5:34 ` jassi brar
@ 2010-03-25  8:30   ` Linus Walleij
  2010-03-25 12:17     ` jassi brar
  0 siblings, 1 reply; 29+ messages in thread
From: Linus Walleij @ 2010-03-25  8:30 UTC (permalink / raw)
  To: jassi brar
  Cc: Joonyoung Shim, dan.j.williams, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

2010/3/25 jassi brar <jassisinghbrar@gmail.com>:

> I too have been writing a driver for PL330 after taking into account the
> suggestions of Russell, Ben and other participants of the thread
> http://lists.infradead.org/pipermail/linux-arm-kernel/2010-February/009856.html
>
> If you don't think this driver conflicts with the theme of the thread,
> may I ask you to please put this driver on hold until you checkout my implementation
> of solution to the issue... which should be soon.

Please post the code as it looks today even if it's not compiling
instead of asking others
to hold their patches back. It will be obvious from what you have if
there is some special
use you're covering. Perhaps Joonyoung can simply port over the stuff
you need to
this driver if you show your code.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  5:44 ` Marc Zyngier
@ 2010-03-25  9:01   ` Joonyoung Shim
  2010-03-25  9:32     ` Marc Zyngier
  0 siblings, 1 reply; 29+ messages in thread
From: Joonyoung Shim @ 2010-03-25  9:01 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: dan.j.williams, linus.ml.walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel

On 3/25/2010 2:44 PM, Marc Zyngier wrote:
> On Thu, 25 Mar 2010 12:17:15 +0900
> Joonyoung Shim <jy0922.shim@samsung.com> wrote:
> 
>> +/* instruction set functions */
>> +static inline int pl330_dmaaddh(u8 *desc_pool_virt, u16 imm, bool ra)
>> +{
>> +	u8 opcode = DMAADDH | (ra << 1);
>> +
>> +	writeb(opcode, desc_pool_virt++);
> 
> desc_pool_virt is a virtual address (from dma_alloc_coherent). In such
> case, write[bwl] seems to be the wrong interface. I suggest the
> following code:
> 
> 	*desc_pool_virt++ = opcode;
> 
>> +	writew(imm, desc_pool_virt);
> 

Right. The write[bwl] is api for address ioremapped of io device. I will
change these.

> Does anything ensure that this won't generate an unaligned access?
> 

PL330 DMA controller fetches variable length instructions that consist of
one to six bytes, so i think unaligned access is no problem.

>> +	return 3;
>> +}
> 
> 	M.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  9:01   ` Joonyoung Shim
@ 2010-03-25  9:32     ` Marc Zyngier
  2010-03-25 10:05       ` Joonyoung Shim
  0 siblings, 1 reply; 29+ messages in thread
From: Marc Zyngier @ 2010-03-25  9:32 UTC (permalink / raw)
  To: Joonyoung Shim
  Cc: dan.j.williams, linus.ml.walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel


On Thu, 25 Mar 2010 18:01:00 +0900, Joonyoung Shim
<jy0922.shim@samsung.com> wrote:
> On 3/25/2010 2:44 PM, Marc Zyngier wrote:
>> On Thu, 25 Mar 2010 12:17:15 +0900
>> Joonyoung Shim <jy0922.shim@samsung.com> wrote:
>> 
>>> +	writew(imm, desc_pool_virt);
>> 
> 
> Right. The write[bwl] is api for address ioremapped of io device. I will
> change these.
> 
>> Does anything ensure that this won't generate an unaligned access?
>> 
> 
> PL330 DMA controller fetches variable length instructions that consist
of
> one to six bytes, so i think unaligned access is no problem.

I'm not too concerned about the device side of things. I'm more worried
about the CPU access when writing the 'imm' value to memory.

Consider desc_pool_virt 16bit aligned when entering the function. Writing
the opcode makes it unaligned and then writing the 'imm' value will result
as an unaligned access.

        M.
-- 
Who you jivin' with that Cosmik Debris?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  9:32     ` Marc Zyngier
@ 2010-03-25 10:05       ` Joonyoung Shim
  2010-03-25 10:32         ` Marc Zyngier
  0 siblings, 1 reply; 29+ messages in thread
From: Joonyoung Shim @ 2010-03-25 10:05 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: dan.j.williams, linus.ml.walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel

On 3/25/2010 6:32 PM, Marc Zyngier wrote:
> On Thu, 25 Mar 2010 18:01:00 +0900, Joonyoung Shim
> <jy0922.shim@samsung.com> wrote:
>> On 3/25/2010 2:44 PM, Marc Zyngier wrote:
>>> On Thu, 25 Mar 2010 12:17:15 +0900
>>> Joonyoung Shim <jy0922.shim@samsung.com> wrote:
>>>
>>>> +	writew(imm, desc_pool_virt);
>> Right. The write[bwl] is api for address ioremapped of io device. I will
>> change these.
>>
>>> Does anything ensure that this won't generate an unaligned access?
>>>
>> PL330 DMA controller fetches variable length instructions that consist
> of
>> one to six bytes, so i think unaligned access is no problem.
> 
> I'm not too concerned about the device side of things. I'm more worried
> about the CPU access when writing the 'imm' value to memory.
> 
> Consider desc_pool_virt 16bit aligned when entering the function. Writing
> the opcode makes it unaligned and then writing the 'imm' value will result
> as an unaligned access.
> 

Why desc_pool_virt should be aligned more than 16bit?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 10:05       ` Joonyoung Shim
@ 2010-03-25 10:32         ` Marc Zyngier
  2010-03-25 11:48           ` Joonyoung Shim
  0 siblings, 1 reply; 29+ messages in thread
From: Marc Zyngier @ 2010-03-25 10:32 UTC (permalink / raw)
  To: Joonyoung Shim
  Cc: dan.j.williams, linus.ml.walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel


On Thu, 25 Mar 2010 19:05:47 +0900, Joonyoung Shim
<jy0922.shim@samsung.com> wrote:
> On 3/25/2010 6:32 PM, Marc Zyngier wrote:
>> On Thu, 25 Mar 2010 18:01:00 +0900, Joonyoung Shim
>> <jy0922.shim@samsung.com> wrote:
>>> On 3/25/2010 2:44 PM, Marc Zyngier wrote:
>>>> On Thu, 25 Mar 2010 12:17:15 +0900
>>>> Joonyoung Shim <jy0922.shim@samsung.com> wrote:
>>>>
>>>>> +	writew(imm, desc_pool_virt);
>>> Right. The write[bwl] is api for address ioremapped of io device. I
will
>>> change these.
>>>
>>>> Does anything ensure that this won't generate an unaligned access?
>>>>
>>> PL330 DMA controller fetches variable length instructions that consist
>> of
>>> one to six bytes, so i think unaligned access is no problem.
>> 
>> I'm not too concerned about the device side of things. I'm more worried
>> about the CPU access when writing the 'imm' value to memory.
>> 
>> Consider desc_pool_virt 16bit aligned when entering the function.
Writing
>> the opcode makes it unaligned and then writing the 'imm' value will
>> result
>> as an unaligned access.
>> 
> 
> Why desc_pool_virt should be aligned more than 16bit?

There is reason for desc_pool_virt to be 16bit aligned. It's just that you
have 50% chance that it will.
In such case, you will write 'imm' to a non 16bit-aligned address. In my
book, that's bad.

Same for pl330_dmamov(), which tries to write a 32bit value without
checking the proper alignment.
In such case, please use the put_unaligned macro to handle the possible
unaligned access.

        M.
-- 
Who you jivin' with that Cosmik Debris?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 10:32         ` Marc Zyngier
@ 2010-03-25 11:48           ` Joonyoung Shim
  0 siblings, 0 replies; 29+ messages in thread
From: Joonyoung Shim @ 2010-03-25 11:48 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: linus.ml.walleij, dan.j.williams, kyungmin.park,
	linux-arm-kernel, linux-kernel

On 3/25/2010 7:32 PM, Marc Zyngier wrote:
> On Thu, 25 Mar 2010 19:05:47 +0900, Joonyoung Shim
> <jy0922.shim@samsung.com> wrote:
>> On 3/25/2010 6:32 PM, Marc Zyngier wrote:
>>> On Thu, 25 Mar 2010 18:01:00 +0900, Joonyoung Shim
>>> <jy0922.shim@samsung.com> wrote:
>>>> On 3/25/2010 2:44 PM, Marc Zyngier wrote:
>>>>> On Thu, 25 Mar 2010 12:17:15 +0900
>>>>> Joonyoung Shim <jy0922.shim@samsung.com> wrote:
>>>>>
>>>>>> +	writew(imm, desc_pool_virt);
>>>> Right. The write[bwl] is api for address ioremapped of io device. I
> will
>>>> change these.
>>>>
>>>>> Does anything ensure that this won't generate an unaligned access?
>>>>>
>>>> PL330 DMA controller fetches variable length instructions that consist
>>> of
>>>> one to six bytes, so i think unaligned access is no problem.
>>> I'm not too concerned about the device side of things. I'm more worried
>>> about the CPU access when writing the 'imm' value to memory.
>>>
>>> Consider desc_pool_virt 16bit aligned when entering the function.
> Writing
>>> the opcode makes it unaligned and then writing the 'imm' value will
>>> result
>>> as an unaligned access.
>>>
>> Why desc_pool_virt should be aligned more than 16bit?
> 
> There is reason for desc_pool_virt to be 16bit aligned. It's just that you
> have 50% chance that it will.
> In such case, you will write 'imm' to a non 16bit-aligned address. In my
> book, that's bad.
> 
> Same for pl330_dmamov(), which tries to write a 32bit value without
> checking the proper alignment.
> In such case, please use the put_unaligned macro to handle the possible
> unaligned access.
> 

OK. i will use put_unaligned.
Thanks.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  8:30   ` Linus Walleij
@ 2010-03-25 12:17     ` jassi brar
  2010-03-25 15:13       ` Dan Williams
  2010-03-25 15:20       ` Linus Walleij
  0 siblings, 2 replies; 29+ messages in thread
From: jassi brar @ 2010-03-25 12:17 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Joonyoung Shim, dan.j.williams, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Thu, Mar 25, 2010 at 5:30 PM, Linus Walleij
<linus.ml.walleij@gmail.com> wrote:
> 2010/3/25 jassi brar <jassisinghbrar@gmail.com>:
>
>> I too have been writing a driver for PL330 after taking into account the
>> suggestions of Russell, Ben and other participants of the thread
>> http://lists.infradead.org/pipermail/linux-arm-kernel/2010-February/009856.html
>>
>> If you don't think this driver conflicts with the theme of the thread,
>> may I ask you to please put this driver on hold until you checkout my implementation
>> of solution to the issue... which should be soon.
>
> Please post the code as it looks today even if it's not compiling
> instead of asking others
> to hold their patches back. It will be obvious from what you have if
> there is some special
> use you're covering.
My approach is to write a separate PL330 core driver as the backend which
can be reused by any DMA API implementer driver. That will avoid
having two copies
of the PL330 driver, among other benefits. And if this patch is accepted, there
_will_ exist two copies of the PL330 driver -- one in drivers/dma/pl330_dmac.c
and another in arch/arm/plat-samsung/. Only the former will be lying unused
until some other SoC vendor decided to use PL330, because S3C has come too
long a way to change its drivers to driver/dma/ API and modify DMA
drivers for every SoC.

I plan something like, arch/arm/common/pl330-core.c implementing the specs in
http://infocenter.arm.com/help/topic/com.arm.doc.ddi0424a/DDI0424A_dmac_pl330_r0p0_trm.pdf
and drivers/dma/pl330.c implement DMA API for SoCs that chose to use it...
and arch/arm/plat-samsung/dma-pl330.c implementing regular S3C DMA API.

I don't claim to have a silver bullet, nobody has atm, but my approach
is at least
more aligned with what maintainers want.

I have the pl330-core part almost ready, but i need time to implement
some _testable_
implementation of the scheme. If maintainers want to see structure of
my code, I can
share it too, but I think I pretty much made it clear.

> Perhaps Joonyoung can simply port over the stuff
> you need to this driver if you show your code.
Having worked on Samsung SoCs(with PL330 DMAC) based products, I would be
_very_ surprised if any user found this implementation useful.
Let alone testing, this implementation can't even explain usability
for fast peripherals
with shallow FIFOs. I didn't give feedback for this patch because I am
not sure if this
is the right way to go at all.

regards.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 12:17     ` jassi brar
@ 2010-03-25 15:13       ` Dan Williams
  2010-03-25 22:27         ` jassi brar
  2010-03-25 15:20       ` Linus Walleij
  1 sibling, 1 reply; 29+ messages in thread
From: Dan Williams @ 2010-03-25 15:13 UTC (permalink / raw)
  To: jassi brar
  Cc: Linus Walleij, Joonyoung Shim, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

jassi brar wrote:
>> Perhaps Joonyoung can simply port over the stuff
>> you need to this driver if you show your code.
> Having worked on Samsung SoCs(with PL330 DMAC) based products, I would be
> _very_ surprised if any user found this implementation useful.
> Let alone testing, this implementation can't even explain usability
> for fast peripherals
> with shallow FIFOs. I didn't give feedback for this patch because I am
> not sure if this
> is the right way to go at all.

This is the wrong attitude.  If it were not for a simple oversight 
Joonyoung's driver would already be upstream for the past two kernel 
releases.  So you need to work together to improve that driver to 
incorporate what you need.

It sounds like you just need to add an extension for the arch specific 
dma api.  At first glance this can mimic the approach taken by 
Nobuhiro-san with the shdma driver.

--
Dan


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 12:17     ` jassi brar
  2010-03-25 15:13       ` Dan Williams
@ 2010-03-25 15:20       ` Linus Walleij
  2010-03-25 22:36         ` jassi brar
  2010-04-01  5:34         ` jassi brar
  1 sibling, 2 replies; 29+ messages in thread
From: Linus Walleij @ 2010-03-25 15:20 UTC (permalink / raw)
  To: jassi brar
  Cc: Joonyoung Shim, dan.j.williams, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

2010/3/25 jassi brar <jassisinghbrar@gmail.com>:

> My approach is to write a separate PL330 core driver as the backend which
> can be reused by any DMA API implementer driver. That will avoid
> having two copies of the PL330 driver, among other benefits.

Seems like a rather good approach.

> And if this patch is accepted, there
> _will_ exist two copies of the PL330 driver -- one in drivers/dma/pl330_dmac.c
> and another in arch/arm/plat-samsung/. Only the former will be lying unused
> until some other SoC vendor decided to use PL330, because S3C has come too
> long a way to change its drivers to driver/dma/ API and modify DMA
> drivers for every SoC.

What's wrong with merging them later then? Refactoring FTW.

> I have the pl330-core part almost ready, but i need time to implement
> some _testable_
> implementation of the scheme. If maintainers want to see structure of
> my code, I can
> share it too, but I think I pretty much made it clear.

Why not just post it on the list? I'm curious! Since I'm working on a PrimeCell
DMA API I would love to look at PrimeCell DMA engine drivers.

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 15:13       ` Dan Williams
@ 2010-03-25 22:27         ` jassi brar
  2010-03-25 23:12           ` Dan Williams
  2010-03-26  0:54           ` Joonyoung Shim
  0 siblings, 2 replies; 29+ messages in thread
From: jassi brar @ 2010-03-25 22:27 UTC (permalink / raw)
  To: Dan Williams
  Cc: Linus Walleij, Joonyoung Shim, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Fri, Mar 26, 2010 at 12:13 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> jassi brar wrote:
>>>
>>> Perhaps Joonyoung can simply port over the stuff
>>> you need to this driver if you show your code.
>>
>> Having worked on Samsung SoCs(with PL330 DMAC) based products, I would be
>> _very_ surprised if any user found this implementation useful.
>> Let alone testing, this implementation can't even explain usability
>> for fast peripherals
>> with shallow FIFOs. I didn't give feedback for this patch because I am
>> not sure if this
>> is the right way to go at all.
>
> This is the wrong attitude.  If it were not for a simple oversight
> Joonyoung's driver would already be upstream for the past two kernel
> releases.  So you need to work together to improve that driver to
> incorporate what you need.
Nothing wrong in attitude here.
Giving feedback on the code only comes after one is convinced with the
overall approach taken. The last time I raised the PL330 driver issue,
most people were not enthusiastic with this drivers/dma/ approach.
I wasn't active mainline discussions when the driver was originally
submitted a few months ago.
And now my replies are not very 'polite' because theres a lot going on
in the background that people in public threads don't know about.


> It sounds like you just need to add an extension for the arch specific dma
> api.
I actually plan more than that.
Apart from inefficient design, JoonYoung's driver has made some fatal
assumptions
about PL330, which will result in DMA aborts if used with SoCs that implement
configuration of PL330 that is very different from Samsung SoCs'
Of course, I address all such issues that I can think of, in my implementation.

regards.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 15:20       ` Linus Walleij
@ 2010-03-25 22:36         ` jassi brar
  2010-04-01  5:34         ` jassi brar
  1 sibling, 0 replies; 29+ messages in thread
From: jassi brar @ 2010-03-25 22:36 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Joonyoung Shim, dan.j.williams, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Fri, Mar 26, 2010 at 12:20 AM, Linus Walleij
<linus.ml.walleij@gmail.com> wrote:
> 2010/3/25 jassi brar <jassisinghbrar@gmail.com>:
>> And if this patch is accepted, there
>> _will_ exist two copies of the PL330 driver -- one in drivers/dma/pl330_dmac.c
>> and another in arch/arm/plat-samsung/. Only the former will be lying unused
>> until some other SoC vendor decided to use PL330, because S3C has come too
>> long a way to change its drivers to driver/dma/ API and modify DMA
>> drivers for every SoC.
>
> What's wrong with merging them later then? Refactoring FTW.
The amount of code that will be modified or taken out of drivers/dma/pl330_dmac
will so much that I will be left only with constrained data structures in
the file to do tricks to make it work with the PL330 engine driver.
I am not very keen on authoring the driver/dma/ driver but neither am I
interested in having to cleanup someone else' code.

>> I have the pl330-core part almost ready, but i need time to implement
>> some _testable_
>> implementation of the scheme. If maintainers want to see structure of
>> my code, I can
>> share it too, but I think I pretty much made it clear.
>
> Why not just post it on the list? I'm curious! Since I'm working on a PrimeCell
> DMA API I would love to look at PrimeCell DMA engine drivers.
I'll post in a day or two when the PL330 core driver takes come shape
closer to what it is supposed to look.
That will help me getting suggestions for improvement, i hope.

regards.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 22:27         ` jassi brar
@ 2010-03-25 23:12           ` Dan Williams
  2010-03-25 23:59             ` jassi brar
  2010-03-26  0:54           ` Joonyoung Shim
  1 sibling, 1 reply; 29+ messages in thread
From: Dan Williams @ 2010-03-25 23:12 UTC (permalink / raw)
  To: jassi brar
  Cc: Linus Walleij, Joonyoung Shim, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Thu, Mar 25, 2010 at 3:27 PM, jassi brar <jassisinghbrar@gmail.com> wrote:
> On Fri, Mar 26, 2010 at 12:13 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>> jassi brar wrote:
>>>>
>>>> Perhaps Joonyoung can simply port over the stuff
>>>> you need to this driver if you show your code.
>>>
>>> Having worked on Samsung SoCs(with PL330 DMAC) based products, I would be
>>> _very_ surprised if any user found this implementation useful.
>>> Let alone testing, this implementation can't even explain usability
>>> for fast peripherals
>>> with shallow FIFOs. I didn't give feedback for this patch because I am
>>> not sure if this
>>> is the right way to go at all.
>>
>> This is the wrong attitude.  If it were not for a simple oversight
>> Joonyoung's driver would already be upstream for the past two kernel
>> releases.  So you need to work together to improve that driver to
>> incorporate what you need.
> Nothing wrong in attitude here.
> Giving feedback on the code only comes after one is convinced with the
> overall approach taken. The last time I raised the PL330 driver issue,
> most people were not enthusiastic with this drivers/dma/ approach.
> I wasn't active mainline discussions when the driver was originally
> submitted a few months ago.
> And now my replies are not very 'polite' because theres a lot going on
> in the background that people in public threads don't know about.

Thanks for clarifying.

>
>
>> It sounds like you just need to add an extension for the arch specific dma
>> api.
> I actually plan more than that.
> Apart from inefficient design, JoonYoung's driver has made some fatal
> assumptions
> about PL330, which will result in DMA aborts if used with SoCs that implement
> configuration of PL330 that is very different from Samsung SoCs'
> Of course, I address all such issues that I can think of, in my implementation.

Ok, I'll rely on acked-by's from the interested parties once your
driver is out as I do not have a vested interest in this hardware,
just the dmaengine framework issues.

--
Dan

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 23:12           ` Dan Williams
@ 2010-03-25 23:59             ` jassi brar
  2010-03-26  0:29               ` Kyungmin Park
  0 siblings, 1 reply; 29+ messages in thread
From: jassi brar @ 2010-03-25 23:59 UTC (permalink / raw)
  To: Dan Williams
  Cc: Linus Walleij, Joonyoung Shim, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Fri, Mar 26, 2010 at 8:12 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Thu, Mar 25, 2010 at 3:27 PM, jassi brar <jassisinghbrar@gmail.com> wrote:
>> On Fri, Mar 26, 2010 at 12:13 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>>> It sounds like you just need to add an extension for the arch specific dma
>>> api.
>> I actually plan more than that.
>> Apart from inefficient design, JoonYoung's driver has made some fatal
>> assumptions
>> about PL330, which will result in DMA aborts if used with SoCs that implement
>> configuration of PL330 that is very different from Samsung SoCs'
>> Of course, I address all such issues that I can think of, in my implementation.
>
> Ok, I'll rely on acked-by's from the interested parties once your
> driver is out as I do not have a vested interest in this hardware,
> just the dmaengine framework issues.
Fair enough. Thank you.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 23:59             ` jassi brar
@ 2010-03-26  0:29               ` Kyungmin Park
  2010-03-26  0:48                 ` jassi brar
  0 siblings, 1 reply; 29+ messages in thread
From: Kyungmin Park @ 2010-03-26  0:29 UTC (permalink / raw)
  To: jassi brar
  Cc: Dan Williams, Russell King - ARM Linux, Joonyoung Shim,
	Linus Walleij, linux-kernel, Ben Dooks, linux-arm-kernel

On Fri, Mar 26, 2010 at 8:59 AM, jassi brar <jassisinghbrar@gmail.com> wrote:
> On Fri, Mar 26, 2010 at 8:12 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>> On Thu, Mar 25, 2010 at 3:27 PM, jassi brar <jassisinghbrar@gmail.com> wrote:
>>> On Fri, Mar 26, 2010 at 12:13 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>>>> It sounds like you just need to add an extension for the arch specific dma
>>>> api.
>>> I actually plan more than that.
>>> Apart from inefficient design, JoonYoung's driver has made some fatal
>>> assumptions
>>> about PL330, which will result in DMA aborts if used with SoCs that implement
>>> configuration of PL330 that is very different from Samsung SoCs'
Exactly what? We are already tested it our board and play the music well.
What's condition DMA aborts are happen?
>>> Of course, I address all such issues that I can think of, in my implementation.
>>
>> Ok, I'll rely on acked-by's from the interested parties once your
>> driver is out as I do not have a vested interest in this hardware,
>> just the dmaengine framework issues.
> Fair enough. Thank you.

As your previous mail, we can wait until this weekend. but If you
don't post the your codes until this weekend.
We assume that your works are not yet done so merge this patch first
and then fix it if your words(DMA aborts issue) are right.

Thank you,
Kyungmin Park

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-26  0:29               ` Kyungmin Park
@ 2010-03-26  0:48                 ` jassi brar
  0 siblings, 0 replies; 29+ messages in thread
From: jassi brar @ 2010-03-26  0:48 UTC (permalink / raw)
  To: Kyungmin Park
  Cc: Dan Williams, Russell King - ARM Linux, Joonyoung Shim,
	Linus Walleij, linux-kernel, Ben Dooks, linux-arm-kernel

On Fri, Mar 26, 2010 at 9:29 AM, Kyungmin Park
<kyungmin.park@samsung.com> wrote:
> On Fri, Mar 26, 2010 at 8:59 AM, jassi brar <jassisinghbrar@gmail.com> wrote:
>> On Fri, Mar 26, 2010 at 8:12 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>>> On Thu, Mar 25, 2010 at 3:27 PM, jassi brar <jassisinghbrar@gmail.com> wrote:
>>>> On Fri, Mar 26, 2010 at 12:13 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>>>>> It sounds like you just need to add an extension for the arch specific dma
>>>>> api.
>>>> I actually plan more than that.
>>>> Apart from inefficient design, JoonYoung's driver has made some fatal
>>>> assumptions
>>>> about PL330, which will result in DMA aborts if used with SoCs that implement
>>>> configuration of PL330 that is very different from Samsung SoCs'
> Exactly what? We are already tested it our board and play the music well.
> What's condition DMA aborts are happen?
I didn't want to get into gory details, but it seems I need to explain .....

Please see my quick review to the original patch in this thread.
We take the discussion there.

>>>> Of course, I address all such issues that I can think of, in my implementation.
>>>
>>> Ok, I'll rely on acked-by's from the interested parties once your
>>> driver is out as I do not have a vested interest in this hardware,
>>> just the dmaengine framework issues.
>> Fair enough. Thank you.
>
> As your previous mail, we can wait until this weekend. but If you
> don't post the your codes until this weekend.
> We assume that your works are not yet done so merge this patch first
> and then fix it if your words(DMA aborts issue) are right.
It's not upto you to give me any deadline. Not the "Until this weekend" one !!!
I think only the word of the maintainers count.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 22:27         ` jassi brar
  2010-03-25 23:12           ` Dan Williams
@ 2010-03-26  0:54           ` Joonyoung Shim
  2010-03-26  1:01             ` jassi brar
  1 sibling, 1 reply; 29+ messages in thread
From: Joonyoung Shim @ 2010-03-26  0:54 UTC (permalink / raw)
  To: jassi brar
  Cc: Dan Williams, Linus Walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On 3/26/2010 7:27 AM, jassi brar wrote:
> On Fri, Mar 26, 2010 at 12:13 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>> jassi brar wrote:
>>>> Perhaps Joonyoung can simply port over the stuff
>>>> you need to this driver if you show your code.
>>> Having worked on Samsung SoCs(with PL330 DMAC) based products, I would be
>>> _very_ surprised if any user found this implementation useful.
>>> Let alone testing, this implementation can't even explain usability
>>> for fast peripherals
>>> with shallow FIFOs. I didn't give feedback for this patch because I am
>>> not sure if this
>>> is the right way to go at all.
>> This is the wrong attitude. 혻If it were not for a simple oversight
>> Joonyoung's driver would already be upstream for the past two kernel
>> releases. 혻So you need to work together to improve that driver to
>> incorporate what you need.
> Nothing wrong in attitude here.
> Giving feedback on the code only comes after one is convinced with the
> overall approach taken. The last time I raised the PL330 driver issue,
> most people were not enthusiastic with this drivers/dma/ approach.
> I wasn't active mainline discussions when the driver was originally
> submitted a few months ago.
> And now my replies are not very 'polite' because theres a lot going on
> in the background that people in public threads don't know about.
> 
> 
>> It sounds like you just need to add an extension for the arch specific dma
>> api.
> I actually plan more than that.
> Apart from inefficient design, JoonYoung's driver has made some fatal
> assumptions
> about PL330, which will result in DMA aborts if used with SoCs that implement
> configuration of PL330 that is very different from Samsung SoCs'
> Of course, I address all such issues that I can think of, in my implementation.
> 

I can wait your implementation and wonder what is the issue also.

I welcome you try other design and want better driver is committed at 
mainline kernel too.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-26  0:54           ` Joonyoung Shim
@ 2010-03-26  1:01             ` jassi brar
  0 siblings, 0 replies; 29+ messages in thread
From: jassi brar @ 2010-03-26  1:01 UTC (permalink / raw)
  To: Joonyoung Shim
  Cc: Dan Williams, Linus Walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Fri, Mar 26, 2010 at 9:54 AM, Joonyoung Shim <jy0922.shim@samsung.com> wrote:
> On 3/26/2010 7:27 AM, jassi brar wrote:
>> On Fri, Mar 26, 2010 at 12:13 AM, Dan Williams <dan.j.williams@intel.com> wrote:
>>> jassi brar wrote:
>>>>> Perhaps Joonyoung can simply port over the stuff
>>>>> you need to this driver if you show your code.
>>>> Having worked on Samsung SoCs(with PL330 DMAC) based products, I would be
>>>> _very_ surprised if any user found this implementation useful.
>>>> Let alone testing, this implementation can't even explain usability
>>>> for fast peripherals
>>>> with shallow FIFOs. I didn't give feedback for this patch because I am
>>>> not sure if this
>>>> is the right way to go at all.
>>> This is the wrong attitude. 혻If it were not for a simple oversight
>>> Joonyoung's driver would already be upstream for the past two kernel
>>> releases. 혻So you need to work together to improve that driver to
>>> incorporate what you need.
>> Nothing wrong in attitude here.
>> Giving feedback on the code only comes after one is convinced with the
>> overall approach taken. The last time I raised the PL330 driver issue,
>> most people were not enthusiastic with this drivers/dma/ approach.
>> I wasn't active mainline discussions when the driver was originally
>> submitted a few months ago.
>> And now my replies are not very 'polite' because theres a lot going on
>> in the background that people in public threads don't know about.
>>
>>
>>> It sounds like you just need to add an extension for the arch specific dma
>>> api.
>> I actually plan more than that.
>> Apart from inefficient design, JoonYoung's driver has made some fatal
>> assumptions
>> about PL330, which will result in DMA aborts if used with SoCs that implement
>> configuration of PL330 that is very different from Samsung SoCs'
>> Of course, I address all such issues that I can think of, in my implementation.
>>
>
> I can wait your implementation and wonder what is the issue also.
>
> I welcome you try other design and want better driver is committed at
> mainline kernel too.
Good to have you at last reply to my posts

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25  3:17 [PATCH v2] PL330: Add PL330 DMA controller driver Joonyoung Shim
                   ` (2 preceding siblings ...)
  2010-03-25  8:26 ` Linus Walleij
@ 2010-03-26  2:08 ` jassi brar
  2010-03-31  1:07   ` Ben Dooks
  3 siblings, 1 reply; 29+ messages in thread
From: jassi brar @ 2010-03-26  2:08 UTC (permalink / raw)
  To: Joonyoung Shim
  Cc: dan.j.williams, linus.ml.walleij, kyungmin.park, linux-kernel,
	linux-arm-kernel

On Thu, Mar 25, 2010 at 12:17 PM, Joonyoung Shim
<jy0922.shim@samsung.com> wrote:
> The PL330 is currently the dma controller using at the S5PC1XX arm SoC.
> This supports DMA_MEMCPY and DMA_SLAVE.
>
> The datasheet for the PL330 can find below url:
> http://infocenter.arm.com/help/topic/com.arm.doc.ddi0424a/DDI0424A_dmac_pl330_r0p0_trm.pdf
>
> Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
> ---
> Change log:
>
> v2: Convert into an amba_device driver.
>    Code clean and update from v1 patch review.

Here goes some quick technical feedback of the code.
Please remember that these issues are only secondary.
The primary drawback is the approach that this patch adopts,
as already explained in other posts.

[snip]

> +/* instruction set functions */
> +static inline int pl330_dmaaddh(u8 *desc_pool_virt, u16 imm, bool ra)
> +{
> +       u8 opcode = DMAADDH | (ra << 1);
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writew(imm, desc_pool_virt);
> +       return 3;
> +}
> +
> +static inline int pl330_dmaend(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMAEND;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmaflushp(u8 *desc_pool_virt, u8 periph)
> +{
> +       u8 opcode = DMAFLUSHHP;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(periph << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmald(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMALD;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmalds(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMALDS;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmaldb(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMALDB;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmaldps(u8 *desc_pool_virt, u8 periph)
> +{
> +       u8 opcode = DMALDPS;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(periph << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmaldpb(u8 *desc_pool_virt, u8 periph)
> +{
> +       u8 opcode = DMALDPB;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(periph << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmalp(u8 *desc_pool_virt, u8 iter, bool lc)
> +{
> +       u8 opcode = DMALP | (lc << 1);
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(iter, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmalpend(u8 *desc_pool_virt, u8 backwards_jump, bool lc)
> +{
> +       u8 opcode = DMALPEND | (lc << 2);
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(backwards_jump, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmalpends(u8 *desc_pool_virt, u8 backwards_jump,
> +               bool lc)
> +{
> +       u8 opcode = DMALPENDS | (lc << 2);
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(backwards_jump, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmalpendb(u8 *desc_pool_virt, u8 backwards_jump,
> +               bool lc)
> +{
> +       u8 opcode = DMALPENDB | (lc << 2);
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(backwards_jump, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmalpfe(u8 *desc_pool_virt, u8 backwards_jump, bool lc)
> +{
> +       u8 opcode = DMALPFE | (lc << 2);
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(backwards_jump, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmakill(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMAKILL;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmamov(u8 *desc_pool_virt, u8 rd, u32 imm)
> +{
> +       u8 opcode = DMAMOV;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(rd, desc_pool_virt++);
> +       writel(imm, desc_pool_virt);
> +       return 6;
> +}
> +
> +static inline int pl330_dmanop(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMANOP;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmarmb(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMARMB;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmasev(u8 *desc_pool_virt, u8 event_num)
> +{
> +       u8 opcode = DMASEV;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(event_num << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmast(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMAST;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmasts(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMASTS;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmastb(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMASTB;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmastps(u8 *desc_pool_virt, u8 periph)
> +{
> +       u8 opcode = DMASTPS;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(periph << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmastpb(u8 *desc_pool_virt, u8 periph)
> +{
> +       u8 opcode = DMASTPB;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(periph << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmastz(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMASTZ;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static inline int pl330_dmawfe(u8 *desc_pool_virt, u8 event_num, bool invalid)
> +{
> +       u8 opcode = DMAWFE;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb((event_num << 3) | (invalid << 1), desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmawfps(u8 *desc_pool_virt, u8 periph)
> +{
> +       u8 opcode = DMAWFPS;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(periph << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmawfpb(u8 *desc_pool_virt, u8 periph)
> +{
> +       u8 opcode = DMAWFPB;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(periph << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmawfpp(u8 *desc_pool_virt, u8 periph)
> +{
> +       u8 opcode = DMAWFPP;
> +
> +       writeb(opcode, desc_pool_virt++);
> +       writeb(periph << 3, desc_pool_virt);
> +       return 2;
> +}
> +
> +static inline int pl330_dmawmb(u8 *desc_pool_virt)
> +{
> +       u8 opcode = DMAWMB;
> +
> +       writeb(opcode, desc_pool_virt);
> +       return 1;
> +}
> +
> +static void pl330_dmago(struct pl330_chan *pl330_ch, struct pl330_desc *desc,
> +               bool ns)
> +{
> +       unsigned int val;
> +       u8 opcode = DMAGO | (ns << 1);
> +
> +       val = (pl330_ch->id << 24) | (opcode << 16) | (pl330_ch->id << 8);
> +       writel(val, pl330_ch->pl330_dev->reg_base + PL330_DBGINST0);
> +
> +       val = desc->async_tx.phys;
> +       writel(val, pl330_ch->pl330_dev->reg_base + PL330_DBGINST1);
> +
> +       writel(0, pl330_ch->pl330_dev->reg_base + PL330_DBGCMD);
> +}
As already mentioned by Marc, it doesn't have to be read/write.
PL330 specifies the microcode buffers to be on system memory and that
need not be treated like ioports.

[snip]

> +static struct pl330_desc *
> +pl330_alloc_descriptor(struct pl330_chan *pl330_ch, gfp_t flags)
> +{
> +       struct device *dev = pl330_ch->pl330_dev->common.dev;
> +       struct pl330_desc *desc;
> +       dma_addr_t phys;
> +
> +       desc = kzalloc(sizeof(*desc), flags);
> +       if (!desc)
> +               return NULL;
> +
> +       desc->desc_pool_virt = dma_alloc_coherent(dev, PL330_POOL_SIZE, &phys,
> +                       flags);
These allocations are inefficient and don't need to be done so often.
My implementation allocates a pool of such buffers(size specified by
DMA API driver)
and manage them by simple pointer manipulation.
Though the xfer requests for DMA API has to be managed in the DMA API driver.

> +       if (!desc->desc_pool_virt) {
> +               kfree(desc);
> +               return NULL;
> +       }
> +
> +       dma_async_tx_descriptor_init(&desc->async_tx, &pl330_ch->common);
> +       desc->async_tx.tx_submit = pl330_tx_submit;
> +       desc->async_tx.phys = phys;
> +
> +       return desc;
> +}
> +
> +static struct pl330_desc *pl330_get_descriptor(struct pl330_chan *pl330_ch)
> +{
> +       struct device *dev = pl330_ch->pl330_dev->common.dev;
> +       struct pl330_desc *desc;
> +
> +       if (!list_empty(&pl330_ch->free_desc)) {
> +               desc = to_pl330_desc(pl330_ch->free_desc.next);
> +               list_del(&desc->desc_node);
> +       } else {
> +               /* try to get another desc */
> +               desc = pl330_alloc_descriptor(pl330_ch, GFP_ATOMIC);
> +               if (!desc) {
> +                       dev_err(dev, "descriptor alloc failed\n");
> +                       return NULL;
> +               }
> +       }
> +
> +       return desc;
> +}
> +
> +static int pl330_alloc_chan_resources(struct dma_chan *chan)
> +{
> +       struct pl330_chan *pl330_ch = to_pl330_chan(chan);
> +       struct device *dev = pl330_ch->pl330_dev->common.dev;
> +       struct pl330_desc *desc;
> +       int i;
> +       LIST_HEAD(tmp_list);
> +
> +       /* have we already been set up? */
> +       if (!list_empty(&pl330_ch->free_desc))
> +               return pl330_ch->desc_num;
> +
> +       for (i = 0; i < PL330_DESC_NUM; i++) {
> +               desc = pl330_alloc_descriptor(pl330_ch, GFP_KERNEL);
> +               if (!desc) {
> +                       dev_err(dev, "Only %d initial descriptors\n", i);
> +                       break;
> +               }
> +               list_add_tail(&desc->desc_node, &tmp_list);
> +       }
> +
> +       pl330_ch->completed = chan->cookie = 1;
> +       pl330_ch->desc_num = i;
> +       list_splice(&tmp_list, &pl330_ch->free_desc);
> +
> +       return pl330_ch->desc_num;
> +}
> +

[snip]

> +static unsigned int pl330_make_instructions(struct pl330_chan *pl330_ch,
> +               struct pl330_desc *desc, dma_addr_t dest, dma_addr_t src,
> +               size_t len, unsigned int inst_size,
> +               enum dma_data_direction direction)
> +{
> +       struct device *dev = pl330_ch->pl330_dev->common.dev;
> +       void *buf = desc->desc_pool_virt;
> +       u32 control = *(u32 *)&pl330_ch->pl330_reg_cc;
> +       unsigned int loop_size;
> +       unsigned int loop_size_rest;
> +       unsigned int loop_count0;
> +       unsigned int loop_count1 = 0;
> +       unsigned int loop_count0_rest = 0;
> +       unsigned int loop_start0 = 0;
> +       unsigned int loop_start1 = 0;
> +
> +       dev_dbg(dev, "desc_pool_phys: 0x%x\n", desc->async_tx.phys);
> +       dev_dbg(dev, "control: 0x%x\n", control);
> +       dev_dbg(dev, "dest: 0x%x\n", dest);
> +       dev_dbg(dev, "src: 0x%x\n", src);
> +       dev_dbg(dev, "len: 0x%x\n", len);
> +
> +       /* calculate loop count */
> +       loop_size = (pl330_ch->pl330_reg_cc.src_burst_len + 1) *
> +               (1 << pl330_ch->pl330_reg_cc.src_burst_size);
> +       loop_count0 = (len / loop_size) - 1;
> +       loop_size_rest = len % loop_size;
> +
> +       dev_dbg(dev, "loop_size: 0x%x\n", loop_size);
> +       dev_dbg(dev, "loop_count0: 0x%x\n", loop_count0);
> +       dev_dbg(dev, "loop_size_rest: 0x%x\n", loop_size_rest);
> +
> +       if (loop_size_rest) {
> +               dev_err(dev, "Transfer length must be aligned to loop_size\n");
> +               return -EINVAL;
> +       }
This limit, though not serious, is unconditionally imposed by your design.
There are ways to get around this situation by smarter generation of
microcode.

> +       if (loop_count0 >= PL330_MAX_LOOPS) {
> +               loop_count1 = (loop_count0 / PL330_MAX_LOOPS) - 1;
> +               loop_count0_rest = (loop_count0 % PL330_MAX_LOOPS) + 1;
> +               loop_count0 = PL330_MAX_LOOPS - 1;
> +               dev_dbg(dev, "loop_count0: 0x%x\n", loop_count0);
> +               dev_dbg(dev, "loop_count0_rest: 0x%x\n", loop_count0_rest);
> +               dev_dbg(dev, "loop_count1: 0x%x\n", loop_count1);
> +
> +               if (loop_count1 >= PL330_MAX_LOOPS)
> +                       dev_dbg(dev, "loop_count1 overflow\n");
Again, the DMA API drivers will suffer just because someone didn't care
to generate microcode efficiently.
The microcode template for xfer takes only about 50 bytes and despite
having PL330_POOL_SIZE buffer, you have to drop xfer requests just because
the template is not properly designed.
My implementation is limited only by the microcode buffer size, which in turn
can be specified at startup by the DMA API driver.

> +       }
> +
> +       /* write instruction sets on buffer */
> +       inst_size += pl330_dmamov(buf + inst_size, RD_DAR, dest);
> +       inst_size += pl330_dmamov(buf + inst_size, RD_SAR, src);
> +       inst_size += pl330_dmamov(buf + inst_size, RD_CCR, control);
> +
> +       if (loop_count1) {
> +               inst_size += pl330_dmalp(buf + inst_size, loop_count1, LC_1);
> +               loop_start1 = inst_size;
> +       }
> +
> +       if (loop_count0) {
> +               inst_size += pl330_dmalp(buf + inst_size, loop_count0, LC_0);
> +               loop_start0 = inst_size;
> +       }
> +
> +       if (direction == DMA_TO_DEVICE) {
> +               struct pl330_dma_slave *dma_slave = pl330_ch->common.private;
> +               u8 periph = dma_slave->peri_num;
> +               inst_size += pl330_dmawfps(buf + inst_size, periph);
> +               inst_size += pl330_dmald(buf + inst_size);
> +               inst_size += pl330_dmastps(buf + inst_size, periph);
> +               inst_size += pl330_dmaflushp(buf + inst_size, periph);
> +       } else if (direction == DMA_FROM_DEVICE) {
> +               struct pl330_dma_slave *dma_slave = pl330_ch->common.private;
> +               u8 periph = dma_slave->peri_num;
> +               inst_size += pl330_dmawfps(buf + inst_size, periph);
> +               inst_size += pl330_dmaldps(buf + inst_size, periph);
> +               inst_size += pl330_dmast(buf + inst_size);
> +               inst_size += pl330_dmaflushp(buf + inst_size, periph);
> +       } else {
> +               inst_size += pl330_dmald(buf + inst_size);
> +               inst_size += pl330_dmarmb(buf + inst_size);
> +               inst_size += pl330_dmast(buf + inst_size);
> +               inst_size += pl330_dmawmb(buf + inst_size);
> +       }
> +
> +       if (loop_count0)
> +               inst_size += pl330_dmalpend(buf + inst_size,
> +                               inst_size - loop_start0, LC_0);
> +
> +       if (loop_count1)
> +               inst_size += pl330_dmalpend(buf + inst_size,
> +                               inst_size - loop_start1, LC_1);
> +
> +       if (loop_count0_rest) {
> +               inst_size += pl330_dmalp(buf + inst_size, loop_count0_rest - 1,
> +                               LC_0);
> +               loop_start0 = inst_size;
> +
> +               if (direction == DMA_TO_DEVICE) {
> +                       struct pl330_dma_slave *dma_slave =
> +                               pl330_ch->common.private;
> +                       u8 periph = dma_slave->peri_num;
> +                       inst_size += pl330_dmawfps(buf + inst_size, periph);
> +                       inst_size += pl330_dmald(buf + inst_size);
> +                       inst_size += pl330_dmastps(buf + inst_size, periph);
> +                       inst_size += pl330_dmaflushp(buf + inst_size, periph);
> +               } else if (direction == DMA_FROM_DEVICE) {
> +                       struct pl330_dma_slave *dma_slave =
> +                               pl330_ch->common.private;
> +                       u8 periph = dma_slave->peri_num;
> +                       inst_size += pl330_dmawfps(buf + inst_size, periph);
> +                       inst_size += pl330_dmaldps(buf + inst_size, periph);
> +                       inst_size += pl330_dmast(buf + inst_size);
> +                       inst_size += pl330_dmaflushp(buf + inst_size, periph);
> +               } else {
> +                       inst_size += pl330_dmald(buf + inst_size);
> +                       inst_size += pl330_dmarmb(buf + inst_size);
> +                       inst_size += pl330_dmast(buf + inst_size);
> +                       inst_size += pl330_dmawmb(buf + inst_size);
> +               }
> +
> +               inst_size += pl330_dmalpend(buf + inst_size,
> +                               inst_size - loop_start0, LC_0);
> +       }
> +
> +       inst_size += pl330_dmasev(buf + inst_size, pl330_ch->id);
> +       inst_size += pl330_dmaend(buf + inst_size);
> +
> +       return inst_size;
> +}
This instruction generation leaves no scope for Security permissions for xfers,
that is a feature of PL330.

[snip]

> +static void pl330_xfer_complete(struct pl330_chan *pl330_ch)
> +{
> +       struct pl330_desc *desc;
> +       dma_async_tx_callback callback;
> +       void *callback_param;
> +
> +       /* execute next desc */
> +       pl330_issue_pending(&pl330_ch->common);
> +
> +       if (list_empty(&pl330_ch->complete_desc))
> +               return;
> +
> +       desc = to_pl330_desc(pl330_ch->complete_desc.next);
> +       list_move_tail(&desc->desc_node, &pl330_ch->free_desc);
> +
> +       pl330_ch->completed = desc->async_tx.cookie;
> +
> +       callback = desc->async_tx.callback;
> +       callback_param = desc->async_tx.callback_param;
> +       if (callback)
> +               callback(callback_param);
> +}
> +
> +static void pl330_ch_tasklet(unsigned long data)
> +{
> +       struct pl330_chan *pl330_ch = (struct pl330_chan *)data;
> +       unsigned int val;
> +
> +       pl330_xfer_complete(pl330_ch);
> +
> +       /* enable channel interrupt */
> +       val = readl(pl330_ch->pl330_dev->reg_base + PL330_INTEN);
> +       val |= (1 << pl330_ch->id);
> +       writel(val, pl330_ch->pl330_dev->reg_base + PL330_INTEN);
> +}
> +
> +static irqreturn_t pl330_irq_handler(int irq, void *data)
> +{
> +       struct pl330_device *pl330_dev = data;
> +       struct pl330_chan *pl330_ch;
> +       unsigned int intstatus;
> +       unsigned int inten;
> +       int i;
> +
> +       intstatus = readl(pl330_dev->reg_base + PL330_INTSTATUS);
> +
> +       if (intstatus == 0)
> +               return IRQ_HANDLED;
> +
> +       inten = readl(pl330_dev->reg_base + PL330_INTEN);
> +       for (i = 0; i < PL330_MAX_CHANS; i++) {
> +               if (intstatus & (1 << i)) {
> +                       pl330_ch = &pl330_dev->pl330_ch[i];
> +                       writel(1 << i, pl330_dev->reg_base + PL330_INTCLR);
> +
> +                       /* disable channel interrupt */
> +                       inten &= ~(1 << i);
> +                       writel(inten, pl330_dev->reg_base + PL330_INTEN);
> +
> +                       tasklet_schedule(&pl330_ch->tasklet);
I think the DMA API already prohibits doing non-irq-context things(like enqueue)
in the callbacks, so why implement tasklets here?
This may still get you "audio working fine" with Samsung I2S controller,
but is likely to cause problems with more demanding peripherals like SPDIF
if they operate at best QOS(even 24bits/sample Stereo at 96000Hz) and has
shallow FIFO(8 samples deep and hence 84 usecs acceptable latency).
Remember that SPDIF usually goes with other system load like HDMI HD
playaback which only increases the interrupt latency.

Not to forget, the overall throughput hit taken by other dma clients,
like MMC over SPI that use 256/512 bytes DMA xfers, due to delayed
DMA-DONE notifications.

Also, using tasklet here may break any protocol that involves _time-bound_ ACK
via some register after the xfer has been done.

If some client needs to do sleepable-context stuff after DMA-Xfer-Done,
let that driver implement tasklet in it's callback rather than have every
client pay the price.

> +               }
> +       }
> +
> +       return IRQ_HANDLED;
> +}
> +

  [snip]

> +
> +static int __devinit pl330_probe(struct amba_device *adev, struct amba_id *id)
> +{
> +       struct pl330_device *pl330_dev;

  [snip]

> +
> +       for (i = 0; i < PL330_MAX_CHANS; i++) {
This whole code is designed around the assumption of every DMAC having
PL330_MAX_CHANS channels. That is dangerous, since PL330 is highly
configurable and some implementation may choose to implement less than
PL330_MAX_CHANS(i.e 8) channels.
As the PL330 spec says, most operations for non-existing channel result in
DMA Abort. Further, the IRQ handler assumes utopia and doesn't even
care to check
such conditions, as a result on non-s3c like implementations there are many
chances the system will just hang looping in DMA Abort irq or no irq at all
depending upon the cause.
Not to mention the unnecessary allocation for MAX possible resources, though
not very serious.

regards

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-26  2:08 ` jassi brar
@ 2010-03-31  1:07   ` Ben Dooks
  2010-03-31  1:40     ` jassi brar
  0 siblings, 1 reply; 29+ messages in thread
From: Ben Dooks @ 2010-03-31  1:07 UTC (permalink / raw)
  To: jassi brar
  Cc: Joonyoung Shim, linus.ml.walleij, dan.j.williams, kyungmin.park,
	linux-arm-kernel, linux-kernel

On Fri, Mar 26, 2010 at 11:08:06AM +0900, jassi brar wrote:
> On Thu, Mar 25, 2010 at 12:17 PM, Joonyoung Shim
> <jy0922.shim@samsung.com> wrote:
> > The PL330 is currently the dma controller using at the S5PC1XX arm SoC.
> > This supports DMA_MEMCPY and DMA_SLAVE.
> >
> > The datasheet for the PL330 can find below url:
> > http://infocenter.arm.com/help/topic/com.arm.doc.ddi0424a/DDI0424A_dmac_pl330_r0p0_trm.pdf
> >
> > Signed-off-by: Joonyoung Shim <jy0922.shim@samsung.com>
> > ---
> > Change log:
> >
> > v2: Convert into an amba_device driver.
> >    Code clean and update from v1 patch review.
> 
> Here goes some quick technical feedback of the code.
> Please remember that these issues are only secondary.
> The primary drawback is the approach that this patch adopts,
> as already explained in other posts.
> 
> [snip]
> 
> > +/* instruction set functions */
> > +static inline int pl330_dmaaddh(u8 *desc_pool_virt, u16 imm, bool ra)
> > +{
> > +       u8 opcode = DMAADDH | (ra << 1);
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writew(imm, desc_pool_virt);
> > +       return 3;
> > +}
> > +
> > +static inline int pl330_dmaend(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMAEND;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmaflushp(u8 *desc_pool_virt, u8 periph)
> > +{
> > +       u8 opcode = DMAFLUSHHP;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(periph << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmald(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMALD;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmalds(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMALDS;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmaldb(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMALDB;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmaldps(u8 *desc_pool_virt, u8 periph)
> > +{
> > +       u8 opcode = DMALDPS;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(periph << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmaldpb(u8 *desc_pool_virt, u8 periph)
> > +{
> > +       u8 opcode = DMALDPB;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(periph << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmalp(u8 *desc_pool_virt, u8 iter, bool lc)
> > +{
> > +       u8 opcode = DMALP | (lc << 1);
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(iter, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmalpend(u8 *desc_pool_virt, u8 backwards_jump, bool lc)
> > +{
> > +       u8 opcode = DMALPEND | (lc << 2);
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(backwards_jump, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmalpends(u8 *desc_pool_virt, u8 backwards_jump,
> > +               bool lc)
> > +{
> > +       u8 opcode = DMALPENDS | (lc << 2);
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(backwards_jump, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmalpendb(u8 *desc_pool_virt, u8 backwards_jump,
> > +               bool lc)
> > +{
> > +       u8 opcode = DMALPENDB | (lc << 2);
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(backwards_jump, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmalpfe(u8 *desc_pool_virt, u8 backwards_jump, bool lc)
> > +{
> > +       u8 opcode = DMALPFE | (lc << 2);
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(backwards_jump, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmakill(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMAKILL;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmamov(u8 *desc_pool_virt, u8 rd, u32 imm)
> > +{
> > +       u8 opcode = DMAMOV;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(rd, desc_pool_virt++);
> > +       writel(imm, desc_pool_virt);
> > +       return 6;
> > +}
> > +
> > +static inline int pl330_dmanop(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMANOP;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmarmb(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMARMB;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmasev(u8 *desc_pool_virt, u8 event_num)
> > +{
> > +       u8 opcode = DMASEV;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(event_num << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmast(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMAST;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmasts(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMASTS;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmastb(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMASTB;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmastps(u8 *desc_pool_virt, u8 periph)
> > +{
> > +       u8 opcode = DMASTPS;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(periph << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmastpb(u8 *desc_pool_virt, u8 periph)
> > +{
> > +       u8 opcode = DMASTPB;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(periph << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmastz(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMASTZ;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static inline int pl330_dmawfe(u8 *desc_pool_virt, u8 event_num, bool invalid)
> > +{
> > +       u8 opcode = DMAWFE;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb((event_num << 3) | (invalid << 1), desc_pool_virt);
> > +       return 2;

writeb() is for peripherals. you can do 'desc_pool_virt[0] = ' here.

> > +
> > +static inline int pl330_dmawfps(u8 *desc_pool_virt, u8 periph)
> > +{
> > +       u8 opcode = DMAWFPS;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(periph << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmawfpb(u8 *desc_pool_virt, u8 periph)
> > +{
> > +       u8 opcode = DMAWFPB;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(periph << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmawfpp(u8 *desc_pool_virt, u8 periph)
> > +{
> > +       u8 opcode = DMAWFPP;
> > +
> > +       writeb(opcode, desc_pool_virt++);
> > +       writeb(periph << 3, desc_pool_virt);
> > +       return 2;
> > +}
> > +
> > +static inline int pl330_dmawmb(u8 *desc_pool_virt)
> > +{
> > +       u8 opcode = DMAWMB;
> > +
> > +       writeb(opcode, desc_pool_virt);
> > +       return 1;
> > +}
> > +
> > +static void pl330_dmago(struct pl330_chan *pl330_ch, struct pl330_desc *desc,
> > +               bool ns)
> > +{
> > +       unsigned int val;
> > +       u8 opcode = DMAGO | (ns << 1);
> > +
> > +       val = (pl330_ch->id << 24) | (opcode << 16) | (pl330_ch->id << 8);
> > +       writel(val, pl330_ch->pl330_dev->reg_base + PL330_DBGINST0);
> > +
> > +       val = desc->async_tx.phys;
> > +       writel(val, pl330_ch->pl330_dev->reg_base + PL330_DBGINST1);
> > +
> > +       writel(0, pl330_ch->pl330_dev->reg_base + PL330_DBGCMD);
> > +}
> As already mentioned by Marc, it doesn't have to be read/write.
> PL330 specifies the microcode buffers to be on system memory and that
> need not be treated like ioports.
> 
> [snip]
> 
> > +static struct pl330_desc *
> > +pl330_alloc_descriptor(struct pl330_chan *pl330_ch, gfp_t flags)
> > +{
> > +       struct device *dev = pl330_ch->pl330_dev->common.dev;
> > +       struct pl330_desc *desc;
> > +       dma_addr_t phys;
> > +
> > +       desc = kzalloc(sizeof(*desc), flags);
> > +       if (!desc)
> > +               return NULL;
> > +
> > +       desc->desc_pool_virt = dma_alloc_coherent(dev, PL330_POOL_SIZE, &phys,
> > +                       flags);
> These allocations are inefficient and don't need to be done so often.
> My implementation allocates a pool of such buffers(size specified by
> DMA API driver)
> and manage them by simple pointer manipulation.
> Though the xfer requests for DMA API has to be managed in the DMA API driver.

There's a dma pool implementation too in the kernel.

> 
> > +       if (!desc->desc_pool_virt) {
> > +               kfree(desc);
> > +               return NULL;
> > +       }
> > +
> > +       dma_async_tx_descriptor_init(&desc->async_tx, &pl330_ch->common);
> > +       desc->async_tx.tx_submit = pl330_tx_submit;
> > +       desc->async_tx.phys = phys;
> > +
> > +       return desc;
> > +}
> > +
> > +static struct pl330_desc *pl330_get_descriptor(struct pl330_chan *pl330_ch)
> > +{
> > +       struct device *dev = pl330_ch->pl330_dev->common.dev;
> > +       struct pl330_desc *desc;
> > +
> > +       if (!list_empty(&pl330_ch->free_desc)) {
> > +               desc = to_pl330_desc(pl330_ch->free_desc.next);
> > +               list_del(&desc->desc_node);
> > +       } else {
> > +               /* try to get another desc */
> > +               desc = pl330_alloc_descriptor(pl330_ch, GFP_ATOMIC);
> > +               if (!desc) {
> > +                       dev_err(dev, "descriptor alloc failed\n");
> > +                       return NULL;
> > +               }
> > +       }
> > +
> > +       return desc;
> > +}
> > +
> > +static int pl330_alloc_chan_resources(struct dma_chan *chan)
> > +{
> > +       struct pl330_chan *pl330_ch = to_pl330_chan(chan);
> > +       struct device *dev = pl330_ch->pl330_dev->common.dev;
> > +       struct pl330_desc *desc;
> > +       int i;
> > +       LIST_HEAD(tmp_list);
> > +
> > +       /* have we already been set up? */
> > +       if (!list_empty(&pl330_ch->free_desc))
> > +               return pl330_ch->desc_num;
> > +
> > +       for (i = 0; i < PL330_DESC_NUM; i++) {
> > +               desc = pl330_alloc_descriptor(pl330_ch, GFP_KERNEL);
> > +               if (!desc) {
> > +                       dev_err(dev, "Only %d initial descriptors\n", i);
> > +                       break;
> > +               }
> > +               list_add_tail(&desc->desc_node, &tmp_list);
> > +       }
> > +
> > +       pl330_ch->completed = chan->cookie = 1;
> > +       pl330_ch->desc_num = i;
> > +       list_splice(&tmp_list, &pl330_ch->free_desc);
> > +
> > +       return pl330_ch->desc_num;
> > +}
> > +
> 
> [snip]
> 
> > +static unsigned int pl330_make_instructions(struct pl330_chan *pl330_ch,
> > +               struct pl330_desc *desc, dma_addr_t dest, dma_addr_t src,
> > +               size_t len, unsigned int inst_size,
> > +               enum dma_data_direction direction)
> > +{
> > +       struct device *dev = pl330_ch->pl330_dev->common.dev;
> > +       void *buf = desc->desc_pool_virt;
> > +       u32 control = *(u32 *)&pl330_ch->pl330_reg_cc;
> > +       unsigned int loop_size;
> > +       unsigned int loop_size_rest;
> > +       unsigned int loop_count0;
> > +       unsigned int loop_count1 = 0;
> > +       unsigned int loop_count0_rest = 0;
> > +       unsigned int loop_start0 = 0;
> > +       unsigned int loop_start1 = 0;
> > +
> > +       dev_dbg(dev, "desc_pool_phys: 0x%x\n", desc->async_tx.phys);
> > +       dev_dbg(dev, "control: 0x%x\n", control);
> > +       dev_dbg(dev, "dest: 0x%x\n", dest);
> > +       dev_dbg(dev, "src: 0x%x\n", src);
> > +       dev_dbg(dev, "len: 0x%x\n", len);
> > +
> > +       /* calculate loop count */
> > +       loop_size = (pl330_ch->pl330_reg_cc.src_burst_len + 1) *
> > +               (1 << pl330_ch->pl330_reg_cc.src_burst_size);
> > +       loop_count0 = (len / loop_size) - 1;
> > +       loop_size_rest = len % loop_size;
> > +
> > +       dev_dbg(dev, "loop_size: 0x%x\n", loop_size);
> > +       dev_dbg(dev, "loop_count0: 0x%x\n", loop_count0);
> > +       dev_dbg(dev, "loop_size_rest: 0x%x\n", loop_size_rest);
> > +
> > +       if (loop_size_rest) {
> > +               dev_err(dev, "Transfer length must be aligned to loop_size\n");
> > +               return -EINVAL;
> > +       }
> This limit, though not serious, is unconditionally imposed by your design.
> There are ways to get around this situation by smarter generation of
> microcode.
> 
> > +       if (loop_count0 >= PL330_MAX_LOOPS) {
> > +               loop_count1 = (loop_count0 / PL330_MAX_LOOPS) - 1;
> > +               loop_count0_rest = (loop_count0 % PL330_MAX_LOOPS) + 1;
> > +               loop_count0 = PL330_MAX_LOOPS - 1;
> > +               dev_dbg(dev, "loop_count0: 0x%x\n", loop_count0);
> > +               dev_dbg(dev, "loop_count0_rest: 0x%x\n", loop_count0_rest);
> > +               dev_dbg(dev, "loop_count1: 0x%x\n", loop_count1);
> > +
> > +               if (loop_count1 >= PL330_MAX_LOOPS)
> > +                       dev_dbg(dev, "loop_count1 overflow\n");
> Again, the DMA API drivers will suffer just because someone didn't care
> to generate microcode efficiently.
> The microcode template for xfer takes only about 50 bytes and despite
> having PL330_POOL_SIZE buffer, you have to drop xfer requests just because
> the template is not properly designed.
> My implementation is limited only by the microcode buffer size, which in turn
> can be specified at startup by the DMA API driver.
> 
> > +       }
> > +
> > +       /* write instruction sets on buffer */
> > +       inst_size += pl330_dmamov(buf + inst_size, RD_DAR, dest);
> > +       inst_size += pl330_dmamov(buf + inst_size, RD_SAR, src);
> > +       inst_size += pl330_dmamov(buf + inst_size, RD_CCR, control);
> > +
> > +       if (loop_count1) {
> > +               inst_size += pl330_dmalp(buf + inst_size, loop_count1, LC_1);
> > +               loop_start1 = inst_size;
> > +       }
> > +
> > +       if (loop_count0) {
> > +               inst_size += pl330_dmalp(buf + inst_size, loop_count0, LC_0);
> > +               loop_start0 = inst_size;
> > +       }
> > +
> > +       if (direction == DMA_TO_DEVICE) {
> > +               struct pl330_dma_slave *dma_slave = pl330_ch->common.private;
> > +               u8 periph = dma_slave->peri_num;
> > +               inst_size += pl330_dmawfps(buf + inst_size, periph);
> > +               inst_size += pl330_dmald(buf + inst_size);
> > +               inst_size += pl330_dmastps(buf + inst_size, periph);
> > +               inst_size += pl330_dmaflushp(buf + inst_size, periph);
> > +       } else if (direction == DMA_FROM_DEVICE) {
> > +               struct pl330_dma_slave *dma_slave = pl330_ch->common.private;
> > +               u8 periph = dma_slave->peri_num;
> > +               inst_size += pl330_dmawfps(buf + inst_size, periph);
> > +               inst_size += pl330_dmaldps(buf + inst_size, periph);
> > +               inst_size += pl330_dmast(buf + inst_size);
> > +               inst_size += pl330_dmaflushp(buf + inst_size, periph);
> > +       } else {
> > +               inst_size += pl330_dmald(buf + inst_size);
> > +               inst_size += pl330_dmarmb(buf + inst_size);
> > +               inst_size += pl330_dmast(buf + inst_size);
> > +               inst_size += pl330_dmawmb(buf + inst_size);
> > +       }
> > +
> > +       if (loop_count0)
> > +               inst_size += pl330_dmalpend(buf + inst_size,
> > +                               inst_size - loop_start0, LC_0);
> > +
> > +       if (loop_count1)
> > +               inst_size += pl330_dmalpend(buf + inst_size,
> > +                               inst_size - loop_start1, LC_1);
> > +
> > +       if (loop_count0_rest) {
> > +               inst_size += pl330_dmalp(buf + inst_size, loop_count0_rest - 1,
> > +                               LC_0);
> > +               loop_start0 = inst_size;
> > +
> > +               if (direction == DMA_TO_DEVICE) {
> > +                       struct pl330_dma_slave *dma_slave =
> > +                               pl330_ch->common.private;
> > +                       u8 periph = dma_slave->peri_num;
> > +                       inst_size += pl330_dmawfps(buf + inst_size, periph);
> > +                       inst_size += pl330_dmald(buf + inst_size);
> > +                       inst_size += pl330_dmastps(buf + inst_size, periph);
> > +                       inst_size += pl330_dmaflushp(buf + inst_size, periph);
> > +               } else if (direction == DMA_FROM_DEVICE) {
> > +                       struct pl330_dma_slave *dma_slave =
> > +                               pl330_ch->common.private;
> > +                       u8 periph = dma_slave->peri_num;
> > +                       inst_size += pl330_dmawfps(buf + inst_size, periph);
> > +                       inst_size += pl330_dmaldps(buf + inst_size, periph);
> > +                       inst_size += pl330_dmast(buf + inst_size);
> > +                       inst_size += pl330_dmaflushp(buf + inst_size, periph);
> > +               } else {
> > +                       inst_size += pl330_dmald(buf + inst_size);
> > +                       inst_size += pl330_dmarmb(buf + inst_size);
> > +                       inst_size += pl330_dmast(buf + inst_size);
> > +                       inst_size += pl330_dmawmb(buf + inst_size);
> > +               }
> > +
> > +               inst_size += pl330_dmalpend(buf + inst_size,
> > +                               inst_size - loop_start0, LC_0);
> > +       }
> > +
> > +       inst_size += pl330_dmasev(buf + inst_size, pl330_ch->id);
> > +       inst_size += pl330_dmaend(buf + inst_size);
> > +
> > +       return inst_size;
> > +}
> This instruction generation leaves no scope for Security permissions for xfers,
> that is a feature of PL330.
> 
> [snip]
> 
> > +static void pl330_xfer_complete(struct pl330_chan *pl330_ch)
> > +{
> > +       struct pl330_desc *desc;
> > +       dma_async_tx_callback callback;
> > +       void *callback_param;
> > +
> > +       /* execute next desc */
> > +       pl330_issue_pending(&pl330_ch->common);
> > +
> > +       if (list_empty(&pl330_ch->complete_desc))
> > +               return;
> > +
> > +       desc = to_pl330_desc(pl330_ch->complete_desc.next);
> > +       list_move_tail(&desc->desc_node, &pl330_ch->free_desc);
> > +
> > +       pl330_ch->completed = desc->async_tx.cookie;
> > +
> > +       callback = desc->async_tx.callback;
> > +       callback_param = desc->async_tx.callback_param;
> > +       if (callback)
> > +               callback(callback_param);
> > +}
> > +
> > +static void pl330_ch_tasklet(unsigned long data)
> > +{
> > +       struct pl330_chan *pl330_ch = (struct pl330_chan *)data;
> > +       unsigned int val;
> > +
> > +       pl330_xfer_complete(pl330_ch);
> > +
> > +       /* enable channel interrupt */
> > +       val = readl(pl330_ch->pl330_dev->reg_base + PL330_INTEN);
> > +       val |= (1 << pl330_ch->id);
> > +       writel(val, pl330_ch->pl330_dev->reg_base + PL330_INTEN);
> > +}
> > +
> > +static irqreturn_t pl330_irq_handler(int irq, void *data)
> > +{
> > +       struct pl330_device *pl330_dev = data;
> > +       struct pl330_chan *pl330_ch;
> > +       unsigned int intstatus;
> > +       unsigned int inten;
> > +       int i;
> > +
> > +       intstatus = readl(pl330_dev->reg_base + PL330_INTSTATUS);
> > +
> > +       if (intstatus == 0)
> > +               return IRQ_HANDLED;
> > +
> > +       inten = readl(pl330_dev->reg_base + PL330_INTEN);
> > +       for (i = 0; i < PL330_MAX_CHANS; i++) {
> > +               if (intstatus & (1 << i)) {
> > +                       pl330_ch = &pl330_dev->pl330_ch[i];
> > +                       writel(1 << i, pl330_dev->reg_base + PL330_INTCLR);
> > +
> > +                       /* disable channel interrupt */
> > +                       inten &= ~(1 << i);
> > +                       writel(inten, pl330_dev->reg_base + PL330_INTEN);
> > +
> > +                       tasklet_schedule(&pl330_ch->tasklet);
> I think the DMA API already prohibits doing non-irq-context things(like enqueue)
> in the callbacks, so why implement tasklets here?
> This may still get you "audio working fine" with Samsung I2S controller,
> but is likely to cause problems with more demanding peripherals like SPDIF
> if they operate at best QOS(even 24bits/sample Stereo at 96000Hz) and has
> shallow FIFO(8 samples deep and hence 84 usecs acceptable latency).
> Remember that SPDIF usually goes with other system load like HDMI HD
> playaback which only increases the interrupt latency.
> 
> Not to forget, the overall throughput hit taken by other dma clients,
> like MMC over SPI that use 256/512 bytes DMA xfers, due to delayed
> DMA-DONE notifications.
> 
> Also, using tasklet here may break any protocol that involves _time-bound_ ACK
> via some register after the xfer has been done.
> 
> If some client needs to do sleepable-context stuff after DMA-Xfer-Done,
> let that driver implement tasklet in it's callback rather than have every
> client pay the price.
> 
> > +               }
> > +       }
> > +
> > +       return IRQ_HANDLED;
> > +}
> > +
> 
>   [snip]
> 
> > +
> > +static int __devinit pl330_probe(struct amba_device *adev, struct amba_id *id)
> > +{
> > +       struct pl330_device *pl330_dev;
> 
>   [snip]
> 
> > +
> > +       for (i = 0; i < PL330_MAX_CHANS; i++) {
> This whole code is designed around the assumption of every DMAC having
> PL330_MAX_CHANS channels. That is dangerous, since PL330 is highly
> configurable and some implementation may choose to implement less than
> PL330_MAX_CHANS(i.e 8) channels.
> As the PL330 spec says, most operations for non-existing channel result in
> DMA Abort. Further, the IRQ handler assumes utopia and doesn't even
> care to check
> such conditions, as a result on non-s3c like implementations there are many
> chances the system will just hang looping in DMA Abort irq or no irq at all
> depending upon the cause.
> Not to mention the unnecessary allocation for MAX possible resources, though
> not very serious.

ditto.
 
> regards
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

-- 
-- 
Ben

Q:      What's a light-year?
A:      One-third less calories than a regular year.


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-31  1:07   ` Ben Dooks
@ 2010-03-31  1:40     ` jassi brar
  0 siblings, 0 replies; 29+ messages in thread
From: jassi brar @ 2010-03-31  1:40 UTC (permalink / raw)
  To: Ben Dooks
  Cc: Joonyoung Shim, linus.ml.walleij, dan.j.williams, kyungmin.park,
	linux-arm-kernel, linux-kernel

On Wed, Mar 31, 2010 at 10:07 AM, Ben Dooks <ben-linux@fluff.org> wrote:
> On Fri, Mar 26, 2010 at 11:08:06AM +0900, jassi brar wrote:
>> On Thu, Mar 25, 2010 at 12:17 PM, Joonyoung Shim
>> <jy0922.shim@samsung.com> wrote:
>> > +static struct pl330_desc *
>> > +pl330_alloc_descriptor(struct pl330_chan *pl330_ch, gfp_t flags)
>> > +{
>> > +       struct device *dev = pl330_ch->pl330_dev->common.dev;
>> > +       struct pl330_desc *desc;
>> > +       dma_addr_t phys;
>> > +
>> > +       desc = kzalloc(sizeof(*desc), flags);
>> > +       if (!desc)
>> > +               return NULL;
>> > +
>> > +       desc->desc_pool_virt = dma_alloc_coherent(dev, PL330_POOL_SIZE, &phys,
>> > +                       flags);
>> These allocations are inefficient and don't need to be done so often.
>> My implementation allocates a pool of such buffers(size specified by
>> DMA API driver)
>> and manage them by simple pointer manipulation.
>> Though the xfer requests for DMA API has to be managed in the DMA API driver.
>
> There's a dma pool implementation too in the kernel.
I meant during 'probe' of the DMAC a chunk of dma consistent memory is allocated
for MicroCode for each channel.
We use the same chunk during xfers, since we can generate MC in a way that 256
bytes are enough to do xfer of 2.5MB at burst size of 1 byte and for
bulkier requests
the DMA API driver can either break the bigger request or allocate
bigger chunk for
the channels.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-03-25 15:20       ` Linus Walleij
  2010-03-25 22:36         ` jassi brar
@ 2010-04-01  5:34         ` jassi brar
  2010-04-01 23:23           ` Linus Walleij
  1 sibling, 1 reply; 29+ messages in thread
From: jassi brar @ 2010-04-01  5:34 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Joonyoung Shim, dan.j.williams, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Fri, Mar 26, 2010 at 12:20 AM, Linus Walleij
<linus.ml.walleij@gmail.com> wrote:
> Why not just post it on the list? I'm curious! Since I'm working on a PrimeCell
> DMA API I would love to look at PrimeCell DMA engine drivers.

Here is untested but only compilable PL330 engine driver, with the following
features/bugs/limitations ...

o  The DMA API driver submits 'request' to PL330 engine.
     A request is a sequence of DMA 'xfers' to be done before the DMA
API driver
     wants to be notified.
     A 'xfer' is the finest grain of DMA transaction that specifies
how many _bytes_
     are to be move from 'source address' to 'destination address'.
     A req can be a scatter-Gather-List.

o  PL330 engine accepts requests from DMA API drivers in ping-pong manner,
    i.e, at any time maximum two reqs can be queued. Other reqs have
    to be buffered by DMA API drivers and enqueued whenever a req-finish
    callback is made.

o  Only necessary resources for a DMAC are allocated according to the
configuration
    read from DMAC during startup.
    Since MicroCode buffers are a channel's resource, they are
allocated just once
    during startup and reused for every other xfer. A channel has its
MC buff divided into
    two equal parts - one for each ping-pong request. The DMA API driver can
    specify the size of MC buffer that each channel(hence req) has,
which decides the
    maximum possible total xfer size that the channel can do in one request.

o  It is possible to do DMA request at different channel configuration
every time; this feature can
    be used by DMA API drivers for implementing 'server' channels that
are not dedicated
    to a particular client but accept requests from any client. Of
course, such channels won't
    be able to guarantee any QOS.

o  Secure, Privilege and InsnAccess modes can be specified for each request.

o  TODO: Desirable is to implement true LLI using MicroCode
modification during each
    request enqueue, so that the xfer continues even while IRQ is
handled and callbacks made.
    To me, there doesn't seem to be a way to flush ICACHE of a channel
without halting it, so we
    can't modify MicroCode in runtime. Using two channels per client
to achieve true LLI is the last resort.
    So currently, cpu intervention is required to trigger each xfer,
hence interrupt latency might play
    some role.
o  TODO: PAUSE/RESUME support. Currently the DMA API driver has to emulate it.


I haven't yet tested it with any DMA API driver, so there might be some changes
that I do more.

Suggestions welcome.

----------8<-------------

Basic PL330 engine driver

Signed-off-by: Jassi Brar <jassi.brar@samsung.com>
---
 arch/arm/common/Kconfig               |    3 +
 arch/arm/common/Makefile              |    1 +
 arch/arm/common/pl330.c               | 1891 +++++++++++++++++++++++++++++++++
 arch/arm/include/asm/hardware/pl330.h |  197 ++++
 4 files changed, 2092 insertions(+), 0 deletions(-)
 create mode 100644 arch/arm/common/pl330.c
 create mode 100644 arch/arm/include/asm/hardware/pl330.h

diff --git a/arch/arm/common/Kconfig b/arch/arm/common/Kconfig
index a2cd9be..19573a5 100644
--- a/arch/arm/common/Kconfig
+++ b/arch/arm/common/Kconfig
@@ -36,3 +36,6 @@ config SHARP_SCOOP

 config COMMON_CLKDEV
 	bool
+
+config PL330
+	bool
diff --git a/arch/arm/common/Makefile b/arch/arm/common/Makefile
index 7cb7961..ad50420 100644
--- a/arch/arm/common/Makefile
+++ b/arch/arm/common/Makefile
@@ -18,3 +18,4 @@ obj-$(CONFIG_ARCH_IXP2000)	+= uengine.o
 obj-$(CONFIG_ARCH_IXP23XX)	+= uengine.o
 obj-$(CONFIG_PCI_HOST_ITE8152)  += it8152.o
 obj-$(CONFIG_COMMON_CLKDEV)	+= clkdev.o
+obj-$(CONFIG_PL330)		+= pl330.o
diff --git a/arch/arm/common/pl330.c b/arch/arm/common/pl330.c
new file mode 100644
index 0000000..20a0690
--- /dev/null
+++ b/arch/arm/common/pl330.c
@@ -0,0 +1,1891 @@
+/* linux/arch/arm/common/pl330.c
+ *
+ * Copyright (C) 2010 Samsung Electronics Co Ltd.
+ *	Jaswinder Singh <jassi.brar@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/hardware/pl330.h>
+
+/* Register and Bit field Definitions */
+#define DS		0x0
+#define DS_ST_STOP	0x0
+#define DS_ST_EXEC	0x1
+#define DS_ST_CMISS	0x2
+#define DS_ST_UPDTPC	0x3
+#define DS_ST_WFE	0x4
+#define DS_ST_ATBRR	0x5
+#define DS_ST_QBUSY	0x6
+#define DS_ST_WFP	0x7
+#define DS_ST_KILL	0x8
+#define DS_ST_CMPLT	0x9
+#define DS_ST_FLTCMP	0xe
+#define DS_ST_FAULT	0xf
+
+#define DPC		0x4
+#define INTEN		0x20
+#define ES		0x24
+#define INTSTATUS	0x28
+#define INTCLR		0x2c
+#define FSM		0x30
+#define FSC		0x34
+#define FTM		0x38
+
+#define _FTC		0x40
+#define FTC(n)		(_FTC + (n)*0x4)
+
+#define _CS		0x100
+#define CS(n)		(_CS + (n)*0x8)
+#define CS_CNS		(1 << 21)
+
+#define _CPC		0x104
+#define CPC(n)		(_CPC + (n)*0x8)
+
+#define _SA		0x400
+#define SA(n)		(_SA + (n)*0x20)
+
+#define _DA		0x404
+#define DA(n)		(_DA + (n)*0x20)
+
+#define _CC		0x408
+#define CC(n)		(_CC + (n)*0x20)
+
+#define CC_SRCINC	(1 << 0)
+#define CC_DSTINC	(1 << 14)
+#define CC_SRCPRI	(1 << 8)
+#define CC_DSTPRI	(1 << 22)
+#define CC_SRCNS	(1 << 9)
+#define CC_DSTNS	(1 << 23)
+#define CC_SRCIA	(1 << 10)
+#define CC_DSTIA	(1 << 24)
+#define CC_SRCBRSTLEN_SHFT	4
+#define CC_DSTBRSTLEN_SHFT	18
+#define CC_SRCBRSTSIZE_SHFT	1
+#define CC_DSTBRSTSIZE_SHFT	15
+#define CC_SRCCCTRL_SHFT	11
+#define CC_SRCCCTRL_MASK	0x7
+#define CC_DSTCCTRL_SHFT	25
+#define CC_DRCCCTRL_MASK	0x7
+#define CC_SWAP_SHFT	28
+
+#define _LC0		0x40c
+#define LC0(n)		(_LC0 + (n)*0x20)
+
+#define _LC1		0x410
+#define LC1(n)		(_LC1 + (n)*0x20)
+
+#define DBGSTATUS	0xd00
+#define DBG_BUSY	(1 << 0)
+
+#define DBGCMD		0xd04
+#define DBGINST0	0xd08
+#define DBGINST1	0xd0c
+
+#define CR0		0xe00
+#define CR1		0xe04
+#define CR2		0xe08
+#define CR3		0xe0c
+#define CR4		0xe10
+#define CRD		0xe14
+
+#define PERIPH_ID	0xfe0
+#define PCELL_ID	0xff0
+
+#define CR0_PERIPH_REQ_SET	(1 << 0)
+#define CR0_BOOT_EN_SET		(1 << 1)
+#define CR0_BOOT_MAN_NS		(1 << 2)
+#define CR0_NUM_CHANS_SHIFT	4
+#define CR0_NUM_CHANS_MASK	0x7
+#define CR0_NUM_PERIPH_SHIFT	12
+#define CR0_NUM_PERIPH_MASK	0x1f
+#define CR0_NUM_EVENTS_SHIFT	17
+#define CR0_NUM_EVENTS_MASK	0x1f
+
+#define CR1_ICACHE_LEN_SHIFT	0
+#define CR1_ICACHE_LEN_MASK	0x7
+#define CR1_NUM_ICACHELINES_SHIFT	4
+#define CR1_NUM_ICACHELINES_MASK	0xf
+
+#define CRD_DATA_WIDTH_SHIFT	0
+#define CRD_DATA_WIDTH_MASK	0x7
+#define CRD_WR_CAP_SHIFT	4
+#define CRD_WR_CAP_MASK		0x7
+#define CRD_WR_Q_DEP_SHIFT	8
+#define CRD_WR_Q_DEP_MASK	0xf
+#define CRD_RD_CAP_SHIFT	12
+#define CRD_RD_CAP_MASK		0x7
+#define CRD_RD_Q_DEP_SHIFT	16
+#define CRD_RD_Q_DEP_MASK	0xf
+#define CRD_DATA_BUFF_SHIFT	20
+#define CRD_DATA_BUFF_MASK	0x3ff
+
+#define	PART		0x330
+#define DESIGNER	0x41
+#define REVISION	0x0
+#define INTEG_CFG	0x0
+#define PERIPH_ID_VAL	((PART << 0) | (DESIGNER << 12) \
+			  | (REVISION << 20) | (INTEG_CFG << 24))
+
+#define PCELL_ID_VAL	0xb105f00d
+
+#define PL330_STATE_STOPPED		(1 << 0)
+#define PL330_STATE_EXECUTING		(1 << 1)
+#define PL330_STATE_WFE			(1 << 2)
+#define PL330_STATE_FAULTING		(1 << 3)
+#define PL330_STATE_COMPLETING		(1 << 4)
+#define PL330_STATE_WFP			(1 << 5) /* TOUT for exit? */
+#define PL330_STATE_KILLING		(1 << 6)
+#define PL330_STATE_FAULT_COMPLETING	(1 << 7)
+#define PL330_STATE_CACHEMISS		(1 << 8)
+#define PL330_STATE_UPDTPC		(1 << 9)
+#define PL330_STATE_ATBARRIER		(1 << 10) /* TOUT for exit? */
+#define PL330_STATE_QUEUEBUSY		(1 << 11) /* TOUT for exit? */
+#define PL330_STATE_INVALID		(1 << 15) /* To catch error */
+
+#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \
+				| PL330_STATE_WFE | PL330_STATE_FAULTING)
+
+#define CMD_DMAADDH	0x54
+#define CMD_DMAEND	0x00
+#define CMD_DMAFLUSHP	0x35
+#define CMD_DMAGO	0xa0
+#define CMD_DMALD	0x04
+#define CMD_DMALDP	0x25
+#define CMD_DMALP	0x20
+#define CMD_DMALPEND	0x28
+#define CMD_DMAKILL	0x01
+#define CMD_DMAMOV	0xbc
+#define CMD_DMANOP	0x18
+#define CMD_DMARMB	0x12
+#define CMD_DMASEV	0x34
+#define CMD_DMAST	0x08
+#define CMD_DMASTP	0x29
+#define CMD_DMASTZ	0x0c
+#define CMD_DMAWFE	0x36
+#define CMD_DMAWFP	0x30
+#define CMD_DMAWMB	0x13
+
+#define SZ_DMAADDH	3
+#define SZ_DMAEND	1
+#define SZ_DMAFLUSHP	2
+#define SZ_DMALD	1
+#define SZ_DMALDP	2
+#define SZ_DMALP	2
+#define SZ_DMALPEND	2
+#define SZ_DMAKILL	1
+#define SZ_DMAMOV	6
+#define SZ_DMANOP	1
+#define SZ_DMARMB	1
+#define SZ_DMASEV	2
+#define SZ_DMAST	1
+#define SZ_DMASTP	2
+#define SZ_DMASTZ	1
+#define SZ_DMAWFE	2
+#define SZ_DMAWFP	2
+#define SZ_DMAWMB	1
+#define SZ_DMAGO	6
+
+#define BRST_LEN(ccr)	((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1)
+#define BRST_SIZE(ccr)	(1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7))
+
+#define BYTE_TO_BURST(b, ccr)		((b) / BRST_SIZE(ccr))
+#define BURST_TO_BYTE(c, ccr)		((c) * BRST_SIZE(ccr))
+
+/* With 256 bytes, we can do more than 2.5MB and 5MB xfers per req
+ * at 1byte/burst for P<->M and M<->M respectively.
+ * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req
+ * should be enough for P<->M and M<->M respectively.
+ */
+#define MCODE_BUFF_PER_REQ	256
+
+/* If program counter 'pc' is at req 'r' */
+#define PC_AT_REQ(r, sz, pc)	(((pc) >= (r)->mc_bus) && \
+				((pc) < ((r)->mc_bus + sz)))
+
+#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
+
+struct _xfer_spec {
+	u32 ccr;
+	struct pl330_req *r;
+	struct pl330_xfer *x;
+};
+
+enum dmamov_dst {
+	SAR = 0,
+	CCR,
+	DAR,
+};
+
+enum pl330_dst {
+	SRC = 0,
+	DST,
+};
+
+enum pl330_cond {
+	SINGLE,
+	BURST,
+	ALWAYS,
+};
+
+struct _pl330_req {
+	u32 mc_bus;
+	void *mc_cpu;
+	struct pl330_req *r;
+	/* hook to attach to DMAC's list of reqs with callbacks due */
+	struct list_head rqd;
+};
+
+struct _pl330_tbd {
+	/* DMAC needs to be reset */
+	unsigned reset_dmac:1;
+	/* manager needs to be reset */
+	unsigned reset_mngr:1;
+	/* which thread needs to be reset */
+	unsigned reset_chan:8;
+};
+
+struct pl330_thread { /* Each DMA Channel */
+	u8 id;
+	int ev;
+	/* If the channel is not yet acquired by any client */
+	bool free;
+	/* 0 for inactive, index of active request + 1, otherwise */
+	unsigned active;
+	struct mutex mtx;
+	/* Only two at a time */
+	struct _pl330_req req[2];
+	/* parent DMAC */
+	struct pl330_dmac *dmac;
+};
+
+enum pl330_dmac_state {
+	UNINIT,
+	INIT,
+	DYING,
+};
+
+/* Each DMA Controller */
+struct pl330_dmac {
+	struct _pl330_tbd	dmac_tbd;
+	spinlock_t		lock;
+	/* hook to attach to global list of DMACs */
+	struct list_head	node;
+	/* Holds list of reqs with due callbacks */
+	struct list_head	req_done;
+	struct device		*dev;
+	struct pl330_info	pinfo;
+	/* Maximum possible events/irqs */
+	int			events[32];
+	/* BUS address of buffer allocated for MicroCode for all Channels */
+	u32			mcode_bus;
+	/* CPU address of buffer allocated for MicroCode for all Channels*/
+	void			*mcode_cpu;
+	struct pl330_thread	*channels;
+	/* MANAGER thread is _always_ the last one */
+	struct pl330_thread	*manager;
+	struct tasklet_struct	tasks;
+	enum pl330_dmac_state	state;
+};
+
+/* All PL-330 DMACs are added to this list */
+static LIST_HEAD(pl330_list);
+/* Protection mutex while list manipulation */
+static DEFINE_MUTEX(pl330_mutex);
+
+static inline void _callback(struct pl330_req *r, int err)
+{
+	if (r && r->xfer_cb)
+		r->xfer_cb(r->token, err);
+}
+
+static inline bool _queue_empty(struct pl330_thread *thrd)
+{
+	return (thrd->req[0].r || thrd->req[1].r) ? false : true;
+}
+
+static inline bool _queue_full(struct pl330_thread *thrd)
+{
+	return (thrd->req[0].r && thrd->req[1].r) ? true : false;
+}
+
+static inline bool is_manager(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	/* MANAGER is indexed at the end */
+	if (thrd->id == pl330->pinfo.pcfg.num_chan)
+		return true;
+	else
+		return false;
+}
+
+/* If manager of the thread is in Non-Secure mode */
+static inline bool _manager_ns(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+
+	return (pl330->pinfo.pcfg.mode & DMAC_MODE_NS) ? true : false;
+}
+
+static inline u32 get_id(struct pl330_dmac *pl330, u32 off)
+{
+	void __iomem *r = pl330->pinfo.base;
+	u32 id = 0;
+
+	id |= (readb(r + off + 0x0) << 0);
+	id |= (readb(r + off + 0x4) << 8);
+	id |= (readb(r + off + 0x8) << 16);
+	id |= (readb(r + off + 0xc) << 24);
+
+	return id;
+}
+
+static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[],
+		enum pl330_dst da, u16 val)
+{
+	if (dry_run)
+		return SZ_DMAADDH;
+
+	buf[0] = CMD_DMAADDH;
+	buf[0] |= (da << 1);
+	*((u16 *)&buf[1]) = val;
+
+	return SZ_DMAADDH;
+}
+
+static inline u32 _emit_END(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAEND;
+
+	buf[0] = CMD_DMAEND;
+
+	return SZ_DMAEND;
+}
+
+static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAFLUSHP;
+
+	buf[0] = CMD_DMAFLUSHP;
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	return SZ_DMAFLUSHP;
+}
+
+static inline u32 _emit_LD(unsigned dry_run, u8 buf[],	enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMALD;
+
+	buf[0] = CMD_DMALD;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	return SZ_DMALD;
+}
+
+static inline u32 _emit_LDP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMALDP;
+
+	buf[0] = CMD_DMALDP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	return SZ_DMALDP;
+}
+
+static inline u32 _emit_LP(unsigned dry_run, u8 buf[],
+		unsigned loop, u8 cnt)
+{
+	if (dry_run)
+		return SZ_DMALP;
+
+	buf[0] = CMD_DMALP;
+
+	if (loop)
+		buf[0] |= (1 << 1);
+
+	buf[1] = cnt;
+
+	return SZ_DMALP;
+}
+
+struct _arg_LPEND {
+	enum pl330_cond cond;
+	bool forever;
+	unsigned loop;
+	u8 bjump;
+};
+
+static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[],
+		const struct _arg_LPEND *arg)
+{
+	enum pl330_cond cond = arg->cond;
+	bool forever = arg->forever;
+	unsigned loop = arg->loop;
+	u8 bjump = arg->bjump;
+
+
+	if (dry_run)
+		return SZ_DMALPEND;
+
+	buf[0] = CMD_DMALPEND;
+
+	if (loop)
+		buf[0] |= (1 << 2);
+
+	if (forever)
+		buf[0] |= (1 << 4);
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	buf[1] = bjump;
+
+	return SZ_DMALPEND;
+}
+
+static inline u32 _emit_KILL(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAKILL;
+
+	buf[0] = CMD_DMAKILL;
+
+	return SZ_DMAKILL;
+}
+
+static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
+		enum dmamov_dst dst, u32 val)
+{
+	if (dry_run)
+		return SZ_DMAMOV;
+
+	buf[0] = CMD_DMAMOV;
+	buf[1] = dst;
+	*((u32 *)&buf[2]) = val;
+
+	return SZ_DMAMOV;
+}
+
+static inline u32 _emit_NOP(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMANOP;
+
+	buf[0] = CMD_DMANOP;
+
+	return SZ_DMANOP;
+}
+
+static inline u32 _emit_RMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMARMB;
+
+	buf[0] = CMD_DMARMB;
+
+	return SZ_DMARMB;
+}
+
+static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev)
+{
+	if (dry_run)
+		return SZ_DMASEV;
+
+	buf[0] = CMD_DMASEV;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	return SZ_DMASEV;
+}
+
+static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond)
+{
+	if (dry_run)
+		return SZ_DMAST;
+
+	buf[0] = CMD_DMAST;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (1 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (1 << 0);
+
+	return SZ_DMAST;
+}
+
+static inline u32 _emit_STP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMASTP;
+
+	buf[0] = CMD_DMASTP;
+
+	if (cond == BURST)
+		buf[0] |= (1 << 1);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	return SZ_DMASTP;
+}
+
+static inline u32 _emit_STZ(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMASTZ;
+
+	buf[0] = CMD_DMASTZ;
+
+	return SZ_DMASTZ;
+}
+
+static inline u32 _emit_WFE(unsigned dry_run, u8 buf[], u8 ev,
+		unsigned invalidate)
+{
+	if (dry_run)
+		return SZ_DMAWFE;
+
+	buf[0] = CMD_DMAWFE;
+
+	ev &= 0x1f;
+	ev <<= 3;
+	buf[1] = ev;
+
+	if (invalidate)
+		buf[1] |= (1 << 1);
+
+	return SZ_DMAWFE;
+}
+
+static inline u32 _emit_WFP(unsigned dry_run, u8 buf[],
+		enum pl330_cond cond, u8 peri)
+{
+	if (dry_run)
+		return SZ_DMAWFP;
+
+	buf[0] = CMD_DMAWFP;
+
+	if (cond == SINGLE)
+		buf[0] |= (0 << 1) | (0 << 0);
+	else if (cond == BURST)
+		buf[0] |= (1 << 1) | (0 << 0);
+	else
+		buf[0] |= (0 << 1) | (1 << 0);
+
+	peri &= 0x1f;
+	peri <<= 3;
+	buf[1] = peri;
+
+	return SZ_DMAWFP;
+}
+
+static inline u32 _emit_WMB(unsigned dry_run, u8 buf[])
+{
+	if (dry_run)
+		return SZ_DMAWMB;
+
+	buf[0] = CMD_DMAWMB;
+
+	return SZ_DMAWMB;
+}
+
+struct _arg_GO {
+	u8 chan;
+	u32 addr;
+	unsigned ns:1;
+};
+
+static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
+		const struct _arg_GO *arg)
+{
+	u8 chan = arg->chan;
+	u32 addr = arg->addr;
+	unsigned ns = arg->ns;
+
+	if (dry_run)
+		return SZ_DMAGO;
+
+	buf[0] = CMD_DMAGO;
+	if (ns)
+		buf[0] |= (ns << 1);
+
+	buf[1] = chan & 0x7;
+
+	*((u32 *)&buf[2]) = addr;
+
+	return SZ_DMAGO;
+}
+
+static inline void _execute_DBGINSN(struct pl330_thread *thrd,
+		u8 insn[], bool as_manager)
+{
+	void __iomem *regs = thrd->dmac->pinfo.base;
+	u32 val;
+
+	val = (insn[0] << 16) | (insn[1] << 24);
+	if (!as_manager) {
+		val |= (1 << 0);
+		val |= (thrd->id << 8); /* Channel Number */
+	}
+	writel(val, regs + DBGINST0);
+
+	val = *((u32 *)&insn[2]);
+	writel(val, regs + DBGINST1);
+}
+
+/* Returns Time-Out */
+static bool _until_dmac_idle(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo.base;
+	unsigned long loops = msecs_to_loops(5);
+
+	do {
+		/* Until Manager is Idle */
+		if (!(readl(regs + DBGSTATUS) & DBG_BUSY))
+			break;
+
+		cpu_relax();
+	} while (--loops);
+
+	if (!loops)
+		return true;
+
+	return false;
+}
+
+static inline u32 _state(struct pl330_thread *thrd)
+{
+	void __iomem *regs = thrd->dmac->pinfo.base;
+	u32 val;
+
+	if (is_manager(thrd))
+		val = readl(regs + DS) & 0xf;
+	else
+		val = readl(regs + CS(thrd->id)) & 0xf;
+
+	switch (val) {
+	case DS_ST_STOP:
+		return PL330_STATE_STOPPED;
+	case DS_ST_EXEC:
+		return PL330_STATE_EXECUTING;
+	case DS_ST_CMISS:
+		return PL330_STATE_CACHEMISS;
+	case DS_ST_UPDTPC:
+		return PL330_STATE_UPDTPC;
+	case DS_ST_WFE:
+		return PL330_STATE_WFE;
+	case DS_ST_FAULT:
+		return PL330_STATE_FAULTING;
+	case DS_ST_ATBRR:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_ATBARRIER;
+	case DS_ST_QBUSY:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_QUEUEBUSY;
+	case DS_ST_WFP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_WFP;
+	case DS_ST_KILL:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_KILLING;
+	case DS_ST_CMPLT:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_COMPLETING;
+	case DS_ST_FLTCMP:
+		if (is_manager(thrd))
+			return PL330_STATE_INVALID;
+		else
+			return PL330_STATE_FAULT_COMPLETING;
+	default:
+		return PL330_STATE_INVALID;
+	}
+}
+
+/* Use this _only_ to wait on transient states */
+#define UNTIL(t, s)	while (!(_state(t) & (s))) cpu_relax();
+
+/* Start doing req 'idx' of thread 'thrd' */
+static bool _trigger(struct pl330_thread *thrd, unsigned idx)
+{
+	void __iomem *regs = thrd->dmac->pinfo.base;
+	struct _pl330_req *req = &thrd->req[idx];
+	struct pl330_req *r = req->r;
+	struct _arg_GO go;
+	unsigned ns;
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+
+	/* Return if already ACTIVE */
+	if (_state(thrd) != PL330_STATE_STOPPED)
+		return true;
+
+	/* Return if no request */
+	if (!r)
+		return true;
+
+	/* If timed out due to halted state-machine */
+	if (_until_dmac_idle(thrd))
+		return false;
+
+	if (r->cfg)
+		ns = r->cfg->nonsecure ? 1 : 0;
+	else if (readl(regs + CS(thrd->id)) & CS_CNS)
+		ns = 1;
+	else
+		ns = 0;
+
+	/* See 'Abort Sources' point-4 at Page 2-25 */
+	if (_manager_ns(thrd) && !ns)
+		printk(KERN_INFO "%s:%d Recipe for ABORT!\n",
+			__func__, __LINE__);
+
+	go.chan = thrd->id;
+	go.addr = req->mc_bus;
+	go.ns = ns;
+	_emit_GO(0, insn, &go);
+
+	/* Set to generate interrupts for SEV */
+	writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN);
+
+	/* Only manager can execute GO */
+	_execute_DBGINSN(thrd, insn, true);
+
+	return true;
+}
+
+/* Makes sure the thread is in STOPPED state */
+static void _stop(struct pl330_thread *thrd)
+{
+	u8 insn[6] = {0, 0, 0, 0, 0, 0};
+
+	/* Return if already STOPPED */
+	if (_state(thrd) == PL330_STATE_STOPPED)
+		return;
+
+	if (is_manager(thrd))
+		_emit_END(0, insn);
+	else
+		_emit_KILL(0, insn);
+
+	_execute_DBGINSN(thrd, insn, is_manager(thrd));
+}
+
+static bool _start(struct pl330_thread *thrd)
+{
+	switch (_state(thrd)) {
+	case PL330_STATE_FAULT_COMPLETING:
+		UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
+
+		if (_state(thrd) == PL330_STATE_KILLING)
+			UNTIL(thrd, PL330_STATE_STOPPED)
+
+	case PL330_STATE_FAULTING:
+		_stop(thrd);
+
+	case PL330_STATE_KILLING:
+	case PL330_STATE_COMPLETING:
+		UNTIL(thrd, PL330_STATE_STOPPED)
+
+	case PL330_STATE_STOPPED:
+		return _trigger(thrd, thrd->req[0].r ? 0 : 1);
+
+	case PL330_STATE_WFP:
+	case PL330_STATE_QUEUEBUSY:
+	case PL330_STATE_ATBARRIER:
+	case PL330_STATE_UPDTPC:
+	case PL330_STATE_CACHEMISS:
+	case PL330_STATE_EXECUTING:
+		return true;
+
+	case PL330_STATE_WFE: /* for PAUSE - nothing yet */
+	default: /* Shouldn't reach here with some transient state */
+		return false;
+	}
+}
+
+static inline u32 _prepare_ccr(struct pl330_reqcfg *rqc)
+{
+	u32 ccr = 0;
+
+	if (rqc->src_inc)
+		ccr |= CC_SRCINC;
+
+	if (rqc->dst_inc)
+		ccr |= CC_DSTINC;
+
+	/* We set same protection levels for Src and DST for now */
+	if (rqc->privileged)
+		ccr |= CC_SRCPRI | CC_DSTPRI;
+	if (rqc->nonsecure)
+		ccr |= CC_SRCNS | CC_DSTNS;
+	if (rqc->insnaccess)
+		ccr |= CC_SRCIA | CC_DSTIA;
+
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
+	ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);
+
+	ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT);
+	ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT);
+
+	ccr |= (rqc->dcctl << CC_SRCCCTRL_SHFT);
+	ccr |= (rqc->scctl << CC_DSTCCTRL_SHFT);
+
+	ccr |= (rqc->swap << CC_SWAP_SHFT);
+
+	return ccr;
+}
+
+static inline bool _is_valid(u32 ccr)
+{
+	enum pl330_dstcachectrl dcctl;
+	enum pl330_srccachectrl scctl;
+
+	dcctl = (ccr >> CC_DSTCCTRL_SHFT) & CC_DRCCCTRL_MASK;
+	scctl = (ccr >> CC_SRCCCTRL_SHFT) & CC_SRCCCTRL_MASK;
+
+	if (dcctl == DINVALID1 || dcctl == DINVALID2
+			|| scctl == SINVALID1 || scctl == SINVALID2)
+		return false;
+	else
+		return true;
+}
+
+static inline int _ldst_memtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		/* Do we need RMB/WMB for each load/store? REVISIT XXX */
+		off += _emit_LD(dry_run, &buf[off], ALWAYS);
+		off += _emit_RMB(dry_run, &buf[off]);
+		off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		off += _emit_WMB(dry_run, &buf[off]);
+	}
+
+	return off;
+}
+
+static inline int _ldst_devtomem(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		/* Do we need WFP for every cycle? REVISIT XXX */
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_ST(dry_run, &buf[off], ALWAYS);
+		/* Do we need FLUSHP for every cycle? REVISIT XXX */
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	}
+
+	return off;
+}
+
+static inline int _ldst_memtodev(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	while (cyc--) {
+		/* Do we need WFP for every cycle? REVISIT XXX */
+		off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		off += _emit_LD(dry_run, &buf[off], ALWAYS);
+		off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->r->peri);
+		/* Do we need FLUSHP for every cycle? REVISIT XXX */
+		off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	}
+
+	return off;
+}
+
+static int _bursts(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs, int cyc)
+{
+	int off = 0;
+
+	switch (pxs->r->rqtype) {
+	case MEMTODEV:
+		off += _ldst_memtodev(dry_run, &buf[off], pxs, cyc);
+		break;
+
+	case DEVTOMEM:
+		off += _ldst_devtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+
+	case MEMTOMEM:
+		off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc);
+		break;
+
+	default:
+		off += 0x40000000; /* Scare off the Client */
+		break;
+	}
+
+	return off;
+}
+
+/* Returns bytes consumed and updates bursts */
+static inline int _loop(unsigned dry_run, u8 buf[],
+		unsigned long *bursts, const struct _xfer_spec *pxs)
+{
+	int cyc, cycmax, szlp, szlpend, szbrst, off;
+	unsigned lcnt0, lcnt1, ljmp0, ljmp1;
+	struct _arg_LPEND lpend;
+
+	/* Max iterations possibile in DMALP is 256 */
+	if (*bursts >= 256*256) {
+		lcnt1 = 256;
+		lcnt0 = 256;
+		cyc = *bursts / lcnt1 / lcnt0;
+	} else if (*bursts > 256) {
+		lcnt1 = 256;
+		lcnt0 = *bursts / lcnt1;
+		cyc = 1;
+	} else {
+		lcnt1 = *bursts;
+		lcnt0 = 0;
+		cyc = 1;
+	}
+
+	szlp = _emit_LP(1, buf, 0, 0);
+	szbrst = _bursts(1, buf, pxs, 1);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 0;
+	lpend.bjump = 0;
+	szlpend = _emit_LPEND(1, buf, &lpend);
+
+	if (lcnt0) {
+		szlp *= 2;
+		szlpend *= 2;
+	}
+
+	/** Do not mess with the construct **/
+
+	/* Max bursts that we can unroll due to limit on the
+	 * size of backward jump that can be encoded in DMALPEND
+	 * which is 8-bits and hence 255
+	 */
+	cycmax = (255 - (szlp + szlpend)) / szbrst;
+
+	cyc = (cycmax < cyc) ? cycmax : cyc;
+
+	off = 0;
+
+	ljmp0 = off;
+	if (lcnt0)
+		off += _emit_LP(dry_run, &buf[off], 0, lcnt0);
+
+	ljmp1 = off;
+	off += _emit_LP(dry_run, &buf[off], 1, lcnt1);
+
+	off += _bursts(dry_run, &buf[off], pxs, cyc);
+
+	lpend.cond = ALWAYS;
+	lpend.forever = false;
+	lpend.loop = 1;
+	lpend.bjump = off - ljmp1;
+	off += _emit_LPEND(dry_run, &buf[off], &lpend);
+
+	if (lcnt0) {
+		lpend.cond = ALWAYS;
+		lpend.forever = false;
+		lpend.loop = 0;
+		lpend.bjump = off - ljmp0;
+		off += _emit_LPEND(dry_run, &buf[off], &lpend);
+	}
+	/***********************************/
+
+	*bursts = lcnt1 * cyc;
+	if (lcnt0)
+		*bursts *= lcnt0;
+
+	return off;
+}
+
+static inline int _setup_loops(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = pxs->x;
+	u32 ccr = pxs->ccr;
+	unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
+	int off = 0;
+
+	while (bursts) {
+		c = bursts;
+		off += _loop(dry_run, &buf[off], &c, pxs);
+		bursts -= c;
+	}
+
+	return off;
+}
+
+static inline int _setup_xfer(unsigned dry_run, u8 buf[],
+		const struct _xfer_spec *pxs)
+{
+	struct pl330_xfer *x = pxs->x;
+	int off = 0;
+
+	/* DMAMOV SAR, x->src_addr */
+	off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr);
+	/* DMAMOV DAR, x->dst_addr */
+	off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr);
+
+	/* Setup Loop(s) */
+	off += _setup_loops(dry_run, &buf[off], pxs);
+
+	return off;
+}
+
+/* A req is a sequence of one or more xfer units.
+ * Returns the number of bytes taken to setup the MC
+ * for the req.
+ */
+static int _setup_req(unsigned dry_run, struct pl330_thread *thrd,
+		unsigned index, struct _xfer_spec *pxs)
+{
+	struct _pl330_req *req = &thrd->req[index];
+	struct pl330_xfer *x;
+	u8 *buf = req->mc_cpu;
+	int off = 0;
+
+	/* DMAMOV CCR, ccr */
+	off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr);
+
+	x = pxs->r->x;
+	do {
+		/* Error if xfer length is not aligned at burst size */
+		if (x->bytes % BRST_SIZE(pxs->ccr))
+			return -EINVAL;
+
+		pxs->x = x;
+		off += _setup_xfer(dry_run, &buf[off], pxs);
+
+		x = x->next;
+	} while (x);
+
+	/* DMAFLUSHP peripheral */
+	off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
+	/* DMASEV peripheral/event */
+	off += _emit_SEV(dry_run, &buf[off], thrd->ev);
+	/* DMAEND */
+	off += _emit_END(dry_run, &buf[off]);
+
+	return off;
+}
+
+/* Submit a list of xfers after which the client wants notification.
+ * Client is not notified after each xfer unit, just once after all
+ * xfer units are done or some error occurs.
+ * The actual xfer on bus starts automatically
+ */
+int pl330_submit_req(void *ch_id, struct pl330_req *r)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_info *pi;
+	struct _xfer_spec xs;
+	void __iomem *regs;
+	u32 ccr;
+	unsigned idx;
+	int ret = 0;
+
+	/* No Req or Unacquired Channel or DMAC stopping */
+	if (!r || !thrd || thrd->free || thrd->dmac->state == DYING)
+		return -EINVAL;
+
+	pi = &thrd->dmac->pinfo;
+	regs = pi->base;
+
+	/* If request for non-existing peripheral */
+	if (r->peri >= pi->pcfg.num_peri)
+		return -EINVAL;
+
+	mutex_lock(&thrd->mtx);
+
+	if (_queue_full(thrd)) {
+		ret = -EBUSY;
+		goto xfer_exit;
+	}
+
+	/* Use last settings, if not provided */
+	if (r->cfg)
+		ccr = _prepare_ccr(r->cfg);
+	else
+		ccr = readl(regs + CC(thrd->id));
+
+	/* If this req doesn't have valid xfer settings */
+	if (!_is_valid(ccr)) {
+		ret = -EINVAL;
+		goto xfer_exit;
+	}
+
+	idx = thrd->req[0].r ? 1 : 0;
+
+	xs.ccr = ccr;
+	xs.r = r;
+
+	/* First dry run to check if req is acceptable */
+	ret = _setup_req(1, thrd, idx, &xs);
+	if (ret < 0)
+		goto xfer_exit;
+
+	if (ret > pi->mcbufsz / 2) {
+		ret = -ENOMEM;
+		goto xfer_exit;
+	}
+
+	ret = 0;
+
+	/* Hook the request */
+	_setup_req(0, thrd, idx, &xs);
+	thrd->req[idx].r = r;
+
+	if (!_start(thrd)) { /* Could not start */
+		ret = -EIO;
+		goto xfer_exit;
+	}
+
+xfer_exit:
+	mutex_unlock(&thrd->mtx);
+	return ret;
+}
+EXPORT_SYMBOL(pl330_submit_req);
+
+static void pl330_dotask(unsigned long data)
+{
+	struct pl330_dmac *pl330 = (struct pl330_dmac *) data;
+	struct pl330_info *pi = &pl330->pinfo;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* The DMAC itself gone nuts */
+	if (pl330->dmac_tbd.reset_dmac) {
+		pl330->state = DYING;
+
+		for (i = 0; i < pi->pcfg.num_chan; i++) {
+			thrd = &pl330->channels[i];
+
+			/* Mark thread as infected */
+			pl330->dmac_tbd.reset_chan |= (1 << thrd->id);
+		}
+
+		pl330->dmac_tbd.reset_mngr = 1;
+	}
+
+	if (pl330->dmac_tbd.reset_mngr)
+		_stop(pl330->manager);
+
+	for (i = 0; i < pi->pcfg.num_chan; i++) {
+		thrd = &pl330->channels[i];
+
+		if (pl330->dmac_tbd.reset_chan & (1 << thrd->id)) {
+			if (thrd->active) {
+				struct pl330_req *r1, *r2;
+				enum pl330_op_err err;
+				void __iomem *regs = pi->base;
+				unsigned active;
+
+				active = thrd->active - 1;
+
+				r1 = thrd->req[active].r;
+				r2 = thrd->req[1 - active].r;
+
+				thrd->req[active].r = NULL;
+				thrd->req[1 - active].r = NULL;
+				thrd->active = 0;
+
+				if (readl(regs + FSC) & (1 << thrd->id))
+					err = PL330_ERR_FAIL;
+				else
+					err = PL330_ERR_ABORT;
+
+				_callback(r1, err);
+				_callback(r2, err);
+			}
+
+			_stop(thrd);
+		}
+	}
+
+	/* Clear all errors */
+	pl330->dmac_tbd.reset_dmac = 0;
+	pl330->dmac_tbd.reset_mngr = 0;
+	pl330->dmac_tbd.reset_chan = 0;
+
+	return;
+}
+
+/* Returns 1 if state was updated, 0 otherwise */
+int pl330_update(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+	void __iomem *regs;
+	u32 val;
+	int id, ev, ret = 0;
+
+	if (!pi)
+		return 0;
+
+	pl330 = container_of(pi, struct pl330_dmac, pinfo);
+
+	if (pl330->state == DYING)
+		return 0;
+
+	regs = pi->base;
+
+	val = readl(regs + FSM) & 0x1;
+	pl330->dmac_tbd.reset_mngr |= val;
+
+	val = readl(regs + FSC) & ((1 << pi->pcfg.num_chan) - 1);
+	pl330->dmac_tbd.reset_chan |= val;
+
+	/* Check which event happened i.e, thread notified */
+	val = readl(regs + ES);
+	if (pi->pcfg.num_events < 32
+			&& val & ~((1 << pi->pcfg.num_events) - 1)) {
+		pl330->dmac_tbd.reset_dmac = 1;
+		printk(KERN_INFO "%s:%d Unexpected!\n", __func__, __LINE__);
+		ret = 1;
+		goto updt_exit;
+	}
+
+	INIT_LIST_HEAD(&pl330->req_done);
+
+	for (ev = 0; ev < pi->pcfg.num_events; ev++) {
+
+		struct _pl330_req *rqdone;
+		struct pl330_thread *thrd;
+		int active;
+
+		if (val & (1 << ev)) { /* Event occured */
+
+			id = pl330->events[ev];
+
+			thrd = &pl330->channels[id];
+
+			mutex_lock(&thrd->mtx);
+
+			if (!thrd->active) {
+				pl330->dmac_tbd.reset_chan |= (1 << id);
+				printk(KERN_INFO "%s:%d Unexpected!\n",
+					__func__, __LINE__);
+			}
+
+			active = thrd->active - 1;
+			rqdone = &thrd->req[active];
+			rqdone->r = NULL;
+
+			if (thrd->req[1 - active].r)
+				thrd->active = 2 - active;
+			else
+				thrd->active = 0;
+
+			/* Get going again ASAP */
+			_start(thrd);
+
+			/* For now, just make a list of callbacks to be done */
+			list_add_tail(&rqdone->rqd, &pl330->req_done);
+
+			mutex_unlock(&thrd->mtx);
+
+			ret = 1;
+		}
+	}
+
+	/* Clear all event interrupts */
+	writel(val, regs + INTCLR);
+
+	/* Now that we are in no hurry, do the callbacks */
+	while (!list_empty(&pl330->req_done)) {
+		struct _pl330_req *rqdone =
+				container_of(pl330->req_done.next,
+					struct _pl330_req, rqd);
+
+		list_del_init(&rqdone->rqd);
+
+		_callback(rqdone->r, PL330_ERR_NONE);
+	}
+
+updt_exit:
+
+	if (pl330->dmac_tbd.reset_dmac
+			|| pl330->dmac_tbd.reset_mngr
+			|| pl330->dmac_tbd.reset_chan) {
+		ret = 1;
+		tasklet_schedule(&pl330->tasks);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(pl330_update);
+
+/* This must be atomic. Since the DMA client calls this,
+ * there is no need to do callbacks. Otherwise, this may not be atomic.
+ */
+int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
+{
+	struct pl330_thread *thrd = ch_id;
+	int ret = 0;
+
+	if (!thrd || thrd->free || thrd->dmac->state == DYING)
+		return -EINVAL;
+
+	mutex_lock(&thrd->mtx);
+
+	if (_queue_empty(thrd))
+		goto ctrl_exit;
+
+	switch (op) {
+	case PL330_OP_FLUSH:
+		_stop(thrd);
+		thrd->req[0].r = NULL;
+		thrd->req[1].r = NULL;
+		thrd->active = 0;
+		break;
+
+	case PL330_OP_ABORT:
+		_stop(thrd);
+
+		/* ABORT is only for the active req */
+		if (!thrd->active)
+			break;
+
+		thrd->req[thrd->active - 1].r = NULL;
+
+		if (_queue_empty(thrd)) {
+			thrd->active = 0;
+			break;
+		}
+
+	case PL330_OP_START: /* Should be un-necessary */
+		if (!_queue_empty(thrd) && !_start(thrd))
+			ret = -EIO;
+
+		break;
+
+	default:
+		ret = -EINVAL;
+	}
+
+ctrl_exit:
+	mutex_unlock(&thrd->mtx);
+	return ret;
+}
+EXPORT_SYMBOL(pl330_chan_ctrl);
+
+int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	struct pl330_info *pi;
+	void __iomem *regs;
+	int i;
+	u32 val;
+
+	if (!pstatus || !thrd || thrd->free)
+		return -EINVAL;
+
+	mutex_lock(&thrd->mtx);
+
+	pl330 = thrd->dmac;
+	pi = &pl330->pinfo;
+	regs = pi->base;
+
+	/* The client should remove the DMAC and add again */
+	if (pl330->state == DYING)
+		pstatus->dmac_halted = true;
+	else
+		pstatus->dmac_halted = false;
+
+	val = readl(regs + FSC);
+	if (val & (1 << thrd->id))
+		pstatus->faulting = true;
+	else
+		pstatus->faulting = false;
+
+	val = readl(regs + CPC(thrd->id));
+	if (PC_AT_REQ(&thrd->req[0], pi->mcbufsz / 2, val))
+		i = 1;
+	else if (PC_AT_REQ(&thrd->req[1], pi->mcbufsz / 2, val))
+		i = 2;
+	else
+		i = 0;
+
+	/* If channel inactive while req in queue */
+	if ((thrd->active != i) || (!_queue_empty(thrd) && !i))
+		printk(KERN_INFO "%s:%d DBG: Invalid state!",
+			__func__, __LINE__);
+
+	if (i) {
+		i--;
+		pstatus->act_req = thrd->req[i].r;
+		pstatus->enq_req = thrd->req[1-i].r;
+	} else {
+		pstatus->act_req = NULL;
+		pstatus->enq_req = NULL;
+	}
+
+	pstatus->src_addr = readl(regs + SA(thrd->id));
+	pstatus->dst_addr = readl(regs + DA(thrd->id));
+
+	mutex_unlock(&thrd->mtx);
+
+	return 0;
+}
+EXPORT_SYMBOL(pl330_chan_status);
+
+static inline void _reset_thread(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = &pl330->pinfo;
+
+	thrd->req[0].r = NULL;
+	thrd->req[0].mc_cpu = pl330->mcode_cpu
+				+ (thrd->id * pi->mcbufsz);
+	thrd->req[0].mc_bus = pl330->mcode_bus
+				+ (thrd->id * pi->mcbufsz);
+
+	thrd->req[1].r = NULL;
+	thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
+				+ pi->mcbufsz / 2;
+	thrd->req[1].mc_bus = thrd->req[0].mc_bus
+				+ pi->mcbufsz / 2;
+}
+
+/* Reserve an event */
+static inline int _alloc_event(struct pl330_thread *thrd)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = &pl330->pinfo;
+	int ev;
+
+	for (ev = 0; ev < pi->pcfg.num_events; ev++) {
+		if (pl330->events[ev] == -1) {
+			pl330->events[ev] = thrd->id;
+			return ev;
+		}
+	}
+
+	return -1;
+}
+
+/* Release an event */
+static inline void _free_event(struct pl330_thread *thrd, int ev)
+{
+	struct pl330_dmac *pl330 = thrd->dmac;
+	struct pl330_info *pi = &pl330->pinfo;
+
+	if (ev >= 0 && ev < pi->pcfg.num_events
+			&& pl330->events[ev] == thrd->id)
+		pl330->events[ev] = -1;
+}
+
+void *pl330_request_channel(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+	struct pl330_thread *thrd;
+	unsigned long flags;
+	int chans, i;
+
+	if (!pi)
+		return NULL;
+
+	pl330 = container_of(pi, struct pl330_dmac, pinfo);
+
+	if (pl330->state == DYING)
+		return NULL;
+
+	chans = pi->pcfg.num_chan;
+
+	spin_lock_irqsave(&pl330->lock, flags);
+
+	thrd = NULL;
+	for (i = 0; i < chans; i++) {
+		if (pl330->channels[i].free) {
+			thrd = &pl330->channels[i];
+			_reset_thread(thrd);
+			thrd->ev = _alloc_event(thrd);
+			if (thrd->ev >= 0) {
+				thrd->free = false;
+				break;
+			}
+			thrd = NULL;
+		}
+	}
+
+	spin_unlock_irqrestore(&pl330->lock, flags);
+
+	return thrd;
+}
+EXPORT_SYMBOL(pl330_request_channel);
+
+void pl330_release_channel(void *ch_id)
+{
+	struct pl330_thread *thrd = ch_id;
+	struct pl330_dmac *pl330;
+	struct pl330_req *r1, *r2;
+	unsigned long flags;
+
+	if (!thrd || thrd->free || thrd->dmac->state == DYING)
+		return;
+
+	pl330 = thrd->dmac;
+
+	if (thrd->active == 1) {
+		r1 = thrd->req[0].r;
+		r2 = thrd->req[1].r;
+	} else {
+		r1 = thrd->req[1].r;
+		r2 = thrd->req[0].r;
+	}
+
+	mutex_lock(&thrd->mtx);
+
+	_stop(thrd);
+
+	mutex_unlock(&thrd->mtx);
+
+	_callback(r1, PL330_ERR_ABORT);
+	_callback(r2, PL330_ERR_ABORT);
+
+	spin_lock_irqsave(&pl330->lock, flags);
+	_reset_thread(thrd);
+	_free_event(thrd, thrd->ev);
+	thrd->free = true;
+	spin_unlock_irqrestore(&pl330->lock, flags);
+}
+EXPORT_SYMBOL(pl330_release_channel);
+
+static int dmac_alloc_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = &pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Allocate 1 Manager and 'chans' Channel threads */
+	pl330->channels = kzalloc((1 + chans) * sizeof(*thrd),
+					GFP_KERNEL);
+	if (!pl330->channels)
+		return -ENOMEM;
+
+	/* Init Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		thrd->id = i;
+		thrd->dmac = pl330;
+		mutex_init(&thrd->mtx);
+		_reset_thread(thrd);
+		thrd->free = true;
+	}
+
+	/* MANAGER is indexed at the end */
+	thrd = &pl330->channels[chans];
+	thrd->id = chans;
+	thrd->dmac = pl330;
+	thrd->free = false; /* Manager can't do xfer */
+	mutex_init(&thrd->mtx);
+	pl330->manager = thrd;
+
+	return 0;
+}
+
+static int dmac_free_threads(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = &pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	struct pl330_thread *thrd;
+	int i;
+
+	/* Release Channel threads */
+	for (i = 0; i < chans; i++) {
+		thrd = &pl330->channels[i];
+		pl330_release_channel((void *)thrd);
+	}
+
+	/* Free memory */
+	kfree(pl330->channels);
+
+	return 0;
+}
+
+/* Must be called after pl330_info has been initialized */
+static int dmac_alloc_resources(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = &pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+	int ret;
+
+	/* Alloc MicroCode buffer for 'chans' Channel threads.
+	 * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN)
+	 */
+	pl330->mcode_cpu = dma_alloc_coherent(pl330->dev,
+				chans * pi->mcbufsz,
+				&pl330->mcode_bus, GFP_KERNEL);
+	if (!pl330->mcode_cpu) {
+		printk(KERN_INFO "Unable to allocate MCODE buffer\n");
+		return -ENOMEM;
+	}
+
+	ret = dmac_alloc_threads(pl330);
+	if (ret) {
+		printk(KERN_INFO "Unable to create channels for DMAC\n");
+		dma_free_coherent(pl330->dev,
+				chans * pi->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void dmac_free_resources(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = &pl330->pinfo;
+	int chans = pi->pcfg.num_chan;
+
+	dmac_free_threads(pl330);
+
+	dma_free_coherent(pl330->dev, chans * pi->mcbufsz,
+				pl330->mcode_cpu, pl330->mcode_bus);
+}
+/* Initialize the structure for PL330 configuration, that can be used
+ * by the client driver the make best use of the DMAC
+ */
+static void read_dmac_config(struct pl330_dmac *pl330)
+{
+	struct pl330_info *pi = &pl330->pinfo;
+	void __iomem *regs = pi->base;
+	u32 val;
+
+	val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT;
+	val &= CRD_DATA_WIDTH_MASK;
+	pi->pcfg.data_bus_width = 8 * (1 << val);
+
+	val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT;
+	val &= CR0_NUM_CHANS_MASK;
+	val += 1;
+	pi->pcfg.num_chan = val;
+
+	val = readl(regs + CR0);
+	if (val & CR0_PERIPH_REQ_SET) {
+		val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK;
+		val += 1;
+		pi->pcfg.num_peri = val;
+		pi->pcfg.peri_ns = readl(regs + CR4);
+	} else {
+		pi->pcfg.num_peri = 0;
+	}
+
+	val = readl(regs + CR0);
+	if (val & CR0_BOOT_MAN_NS)
+		pi->pcfg.mode |= DMAC_MODE_NS;
+	else
+		pi->pcfg.mode &= ~DMAC_MODE_NS;
+
+	val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT;
+	val &= CR0_NUM_EVENTS_MASK;
+	val += 1;
+	pi->pcfg.num_events = val;
+
+	pi->pcfg.irq_ns = readl(regs + CR3);
+
+	pi->pcfg.periph_id = get_id(pl330, PERIPH_ID);
+	pi->pcfg.pcell_id = get_id(pl330, PCELL_ID);
+}
+
+/* After pl330_alloc, initialize pl330_info.base
+ * before calling pl330_add
+ */
+int pl330_add(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330, *pt;
+	void __iomem *regs;
+	int i;
+
+	if (!pi)
+		return -EINVAL;
+
+	pl330 = container_of(pi, struct pl330_dmac, pinfo);
+
+	regs = pi->base;
+
+	/* If the SoC can perform reset on the DMAC, then do it
+	 * before reading its configuration.
+	 */
+	if (pi->dmac_reset)
+		pi->dmac_reset(pi);
+
+	/* Check if we can handle this DMAC */
+	if (get_id(pl330, PERIPH_ID) != PERIPH_ID_VAL
+	   || get_id(pl330, PCELL_ID) != PCELL_ID_VAL) {
+		printk(KERN_INFO "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n",
+			readl(regs + PERIPH_ID), readl(regs + PCELL_ID));
+		return -EINVAL;
+	}
+
+	/* Make sure it isn't already added */
+	list_for_each_entry(pt, &pl330_list, node)
+		if (pt == pl330)
+			return 0;
+
+	/* Read the configuration of the DMAC */
+	read_dmac_config(pl330);
+
+	if (pi->pcfg.num_events == 0) {
+		printk(KERN_INFO "%s:%d Can't work without events!\n",
+			__func__, __LINE__);
+		return -EINVAL;
+	}
+
+	/* Use default MC buffer size if not provided */
+	if (!pi->mcbufsz)
+		pi->mcbufsz = MCODE_BUFF_PER_REQ * 2;
+
+	/* Mark all events as free */
+	for (i = 0; i < pi->pcfg.num_events; i++)
+		pl330->events[i] = -1;
+
+	/* Allocate resources needed by the DMAC */
+	i = dmac_alloc_resources(pl330);
+	if (i) {
+		printk(KERN_INFO "Unable to create channels for DMAC\n");
+		return i;
+	}
+
+	mutex_lock(&pl330_mutex);
+	list_add_tail(&pl330->node, &pl330_list);
+	mutex_unlock(&pl330_mutex);
+
+	tasklet_init(&pl330->tasks, pl330_dotask,
+				(unsigned long) pl330);
+
+	pl330->state = INIT;
+
+	return 0;
+}
+EXPORT_SYMBOL(pl330_add);
+
+/* Drop DMAC from the list
+ */
+void pl330_del(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330, *pt;
+	int found;
+
+	if (!pi)
+		return;
+
+	pl330 = container_of(pi, struct pl330_dmac, pinfo);
+
+	pl330->state = UNINIT;
+
+	/* Make sure it is already added */
+	found = 0;
+	list_for_each_entry(pt, &pl330_list, node)
+		if (pt == pl330)
+			found = 1;
+
+	if (!found)
+		return;
+
+	tasklet_kill(&pl330->tasks);
+
+	mutex_lock(&pl330_mutex);
+	list_del(&pl330->node);
+	mutex_unlock(&pl330_mutex);
+
+	/* Free DMAC resources */
+	dmac_free_resources(pl330);
+}
+EXPORT_SYMBOL(pl330_del);
+
+struct pl330_info *pl330_alloc(struct device *dev)
+{
+	struct pl330_dmac *pl330;
+
+	pl330 = kzalloc(sizeof(*pl330), GFP_KERNEL);
+	if (!pl330)
+		return NULL;
+
+	spin_lock_init(&pl330->lock);
+
+	pl330->dev = dev;
+
+	return &pl330->pinfo;
+}
+EXPORT_SYMBOL(pl330_alloc);
+
+void pl330_free(struct pl330_info *pi)
+{
+	struct pl330_dmac *pl330;
+
+	if (!pi)
+		return;
+
+	pl330_del(pi);
+
+	pl330 = container_of(pi, struct pl330_dmac, pinfo);
+
+	kfree(pl330);
+}
+EXPORT_SYMBOL(pl330_free);
diff --git a/arch/arm/include/asm/hardware/pl330.h
b/arch/arm/include/asm/hardware/pl330.h
new file mode 100644
index 0000000..4e907ad
--- /dev/null
+++ b/arch/arm/include/asm/hardware/pl330.h
@@ -0,0 +1,197 @@
+/* linux/include/asm/hardware/pl330.h
+ *
+ * Copyright (C) 2010 Samsung Electronics Co Ltd.
+ *	Jaswinder Singh <jassi.brar@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __PL330_CORE_H
+#define __PL330_CORE_H
+
+enum pl330_srccachectrl {
+	SCCTRL0 = 0, /* Noncacheable and nonbufferable */
+	SCCTRL1, /* Bufferable only */
+	SCCTRL2, /* Cacheable, but do not allocate */
+	SCCTRL3, /* Cacheable and bufferable, but do not allocate */
+	SINVALID1,
+	SINVALID2,
+	SCCTRL6, /* Cacheable write-through, allocate on reads only */
+	SCCTRL7, /* Cacheable write-back, allocate on reads only */
+};
+
+enum pl330_dstcachectrl {
+	DCCTRL0 = 0, /* Noncacheable and nonbufferable */
+	DCCTRL1, /* Bufferable only */
+	DCCTRL2, /* Cacheable, but do not allocate */
+	DCCTRL3, /* Cacheable and bufferable, but do not allocate */
+	DINVALID1 = 8,
+	DINVALID2,
+	DCCTRL6, /* Cacheable write-through, allocate on writes only */
+	DCCTRL7, /* Cacheable write-back, allocate on writes only */
+};
+
+/* Populated by the PL330 core driver for DMA API driver's info */
+struct pl330_config {
+	u32	periph_id;
+	u32	pcell_id;
+#define DMAC_MODE_NS	(1 << 0)
+	unsigned int	mode;
+	unsigned int	data_bus_width:10; /* In number of bits */
+	unsigned int	num_chan:4;
+	unsigned int	num_peri:6;
+	u32		peri_ns;
+	unsigned int	num_events:6;
+	u32		irq_ns;
+};
+
+/* Handle to the DMAC provided by PL330 engine */
+struct pl330_info {
+	/* Size of MicroCode buffers for each channel */
+	unsigned mcbufsz;
+	/* ioremap'ed address of PL330 registers */
+	void __iomem	*base;
+	/* Client can freely use it */
+	void	*private_data;
+	/* Populated by the PL330 core driver during pl330_add */
+	struct pl330_config	pcfg;
+	/* If the DMAC has some reset mechanism, then the client
+	 * may want to provide pointer to the relevent function.
+	 */
+	void (*dmac_reset)(struct pl330_info *pi);
+};
+
+enum pl330_byteswap {
+	SWAP_NO = 0,
+	SWAP_2,
+	SWAP_4,
+	SWAP_8,
+	SWAP_16,
+};
+
+enum pl330_reqtype {
+	MEMTOMEM,
+	MEMTODEV,
+	DEVTOMEM,
+	DEVTODEV,
+};
+
+/* Request Configuration.
+ * The PL330 core uses the last working configuration if the
+ * request doesn't provide any.
+ *
+ * The Client may want to provide this info only for the
+ * first request and a request with new settings.
+ */
+struct pl330_reqcfg {
+	/* Implies Incrementing address */
+	unsigned dst_inc:1;
+	unsigned src_inc:1;
+
+	/* For now, the SRC & DST protection levels
+	 * and burst size/length are assumed same
+	 */
+	unsigned nonsecure:1;
+	unsigned privileged:1;
+	unsigned insnaccess:1;
+	unsigned brst_len:5;
+	unsigned brst_size:3; /* power of 2 */
+
+	enum pl330_dstcachectrl dcctl;
+	enum pl330_srccachectrl scctl;
+	enum pl330_byteswap swap;
+};
+
+/* One cycle of DMAC operation.
+ * There may be more than one xfer in a request.
+ */
+struct pl330_xfer {
+	u32 src_addr;
+	u32 dst_addr;
+	/* Number of total _bytes_ to xfer */
+	u32 bytes;
+	/* Pointer to next xfer in the list.
+	 * The last xfer in the req must point to NULL
+	 */
+	struct pl330_xfer *next;
+};
+
+/* A request defining Scatter-Gather List ending with NULL xfer */
+struct pl330_req {
+	enum pl330_reqtype rqtype;
+	/* Index of peripheral for the xfer */
+	unsigned peri:5;
+	/* Unique token for this xfer, set by the DMA engine */
+	void *token;
+	/* Callback to be called after xfer */
+	void (*xfer_cb)(void *token, int result);
+	/* If NULL, req will be done at last set parameters */
+	struct pl330_reqcfg *cfg;
+	/* Pointer to first xfer in the List */
+	struct pl330_xfer *x;
+};
+
+/* To know the status of the channel and DMAC, the client
+ * provides a pointer to this structure. The PL330 core
+ * fills it with current information
+ */
+struct pl330_chanstatus {
+	/* If the DMAC engine halted due to some error,
+	 * the client should remove-add DMAC */
+	bool dmac_halted;
+	/* If channel is halted due to some error,
+	 * the client may ABORT or FLUSH the channel */
+	bool faulting;
+	/* Location of last load */
+	u32 src_addr;
+	/* Location of last store */
+	u32 dst_addr;
+	/* Pointer to the active req */
+	struct pl330_req *act_req;
+	/* Pointer to req waiting in the queue */
+	struct pl330_req *enq_req;
+};
+
+/* The callbacks are made with one of these arguments */
+enum pl330_op_err {
+	/* The all xfers in the request were success */
+	PL330_ERR_NONE,
+	/* If req aborted due to global error */
+	PL330_ERR_ABORT,
+	/* If req failed due to problem with Channel */
+	PL330_ERR_FAIL,
+};
+
+enum pl330_chan_op {
+	/* Start the channel */
+	PL330_OP_START,
+	/* Abort the active xfer */
+	PL330_OP_ABORT,
+	/* Stop xfer and flush queue */
+	PL330_OP_FLUSH,
+};
+
+extern struct pl330_info *pl330_alloc(struct device *);
+extern int pl330_add(struct pl330_info *);
+extern void pl330_del(struct pl330_info *pi);
+extern int pl330_update(struct pl330_info *pi);
+extern void pl330_release_channel(void *ch_id);
+extern void *pl330_request_channel(struct pl330_info *pi);
+extern int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus);
+extern int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op);
+extern int pl330_submit_req(void *ch_id, struct pl330_req *r);
+extern void pl330_free(struct pl330_info *pi);
+
+#endif	/* __PL330_CORE_H */
-- 
1.6.2.5

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-04-01  5:34         ` jassi brar
@ 2010-04-01 23:23           ` Linus Walleij
  2010-04-02  1:38             ` jassi brar
  0 siblings, 1 reply; 29+ messages in thread
From: Linus Walleij @ 2010-04-01 23:23 UTC (permalink / raw)
  To: jassi brar
  Cc: Joonyoung Shim, dan.j.williams, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

Hi Jassi,

this is looking good.

The only advantage of the other driver by Joonyoung is that it is finished and
ready for integration. If you finalize the DMA devices/engine API and post
this in time for the next merge window I would easily vote for including this
one rather than the other one. (Whatever that means for the world.)
Simply for technical merits.

It's sad that you two have done duplicate work but such is life..

I understand it that as this is the core engine so you intend to keep the core
in arch/arm/common/* and then a separate interface to the DMAdevices
implementing <linux/dmaengine.h> in drivers/dma/ and this is what the
"DMA API" referenced below refers to?

In that case I really like this clear separation between hardware driver
and DMA devices/engine API. And I see that the DMA API is not far
away. If you implement it you will be able to excersise this with the
DMA engine memcpy test to assure it's working.

There is nothing wrong with moving this entire thing except the header
file into drivers/dma it will be more comfortable there, with the other
DMA drivers. Whether the header should be in include/linux/amba
or include/linux/dma is however a good question for the philosophers,
but I would stick it into linux/amba with the rest of the PrimeCells.
But perhaps you have better ideas.

2010/4/1 jassi brar <jassisinghbrar@gmail.com>:

> o  The DMA API driver submits 'request' to PL330 engine.
>     A request is a sequence of DMA 'xfers' to be done before the DMA
> API driver wants to be notified.

This hints that there is some other patch to provide that API
<linux/dmaengine.h> that is not part of this patch, right?

>     A req can be a scatter-Gather-List.

This is great, do you also plan to support that for M<->M xfers like we
added for the DMA40? Then we might want to lift that into the generic
DMA engine.

> o  PL330 engine accepts requests from DMA API drivers in ping-pong manner,
>    i.e, at any time maximum two reqs can be queued. Other reqs have
>    to be buffered by DMA API drivers and enqueued whenever a req-finish
>    callback is made.

Nice!

> o  TODO: Desirable is to implement true LLI using MicroCode
> modification during each
>    request enqueue, so that the xfer continues even while IRQ is
> handled and callbacks made.
>    To me, there doesn't seem to be a way to flush ICACHE of a channel
> without halting it, so we
>    can't modify MicroCode in runtime. Using two channels per client
> to achieve true LLI is the last resort.

True, not as elegant as being able to do it with microcode but
still quite elegant.

>    So currently, cpu intervention is required to trigger each xfer,
> hence interrupt latency might play
>    some role.

>From the DMA API level in the PrimeCell drivers the crucial driver that
need something like this is the AMBA PL011 UART driver, RX part,
where data comes in from the outside and we have no control over
the data flow. I trigger one transfer to a buffer here, then wait for it
to complete or be interrupted. If it completes, I immediately trigger
another transfer to the second buffer before I start processing the just
recieved buffer (like front/back buffers).

I just hope that this will always be fast enough, queueing two transfers
after each other at the same time first would perhaps be better if the
hardware can handle it, now we have no hardware that can actually
queue that up so we can work it over the day we see something like
that...

(I don't know if I'm making myself clear, the PL011 patch may
speak for itself rather.)

> o  TODO: PAUSE/RESUME support. Currently the DMA API driver has to emulate it.

The only PrimeCell that needs this is currently again the PL011.
It needs to PAUSE then get the number of pending bytes and then
terminate the transfer. This is done when we timeout transfers e.g.
for UART consoles. So being able to pause and retrieve the number
of bytes left and then cancel is the most advanced sequence that
will be used by a PrimeCell currently.

I've seen sample PCM/I2S drivers that wants PAUSE/RESUME though.

(...)
> Basic PL330 engine driver
>
> Signed-off-by: Jassi Brar <jassi.brar@samsung.com>
> ---
>  arch/arm/common/Kconfig               |    3 +
>  arch/arm/common/Makefile              |    1 +
>  arch/arm/common/pl330.c               | 1891 +++++++++++++++++++++++++++++++++
>  arch/arm/include/asm/hardware/pl330.h |  197 ++++
>  4 files changed, 2092 insertions(+), 0 deletions(-)
>  create mode 100644 arch/arm/common/pl330.c
>  create mode 100644 arch/arm/include/asm/hardware/pl330.h

Contemplate moving all but the header file to drivers/dma (not that I
have any strong feelings about it, just feels right).

(...)
> +/* Register and Bit field Definitions */
> +#define DS             0x0
> +#define DS_ST_STOP     0x0
> +#define DS_ST_EXEC     0x1
> +#define DS_ST_CMISS    0x2
> +#define DS_ST_UPDTPC   0x3
> +#define DS_ST_WFE      0x4
> +#define DS_ST_ATBRR    0x5
> +#define DS_ST_QBUSY    0x6
> +#define DS_ST_WFP      0x7
> +#define DS_ST_KILL     0x8
> +#define DS_ST_CMPLT    0x9
> +#define DS_ST_FLTCMP   0xe
> +#define DS_ST_FAULT    0xf
> +
> +#define DPC            0x4
> +#define INTEN          0x20
> +#define ES             0x24
> +#define INTSTATUS      0x28
> +#define INTCLR         0x2c
> +#define FSM            0x30
> +#define FSC            0x34
> +#define FTM            0x38
> +
> +#define _FTC           0x40
> +#define FTC(n)         (_FTC + (n)*0x4)
> +
> +#define _CS            0x100
> +#define CS(n)          (_CS + (n)*0x8)
> +#define CS_CNS         (1 << 21)
> +
> +#define _CPC           0x104
> +#define CPC(n)         (_CPC + (n)*0x8)
> +
> +#define _SA            0x400
> +#define SA(n)          (_SA + (n)*0x20)
> +
> +#define _DA            0x404
> +#define DA(n)          (_DA + (n)*0x20)
> +
> +#define _CC            0x408
> +#define CC(n)          (_CC + (n)*0x20)
> +
> +#define CC_SRCINC      (1 << 0)
> +#define CC_DSTINC      (1 << 14)
> +#define CC_SRCPRI      (1 << 8)
> +#define CC_DSTPRI      (1 << 22)
> +#define CC_SRCNS       (1 << 9)
> +#define CC_DSTNS       (1 << 23)
> +#define CC_SRCIA       (1 << 10)
> +#define CC_DSTIA       (1 << 24)
> +#define CC_SRCBRSTLEN_SHFT     4
> +#define CC_DSTBRSTLEN_SHFT     18
> +#define CC_SRCBRSTSIZE_SHFT    1
> +#define CC_DSTBRSTSIZE_SHFT    15
> +#define CC_SRCCCTRL_SHFT       11
> +#define CC_SRCCCTRL_MASK       0x7
> +#define CC_DSTCCTRL_SHFT       25
> +#define CC_DRCCCTRL_MASK       0x7
> +#define CC_SWAP_SHFT   28
> +
> +#define _LC0           0x40c
> +#define LC0(n)         (_LC0 + (n)*0x20)
> +
> +#define _LC1           0x410
> +#define LC1(n)         (_LC1 + (n)*0x20)
> +
> +#define DBGSTATUS      0xd00
> +#define DBG_BUSY       (1 << 0)
> +
> +#define DBGCMD         0xd04
> +#define DBGINST0       0xd08
> +#define DBGINST1       0xd0c
> +
> +#define CR0            0xe00
> +#define CR1            0xe04
> +#define CR2            0xe08
> +#define CR3            0xe0c
> +#define CR4            0xe10
> +#define CRD            0xe14
> +
> +#define PERIPH_ID      0xfe0
> +#define PCELL_ID       0xff0
> +
> +#define CR0_PERIPH_REQ_SET     (1 << 0)
> +#define CR0_BOOT_EN_SET                (1 << 1)
> +#define CR0_BOOT_MAN_NS                (1 << 2)
> +#define CR0_NUM_CHANS_SHIFT    4
> +#define CR0_NUM_CHANS_MASK     0x7
> +#define CR0_NUM_PERIPH_SHIFT   12
> +#define CR0_NUM_PERIPH_MASK    0x1f
> +#define CR0_NUM_EVENTS_SHIFT   17
> +#define CR0_NUM_EVENTS_MASK    0x1f
> +
> +#define CR1_ICACHE_LEN_SHIFT   0
> +#define CR1_ICACHE_LEN_MASK    0x7
> +#define CR1_NUM_ICACHELINES_SHIFT      4
> +#define CR1_NUM_ICACHELINES_MASK       0xf
> +
> +#define CRD_DATA_WIDTH_SHIFT   0
> +#define CRD_DATA_WIDTH_MASK    0x7
> +#define CRD_WR_CAP_SHIFT       4
> +#define CRD_WR_CAP_MASK                0x7
> +#define CRD_WR_Q_DEP_SHIFT     8
> +#define CRD_WR_Q_DEP_MASK      0xf
> +#define CRD_RD_CAP_SHIFT       12
> +#define CRD_RD_CAP_MASK                0x7
> +#define CRD_RD_Q_DEP_SHIFT     16
> +#define CRD_RD_Q_DEP_MASK      0xf
> +#define CRD_DATA_BUFF_SHIFT    20
> +#define CRD_DATA_BUFF_MASK     0x3ff
> +
> +#define        PART            0x330
> +#define DESIGNER       0x41
> +#define REVISION       0x0
> +#define INTEG_CFG      0x0
> +#define PERIPH_ID_VAL  ((PART << 0) | (DESIGNER << 12) \
> +                         | (REVISION << 20) | (INTEG_CFG << 24))
> +
> +#define PCELL_ID_VAL   0xb105f00d
> +
> +#define PL330_STATE_STOPPED            (1 << 0)
> +#define PL330_STATE_EXECUTING          (1 << 1)
> +#define PL330_STATE_WFE                        (1 << 2)
> +#define PL330_STATE_FAULTING           (1 << 3)
> +#define PL330_STATE_COMPLETING         (1 << 4)
> +#define PL330_STATE_WFP                        (1 << 5) /* TOUT for exit? */
> +#define PL330_STATE_KILLING            (1 << 6)
> +#define PL330_STATE_FAULT_COMPLETING   (1 << 7)
> +#define PL330_STATE_CACHEMISS          (1 << 8)
> +#define PL330_STATE_UPDTPC             (1 << 9)
> +#define PL330_STATE_ATBARRIER          (1 << 10) /* TOUT for exit? */
> +#define PL330_STATE_QUEUEBUSY          (1 << 11) /* TOUT for exit? */
> +#define PL330_STATE_INVALID            (1 << 15) /* To catch error */
> +
> +#define PL330_STABLE_STATES (PL330_STATE_STOPPED | PL330_STATE_EXECUTING \
> +                               | PL330_STATE_WFE | PL330_STATE_FAULTING)
> +
> +#define CMD_DMAADDH    0x54
> +#define CMD_DMAEND     0x00
> +#define CMD_DMAFLUSHP  0x35
> +#define CMD_DMAGO      0xa0
> +#define CMD_DMALD      0x04
> +#define CMD_DMALDP     0x25
> +#define CMD_DMALP      0x20
> +#define CMD_DMALPEND   0x28
> +#define CMD_DMAKILL    0x01
> +#define CMD_DMAMOV     0xbc
> +#define CMD_DMANOP     0x18
> +#define CMD_DMARMB     0x12
> +#define CMD_DMASEV     0x34
> +#define CMD_DMAST      0x08
> +#define CMD_DMASTP     0x29
> +#define CMD_DMASTZ     0x0c
> +#define CMD_DMAWFE     0x36
> +#define CMD_DMAWFP     0x30
> +#define CMD_DMAWMB     0x13
> +
> +#define SZ_DMAADDH     3
> +#define SZ_DMAEND      1
> +#define SZ_DMAFLUSHP   2
> +#define SZ_DMALD       1
> +#define SZ_DMALDP      2
> +#define SZ_DMALP       2
> +#define SZ_DMALPEND    2
> +#define SZ_DMAKILL     1
> +#define SZ_DMAMOV      6
> +#define SZ_DMANOP      1
> +#define SZ_DMARMB      1
> +#define SZ_DMASEV      2
> +#define SZ_DMAST       1
> +#define SZ_DMASTP      2
> +#define SZ_DMASTZ      1
> +#define SZ_DMAWFE      2
> +#define SZ_DMAWFP      2
> +#define SZ_DMAWMB      1
> +#define SZ_DMAGO       6
> +
> +#define BRST_LEN(ccr)  ((((ccr) >> CC_SRCBRSTLEN_SHFT) & 0xf) + 1)
> +#define BRST_SIZE(ccr) (1 << (((ccr) >> CC_SRCBRSTSIZE_SHFT) & 0x7))
> +
> +#define BYTE_TO_BURST(b, ccr)          ((b) / BRST_SIZE(ccr))
> +#define BURST_TO_BYTE(c, ccr)          ((c) * BRST_SIZE(ccr))
> +
> +/* With 256 bytes, we can do more than 2.5MB and 5MB xfers per req
> + * at 1byte/burst for P<->M and M<->M respectively.
> + * For typical scenario, at 1word/burst, 10MB and 20MB xfers per req
> + * should be enough for P<->M and M<->M respectively.
> + */


I like multiline comments like this, notice blank first line:

/*
 * Foo
 */

(Yeah I know it's picky. Applies to entire file.)

> +#define MCODE_BUFF_PER_REQ     256
> +
> +/* If program counter 'pc' is at req 'r' */
> +#define PC_AT_REQ(r, sz, pc)   (((pc) >= (r)->mc_bus) && \
> +                               ((pc) < ((r)->mc_bus + sz)))
> +
> +#define msecs_to_loops(t) (loops_per_jiffy / 1000 * HZ * t)
> +
> +struct _xfer_spec {
> +       u32 ccr;
> +       struct pl330_req *r;
> +       struct pl330_xfer *x;
> +};
> +
> +enum dmamov_dst {
> +       SAR = 0,
> +       CCR,
> +       DAR,
> +};
> +
> +enum pl330_dst {
> +       SRC = 0,
> +       DST,
> +};
> +
> +enum pl330_cond {
> +       SINGLE,
> +       BURST,
> +       ALWAYS,
> +};
> +
> +struct _pl330_req {
> +       u32 mc_bus;
> +       void *mc_cpu;
> +       struct pl330_req *r;
> +       /* hook to attach to DMAC's list of reqs with callbacks due */
> +       struct list_head rqd;
> +};
> +
> +struct _pl330_tbd {
> +       /* DMAC needs to be reset */
> +       unsigned reset_dmac:1;
> +       /* manager needs to be reset */
> +       unsigned reset_mngr:1;

Contemplate using bool for these two members.

> +       /* which thread needs to be reset */
> +       unsigned reset_chan:8;

Why not use:
u8 reset_chan;

> +};
> +
> +struct pl330_thread { /* Each DMA Channel */
> +       u8 id;
> +       int ev;
> +       /* If the channel is not yet acquired by any client */
> +       bool free;
> +       /* 0 for inactive, index of active request + 1, otherwise */
> +       unsigned active;
> +       struct mutex mtx;
> +       /* Only two at a time */
> +       struct _pl330_req req[2];
> +       /* parent DMAC */
> +       struct pl330_dmac *dmac;
> +};
> +
> +enum pl330_dmac_state {
> +       UNINIT,
> +       INIT,
> +       DYING,
> +};
> +
> +/* Each DMA Controller */
> +struct pl330_dmac {
> +       struct _pl330_tbd       dmac_tbd;
> +       spinlock_t              lock;
> +       /* hook to attach to global list of DMACs */
> +       struct list_head        node;
> +       /* Holds list of reqs with due callbacks */
> +       struct list_head        req_done;
> +       struct device           *dev;
> +       struct pl330_info       pinfo;
> +       /* Maximum possible events/irqs */
> +       int                     events[32];
> +       /* BUS address of buffer allocated for MicroCode for all Channels */
> +       u32                     mcode_bus;
> +       /* CPU address of buffer allocated for MicroCode for all Channels*/
> +       void                    *mcode_cpu;
> +       struct pl330_thread     *channels;
> +       /* MANAGER thread is _always_ the last one */
> +       struct pl330_thread     *manager;
> +       struct tasklet_struct   tasks;
> +       enum pl330_dmac_state   state;
> +};
> +
> +/* All PL-330 DMACs are added to this list */
> +static LIST_HEAD(pl330_list);
> +/* Protection mutex while list manipulation */
> +static DEFINE_MUTEX(pl330_mutex);
> +
> +static inline void _callback(struct pl330_req *r, int err)
> +{
> +       if (r && r->xfer_cb)
> +               r->xfer_cb(r->token, err);
> +}
> +
> +static inline bool _queue_empty(struct pl330_thread *thrd)
> +{
> +       return (thrd->req[0].r || thrd->req[1].r) ? false : true;
> +}
> +
> +static inline bool _queue_full(struct pl330_thread *thrd)
> +{
> +       return (thrd->req[0].r && thrd->req[1].r) ? true : false;
> +}
> +
> +static inline bool is_manager(struct pl330_thread *thrd)
> +{
> +       struct pl330_dmac *pl330 = thrd->dmac;
> +
> +       /* MANAGER is indexed at the end */
> +       if (thrd->id == pl330->pinfo.pcfg.num_chan)
> +               return true;
> +       else
> +               return false;
> +}
> +
> +/* If manager of the thread is in Non-Secure mode */
> +static inline bool _manager_ns(struct pl330_thread *thrd)
> +{
> +       struct pl330_dmac *pl330 = thrd->dmac;
> +
> +       return (pl330->pinfo.pcfg.mode & DMAC_MODE_NS) ? true : false;
> +}
> +
> +static inline u32 get_id(struct pl330_dmac *pl330, u32 off)
> +{
> +       void __iomem *r = pl330->pinfo.base;
> +       u32 id = 0;
> +
> +       id |= (readb(r + off + 0x0) << 0);
> +       id |= (readb(r + off + 0x4) << 8);
> +       id |= (readb(r + off + 0x8) << 16);
> +       id |= (readb(r + off + 0xc) << 24);
> +
> +       return id;
> +}
> +
> +static inline u32 _emit_ADDH(unsigned dry_run, u8 buf[],
> +               enum pl330_dst da, u16 val)
> +{
> +       if (dry_run)
> +               return SZ_DMAADDH;
> +
> +       buf[0] = CMD_DMAADDH;
> +       buf[0] |= (da << 1);
> +       *((u16 *)&buf[1]) = val;
> +
> +       return SZ_DMAADDH;
> +}
> +
> +static inline u32 _emit_END(unsigned dry_run, u8 buf[])
> +{
> +       if (dry_run)
> +               return SZ_DMAEND;
> +
> +       buf[0] = CMD_DMAEND;
> +
> +       return SZ_DMAEND;
> +}
> +
> +static inline u32 _emit_FLUSHP(unsigned dry_run, u8 buf[], u8 peri)
> +{
> +       if (dry_run)
> +               return SZ_DMAFLUSHP;
> +
> +       buf[0] = CMD_DMAFLUSHP;
> +
> +       peri &= 0x1f;
> +       peri <<= 3;
> +       buf[1] = peri;
> +
> +       return SZ_DMAFLUSHP;
> +}
> +
> +static inline u32 _emit_LD(unsigned dry_run, u8 buf[], enum pl330_cond cond)
> +{
> +       if (dry_run)
> +               return SZ_DMALD;
> +
> +       buf[0] = CMD_DMALD;
> +
> +       if (cond == SINGLE)
> +               buf[0] |= (0 << 1) | (1 << 0);
> +       else if (cond == BURST)
> +               buf[0] |= (1 << 1) | (1 << 0);
> +
> +       return SZ_DMALD;
> +}
> +
> +static inline u32 _emit_LDP(unsigned dry_run, u8 buf[],
> +               enum pl330_cond cond, u8 peri)
> +{
> +       if (dry_run)
> +               return SZ_DMALDP;
> +
> +       buf[0] = CMD_DMALDP;
> +
> +       if (cond == BURST)
> +               buf[0] |= (1 << 1);
> +
> +       peri &= 0x1f;
> +       peri <<= 3;
> +       buf[1] = peri;
> +
> +       return SZ_DMALDP;
> +}
> +
> +static inline u32 _emit_LP(unsigned dry_run, u8 buf[],
> +               unsigned loop, u8 cnt)
> +{
> +       if (dry_run)
> +               return SZ_DMALP;
> +
> +       buf[0] = CMD_DMALP;
> +
> +       if (loop)
> +               buf[0] |= (1 << 1);
> +
> +       buf[1] = cnt;
> +
> +       return SZ_DMALP;
> +}
> +
> +struct _arg_LPEND {
> +       enum pl330_cond cond;
> +       bool forever;
> +       unsigned loop;
> +       u8 bjump;
> +};
> +
> +static inline u32 _emit_LPEND(unsigned dry_run, u8 buf[],
> +               const struct _arg_LPEND *arg)
> +{
> +       enum pl330_cond cond = arg->cond;
> +       bool forever = arg->forever;
> +       unsigned loop = arg->loop;
> +       u8 bjump = arg->bjump;
> +
> +
> +       if (dry_run)
> +               return SZ_DMALPEND;
> +
> +       buf[0] = CMD_DMALPEND;
> +
> +       if (loop)
> +               buf[0] |= (1 << 2);
> +
> +       if (forever)
> +               buf[0] |= (1 << 4);
> +
> +       if (cond == SINGLE)
> +               buf[0] |= (0 << 1) | (1 << 0);
> +       else if (cond == BURST)
> +               buf[0] |= (1 << 1) | (1 << 0);
> +
> +       buf[1] = bjump;
> +
> +       return SZ_DMALPEND;
> +}
> +
> +static inline u32 _emit_KILL(unsigned dry_run, u8 buf[])
> +{
> +       if (dry_run)
> +               return SZ_DMAKILL;
> +
> +       buf[0] = CMD_DMAKILL;
> +
> +       return SZ_DMAKILL;
> +}
> +
> +static inline u32 _emit_MOV(unsigned dry_run, u8 buf[],
> +               enum dmamov_dst dst, u32 val)
> +{
> +       if (dry_run)
> +               return SZ_DMAMOV;
> +
> +       buf[0] = CMD_DMAMOV;
> +       buf[1] = dst;
> +       *((u32 *)&buf[2]) = val;
> +
> +       return SZ_DMAMOV;
> +}
> +
> +static inline u32 _emit_NOP(unsigned dry_run, u8 buf[])
> +{
> +       if (dry_run)
> +               return SZ_DMANOP;
> +
> +       buf[0] = CMD_DMANOP;
> +
> +       return SZ_DMANOP;
> +}
> +
> +static inline u32 _emit_RMB(unsigned dry_run, u8 buf[])
> +{
> +       if (dry_run)
> +               return SZ_DMARMB;
> +
> +       buf[0] = CMD_DMARMB;
> +
> +       return SZ_DMARMB;
> +}
> +
> +static inline u32 _emit_SEV(unsigned dry_run, u8 buf[], u8 ev)
> +{
> +       if (dry_run)
> +               return SZ_DMASEV;
> +
> +       buf[0] = CMD_DMASEV;
> +
> +       ev &= 0x1f;
> +       ev <<= 3;
> +       buf[1] = ev;
> +
> +       return SZ_DMASEV;
> +}
> +
> +static inline u32 _emit_ST(unsigned dry_run, u8 buf[], enum pl330_cond cond)
> +{
> +       if (dry_run)
> +               return SZ_DMAST;
> +
> +       buf[0] = CMD_DMAST;
> +
> +       if (cond == SINGLE)
> +               buf[0] |= (0 << 1) | (1 << 0);
> +       else if (cond == BURST)
> +               buf[0] |= (1 << 1) | (1 << 0);
> +
> +       return SZ_DMAST;
> +}
> +
> +static inline u32 _emit_STP(unsigned dry_run, u8 buf[],
> +               enum pl330_cond cond, u8 peri)
> +{
> +       if (dry_run)
> +               return SZ_DMASTP;
> +
> +       buf[0] = CMD_DMASTP;
> +
> +       if (cond == BURST)
> +               buf[0] |= (1 << 1);
> +
> +       peri &= 0x1f;
> +       peri <<= 3;
> +       buf[1] = peri;
> +
> +       return SZ_DMASTP;
> +}
> +
> +static inline u32 _emit_STZ(unsigned dry_run, u8 buf[])
> +{
> +       if (dry_run)
> +               return SZ_DMASTZ;
> +
> +       buf[0] = CMD_DMASTZ;
> +
> +       return SZ_DMASTZ;
> +}
> +
> +static inline u32 _emit_WFE(unsigned dry_run, u8 buf[], u8 ev,
> +               unsigned invalidate)
> +{
> +       if (dry_run)
> +               return SZ_DMAWFE;
> +
> +       buf[0] = CMD_DMAWFE;
> +
> +       ev &= 0x1f;
> +       ev <<= 3;
> +       buf[1] = ev;
> +
> +       if (invalidate)
> +               buf[1] |= (1 << 1);
> +
> +       return SZ_DMAWFE;
> +}
> +
> +static inline u32 _emit_WFP(unsigned dry_run, u8 buf[],
> +               enum pl330_cond cond, u8 peri)
> +{
> +       if (dry_run)
> +               return SZ_DMAWFP;
> +
> +       buf[0] = CMD_DMAWFP;
> +
> +       if (cond == SINGLE)
> +               buf[0] |= (0 << 1) | (0 << 0);
> +       else if (cond == BURST)
> +               buf[0] |= (1 << 1) | (0 << 0);
> +       else
> +               buf[0] |= (0 << 1) | (1 << 0);
> +
> +       peri &= 0x1f;
> +       peri <<= 3;
> +       buf[1] = peri;
> +
> +       return SZ_DMAWFP;
> +}
> +
> +static inline u32 _emit_WMB(unsigned dry_run, u8 buf[])
> +{
> +       if (dry_run)
> +               return SZ_DMAWMB;
> +
> +       buf[0] = CMD_DMAWMB;
> +
> +       return SZ_DMAWMB;
> +}
> +
> +struct _arg_GO {
> +       u8 chan;
> +       u32 addr;
> +       unsigned ns:1;
> +};
> +
> +static inline u32 _emit_GO(unsigned dry_run, u8 buf[],
> +               const struct _arg_GO *arg)
> +{
> +       u8 chan = arg->chan;
> +       u32 addr = arg->addr;
> +       unsigned ns = arg->ns;
> +
> +       if (dry_run)
> +               return SZ_DMAGO;
> +
> +       buf[0] = CMD_DMAGO;
> +       if (ns)
> +               buf[0] |= (ns << 1);
> +
> +       buf[1] = chan & 0x7;
> +
> +       *((u32 *)&buf[2]) = addr;
> +
> +       return SZ_DMAGO;
> +}

With all these emit_* functions you have half a microcode compiler in the
driver, but I really, really like it! It's the right foundation for
hackers that want
to have fun with the microcode generation later on.

> +static inline void _execute_DBGINSN(struct pl330_thread *thrd,
> +               u8 insn[], bool as_manager)
> +{
> +       void __iomem *regs = thrd->dmac->pinfo.base;
> +       u32 val;
> +
> +       val = (insn[0] << 16) | (insn[1] << 24);
> +       if (!as_manager) {
> +               val |= (1 << 0);
> +               val |= (thrd->id << 8); /* Channel Number */
> +       }
> +       writel(val, regs + DBGINST0);
> +
> +       val = *((u32 *)&insn[2]);
> +       writel(val, regs + DBGINST1);
> +}
> +
> +/* Returns Time-Out */
> +static bool _until_dmac_idle(struct pl330_thread *thrd)
> +{
> +       void __iomem *regs = thrd->dmac->pinfo.base;
> +       unsigned long loops = msecs_to_loops(5);
> +
> +       do {
> +               /* Until Manager is Idle */
> +               if (!(readl(regs + DBGSTATUS) & DBG_BUSY))
> +                       break;
> +
> +               cpu_relax();
> +       } while (--loops);
> +
> +       if (!loops)
> +               return true;
> +
> +       return false;
> +}
> +
> +static inline u32 _state(struct pl330_thread *thrd)
> +{
> +       void __iomem *regs = thrd->dmac->pinfo.base;
> +       u32 val;
> +
> +       if (is_manager(thrd))
> +               val = readl(regs + DS) & 0xf;
> +       else
> +               val = readl(regs + CS(thrd->id)) & 0xf;
> +
> +       switch (val) {
> +       case DS_ST_STOP:
> +               return PL330_STATE_STOPPED;
> +       case DS_ST_EXEC:
> +               return PL330_STATE_EXECUTING;
> +       case DS_ST_CMISS:
> +               return PL330_STATE_CACHEMISS;
> +       case DS_ST_UPDTPC:
> +               return PL330_STATE_UPDTPC;
> +       case DS_ST_WFE:
> +               return PL330_STATE_WFE;
> +       case DS_ST_FAULT:
> +               return PL330_STATE_FAULTING;
> +       case DS_ST_ATBRR:
> +               if (is_manager(thrd))
> +                       return PL330_STATE_INVALID;
> +               else
> +                       return PL330_STATE_ATBARRIER;
> +       case DS_ST_QBUSY:
> +               if (is_manager(thrd))
> +                       return PL330_STATE_INVALID;
> +               else
> +                       return PL330_STATE_QUEUEBUSY;
> +       case DS_ST_WFP:
> +               if (is_manager(thrd))
> +                       return PL330_STATE_INVALID;
> +               else
> +                       return PL330_STATE_WFP;
> +       case DS_ST_KILL:
> +               if (is_manager(thrd))
> +                       return PL330_STATE_INVALID;
> +               else
> +                       return PL330_STATE_KILLING;
> +       case DS_ST_CMPLT:
> +               if (is_manager(thrd))
> +                       return PL330_STATE_INVALID;
> +               else
> +                       return PL330_STATE_COMPLETING;
> +       case DS_ST_FLTCMP:
> +               if (is_manager(thrd))
> +                       return PL330_STATE_INVALID;
> +               else
> +                       return PL330_STATE_FAULT_COMPLETING;
> +       default:
> +               return PL330_STATE_INVALID;
> +       }
> +}
> +
> +/* Use this _only_ to wait on transient states */
> +#define UNTIL(t, s)    while (!(_state(t) & (s))) cpu_relax();
> +
> +/* Start doing req 'idx' of thread 'thrd' */
> +static bool _trigger(struct pl330_thread *thrd, unsigned idx)
> +{
> +       void __iomem *regs = thrd->dmac->pinfo.base;
> +       struct _pl330_req *req = &thrd->req[idx];
> +       struct pl330_req *r = req->r;
> +       struct _arg_GO go;
> +       unsigned ns;

bool

> +       u8 insn[6] = {0, 0, 0, 0, 0, 0};
> +
> +       /* Return if already ACTIVE */
> +       if (_state(thrd) != PL330_STATE_STOPPED)
> +               return true;
> +
> +       /* Return if no request */
> +       if (!r)
> +               return true;
> +
> +       /* If timed out due to halted state-machine */
> +       if (_until_dmac_idle(thrd))
> +               return false;
> +
> +       if (r->cfg)
> +               ns = r->cfg->nonsecure ? 1 : 0;

Since you defined nonsecure as :1 you could just assign it.
But please make both ns and cfg->nonsecure bool.

> +       else if (readl(regs + CS(thrd->id)) & CS_CNS)
> +               ns = 1;
> +       else
> +               ns = 0;
> +
> +       /* See 'Abort Sources' point-4 at Page 2-25 */
> +       if (_manager_ns(thrd) && !ns)
> +               printk(KERN_INFO "%s:%d Recipe for ABORT!\n",
> +                       __func__, __LINE__);


dev_info(thrd->dmac->dev, "....");

> +
> +       go.chan = thrd->id;
> +       go.addr = req->mc_bus;
> +       go.ns = ns;
> +       _emit_GO(0, insn, &go);
> +
> +       /* Set to generate interrupts for SEV */
> +       writel(readl(regs + INTEN) | (1 << thrd->ev), regs + INTEN);
> +
> +       /* Only manager can execute GO */
> +       _execute_DBGINSN(thrd, insn, true);
> +
> +       return true;
> +}
> +
> +/* Makes sure the thread is in STOPPED state */
> +static void _stop(struct pl330_thread *thrd)
> +{
> +       u8 insn[6] = {0, 0, 0, 0, 0, 0};
> +
> +       /* Return if already STOPPED */
> +       if (_state(thrd) == PL330_STATE_STOPPED)
> +               return;
> +
> +       if (is_manager(thrd))
> +               _emit_END(0, insn);
> +       else
> +               _emit_KILL(0, insn);
> +
> +       _execute_DBGINSN(thrd, insn, is_manager(thrd));
> +}
> +
> +static bool _start(struct pl330_thread *thrd)
> +{
> +       switch (_state(thrd)) {
> +       case PL330_STATE_FAULT_COMPLETING:
> +               UNTIL(thrd, PL330_STATE_FAULTING | PL330_STATE_KILLING);
> +
> +               if (_state(thrd) == PL330_STATE_KILLING)
> +                       UNTIL(thrd, PL330_STATE_STOPPED)
> +
> +       case PL330_STATE_FAULTING:
> +               _stop(thrd);
> +
> +       case PL330_STATE_KILLING:
> +       case PL330_STATE_COMPLETING:
> +               UNTIL(thrd, PL330_STATE_STOPPED)
> +
> +       case PL330_STATE_STOPPED:
> +               return _trigger(thrd, thrd->req[0].r ? 0 : 1);
> +
> +       case PL330_STATE_WFP:
> +       case PL330_STATE_QUEUEBUSY:
> +       case PL330_STATE_ATBARRIER:
> +       case PL330_STATE_UPDTPC:
> +       case PL330_STATE_CACHEMISS:
> +       case PL330_STATE_EXECUTING:
> +               return true;
> +
> +       case PL330_STATE_WFE: /* for PAUSE - nothing yet */
> +       default: /* Shouldn't reach here with some transient state */
> +               return false;
> +       }
> +}
> +
> +static inline u32 _prepare_ccr(struct pl330_reqcfg *rqc)
> +{
> +       u32 ccr = 0;
> +
> +       if (rqc->src_inc)
> +               ccr |= CC_SRCINC;
> +
> +       if (rqc->dst_inc)
> +               ccr |= CC_DSTINC;
> +
> +       /* We set same protection levels for Src and DST for now */
> +       if (rqc->privileged)
> +               ccr |= CC_SRCPRI | CC_DSTPRI;
> +       if (rqc->nonsecure)
> +               ccr |= CC_SRCNS | CC_DSTNS;
> +       if (rqc->insnaccess)
> +               ccr |= CC_SRCIA | CC_DSTIA;
> +
> +       ccr |= (((rqc->brst_len - 1) & 0xf) << CC_SRCBRSTLEN_SHFT);
> +       ccr |= (((rqc->brst_len - 1) & 0xf) << CC_DSTBRSTLEN_SHFT);
> +
> +       ccr |= (rqc->brst_size << CC_SRCBRSTSIZE_SHFT);
> +       ccr |= (rqc->brst_size << CC_DSTBRSTSIZE_SHFT);
> +
> +       ccr |= (rqc->dcctl << CC_SRCCCTRL_SHFT);
> +       ccr |= (rqc->scctl << CC_DSTCCTRL_SHFT);
> +
> +       ccr |= (rqc->swap << CC_SWAP_SHFT);
> +
> +       return ccr;
> +}
> +
> +static inline bool _is_valid(u32 ccr)
> +{
> +       enum pl330_dstcachectrl dcctl;
> +       enum pl330_srccachectrl scctl;
> +
> +       dcctl = (ccr >> CC_DSTCCTRL_SHFT) & CC_DRCCCTRL_MASK;
> +       scctl = (ccr >> CC_SRCCCTRL_SHFT) & CC_SRCCCTRL_MASK;
> +
> +       if (dcctl == DINVALID1 || dcctl == DINVALID2
> +                       || scctl == SINVALID1 || scctl == SINVALID2)
> +               return false;
> +       else
> +               return true;
> +}
> +
> +static inline int _ldst_memtomem(unsigned dry_run, u8 buf[],
> +               const struct _xfer_spec *pxs, int cyc)
> +{
> +       int off = 0;
> +
> +       while (cyc--) {
> +               /* Do we need RMB/WMB for each load/store? REVISIT XXX */
> +               off += _emit_LD(dry_run, &buf[off], ALWAYS);
> +               off += _emit_RMB(dry_run, &buf[off]);
> +               off += _emit_ST(dry_run, &buf[off], ALWAYS);
> +               off += _emit_WMB(dry_run, &buf[off]);
> +       }
> +
> +       return off;
> +}
> +
> +static inline int _ldst_devtomem(unsigned dry_run, u8 buf[],
> +               const struct _xfer_spec *pxs, int cyc)
> +{
> +       int off = 0;
> +
> +       while (cyc--) {
> +               /* Do we need WFP for every cycle? REVISIT XXX */
> +               off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
> +               off += _emit_LDP(dry_run, &buf[off], SINGLE, pxs->r->peri);
> +               off += _emit_ST(dry_run, &buf[off], ALWAYS);
> +               /* Do we need FLUSHP for every cycle? REVISIT XXX */
> +               off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
> +       }
> +
> +       return off;
> +}
> +
> +static inline int _ldst_memtodev(unsigned dry_run, u8 buf[],
> +               const struct _xfer_spec *pxs, int cyc)
> +{
> +       int off = 0;
> +
> +       while (cyc--) {
> +               /* Do we need WFP for every cycle? REVISIT XXX */
> +               off += _emit_WFP(dry_run, &buf[off], SINGLE, pxs->r->peri);
> +               off += _emit_LD(dry_run, &buf[off], ALWAYS);
> +               off += _emit_STP(dry_run, &buf[off], SINGLE, pxs->r->peri);
> +               /* Do we need FLUSHP for every cycle? REVISIT XXX */
> +               off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
> +       }
> +
> +       return off;
> +}
> +
> +static int _bursts(unsigned dry_run, u8 buf[],
> +               const struct _xfer_spec *pxs, int cyc)
> +{
> +       int off = 0;
> +
> +       switch (pxs->r->rqtype) {
> +       case MEMTODEV:
> +               off += _ldst_memtodev(dry_run, &buf[off], pxs, cyc);
> +               break;
> +
> +       case DEVTOMEM:
> +               off += _ldst_devtomem(dry_run, &buf[off], pxs, cyc);
> +               break;
> +
> +       case MEMTOMEM:
> +               off += _ldst_memtomem(dry_run, &buf[off], pxs, cyc);
> +               break;
> +
> +       default:
> +               off += 0x40000000; /* Scare off the Client */
> +               break;
> +       }
> +
> +       return off;
> +}
> +
> +/* Returns bytes consumed and updates bursts */
> +static inline int _loop(unsigned dry_run, u8 buf[],
> +               unsigned long *bursts, const struct _xfer_spec *pxs)
> +{
> +       int cyc, cycmax, szlp, szlpend, szbrst, off;
> +       unsigned lcnt0, lcnt1, ljmp0, ljmp1;
> +       struct _arg_LPEND lpend;
> +
> +       /* Max iterations possibile in DMALP is 256 */
> +       if (*bursts >= 256*256) {
> +               lcnt1 = 256;
> +               lcnt0 = 256;
> +               cyc = *bursts / lcnt1 / lcnt0;
> +       } else if (*bursts > 256) {
> +               lcnt1 = 256;
> +               lcnt0 = *bursts / lcnt1;
> +               cyc = 1;
> +       } else {
> +               lcnt1 = *bursts;
> +               lcnt0 = 0;
> +               cyc = 1;
> +       }
> +
> +       szlp = _emit_LP(1, buf, 0, 0);
> +       szbrst = _bursts(1, buf, pxs, 1);
> +
> +       lpend.cond = ALWAYS;
> +       lpend.forever = false;
> +       lpend.loop = 0;
> +       lpend.bjump = 0;
> +       szlpend = _emit_LPEND(1, buf, &lpend);
> +
> +       if (lcnt0) {
> +               szlp *= 2;
> +               szlpend *= 2;
> +       }
> +
> +       /** Do not mess with the construct **/

Which means? Hackers like to mess with stuff... Note to self?
Usually comments like that is a trace of questionable design
so if the design is solid, remove the comments because then it
will be obvious that you don't want to mess with the construct.

> +
> +       /* Max bursts that we can unroll due to limit on the
> +        * size of backward jump that can be encoded in DMALPEND
> +        * which is 8-bits and hence 255
> +        */
> +       cycmax = (255 - (szlp + szlpend)) / szbrst;
> +
> +       cyc = (cycmax < cyc) ? cycmax : cyc;
> +
> +       off = 0;
> +
> +       ljmp0 = off;
> +       if (lcnt0)
> +               off += _emit_LP(dry_run, &buf[off], 0, lcnt0);
> +
> +       ljmp1 = off;
> +       off += _emit_LP(dry_run, &buf[off], 1, lcnt1);
> +
> +       off += _bursts(dry_run, &buf[off], pxs, cyc);
> +
> +       lpend.cond = ALWAYS;
> +       lpend.forever = false;
> +       lpend.loop = 1;
> +       lpend.bjump = off - ljmp1;
> +       off += _emit_LPEND(dry_run, &buf[off], &lpend);
> +
> +       if (lcnt0) {
> +               lpend.cond = ALWAYS;
> +               lpend.forever = false;
> +               lpend.loop = 0;
> +               lpend.bjump = off - ljmp0;
> +               off += _emit_LPEND(dry_run, &buf[off], &lpend);
> +       }
> +       /***********************************/
> +
> +       *bursts = lcnt1 * cyc;
> +       if (lcnt0)
> +               *bursts *= lcnt0;
> +
> +       return off;
> +}
> +
> +static inline int _setup_loops(unsigned dry_run, u8 buf[],
> +               const struct _xfer_spec *pxs)
> +{
> +       struct pl330_xfer *x = pxs->x;
> +       u32 ccr = pxs->ccr;
> +       unsigned long c, bursts = BYTE_TO_BURST(x->bytes, ccr);
> +       int off = 0;
> +
> +       while (bursts) {
> +               c = bursts;
> +               off += _loop(dry_run, &buf[off], &c, pxs);
> +               bursts -= c;
> +       }
> +
> +       return off;
> +}
> +
> +static inline int _setup_xfer(unsigned dry_run, u8 buf[],
> +               const struct _xfer_spec *pxs)
> +{
> +       struct pl330_xfer *x = pxs->x;
> +       int off = 0;
> +
> +       /* DMAMOV SAR, x->src_addr */
> +       off += _emit_MOV(dry_run, &buf[off], SAR, x->src_addr);
> +       /* DMAMOV DAR, x->dst_addr */
> +       off += _emit_MOV(dry_run, &buf[off], DAR, x->dst_addr);
> +
> +       /* Setup Loop(s) */
> +       off += _setup_loops(dry_run, &buf[off], pxs);
> +
> +       return off;
> +}
> +
> +/* A req is a sequence of one or more xfer units.
> + * Returns the number of bytes taken to setup the MC
> + * for the req.
> + */
> +static int _setup_req(unsigned dry_run, struct pl330_thread *thrd,
> +               unsigned index, struct _xfer_spec *pxs)
> +{
> +       struct _pl330_req *req = &thrd->req[index];
> +       struct pl330_xfer *x;
> +       u8 *buf = req->mc_cpu;
> +       int off = 0;
> +
> +       /* DMAMOV CCR, ccr */
> +       off += _emit_MOV(dry_run, &buf[off], CCR, pxs->ccr);
> +
> +       x = pxs->r->x;
> +       do {
> +               /* Error if xfer length is not aligned at burst size */
> +               if (x->bytes % BRST_SIZE(pxs->ccr))
> +                       return -EINVAL;
> +
> +               pxs->x = x;
> +               off += _setup_xfer(dry_run, &buf[off], pxs);
> +
> +               x = x->next;
> +       } while (x);
> +
> +       /* DMAFLUSHP peripheral */
> +       off += _emit_FLUSHP(dry_run, &buf[off], pxs->r->peri);
> +       /* DMASEV peripheral/event */
> +       off += _emit_SEV(dry_run, &buf[off], thrd->ev);
> +       /* DMAEND */
> +       off += _emit_END(dry_run, &buf[off]);
> +
> +       return off;
> +}
> +
> +/* Submit a list of xfers after which the client wants notification.
> + * Client is not notified after each xfer unit, just once after all
> + * xfer units are done or some error occurs.
> + * The actual xfer on bus starts automatically
> + */
> +int pl330_submit_req(void *ch_id, struct pl330_req *r)
> +{
> +       struct pl330_thread *thrd = ch_id;
> +       struct pl330_info *pi;
> +       struct _xfer_spec xs;
> +       void __iomem *regs;
> +       u32 ccr;
> +       unsigned idx;
> +       int ret = 0;
> +
> +       /* No Req or Unacquired Channel or DMAC stopping */
> +       if (!r || !thrd || thrd->free || thrd->dmac->state == DYING)
> +               return -EINVAL;
> +
> +       pi = &thrd->dmac->pinfo;
> +       regs = pi->base;
> +
> +       /* If request for non-existing peripheral */
> +       if (r->peri >= pi->pcfg.num_peri)
> +               return -EINVAL;
> +
> +       mutex_lock(&thrd->mtx);
> +
> +       if (_queue_full(thrd)) {
> +               ret = -EBUSY;
> +               goto xfer_exit;
> +       }
> +
> +       /* Use last settings, if not provided */
> +       if (r->cfg)
> +               ccr = _prepare_ccr(r->cfg);
> +       else
> +               ccr = readl(regs + CC(thrd->id));
> +
> +       /* If this req doesn't have valid xfer settings */
> +       if (!_is_valid(ccr)) {
> +               ret = -EINVAL;
> +               goto xfer_exit;
> +       }
> +
> +       idx = thrd->req[0].r ? 1 : 0;
> +
> +       xs.ccr = ccr;
> +       xs.r = r;
> +
> +       /* First dry run to check if req is acceptable */
> +       ret = _setup_req(1, thrd, idx, &xs);
> +       if (ret < 0)
> +               goto xfer_exit;
> +
> +       if (ret > pi->mcbufsz / 2) {
> +               ret = -ENOMEM;
> +               goto xfer_exit;
> +       }
> +
> +       ret = 0;
> +
> +       /* Hook the request */
> +       _setup_req(0, thrd, idx, &xs);
> +       thrd->req[idx].r = r;
> +
> +       if (!_start(thrd)) { /* Could not start */
> +               ret = -EIO;
> +               goto xfer_exit;
> +       }
> +
> +xfer_exit:
> +       mutex_unlock(&thrd->mtx);
> +       return ret;
> +}
> +EXPORT_SYMBOL(pl330_submit_req);

For all exported symbols: I have a hard time seeing anyone compiling the
DMA engine driver or anything else using this as a module and making use
of all these exports. But maybe for testing, what do I know...

> +static void pl330_dotask(unsigned long data)
> +{
> +       struct pl330_dmac *pl330 = (struct pl330_dmac *) data;
> +       struct pl330_info *pi = &pl330->pinfo;
> +       struct pl330_thread *thrd;
> +       int i;
> +
> +       /* The DMAC itself gone nuts */
> +       if (pl330->dmac_tbd.reset_dmac) {
> +               pl330->state = DYING;
> +
> +               for (i = 0; i < pi->pcfg.num_chan; i++) {
> +                       thrd = &pl330->channels[i];
> +
> +                       /* Mark thread as infected */
> +                       pl330->dmac_tbd.reset_chan |= (1 << thrd->id);
> +               }
> +
> +               pl330->dmac_tbd.reset_mngr = 1;
> +       }
> +
> +       if (pl330->dmac_tbd.reset_mngr)
> +               _stop(pl330->manager);
> +
> +       for (i = 0; i < pi->pcfg.num_chan; i++) {
> +               thrd = &pl330->channels[i];
> +
> +               if (pl330->dmac_tbd.reset_chan & (1 << thrd->id)) {
> +                       if (thrd->active) {
> +                               struct pl330_req *r1, *r2;
> +                               enum pl330_op_err err;
> +                               void __iomem *regs = pi->base;
> +                               unsigned active;
> +
> +                               active = thrd->active - 1;
> +
> +                               r1 = thrd->req[active].r;
> +                               r2 = thrd->req[1 - active].r;
> +
> +                               thrd->req[active].r = NULL;
> +                               thrd->req[1 - active].r = NULL;
> +                               thrd->active = 0;
> +
> +                               if (readl(regs + FSC) & (1 << thrd->id))
> +                                       err = PL330_ERR_FAIL;
> +                               else
> +                                       err = PL330_ERR_ABORT;
> +
> +                               _callback(r1, err);
> +                               _callback(r2, err);
> +                       }
> +
> +                       _stop(thrd);
> +               }
> +       }
> +
> +       /* Clear all errors */
> +       pl330->dmac_tbd.reset_dmac = 0;
> +       pl330->dmac_tbd.reset_mngr = 0;
> +       pl330->dmac_tbd.reset_chan = 0;
> +
> +       return;
> +}
> +
> +/* Returns 1 if state was updated, 0 otherwise */
> +int pl330_update(struct pl330_info *pi)
> +{
> +       struct pl330_dmac *pl330;
> +       void __iomem *regs;
> +       u32 val;
> +       int id, ev, ret = 0;
> +
> +       if (!pi)
> +               return 0;
> +
> +       pl330 = container_of(pi, struct pl330_dmac, pinfo);
> +
> +       if (pl330->state == DYING)
> +               return 0;
> +
> +       regs = pi->base;
> +
> +       val = readl(regs + FSM) & 0x1;
> +       pl330->dmac_tbd.reset_mngr |= val;
> +
> +       val = readl(regs + FSC) & ((1 << pi->pcfg.num_chan) - 1);
> +       pl330->dmac_tbd.reset_chan |= val;
> +
> +       /* Check which event happened i.e, thread notified */
> +       val = readl(regs + ES);
> +       if (pi->pcfg.num_events < 32
> +                       && val & ~((1 << pi->pcfg.num_events) - 1)) {
> +               pl330->dmac_tbd.reset_dmac = 1;
> +               printk(KERN_INFO "%s:%d Unexpected!\n", __func__, __LINE__);

dev_info(pl330->dev, "...");

> +               ret = 1;
> +               goto updt_exit;
> +       }
> +
> +       INIT_LIST_HEAD(&pl330->req_done);
> +
> +       for (ev = 0; ev < pi->pcfg.num_events; ev++) {
> +
> +               struct _pl330_req *rqdone;
> +               struct pl330_thread *thrd;
> +               int active;
> +
> +               if (val & (1 << ev)) { /* Event occured */
> +
> +                       id = pl330->events[ev];
> +
> +                       thrd = &pl330->channels[id];
> +
> +                       mutex_lock(&thrd->mtx);
> +
> +                       if (!thrd->active) {
> +                               pl330->dmac_tbd.reset_chan |= (1 << id);
> +                               printk(KERN_INFO "%s:%d Unexpected!\n",
> +                                       __func__, __LINE__);

dev_info(pl330->dev, "....");

> +                       }
> +
> +                       active = thrd->active - 1;
> +                       rqdone = &thrd->req[active];
> +                       rqdone->r = NULL;
> +
> +                       if (thrd->req[1 - active].r)
> +                               thrd->active = 2 - active;
> +                       else
> +                               thrd->active = 0;
> +
> +                       /* Get going again ASAP */
> +                       _start(thrd);
> +
> +                       /* For now, just make a list of callbacks to be done */
> +                       list_add_tail(&rqdone->rqd, &pl330->req_done);
> +
> +                       mutex_unlock(&thrd->mtx);
> +
> +                       ret = 1;
> +               }
> +       }
> +
> +       /* Clear all event interrupts */
> +       writel(val, regs + INTCLR);
> +
> +       /* Now that we are in no hurry, do the callbacks */
> +       while (!list_empty(&pl330->req_done)) {
> +               struct _pl330_req *rqdone =
> +                               container_of(pl330->req_done.next,
> +                                       struct _pl330_req, rqd);
> +
> +               list_del_init(&rqdone->rqd);
> +
> +               _callback(rqdone->r, PL330_ERR_NONE);
> +       }
> +
> +updt_exit:
> +
> +       if (pl330->dmac_tbd.reset_dmac
> +                       || pl330->dmac_tbd.reset_mngr
> +                       || pl330->dmac_tbd.reset_chan) {
> +               ret = 1;
> +               tasklet_schedule(&pl330->tasks);
> +       }
> +
> +       return ret;
> +}
> +EXPORT_SYMBOL(pl330_update);
> +
> +/* This must be atomic. Since the DMA client calls this,
> + * there is no need to do callbacks. Otherwise, this may not be atomic.
> + */
> +int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op)
> +{
> +       struct pl330_thread *thrd = ch_id;
> +       int ret = 0;
> +
> +       if (!thrd || thrd->free || thrd->dmac->state == DYING)
> +               return -EINVAL;
> +
> +       mutex_lock(&thrd->mtx);
> +
> +       if (_queue_empty(thrd))
> +               goto ctrl_exit;
> +
> +       switch (op) {
> +       case PL330_OP_FLUSH:
> +               _stop(thrd);
> +               thrd->req[0].r = NULL;
> +               thrd->req[1].r = NULL;
> +               thrd->active = 0;
> +               break;
> +
> +       case PL330_OP_ABORT:
> +               _stop(thrd);
> +
> +               /* ABORT is only for the active req */
> +               if (!thrd->active)
> +                       break;
> +
> +               thrd->req[thrd->active - 1].r = NULL;
> +
> +               if (_queue_empty(thrd)) {
> +                       thrd->active = 0;
> +                       break;
> +               }
> +
> +       case PL330_OP_START: /* Should be un-necessary */
> +               if (!_queue_empty(thrd) && !_start(thrd))
> +                       ret = -EIO;
> +
> +               break;
> +
> +       default:
> +               ret = -EINVAL;
> +       }
> +
> +ctrl_exit:
> +       mutex_unlock(&thrd->mtx);
> +       return ret;
> +}
> +EXPORT_SYMBOL(pl330_chan_ctrl);
> +
> +int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus)
> +{
> +       struct pl330_thread *thrd = ch_id;
> +       struct pl330_dmac *pl330;
> +       struct pl330_info *pi;
> +       void __iomem *regs;
> +       int i;
> +       u32 val;
> +
> +       if (!pstatus || !thrd || thrd->free)
> +               return -EINVAL;
> +
> +       mutex_lock(&thrd->mtx);
> +
> +       pl330 = thrd->dmac;
> +       pi = &pl330->pinfo;
> +       regs = pi->base;
> +
> +       /* The client should remove the DMAC and add again */
> +       if (pl330->state == DYING)
> +               pstatus->dmac_halted = true;
> +       else
> +               pstatus->dmac_halted = false;
> +
> +       val = readl(regs + FSC);
> +       if (val & (1 << thrd->id))
> +               pstatus->faulting = true;
> +       else
> +               pstatus->faulting = false;
> +
> +       val = readl(regs + CPC(thrd->id));
> +       if (PC_AT_REQ(&thrd->req[0], pi->mcbufsz / 2, val))
> +               i = 1;
> +       else if (PC_AT_REQ(&thrd->req[1], pi->mcbufsz / 2, val))
> +               i = 2;
> +       else
> +               i = 0;
> +
> +       /* If channel inactive while req in queue */
> +       if ((thrd->active != i) || (!_queue_empty(thrd) && !i))
> +               printk(KERN_INFO "%s:%d DBG: Invalid state!",
> +                       __func__, __LINE__);

dev_err(pl330->dev, "....");

Notice err! Not info.

> +
> +       if (i) {
> +               i--;
> +               pstatus->act_req = thrd->req[i].r;
> +               pstatus->enq_req = thrd->req[1-i].r;
> +       } else {
> +               pstatus->act_req = NULL;
> +               pstatus->enq_req = NULL;
> +       }
> +
> +       pstatus->src_addr = readl(regs + SA(thrd->id));
> +       pstatus->dst_addr = readl(regs + DA(thrd->id));
> +
> +       mutex_unlock(&thrd->mtx);
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL(pl330_chan_status);
> +
> +static inline void _reset_thread(struct pl330_thread *thrd)
> +{
> +       struct pl330_dmac *pl330 = thrd->dmac;
> +       struct pl330_info *pi = &pl330->pinfo;
> +
> +       thrd->req[0].r = NULL;
> +       thrd->req[0].mc_cpu = pl330->mcode_cpu
> +                               + (thrd->id * pi->mcbufsz);
> +       thrd->req[0].mc_bus = pl330->mcode_bus
> +                               + (thrd->id * pi->mcbufsz);
> +
> +       thrd->req[1].r = NULL;
> +       thrd->req[1].mc_cpu = thrd->req[0].mc_cpu
> +                               + pi->mcbufsz / 2;
> +       thrd->req[1].mc_bus = thrd->req[0].mc_bus
> +                               + pi->mcbufsz / 2;
> +}
> +
> +/* Reserve an event */
> +static inline int _alloc_event(struct pl330_thread *thrd)
> +{
> +       struct pl330_dmac *pl330 = thrd->dmac;
> +       struct pl330_info *pi = &pl330->pinfo;
> +       int ev;
> +
> +       for (ev = 0; ev < pi->pcfg.num_events; ev++) {
> +               if (pl330->events[ev] == -1) {
> +                       pl330->events[ev] = thrd->id;
> +                       return ev;
> +               }
> +       }
> +
> +       return -1;
> +}
> +
> +/* Release an event */
> +static inline void _free_event(struct pl330_thread *thrd, int ev)
> +{
> +       struct pl330_dmac *pl330 = thrd->dmac;
> +       struct pl330_info *pi = &pl330->pinfo;
> +
> +       if (ev >= 0 && ev < pi->pcfg.num_events
> +                       && pl330->events[ev] == thrd->id)
> +               pl330->events[ev] = -1;
> +}
> +
> +void *pl330_request_channel(struct pl330_info *pi)
> +{
> +       struct pl330_dmac *pl330;
> +       struct pl330_thread *thrd;
> +       unsigned long flags;
> +       int chans, i;
> +
> +       if (!pi)
> +               return NULL;
> +
> +       pl330 = container_of(pi, struct pl330_dmac, pinfo);
> +
> +       if (pl330->state == DYING)
> +               return NULL;
> +
> +       chans = pi->pcfg.num_chan;
> +
> +       spin_lock_irqsave(&pl330->lock, flags);
> +
> +       thrd = NULL;
> +       for (i = 0; i < chans; i++) {
> +               if (pl330->channels[i].free) {
> +                       thrd = &pl330->channels[i];
> +                       _reset_thread(thrd);
> +                       thrd->ev = _alloc_event(thrd);
> +                       if (thrd->ev >= 0) {
> +                               thrd->free = false;
> +                               break;
> +                       }
> +                       thrd = NULL;
> +               }
> +       }
> +
> +       spin_unlock_irqrestore(&pl330->lock, flags);
> +
> +       return thrd;
> +}
> +EXPORT_SYMBOL(pl330_request_channel);
> +
> +void pl330_release_channel(void *ch_id)
> +{
> +       struct pl330_thread *thrd = ch_id;
> +       struct pl330_dmac *pl330;
> +       struct pl330_req *r1, *r2;
> +       unsigned long flags;
> +
> +       if (!thrd || thrd->free || thrd->dmac->state == DYING)
> +               return;
> +
> +       pl330 = thrd->dmac;
> +
> +       if (thrd->active == 1) {
> +               r1 = thrd->req[0].r;
> +               r2 = thrd->req[1].r;
> +       } else {
> +               r1 = thrd->req[1].r;
> +               r2 = thrd->req[0].r;
> +       }
> +
> +       mutex_lock(&thrd->mtx);
> +
> +       _stop(thrd);
> +
> +       mutex_unlock(&thrd->mtx);
> +
> +       _callback(r1, PL330_ERR_ABORT);
> +       _callback(r2, PL330_ERR_ABORT);
> +
> +       spin_lock_irqsave(&pl330->lock, flags);
> +       _reset_thread(thrd);
> +       _free_event(thrd, thrd->ev);
> +       thrd->free = true;
> +       spin_unlock_irqrestore(&pl330->lock, flags);
> +}
> +EXPORT_SYMBOL(pl330_release_channel);
> +
> +static int dmac_alloc_threads(struct pl330_dmac *pl330)
> +{
> +       struct pl330_info *pi = &pl330->pinfo;
> +       int chans = pi->pcfg.num_chan;
> +       struct pl330_thread *thrd;
> +       int i;
> +
> +       /* Allocate 1 Manager and 'chans' Channel threads */
> +       pl330->channels = kzalloc((1 + chans) * sizeof(*thrd),
> +                                       GFP_KERNEL);
> +       if (!pl330->channels)
> +               return -ENOMEM;
> +
> +       /* Init Channel threads */
> +       for (i = 0; i < chans; i++) {
> +               thrd = &pl330->channels[i];
> +               thrd->id = i;
> +               thrd->dmac = pl330;
> +               mutex_init(&thrd->mtx);
> +               _reset_thread(thrd);
> +               thrd->free = true;
> +       }
> +
> +       /* MANAGER is indexed at the end */
> +       thrd = &pl330->channels[chans];
> +       thrd->id = chans;
> +       thrd->dmac = pl330;
> +       thrd->free = false; /* Manager can't do xfer */
> +       mutex_init(&thrd->mtx);
> +       pl330->manager = thrd;
> +
> +       return 0;
> +}
> +
> +static int dmac_free_threads(struct pl330_dmac *pl330)
> +{
> +       struct pl330_info *pi = &pl330->pinfo;
> +       int chans = pi->pcfg.num_chan;
> +       struct pl330_thread *thrd;
> +       int i;
> +
> +       /* Release Channel threads */
> +       for (i = 0; i < chans; i++) {
> +               thrd = &pl330->channels[i];
> +               pl330_release_channel((void *)thrd);
> +       }
> +
> +       /* Free memory */
> +       kfree(pl330->channels);
> +
> +       return 0;
> +}
> +
> +/* Must be called after pl330_info has been initialized */
> +static int dmac_alloc_resources(struct pl330_dmac *pl330)
> +{
> +       struct pl330_info *pi = &pl330->pinfo;
> +       int chans = pi->pcfg.num_chan;
> +       int ret;
> +
> +       /* Alloc MicroCode buffer for 'chans' Channel threads.
> +        * A channel's buffer offset is (Channel_Id * MCODE_BUFF_PERCHAN)
> +        */
> +       pl330->mcode_cpu = dma_alloc_coherent(pl330->dev,
> +                               chans * pi->mcbufsz,
> +                               &pl330->mcode_bus, GFP_KERNEL);
> +       if (!pl330->mcode_cpu) {
> +               printk(KERN_INFO "Unable to allocate MCODE buffer\n");

dev_err(pl330->dev, "....");
ERR!

> +               return -ENOMEM;
> +       }
> +
> +       ret = dmac_alloc_threads(pl330);
> +       if (ret) {
> +               printk(KERN_INFO "Unable to create channels for DMAC\n");


dev_err(pl330->dev, "....");
ERR!

> +               dma_free_coherent(pl330->dev,
> +                               chans * pi->mcbufsz,
> +                               pl330->mcode_cpu, pl330->mcode_bus);
> +               return ret;
> +       }
> +
> +       return 0;
> +}
> +
> +static void dmac_free_resources(struct pl330_dmac *pl330)
> +{
> +       struct pl330_info *pi = &pl330->pinfo;
> +       int chans = pi->pcfg.num_chan;
> +
> +       dmac_free_threads(pl330);
> +
> +       dma_free_coherent(pl330->dev, chans * pi->mcbufsz,
> +                               pl330->mcode_cpu, pl330->mcode_bus);
> +}
> +/* Initialize the structure for PL330 configuration, that can be used
> + * by the client driver the make best use of the DMAC
> + */
> +static void read_dmac_config(struct pl330_dmac *pl330)
> +{
> +       struct pl330_info *pi = &pl330->pinfo;
> +       void __iomem *regs = pi->base;
> +       u32 val;
> +
> +       val = readl(regs + CRD) >> CRD_DATA_WIDTH_SHIFT;
> +       val &= CRD_DATA_WIDTH_MASK;
> +       pi->pcfg.data_bus_width = 8 * (1 << val);
> +
> +       val = readl(regs + CR0) >> CR0_NUM_CHANS_SHIFT;
> +       val &= CR0_NUM_CHANS_MASK;
> +       val += 1;
> +       pi->pcfg.num_chan = val;
> +
> +       val = readl(regs + CR0);
> +       if (val & CR0_PERIPH_REQ_SET) {
> +               val = (val >> CR0_NUM_PERIPH_SHIFT) & CR0_NUM_PERIPH_MASK;
> +               val += 1;
> +               pi->pcfg.num_peri = val;
> +               pi->pcfg.peri_ns = readl(regs + CR4);
> +       } else {
> +               pi->pcfg.num_peri = 0;
> +       }
> +
> +       val = readl(regs + CR0);
> +       if (val & CR0_BOOT_MAN_NS)
> +               pi->pcfg.mode |= DMAC_MODE_NS;
> +       else
> +               pi->pcfg.mode &= ~DMAC_MODE_NS;
> +
> +       val = readl(regs + CR0) >> CR0_NUM_EVENTS_SHIFT;
> +       val &= CR0_NUM_EVENTS_MASK;
> +       val += 1;
> +       pi->pcfg.num_events = val;
> +
> +       pi->pcfg.irq_ns = readl(regs + CR3);
> +
> +       pi->pcfg.periph_id = get_id(pl330, PERIPH_ID);
> +       pi->pcfg.pcell_id = get_id(pl330, PCELL_ID);
> +}
> +
> +/* After pl330_alloc, initialize pl330_info.base
> + * before calling pl330_add
> + */
> +int pl330_add(struct pl330_info *pi)
> +{
> +       struct pl330_dmac *pl330, *pt;
> +       void __iomem *regs;
> +       int i;
> +
> +       if (!pi)
> +               return -EINVAL;
> +
> +       pl330 = container_of(pi, struct pl330_dmac, pinfo);
> +
> +       regs = pi->base;
> +
> +       /* If the SoC can perform reset on the DMAC, then do it
> +        * before reading its configuration.
> +        */
> +       if (pi->dmac_reset)
> +               pi->dmac_reset(pi);
> +
> +       /* Check if we can handle this DMAC */
> +       if (get_id(pl330, PERIPH_ID) != PERIPH_ID_VAL
> +          || get_id(pl330, PCELL_ID) != PCELL_ID_VAL) {
> +               printk(KERN_INFO "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n",
> +                       readl(regs + PERIPH_ID), readl(regs + PCELL_ID));

dev_info(pl330->dev, ...)

> +               return -EINVAL;
> +       }

If the parent device (IMO a DMAdevices/DMAengine) is an struct amba_device
I don't think this ID check is necessary, there is already PrimeCell
matching code in
<linux/amba/bus.h>

> +
> +       /* Make sure it isn't already added */
> +       list_for_each_entry(pt, &pl330_list, node)
> +               if (pt == pl330)

Perhaps print some warning here. Doesn't seem sound that this
would happen.

> +                       return 0;
> +
> +       /* Read the configuration of the DMAC */
> +       read_dmac_config(pl330);
> +
> +       if (pi->pcfg.num_events == 0) {
> +               printk(KERN_INFO "%s:%d Can't work without events!\n",
> +                       __func__, __LINE__);

dev_info(pl330->dev, "....");

> +               return -EINVAL;
> +       }
> +
> +       /* Use default MC buffer size if not provided */
> +       if (!pi->mcbufsz)
> +               pi->mcbufsz = MCODE_BUFF_PER_REQ * 2;
> +
> +       /* Mark all events as free */
> +       for (i = 0; i < pi->pcfg.num_events; i++)
> +               pl330->events[i] = -1;
> +
> +       /* Allocate resources needed by the DMAC */
> +       i = dmac_alloc_resources(pl330);
> +       if (i) {
> +               printk(KERN_INFO "Unable to create channels for DMAC\n");

dev_info(pl330->dev, "....");

> +               return i;
> +       }
> +
> +       mutex_lock(&pl330_mutex);
> +       list_add_tail(&pl330->node, &pl330_list);
> +       mutex_unlock(&pl330_mutex);
> +
> +       tasklet_init(&pl330->tasks, pl330_dotask,
> +                               (unsigned long) pl330);
> +
> +       pl330->state = INIT;
> +
> +       return 0;
> +}
> +EXPORT_SYMBOL(pl330_add);
> +
> +/* Drop DMAC from the list
> + */
> +void pl330_del(struct pl330_info *pi)
> +{
> +       struct pl330_dmac *pl330, *pt;
> +       int found;
> +
> +       if (!pi)
> +               return;
> +
> +       pl330 = container_of(pi, struct pl330_dmac, pinfo);
> +
> +       pl330->state = UNINIT;
> +
> +       /* Make sure it is already added */
> +       found = 0;
> +       list_for_each_entry(pt, &pl330_list, node)
> +               if (pt == pl330)
> +                       found = 1;
> +
> +       if (!found)
> +               return;
> +
> +       tasklet_kill(&pl330->tasks);
> +
> +       mutex_lock(&pl330_mutex);
> +       list_del(&pl330->node);
> +       mutex_unlock(&pl330_mutex);
> +
> +       /* Free DMAC resources */
> +       dmac_free_resources(pl330);
> +}
> +EXPORT_SYMBOL(pl330_del);
> +
> +struct pl330_info *pl330_alloc(struct device *dev)
> +{
> +       struct pl330_dmac *pl330;
> +
> +       pl330 = kzalloc(sizeof(*pl330), GFP_KERNEL);
> +       if (!pl330)
> +               return NULL;
> +
> +       spin_lock_init(&pl330->lock);
> +
> +       pl330->dev = dev;
> +
> +       return &pl330->pinfo;
> +}
> +EXPORT_SYMBOL(pl330_alloc);
> +
> +void pl330_free(struct pl330_info *pi)
> +{
> +       struct pl330_dmac *pl330;
> +
> +       if (!pi)
> +               return;
> +
> +       pl330_del(pi);
> +
> +       pl330 = container_of(pi, struct pl330_dmac, pinfo);
> +
> +       kfree(pl330);
> +}
> +EXPORT_SYMBOL(pl330_free);
> diff --git a/arch/arm/include/asm/hardware/pl330.h
> b/arch/arm/include/asm/hardware/pl330.h
> new file mode 100644
> index 0000000..4e907ad
> --- /dev/null
> +++ b/arch/arm/include/asm/hardware/pl330.h
> @@ -0,0 +1,197 @@
> +/* linux/include/asm/hardware/pl330.h
> + *
> + * Copyright (C) 2010 Samsung Electronics Co Ltd.
> + *     Jaswinder Singh <jassi.brar@samsung.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
> + */
> +
> +#ifndef __PL330_CORE_H
> +#define __PL330_CORE_H
> +
> +enum pl330_srccachectrl {
> +       SCCTRL0 = 0, /* Noncacheable and nonbufferable */
> +       SCCTRL1, /* Bufferable only */
> +       SCCTRL2, /* Cacheable, but do not allocate */
> +       SCCTRL3, /* Cacheable and bufferable, but do not allocate */
> +       SINVALID1,
> +       SINVALID2,
> +       SCCTRL6, /* Cacheable write-through, allocate on reads only */
> +       SCCTRL7, /* Cacheable write-back, allocate on reads only */
> +};
> +
> +enum pl330_dstcachectrl {
> +       DCCTRL0 = 0, /* Noncacheable and nonbufferable */
> +       DCCTRL1, /* Bufferable only */
> +       DCCTRL2, /* Cacheable, but do not allocate */
> +       DCCTRL3, /* Cacheable and bufferable, but do not allocate */
> +       DINVALID1 = 8,
> +       DINVALID2,
> +       DCCTRL6, /* Cacheable write-through, allocate on writes only */
> +       DCCTRL7, /* Cacheable write-back, allocate on writes only */
> +};
> +
> +/* Populated by the PL330 core driver for DMA API driver's info */
> +struct pl330_config {
> +       u32     periph_id;
> +       u32     pcell_id;
> +#define DMAC_MODE_NS   (1 << 0)
> +       unsigned int    mode;
> +       unsigned int    data_bus_width:10; /* In number of bits */
> +       unsigned int    num_chan:4;
> +       unsigned int    num_peri:6;
> +       u32             peri_ns;
> +       unsigned int    num_events:6;
> +       u32             irq_ns;
> +};
> +
> +/* Handle to the DMAC provided by PL330 engine */
> +struct pl330_info {

Contemplate adding:
      /* Owning device */
      struct device *dev;

> +       /* Size of MicroCode buffers for each channel */
> +       unsigned mcbufsz;
> +       /* ioremap'ed address of PL330 registers */
> +       void __iomem    *base;
> +       /* Client can freely use it */
> +       void    *private_data;
> +       /* Populated by the PL330 core driver during pl330_add */
> +       struct pl330_config     pcfg;
> +       /* If the DMAC has some reset mechanism, then the client
> +        * may want to provide pointer to the relevent function.
> +        */
> +       void (*dmac_reset)(struct pl330_info *pi);
> +};
> +
> +enum pl330_byteswap {
> +       SWAP_NO = 0,
> +       SWAP_2,
> +       SWAP_4,
> +       SWAP_8,
> +       SWAP_16,
> +};
> +
> +enum pl330_reqtype {
> +       MEMTOMEM,
> +       MEMTODEV,
> +       DEVTOMEM,
> +       DEVTODEV,
> +};
> +
> +/* Request Configuration.
> + * The PL330 core uses the last working configuration if the
> + * request doesn't provide any.
> + *
> + * The Client may want to provide this info only for the
> + * first request and a request with new settings.
> + */
> +struct pl330_reqcfg {
> +       /* Implies Incrementing address */
> +       unsigned dst_inc:1;
> +       unsigned src_inc:1;
> +
> +       /* For now, the SRC & DST protection levels
> +        * and burst size/length are assumed same
> +        */
> +       unsigned nonsecure:1;
> +       unsigned privileged:1;
> +       unsigned insnaccess:1;

For all of these things using just one bit, contemplate
turning them into bool instead, because that's what they are.

> +       unsigned brst_len:5;
> +       unsigned brst_size:3; /* power of 2 */
> +
> +       enum pl330_dstcachectrl dcctl;
> +       enum pl330_srccachectrl scctl;
> +       enum pl330_byteswap swap;
> +};
> +
> +/* One cycle of DMAC operation.
> + * There may be more than one xfer in a request.
> + */
> +struct pl330_xfer {
> +       u32 src_addr;
> +       u32 dst_addr;
> +       /* Number of total _bytes_ to xfer */
> +       u32 bytes;
> +       /* Pointer to next xfer in the list.
> +        * The last xfer in the req must point to NULL
> +        */
> +       struct pl330_xfer *next;
> +};
> +
> +/* A request defining Scatter-Gather List ending with NULL xfer */
> +struct pl330_req {
> +       enum pl330_reqtype rqtype;
> +       /* Index of peripheral for the xfer */
> +       unsigned peri:5;
> +       /* Unique token for this xfer, set by the DMA engine */
> +       void *token;
> +       /* Callback to be called after xfer */
> +       void (*xfer_cb)(void *token, int result);
> +       /* If NULL, req will be done at last set parameters */
> +       struct pl330_reqcfg *cfg;
> +       /* Pointer to first xfer in the List */
> +       struct pl330_xfer *x;
> +};
> +
> +/* To know the status of the channel and DMAC, the client
> + * provides a pointer to this structure. The PL330 core
> + * fills it with current information
> + */
> +struct pl330_chanstatus {
> +       /* If the DMAC engine halted due to some error,
> +        * the client should remove-add DMAC */
> +       bool dmac_halted;
> +       /* If channel is halted due to some error,
> +        * the client may ABORT or FLUSH the channel */
> +       bool faulting;
> +       /* Location of last load */
> +       u32 src_addr;
> +       /* Location of last store */
> +       u32 dst_addr;
> +       /* Pointer to the active req */
> +       struct pl330_req *act_req;
> +       /* Pointer to req waiting in the queue */
> +       struct pl330_req *enq_req;
> +};
> +
> +/* The callbacks are made with one of these arguments */
> +enum pl330_op_err {
> +       /* The all xfers in the request were success */
> +       PL330_ERR_NONE,
> +       /* If req aborted due to global error */
> +       PL330_ERR_ABORT,
> +       /* If req failed due to problem with Channel */
> +       PL330_ERR_FAIL,
> +};
> +
> +enum pl330_chan_op {
> +       /* Start the channel */
> +       PL330_OP_START,
> +       /* Abort the active xfer */
> +       PL330_OP_ABORT,
> +       /* Stop xfer and flush queue */
> +       PL330_OP_FLUSH,
> +};
> +
> +extern struct pl330_info *pl330_alloc(struct device *);
> +extern int pl330_add(struct pl330_info *);
> +extern void pl330_del(struct pl330_info *pi);
> +extern int pl330_update(struct pl330_info *pi);
> +extern void pl330_release_channel(void *ch_id);
> +extern void *pl330_request_channel(struct pl330_info *pi);
> +extern int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus);
> +extern int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op);
> +extern int pl330_submit_req(void *ch_id, struct pl330_req *r);
> +extern void pl330_free(struct pl330_info *pi);


Do you really need both pairs:

pl330_alloc() + pl330_add() and
pl330_del() + pl330_free()

to be public and exposed in the interface and exported? I would suggest making
removing two of them unless there is something I don't get here.
IMO:

int pl330_add(struct device *, struct pl330_info *);
Should be enough, pl330_info will be filled in if the call returns sucessfully.
You could also

struct pl330_info *pl330_add(struct device *);

If you prefer to use macros like IS_ERR() etc on the returned pointer.

> +#endif /* __PL330_CORE_H */
> --
> 1.6.2.5
>

Yours,
Linus Walleij

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-04-01 23:23           ` Linus Walleij
@ 2010-04-02  1:38             ` jassi brar
  2010-04-17  7:06               ` Kyungmin Park
  0 siblings, 1 reply; 29+ messages in thread
From: jassi brar @ 2010-04-02  1:38 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Joonyoung Shim, dan.j.williams, kyungmin.park, linux-kernel,
	linux-arm-kernel, Russell King - ARM Linux, Ben Dooks

On Fri, Apr 2, 2010 at 8:23 AM, Linus Walleij
<linus.ml.walleij@gmail.com> wrote:
> Hi Jassi,
> The only advantage of the other driver by Joonyoung is that it is finished and
> ready for integration. If you finalize the DMA devices/engine API and post
> this in time for the next merge window I would easily vote for including this
> one rather than the other one. (Whatever that means for the world.)
> Simply for technical merits.
Thank you. I am working on it full time. This submission was solely as
a preview, the
code needs to be optimized, polished and tested, though I don't anticipate much
changes in the API. By when should I submit the final version?

........

> I understand it that as this is the core engine so you intend to keep the core
> in arch/arm/common/* and then a separate interface to the DMAdevices
> implementing <linux/dmaengine.h> in drivers/dma/ and this is what the
> "DMA API" referenced below refers to?
Yes, drivers/dma/pl330.c could be one DMA API driver.
Another may implement S3C DMA API and reside in arch/arm/plat-samsung/ or
wherever the S3C PL080 driver is atm.
So, DMA API driver is the frontend that makes use of the
arch/arm/common/pl330.c
backend driver.

> In that case I really like this clear separation between hardware driver
> and DMA devices/engine API. And I see that the DMA API is not far
> away. If you implement it you will be able to excersise this with the
> DMA engine memcpy test to assure it's working.
Yes, implementing PL330 core was supposed to be the major challenge.
DMA API drivers should be easy, though I plan to first test the pl330.c with
S3C DMA API because that way I'll have a benchmark to compare the stability
and performance of this new driver.
Of course, I'll like to see driver/dma driver as well, but maybe JoonYoung wants
to implement that part, if he doesn't show interest then I will.

> There is nothing wrong with moving this entire thing except the header
> file into drivers/dma it will be more comfortable there, with the other
> DMA drivers. Whether the header should be in include/linux/amba
> or include/linux/dma is however a good question for the philosophers,
> but I would stick it into linux/amba with the rest of the PrimeCells.
> But perhaps you have better ideas.
For platforms that choose to implement their own DMA API (how good or bad is a
different subject), arch/arm/common/ seems be more appropriate for this pl330.c
And all drivers in drivers/dma/ implement the include/linux/dmaengine.h API

> 2010/4/1 jassi brar <jassisinghbrar@gmail.com>:
>
>> o  The DMA API driver submits 'request' to PL330 engine.
>>     A request is a sequence of DMA 'xfers' to be done before the DMA
>> API driver wants to be notified.
>
> This hints that there is some other patch to provide that API
> <linux/dmaengine.h> that is not part of this patch, right?
Right, and that is what I call Client/DMA-API driver. Though the patch is not
ready yet.

>>     A req can be a scatter-Gather-List.
>
> This is great, do you also plan to support that for M<->M xfers like we
> added for the DMA40? Then we might want to lift that into the generic
> DMA engine.
It should already be working with this driver as such.

>> o  TODO: Desirable is to implement true LLI using MicroCode
>> modification during each
>>    request enqueue, so that the xfer continues even while IRQ is
>> handled and callbacks made.
>>    To me, there doesn't seem to be a way to flush ICACHE of a channel
>> without halting it, so we
>>    can't modify MicroCode in runtime. Using two channels per client
>> to achieve true LLI is the last resort.
>
> True, not as elegant as being able to do it with microcode but
> still quite elegant.
I hope the driver is efficient/fast enough for some tough test cases I have,
otherwise I might have to modify or add to the API to implement this
two-channels per user situation.

>>    So currently, cpu intervention is required to trigger each xfer,
>> hence interrupt latency might play
>>    some role.
>
> From the DMA API level in the PrimeCell drivers the crucial driver that
> need something like this is the AMBA PL011 UART driver, RX part,
> where data comes in from the outside and we have no control over
> the data flow. I trigger one transfer to a buffer here, then wait for it
> to complete or be interrupted. If it completes, I immediately trigger
> another transfer to the second buffer before I start processing the just
> recieved buffer (like front/back buffers).
If I get it right, that is common issue with any 'slave-receiver' type device
and it might do good to have timeout option support in DMA API for such receive
requests. That is provide whatever data is collected within some
amount of time,
provide that to upper layers and queue request again.

>> o  TODO: PAUSE/RESUME support. Currently the DMA API driver has to emulate it.
>
> The only PrimeCell that needs this is currently again the PL011.
> It needs to PAUSE then get the number of pending bytes and then
> terminate the transfer. This is done when we timeout transfers e.g.
> for UART consoles. So being able to pause and retrieve the number
> of bytes left and then cancel is the most advanced sequence that
> will be used by a PrimeCell currently.
Even with this implementation, for PAUSE, the DMA API driver can call
pl330_chan_status, saving remaining requests locally and calling
PL330_OP_ABORT. During RESUME it simply submit remaining requests again.

>> +/* Returns bytes consumed and updates bursts */
>> +static inline int _loop(unsigned dry_run, u8 buf[],
>> +               unsigned long *bursts, const struct _xfer_spec *pxs)
>> +{
>> +       int cyc, cycmax, szlp, szlpend, szbrst, off;
>> +       unsigned lcnt0, lcnt1, ljmp0, ljmp1;
>> +       struct _arg_LPEND lpend;
>> +
>> +       /* Max iterations possibile in DMALP is 256 */
>> +       if (*bursts >= 256*256) {
>> +               lcnt1 = 256;
>> +               lcnt0 = 256;
>> +               cyc = *bursts / lcnt1 / lcnt0;
>> +       } else if (*bursts > 256) {
>> +               lcnt1 = 256;
>> +               lcnt0 = *bursts / lcnt1;
>> +               cyc = 1;
>> +       } else {
>> +               lcnt1 = *bursts;
>> +               lcnt0 = 0;
>> +               cyc = 1;
>> +       }
>> +
>> +       szlp = _emit_LP(1, buf, 0, 0);
>> +       szbrst = _bursts(1, buf, pxs, 1);
>> +
>> +       lpend.cond = ALWAYS;
>> +       lpend.forever = false;
>> +       lpend.loop = 0;
>> +       lpend.bjump = 0;
>> +       szlpend = _emit_LPEND(1, buf, &lpend);
>> +
>> +       if (lcnt0) {
>> +               szlp *= 2;
>> +               szlpend *= 2;
>> +       }
>> +
>> +       /** Do not mess with the construct **/
>
> Which means? Hackers like to mess with stuff... Note to self?
> Usually comments like that is a trace of questionable design
> so if the design is solid, remove the comments because then it
> will be obvious that you don't want to mess with the construct.
That is mostly to self. It just means that every variable in the block
must be analyzed before making any change there.

......

>> +xfer_exit:
>> +       mutex_unlock(&thrd->mtx);
>> +       return ret;
>> +}
>> +EXPORT_SYMBOL(pl330_submit_req);
>
> For all exported symbols: I have a hard time seeing anyone compiling the
> DMA engine driver or anything else using this as a module and making use
> of all these exports. But maybe for testing, what do I know...
I think it is considered good practice to export every symbol of an API.

......

>> +
>> +       /* Check if we can handle this DMAC */
>> +       if (get_id(pl330, PERIPH_ID) != PERIPH_ID_VAL
>> +          || get_id(pl330, PCELL_ID) != PCELL_ID_VAL) {
>> +               printk(KERN_INFO "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n",
>> +                       readl(regs + PERIPH_ID), readl(regs + PCELL_ID));
>
> dev_info(pl330->dev, ...)
>
>> +               return -EINVAL;
>> +       }
>
> If the parent device (IMO a DMAdevices/DMAengine) is an struct amba_device
> I don't think this ID check is necessary, there is already PrimeCell
> matching code in
> <linux/amba/bus.h>
As I said, this driver is designed to be usable with any DMA API, and
some, like S3C,
do not see the DMAC as some amba device.

>> +
>> +       /* Make sure it isn't already added */
>> +       list_for_each_entry(pt, &pl330_list, node)
>> +               if (pt == pl330)
>
> Perhaps print some warning here. Doesn't seem sound that this
> would happen.
The check is there just for some robustness.

......

>> +extern struct pl330_info *pl330_alloc(struct device *);
>> +extern int pl330_add(struct pl330_info *);
>> +extern void pl330_del(struct pl330_info *pi);
>> +extern int pl330_update(struct pl330_info *pi);
>> +extern void pl330_release_channel(void *ch_id);
>> +extern void *pl330_request_channel(struct pl330_info *pi);
>> +extern int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus);
>> +extern int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op);
>> +extern int pl330_submit_req(void *ch_id, struct pl330_req *r);
>> +extern void pl330_free(struct pl330_info *pi);
>
>
> Do you really need both pairs:
>
> pl330_alloc() + pl330_add() and
> pl330_del() + pl330_free()
yes I was already thinking on similar lines. I'll merge them to one.

As I said, this code was just for preview. It needs to undergo at
least one cycle of
optimizing->polishing->testing before I finally submit for inclusion.
comments, prints, types etc will be modified to match other code in
the directory
it will be aimed to be put in. Of course, I have taken every feedback you gave.

Thanks.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-04-02  1:38             ` jassi brar
@ 2010-04-17  7:06               ` Kyungmin Park
  2010-04-19  1:14                 ` jassi brar
  0 siblings, 1 reply; 29+ messages in thread
From: Kyungmin Park @ 2010-04-17  7:06 UTC (permalink / raw)
  To: jassi brar, Linus Walleij
  Cc: Russell King - ARM Linux, Joonyoung Shim, linux-kernel,
	Ben Dooks, dan.j.williams, linux-arm-kernel

Hi,

If there's no comments. it's time to select to merge it.
I think it can't which is better. so I hope community select any one
Only consideration which is proper for linux and future usage

To Linux Walleij.
I hope it will merge your patch series together.

Thank you,
Kyungmin Park

On Fri, Apr 2, 2010 at 10:38 AM, jassi brar <jassisinghbrar@gmail.com> wrote:
> On Fri, Apr 2, 2010 at 8:23 AM, Linus Walleij
> <linus.ml.walleij@gmail.com> wrote:
>> Hi Jassi,
>> The only advantage of the other driver by Joonyoung is that it is finished and
>> ready for integration. If you finalize the DMA devices/engine API and post
>> this in time for the next merge window I would easily vote for including this
>> one rather than the other one. (Whatever that means for the world.)
>> Simply for technical merits.
> Thank you. I am working on it full time. This submission was solely as
> a preview, the
> code needs to be optimized, polished and tested, though I don't anticipate much
> changes in the API. By when should I submit the final version?
>
> ........
>
>> I understand it that as this is the core engine so you intend to keep the core
>> in arch/arm/common/* and then a separate interface to the DMAdevices
>> implementing <linux/dmaengine.h> in drivers/dma/ and this is what the
>> "DMA API" referenced below refers to?
> Yes, drivers/dma/pl330.c could be one DMA API driver.
> Another may implement S3C DMA API and reside in arch/arm/plat-samsung/ or
> wherever the S3C PL080 driver is atm.
> So, DMA API driver is the frontend that makes use of the
> arch/arm/common/pl330.c
> backend driver.
>
>> In that case I really like this clear separation between hardware driver
>> and DMA devices/engine API. And I see that the DMA API is not far
>> away. If you implement it you will be able to excersise this with the
>> DMA engine memcpy test to assure it's working.
> Yes, implementing PL330 core was supposed to be the major challenge.
> DMA API drivers should be easy, though I plan to first test the pl330.c with
> S3C DMA API because that way I'll have a benchmark to compare the stability
> and performance of this new driver.
> Of course, I'll like to see driver/dma driver as well, but maybe JoonYoung wants
> to implement that part, if he doesn't show interest then I will.
>
>> There is nothing wrong with moving this entire thing except the header
>> file into drivers/dma it will be more comfortable there, with the other
>> DMA drivers. Whether the header should be in include/linux/amba
>> or include/linux/dma is however a good question for the philosophers,
>> but I would stick it into linux/amba with the rest of the PrimeCells.
>> But perhaps you have better ideas.
> For platforms that choose to implement their own DMA API (how good or bad is a
> different subject), arch/arm/common/ seems be more appropriate for this pl330.c
> And all drivers in drivers/dma/ implement the include/linux/dmaengine.h API
>
>> 2010/4/1 jassi brar <jassisinghbrar@gmail.com>:
>>
>>> o  The DMA API driver submits 'request' to PL330 engine.
>>>     A request is a sequence of DMA 'xfers' to be done before the DMA
>>> API driver wants to be notified.
>>
>> This hints that there is some other patch to provide that API
>> <linux/dmaengine.h> that is not part of this patch, right?
> Right, and that is what I call Client/DMA-API driver. Though the patch is not
> ready yet.
>
>>>     A req can be a scatter-Gather-List.
>>
>> This is great, do you also plan to support that for M<->M xfers like we
>> added for the DMA40? Then we might want to lift that into the generic
>> DMA engine.
> It should already be working with this driver as such.
>
>>> o  TODO: Desirable is to implement true LLI using MicroCode
>>> modification during each
>>>    request enqueue, so that the xfer continues even while IRQ is
>>> handled and callbacks made.
>>>    To me, there doesn't seem to be a way to flush ICACHE of a channel
>>> without halting it, so we
>>>    can't modify MicroCode in runtime. Using two channels per client
>>> to achieve true LLI is the last resort.
>>
>> True, not as elegant as being able to do it with microcode but
>> still quite elegant.
> I hope the driver is efficient/fast enough for some tough test cases I have,
> otherwise I might have to modify or add to the API to implement this
> two-channels per user situation.
>
>>>    So currently, cpu intervention is required to trigger each xfer,
>>> hence interrupt latency might play
>>>    some role.
>>
>> From the DMA API level in the PrimeCell drivers the crucial driver that
>> need something like this is the AMBA PL011 UART driver, RX part,
>> where data comes in from the outside and we have no control over
>> the data flow. I trigger one transfer to a buffer here, then wait for it
>> to complete or be interrupted. If it completes, I immediately trigger
>> another transfer to the second buffer before I start processing the just
>> recieved buffer (like front/back buffers).
> If I get it right, that is common issue with any 'slave-receiver' type device
> and it might do good to have timeout option support in DMA API for such receive
> requests. That is provide whatever data is collected within some
> amount of time,
> provide that to upper layers and queue request again.
>
>>> o  TODO: PAUSE/RESUME support. Currently the DMA API driver has to emulate it.
>>
>> The only PrimeCell that needs this is currently again the PL011.
>> It needs to PAUSE then get the number of pending bytes and then
>> terminate the transfer. This is done when we timeout transfers e.g.
>> for UART consoles. So being able to pause and retrieve the number
>> of bytes left and then cancel is the most advanced sequence that
>> will be used by a PrimeCell currently.
> Even with this implementation, for PAUSE, the DMA API driver can call
> pl330_chan_status, saving remaining requests locally and calling
> PL330_OP_ABORT. During RESUME it simply submit remaining requests again.
>
>>> +/* Returns bytes consumed and updates bursts */
>>> +static inline int _loop(unsigned dry_run, u8 buf[],
>>> +               unsigned long *bursts, const struct _xfer_spec *pxs)
>>> +{
>>> +       int cyc, cycmax, szlp, szlpend, szbrst, off;
>>> +       unsigned lcnt0, lcnt1, ljmp0, ljmp1;
>>> +       struct _arg_LPEND lpend;
>>> +
>>> +       /* Max iterations possibile in DMALP is 256 */
>>> +       if (*bursts >= 256*256) {
>>> +               lcnt1 = 256;
>>> +               lcnt0 = 256;
>>> +               cyc = *bursts / lcnt1 / lcnt0;
>>> +       } else if (*bursts > 256) {
>>> +               lcnt1 = 256;
>>> +               lcnt0 = *bursts / lcnt1;
>>> +               cyc = 1;
>>> +       } else {
>>> +               lcnt1 = *bursts;
>>> +               lcnt0 = 0;
>>> +               cyc = 1;
>>> +       }
>>> +
>>> +       szlp = _emit_LP(1, buf, 0, 0);
>>> +       szbrst = _bursts(1, buf, pxs, 1);
>>> +
>>> +       lpend.cond = ALWAYS;
>>> +       lpend.forever = false;
>>> +       lpend.loop = 0;
>>> +       lpend.bjump = 0;
>>> +       szlpend = _emit_LPEND(1, buf, &lpend);
>>> +
>>> +       if (lcnt0) {
>>> +               szlp *= 2;
>>> +               szlpend *= 2;
>>> +       }
>>> +
>>> +       /** Do not mess with the construct **/
>>
>> Which means? Hackers like to mess with stuff... Note to self?
>> Usually comments like that is a trace of questionable design
>> so if the design is solid, remove the comments because then it
>> will be obvious that you don't want to mess with the construct.
> That is mostly to self. It just means that every variable in the block
> must be analyzed before making any change there.
>
> ......
>
>>> +xfer_exit:
>>> +       mutex_unlock(&thrd->mtx);
>>> +       return ret;
>>> +}
>>> +EXPORT_SYMBOL(pl330_submit_req);
>>
>> For all exported symbols: I have a hard time seeing anyone compiling the
>> DMA engine driver or anything else using this as a module and making use
>> of all these exports. But maybe for testing, what do I know...
> I think it is considered good practice to export every symbol of an API.
>
> ......
>
>>> +
>>> +       /* Check if we can handle this DMAC */
>>> +       if (get_id(pl330, PERIPH_ID) != PERIPH_ID_VAL
>>> +          || get_id(pl330, PCELL_ID) != PCELL_ID_VAL) {
>>> +               printk(KERN_INFO "PERIPH_ID 0x%x, PCELL_ID 0x%x !\n",
>>> +                       readl(regs + PERIPH_ID), readl(regs + PCELL_ID));
>>
>> dev_info(pl330->dev, ...)
>>
>>> +               return -EINVAL;
>>> +       }
>>
>> If the parent device (IMO a DMAdevices/DMAengine) is an struct amba_device
>> I don't think this ID check is necessary, there is already PrimeCell
>> matching code in
>> <linux/amba/bus.h>
> As I said, this driver is designed to be usable with any DMA API, and
> some, like S3C,
> do not see the DMAC as some amba device.
>
>>> +
>>> +       /* Make sure it isn't already added */
>>> +       list_for_each_entry(pt, &pl330_list, node)
>>> +               if (pt == pl330)
>>
>> Perhaps print some warning here. Doesn't seem sound that this
>> would happen.
> The check is there just for some robustness.
>
> ......
>
>>> +extern struct pl330_info *pl330_alloc(struct device *);
>>> +extern int pl330_add(struct pl330_info *);
>>> +extern void pl330_del(struct pl330_info *pi);
>>> +extern int pl330_update(struct pl330_info *pi);
>>> +extern void pl330_release_channel(void *ch_id);
>>> +extern void *pl330_request_channel(struct pl330_info *pi);
>>> +extern int pl330_chan_status(void *ch_id, struct pl330_chanstatus *pstatus);
>>> +extern int pl330_chan_ctrl(void *ch_id, enum pl330_chan_op op);
>>> +extern int pl330_submit_req(void *ch_id, struct pl330_req *r);
>>> +extern void pl330_free(struct pl330_info *pi);
>>
>>
>> Do you really need both pairs:
>>
>> pl330_alloc() + pl330_add() and
>> pl330_del() + pl330_free()
> yes I was already thinking on similar lines. I'll merge them to one.
>
> As I said, this code was just for preview. It needs to undergo at
> least one cycle of
> optimizing->polishing->testing before I finally submit for inclusion.
> comments, prints, types etc will be modified to match other code in
> the directory
> it will be aimed to be put in. Of course, I have taken every feedback you gave.
>
> Thanks.
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH v2] PL330: Add PL330 DMA controller driver
  2010-04-17  7:06               ` Kyungmin Park
@ 2010-04-19  1:14                 ` jassi brar
  0 siblings, 0 replies; 29+ messages in thread
From: jassi brar @ 2010-04-19  1:14 UTC (permalink / raw)
  To: Kyungmin Park
  Cc: Linus Walleij, Russell King - ARM Linux, Joonyoung Shim,
	linux-kernel, Ben Dooks, dan.j.williams, linux-arm-kernel

On Sat, Apr 17, 2010 at 4:06 PM, Kyungmin Park <kmpark@infradead.org> wrote:
> If there's no comments. it's time to select to merge it.
> I think it can't which is better. so I hope community select any one
> Only consideration which is proper for linux and future usage
As i said, I am working full time on it.
PL330 core and S3C DMA API driver is done and it'll be a day or two
before I am done testing them.
So, around this wednesday I plan to submit patches for PL330 core driver
and the S3C-DMA-API driver for S5P-6442, C1XX and V210
Once the PL330 core is accepted, we can freeze it's API and start work on
DMA engine API driver for it.

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2010-04-19  1:14 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-03-25  3:17 [PATCH v2] PL330: Add PL330 DMA controller driver Joonyoung Shim
2010-03-25  5:34 ` jassi brar
2010-03-25  8:30   ` Linus Walleij
2010-03-25 12:17     ` jassi brar
2010-03-25 15:13       ` Dan Williams
2010-03-25 22:27         ` jassi brar
2010-03-25 23:12           ` Dan Williams
2010-03-25 23:59             ` jassi brar
2010-03-26  0:29               ` Kyungmin Park
2010-03-26  0:48                 ` jassi brar
2010-03-26  0:54           ` Joonyoung Shim
2010-03-26  1:01             ` jassi brar
2010-03-25 15:20       ` Linus Walleij
2010-03-25 22:36         ` jassi brar
2010-04-01  5:34         ` jassi brar
2010-04-01 23:23           ` Linus Walleij
2010-04-02  1:38             ` jassi brar
2010-04-17  7:06               ` Kyungmin Park
2010-04-19  1:14                 ` jassi brar
2010-03-25  5:44 ` Marc Zyngier
2010-03-25  9:01   ` Joonyoung Shim
2010-03-25  9:32     ` Marc Zyngier
2010-03-25 10:05       ` Joonyoung Shim
2010-03-25 10:32         ` Marc Zyngier
2010-03-25 11:48           ` Joonyoung Shim
2010-03-25  8:26 ` Linus Walleij
2010-03-26  2:08 ` jassi brar
2010-03-31  1:07   ` Ben Dooks
2010-03-31  1:40     ` jassi brar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).