linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
@ 2015-10-30  3:08 Sinan Kaya
  2015-10-30  3:08 ` [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver Sinan Kaya
                   ` (4 more replies)
  0 siblings, 5 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-30  3:08 UTC (permalink / raw)
  To: dmaengine, timur, cov, jcm
  Cc: Sinan Kaya, Rob Herring, Pawel Moll, Mark Rutland, Ian Campbell,
	Kumar Gala, Vinod Koul, Dan Williams, devicetree, linux-kernel

The Qualcomm Technologies HIDMA device has been designed
to support virtualization technology. The driver has been
divided into two to follow the hardware design. The management
driver is executed in hypervisor context and is the main
managment for all channels provided by the device. The
channel driver is exected in the guest OS context.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 .../devicetree/bindings/dma/qcom_hidma_mgmt.txt    |  42 +
 drivers/dma/Kconfig                                |  11 +
 drivers/dma/Makefile                               |   1 +
 drivers/dma/qcom_hidma_mgmt.c                      | 868 +++++++++++++++++++++
 4 files changed, 922 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
 create mode 100644 drivers/dma/qcom_hidma_mgmt.c

diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
new file mode 100644
index 0000000..81674ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
@@ -0,0 +1,42 @@
+Qualcomm Technologies HIDMA Management interface
+
+Required properties:
+- compatible: must contain "qcom,hidma_mgmt"
+- reg: Address range for DMA device
+- interrupts: Should contain the one interrupt shared by all channels
+- nr-channels: Number of channels supported by this DMA controller.
+- max-write: Maximum write burst in bytes. A memcpy requested is
+  fragmented to multiples of this amount.
+- max-read: Maximum read burst in bytes. A memcpy request is
+  fragmented to multiples of this amount.
+- max-wxactions: Maximum write transactions to perform in a burst
+- max-rdactions: Maximum read transactions to perform in a burst
+- max-memset-limit: Maximum memset limit
+- ch-priority-#n: Priority of the channel
+- ch-weight-#n: Round robin weight of the channel
+Example:
+
+	hidma-mgmt@f9984000 = {
+		compatible = "qcom,hidma_mgmt";
+		reg = <0xf9984000 0x15000>;
+		interrupts = <0 94 0>;
+		nr-channels = 6;
+		max-write = 1024;
+		max-read = 1024;
+		max-wxactions = 31;
+		max-rdactions = 31;
+		max-memset-limit = 8;
+		ch-priority-0 = 0;
+		ch-priority-1 = 1;
+		ch-priority-2 = 1;
+		ch-priority-3 = 0;
+		ch-priority-4 = 0;
+		ch-priority-5 = 0;
+		ch-weight-0 = 1;
+		ch-weight-1 = 13;
+		ch-weight-2 = 10;
+		ch-weight-3 = 3;
+		ch-weight-4 = 4;
+		ch-weight-5 = 5;
+	};
+
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index b458475..76a5a5e 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -501,6 +501,17 @@ config XGENE_DMA
 	help
 	  Enable support for the APM X-Gene SoC DMA engine.
 
+config QCOM_HIDMA_MGMT
+	bool "Qualcomm Technologies HIDMA Managment support"
+	select DMA_ENGINE
+	help
+	  Enable support for the Qualcomm Technologies HIDMA Management.
+	  Each DMA device requires one management interface driver
+	  for basic initialization before QCOM_HIDMA driver can start
+	  managing the channels. In a virtualized environment, the guest
+	  OS would run QCOM_HIDMA driver and the hypervisor would run
+	  the QCOM_HIDMA_MGMT driver.
+
 config XILINX_VDMA
 	tristate "Xilinx AXI VDMA Engine"
 	depends on (ARCH_ZYNQ || MICROBLAZE)
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 7711a71..3d25ffd 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
 obj-$(CONFIG_PXA_DMA) += pxa_dma.o
 obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
+obj-$(CONFIG_QCOM_HIDMA_MGMT) += qcom_hidma_mgmt.o
 obj-$(CONFIG_RENESAS_DMA) += sh/
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
diff --git a/drivers/dma/qcom_hidma_mgmt.c b/drivers/dma/qcom_hidma_mgmt.c
new file mode 100644
index 0000000..8fcad4d
--- /dev/null
+++ b/drivers/dma/qcom_hidma_mgmt.c
@@ -0,0 +1,868 @@
+/*
+ * Qualcomm Technologies HIDMA DMA engine Management interface
+ *
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+#include <linux/property.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include <linux/pm_runtime.h>
+#include "qcom_hidma.h"
+
+#define MHICFG_OFFSET			0x10
+#define QOS_N_OFFSET			0x300
+#define CFG_OFFSET			0x400
+#define HW_PARAM_OFFSET		0x408
+#define MAX_BUS_REQ_LEN_OFFSET		0x41C
+#define MAX_XACTIONS_OFFSET		0x420
+#define SW_VERSION_OFFSET		0x424
+#define CHRESET_TIMEOUUT_OFFSET	0x500
+#define MEMSET_LIMIT_OFFSET		0x600
+#define MHID_BUS_ERR0_OFFSET		0x1020
+#define MHID_BUS_ERR1_OFFSET		0x1024
+#define MHID_BUS_ERR_CLR_OFFSET	0x102C
+#define EVT_BUS_ERR0_OFFSET		0x1030
+#define EVT_BUS_ERR1_OFFSET		0x1034
+#define EVT_BUS_ERR_CLR_OFFSET		0x103C
+#define IDE_BUS_ERR0_OFFSET		0x1040
+#define IDE_BUS_ERR1_OFFSET		0x1044
+#define IDE_BUS_ERR2_OFFSET		0x1048
+#define IDE_BUS_ERR_CLR_OFFSET		0x104C
+#define ODE_BUS_ERR0_OFFSET		0x1050
+#define ODE_BUS_ERR1_OFFSET		0x1054
+#define ODE_BUS_ERR2_OFFSET		0x1058
+#define ODE_BUS_ERR_CLR_OFFSET		0x105C
+#define MSI_BUS_ERR0_OFFSET		0x1060
+#define MSI_BUS_ERR_CLR_OFFSET		0x106C
+#define TRE_ERR0_OFFSET		0x1070
+#define TRE_ERR_CLR_OFFSET		0x107C
+#define HW_EVENTS_CFG_OFFSET		0x1080
+
+#define HW_EVENTS_CFG_MASK		0xFF
+#define TRE_ERR_TRCHID_MASK		0xF
+#define TRE_ERR_EVRIDX_MASK		0xFF
+#define TRE_ERR_TYPE_MASK		0xFF
+#define MSI_ERR_RESP_MASK		0xFF
+#define MSI_ERR_TRCHID_MASK		0xFF
+#define ODE_ERR_REQLEN_MASK		0xFFFF
+#define ODE_ERR_RESP_MASK		0xFF
+#define ODE_ERR_TRCHID_MASK		0xFF
+#define IDE_ERR_REQLEN_MASK		0xFFFF
+#define IDE_ERR_RESP_MASK		0xFF
+#define IDE_ERR_TRCHID_MASK		0xFF
+#define EVT_ERR_RESP_MASK		0xFF
+#define EVT_ERR_TRCHID_MASK		0xFF
+#define MHID_ERR_RESP_MASK		0xFF
+#define MHID_ERR_TRCHID_MASK		0xFF
+#define MEMSET_LIMIT_MASK		0x1F
+#define MAX_WR_XACTIONS_MASK		0x1F
+#define MAX_RD_XACTIONS_MASK		0x1F
+#define MAX_JOBSIZE_MASK		0xFF
+#define MAX_COIDX_MASK			0xFF
+#define TREQ_CAPACITY_MASK		0xFF
+#define WEIGHT_MASK			0x7F
+#define TREQ_LIMIT_MASK		0x1FF
+#define NR_CHANNEL_MASK		0xFFFF
+#define MAX_BUS_REQ_LEN_MASK		0xFFFF
+#define CHRESET_TIMEOUUT_MASK		0xFFFFF
+
+#define TRE_ERR_TRCHID_BIT_POS		28
+#define TRE_ERR_IEOB_BIT_POS		16
+#define TRE_ERR_EVRIDX_BIT_POS		8
+#define MSI_ERR_RESP_BIT_POS		8
+#define ODE_ERR_REQLEN_BIT_POS		16
+#define ODE_ERR_RESP_BIT_POS		8
+#define IDE_ERR_REQLEN_BIT_POS		16
+#define IDE_ERR_RESP_BIT_POS		8
+#define EVT_ERR_RESP_BIT_POS		8
+#define MHID_ERR_RESP_BIT_POS		8
+#define MAX_WR_XACTIONS_BIT_POS	16
+#define TREQ_CAPACITY_BIT_POS		8
+#define MAX_JOB_SIZE_BIT_POS		16
+#define NR_EV_CHANNEL_BIT_POS		16
+#define MAX_BUS_WR_REQ_BIT_POS		16
+#define WRR_BIT_POS			8
+#define PRIORITY_BIT_POS		15
+#define TREQ_LIMIT_BIT_POS		16
+#define TREQ_LIMIT_EN_BIT_POS		23
+#define STOP_BIT_POS			24
+
+#define MODULE_NAME			"hidma-mgmt"
+#define PREFIX				MODULE_NAME ": "
+#define AUTOSUSPEND_TIMEOUT		2000
+
+#define HIDMA_RUNTIME_GET(dmadev)				\
+do {								\
+	atomic_inc(&(dmadev)->pm_counter);			\
+	TRC_PM(&(dmadev)->pdev->dev,				\
+		"%s:%d pm_runtime_get %d\n", __func__, __LINE__,\
+		atomic_read(&(dmadev)->pm_counter));		\
+	pm_runtime_get_sync(&(dmadev)->pdev->dev);		\
+} while (0)
+
+#define HIDMA_RUNTIME_SET(dmadev)				\
+do {								\
+	atomic_dec(&(dmadev)->pm_counter);			\
+	TRC_PM(&(dmadev)->pdev->dev,				\
+		"%s:%d pm_runtime_put_autosuspend:%d\n",	\
+		__func__, __LINE__,				\
+		atomic_read(&(dmadev)->pm_counter));		\
+	pm_runtime_mark_last_busy(&(dmadev)->pdev->dev);	\
+	pm_runtime_put_autosuspend(&(dmadev)->pdev->dev);	\
+} while (0)
+
+struct qcom_hidma_mgmt_dev {
+	u8 max_wr_xactions;
+	u8 max_rd_xactions;
+	u8 max_memset_limit;
+	u16 max_write_request;
+	u16 max_read_request;
+	u16 nr_channels;
+	u32 chreset_timeout;
+	u32 sw_version;
+	u8 *priority;
+	u8 *weight;
+
+	atomic_t	pm_counter;
+	/* Hardware device constants */
+	dma_addr_t dev_physaddr;
+	void __iomem *dev_virtaddr;
+	resource_size_t dev_addrsize;
+
+	struct dentry	*debugfs;
+	struct dentry	*info;
+	struct dentry	*err;
+	struct dentry	*mhid_errclr;
+	struct dentry	*evt_errclr;
+	struct dentry	*ide_errclr;
+	struct dentry	*ode_errclr;
+	struct dentry	*msi_errclr;
+	struct dentry	*tre_errclr;
+	struct dentry	*evt_ena;
+	struct platform_device *pdev;
+};
+
+static unsigned int debug_pm;
+module_param(debug_pm, uint, 0644);
+MODULE_PARM_DESC(debug_pm,
+		 "debug runtime power management transitions (default: 0)");
+
+#define TRC_PM(...) do {			\
+		if (debug_pm)			\
+			dev_info(__VA_ARGS__);	\
+	} while (0)
+
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+#define HIDMA_SHOW(dma, name) \
+		seq_printf(s, #name "=0x%x\n", dma->name)
+
+#define HIDMA_READ_SHOW(dma, name, offset) \
+	do { \
+		u32 val; \
+		val = readl(dma->dev_virtaddr + offset); \
+		seq_printf(s, name "=0x%x\n", val); \
+	} while (0)
+
+/**
+ * qcom_hidma_mgmt_info: display HIDMA device info
+ *
+ * Display the info for the current HIDMA device.
+ */
+static int qcom_hidma_mgmt_info(struct seq_file *s, void *unused)
+{
+	struct qcom_hidma_mgmt_dev *mgmtdev = s->private;
+	u32 val;
+	int i;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	HIDMA_SHOW(mgmtdev, sw_version);
+
+	val = readl(mgmtdev->dev_virtaddr + CFG_OFFSET);
+	seq_printf(s, "ENABLE=%d\n", val & 0x1);
+
+	val = readl(mgmtdev->dev_virtaddr + CHRESET_TIMEOUUT_OFFSET);
+	seq_printf(s, "reset_timeout=%d\n", val & CHRESET_TIMEOUUT_MASK);
+
+	val = readl(mgmtdev->dev_virtaddr + MHICFG_OFFSET);
+	seq_printf(s, "nr_event_channel=%d\n",
+		(val >> NR_EV_CHANNEL_BIT_POS) & NR_CHANNEL_MASK);
+	seq_printf(s, "nr_tr_channel=%d\n", (val & NR_CHANNEL_MASK));
+	seq_printf(s, "nr_virt_tr_channel=%d\n", mgmtdev->nr_channels);
+	seq_printf(s, "dev_virtaddr=%p\n", &mgmtdev->dev_virtaddr);
+	seq_printf(s, "dev_physaddr=%pap\n", &mgmtdev->dev_physaddr);
+	seq_printf(s, "dev_addrsize=%pap\n", &mgmtdev->dev_addrsize);
+
+	val = readl(mgmtdev->dev_virtaddr + MEMSET_LIMIT_OFFSET);
+	seq_printf(s, "MEMSET_LIMIT_OFFSET=%d\n", val & MEMSET_LIMIT_MASK);
+
+	val = readl(mgmtdev->dev_virtaddr + HW_PARAM_OFFSET);
+	seq_printf(s, "MAX_JOB_SIZE=%d\n",
+		(val >> MAX_JOB_SIZE_BIT_POS) & MAX_JOBSIZE_MASK);
+	seq_printf(s, "TREQ_CAPACITY=%d\n",
+		(val >> TREQ_CAPACITY_BIT_POS) & TREQ_CAPACITY_MASK);
+	seq_printf(s, "MAX_COIDX_DEPTH=%d\n", val & MAX_COIDX_MASK);
+
+	val = readl(mgmtdev->dev_virtaddr + MAX_BUS_REQ_LEN_OFFSET);
+	seq_printf(s, "MAX_BUS_WR_REQ_LEN=%d\n",
+		(val >> MAX_BUS_WR_REQ_BIT_POS) & MAX_BUS_REQ_LEN_MASK);
+	seq_printf(s, "MAX_BUS_RD_REQ_LEN=%d\n", val & MAX_BUS_REQ_LEN_MASK);
+
+	val = readl(mgmtdev->dev_virtaddr + MAX_XACTIONS_OFFSET);
+	seq_printf(s, "MAX_WR_XACTIONS=%d\n",
+		(val >> MAX_WR_XACTIONS_BIT_POS) & MAX_WR_XACTIONS_MASK);
+	seq_printf(s, "MAX_RD_XACTIONS=%d\n", val & MAX_RD_XACTIONS_MASK);
+
+	for (i = 0; i < mgmtdev->nr_channels; i++) {
+		void __iomem *offset;
+
+		offset = mgmtdev->dev_virtaddr + QOS_N_OFFSET + (4 * i);
+		val = readl(offset);
+
+		seq_printf(s, "CH#%d STOP=%d\n",
+			i, (val & (1 << STOP_BIT_POS)) ? 1 : 0);
+		seq_printf(s, "CH#%d TREQ LIMIT EN=%d\n", i,
+			(val & (1 << TREQ_LIMIT_EN_BIT_POS)) ? 1 : 0);
+		seq_printf(s, "CH#%d TREQ LIMIT=%d\n",
+			i, (val >> TREQ_LIMIT_BIT_POS) & TREQ_LIMIT_MASK);
+		seq_printf(s, "CH#%d priority=%d\n", i,
+			(val & (1 << PRIORITY_BIT_POS)) ? 1 : 0);
+		seq_printf(s, "CH#%d WRR=%d\n", i,
+			(val >> WRR_BIT_POS) & WEIGHT_MASK);
+		seq_printf(s, "CH#%d USE_DLA=%d\n", i, (val & 1) ? 1 : 0);
+	}
+	HIDMA_RUNTIME_SET(mgmtdev);
+
+	return 0;
+}
+
+static int qcom_hidma_mgmt_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, qcom_hidma_mgmt_info, inode->i_private);
+}
+
+static const struct file_operations qcom_hidma_mgmt_fops = {
+	.open = qcom_hidma_mgmt_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * qcom_hidma_mgmt_err: display HIDMA error info
+ *
+ * Display the error info for the current HIDMA device.
+ */
+static int qcom_hidma_mgmt_err(struct seq_file *s, void *unused)
+{
+	u32 val;
+	struct qcom_hidma_mgmt_dev *mgmtdev = s->private;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	val = readl(mgmtdev->dev_virtaddr + MHID_BUS_ERR0_OFFSET);
+	seq_printf(s, "MHID TR_CHID=%d\n", val & MHID_ERR_TRCHID_MASK);
+	seq_printf(s, "MHID RESP_ERROR=%d\n",
+		(val >> MHID_ERR_RESP_BIT_POS) & MHID_ERR_RESP_MASK);
+	HIDMA_READ_SHOW(mgmtdev, "MHID READ_PTR", MHID_BUS_ERR1_OFFSET);
+
+	val = readl(mgmtdev->dev_virtaddr + EVT_BUS_ERR0_OFFSET);
+	seq_printf(s, "EVT TR_CHID=%d\n", val & EVT_ERR_TRCHID_MASK);
+	seq_printf(s, "EVT RESP_ERROR=%d\n",
+		(val >> EVT_ERR_RESP_BIT_POS) & EVT_ERR_RESP_MASK);
+	HIDMA_READ_SHOW(mgmtdev, "EVT WRITE_PTR", EVT_BUS_ERR1_OFFSET);
+
+	val = readl(mgmtdev->dev_virtaddr + IDE_BUS_ERR0_OFFSET);
+	seq_printf(s, "IDE TR_CHID=%d\n", val & IDE_ERR_TRCHID_MASK);
+	seq_printf(s, "IDE RESP_ERROR=%d\n",
+		(val >> IDE_ERR_RESP_BIT_POS) & IDE_ERR_RESP_MASK);
+	seq_printf(s, "IDE REQ_LENGTH=%d\n",
+		(val >> IDE_ERR_REQLEN_BIT_POS) & IDE_ERR_REQLEN_MASK);
+	HIDMA_READ_SHOW(mgmtdev, "IDE ADDR_LSB", IDE_BUS_ERR1_OFFSET);
+	HIDMA_READ_SHOW(mgmtdev, "IDE ADDR_MSB", IDE_BUS_ERR2_OFFSET);
+
+	val = readl(mgmtdev->dev_virtaddr + ODE_BUS_ERR0_OFFSET);
+	seq_printf(s, "ODE TR_CHID=%d\n", val & ODE_ERR_TRCHID_MASK);
+	seq_printf(s, "ODE RESP_ERROR=%d\n",
+		(val >> ODE_ERR_RESP_BIT_POS) & ODE_ERR_RESP_MASK);
+	seq_printf(s, "ODE REQ_LENGTH=%d\n",
+		(val >> ODE_ERR_REQLEN_BIT_POS) & ODE_ERR_REQLEN_MASK);
+	HIDMA_READ_SHOW(mgmtdev, "ODE ADDR_LSB", ODE_BUS_ERR1_OFFSET);
+	HIDMA_READ_SHOW(mgmtdev, "ODE ADDR_MSB", ODE_BUS_ERR2_OFFSET);
+
+	val = readl(mgmtdev->dev_virtaddr + MSI_BUS_ERR0_OFFSET);
+	seq_printf(s, "MSI TR_CHID=%d\n", val & MSI_ERR_TRCHID_MASK);
+	seq_printf(s, "MSI RESP_ERROR=%d\n",
+		(val >> MSI_ERR_RESP_BIT_POS) & MSI_ERR_RESP_MASK);
+
+	val = readl(mgmtdev->dev_virtaddr + TRE_ERR0_OFFSET);
+	seq_printf(s, "TRE TRE_TYPE=%d\n", val & TRE_ERR_TYPE_MASK);
+	seq_printf(s, "TRE TRE_EVRIDX=%d\n",
+		(val >> TRE_ERR_EVRIDX_BIT_POS) & TRE_ERR_EVRIDX_MASK);
+	seq_printf(s, "TRE TRE_IEOB=%d\n",
+		(val >> TRE_ERR_IEOB_BIT_POS) & 1);
+	seq_printf(s, "TRE TRCHID=%d\n",
+		(val >> TRE_ERR_TRCHID_BIT_POS) & TRE_ERR_TRCHID_MASK);
+
+	HIDMA_READ_SHOW(mgmtdev, "HW_EVENTS_CFG_OFFSET",
+			HW_EVENTS_CFG_OFFSET);
+
+	HIDMA_RUNTIME_SET(mgmtdev);
+	return 0;
+}
+
+static int qcom_hidma_mgmt_err_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, qcom_hidma_mgmt_err, inode->i_private);
+}
+
+static const struct file_operations qcom_hidma_mgmt_err_fops = {
+	.open = qcom_hidma_mgmt_err_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static ssize_t qcom_hidma_mgmt_mhiderr_clr(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	writel(1, mgmtdev->dev_virtaddr + MHID_BUS_ERR_CLR_OFFSET);
+	HIDMA_RUNTIME_SET(mgmtdev);
+	return count;
+}
+
+static const struct file_operations qcom_hidma_mgmt_mhiderr_clrfops = {
+	.write = qcom_hidma_mgmt_mhiderr_clr,
+};
+
+static ssize_t qcom_hidma_mgmt_evterr_clr(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	writel(1, mgmtdev->dev_virtaddr + EVT_BUS_ERR_CLR_OFFSET);
+	HIDMA_RUNTIME_SET(mgmtdev);
+	return count;
+}
+
+static const struct file_operations qcom_hidma_mgmt_evterr_clrfops = {
+	.write = qcom_hidma_mgmt_evterr_clr,
+};
+
+static ssize_t qcom_hidma_mgmt_ideerr_clr(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	writel(1, mgmtdev->dev_virtaddr + IDE_BUS_ERR_CLR_OFFSET);
+	HIDMA_RUNTIME_SET(mgmtdev);
+	return count;
+}
+
+static const struct file_operations qcom_hidma_mgmt_ideerr_clrfops = {
+	.write = qcom_hidma_mgmt_ideerr_clr,
+};
+
+static ssize_t qcom_hidma_mgmt_odeerr_clr(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	writel(1, mgmtdev->dev_virtaddr + ODE_BUS_ERR_CLR_OFFSET);
+	HIDMA_RUNTIME_SET(mgmtdev);
+	return count;
+}
+
+static const struct file_operations qcom_hidma_mgmt_odeerr_clrfops = {
+	.write = qcom_hidma_mgmt_odeerr_clr,
+};
+
+static ssize_t qcom_hidma_mgmt_msierr_clr(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	writel(1, mgmtdev->dev_virtaddr + MSI_BUS_ERR_CLR_OFFSET);
+	HIDMA_RUNTIME_SET(mgmtdev);
+	return count;
+}
+
+static const struct file_operations qcom_hidma_mgmt_msierr_clrfops = {
+	.write = qcom_hidma_mgmt_msierr_clr,
+};
+
+static ssize_t qcom_hidma_mgmt_treerr_clr(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	writel(1, mgmtdev->dev_virtaddr + TRE_ERR_CLR_OFFSET);
+	HIDMA_RUNTIME_SET(mgmtdev);
+	return count;
+}
+
+static const struct file_operations qcom_hidma_mgmt_treerr_clrfops = {
+	.write = qcom_hidma_mgmt_treerr_clr,
+};
+
+static ssize_t qcom_hidma_mgmt_evtena(struct file *file,
+	const char __user *user_buf, size_t count, loff_t *ppos)
+{
+	char temp_buf[16+1];
+	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
+	u32 event;
+	ssize_t ret;
+	unsigned long val;
+
+	temp_buf[16] = '\0';
+	if (copy_from_user(temp_buf, user_buf, min_t(int, count, 16)))
+		goto out;
+
+	ret = kstrtoul(temp_buf, 16, &val);
+	if (ret) {
+		pr_warn(PREFIX "unknown event\n");
+		goto out;
+	}
+
+	event = (u32)val & HW_EVENTS_CFG_MASK;
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	writel(event, mgmtdev->dev_virtaddr + HW_EVENTS_CFG_OFFSET);
+	HIDMA_RUNTIME_SET(mgmtdev);
+out:
+	return count;
+}
+
+static const struct file_operations qcom_hidma_mgmt_evtena_fops = {
+	.write = qcom_hidma_mgmt_evtena,
+};
+
+static void qcom_hidma_mgmt_debug_uninit(struct qcom_hidma_mgmt_dev *mgmtdev)
+{
+	debugfs_remove(mgmtdev->evt_ena);
+	debugfs_remove(mgmtdev->tre_errclr);
+	debugfs_remove(mgmtdev->msi_errclr);
+	debugfs_remove(mgmtdev->ode_errclr);
+	debugfs_remove(mgmtdev->ide_errclr);
+	debugfs_remove(mgmtdev->evt_errclr);
+	debugfs_remove(mgmtdev->mhid_errclr);
+	debugfs_remove(mgmtdev->err);
+	debugfs_remove(mgmtdev->info);
+	debugfs_remove(mgmtdev->debugfs);
+}
+
+static int qcom_hidma_mgmt_debug_init(struct qcom_hidma_mgmt_dev *mgmtdev)
+{
+	int rc = 0;
+
+	mgmtdev->debugfs = debugfs_create_dir(dev_name(&mgmtdev->pdev->dev),
+						NULL);
+	if (!mgmtdev->debugfs) {
+		rc = -ENODEV;
+		return rc;
+	}
+
+	mgmtdev->info = debugfs_create_file("info", S_IRUGO,
+			mgmtdev->debugfs, mgmtdev, &qcom_hidma_mgmt_fops);
+	if (!mgmtdev->info) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	mgmtdev->err = debugfs_create_file("err", S_IRUGO,
+			mgmtdev->debugfs, mgmtdev,
+			&qcom_hidma_mgmt_err_fops);
+	if (!mgmtdev->err) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	mgmtdev->mhid_errclr = debugfs_create_file("mhiderrclr", S_IWUSR,
+			mgmtdev->debugfs, mgmtdev,
+			&qcom_hidma_mgmt_mhiderr_clrfops);
+	if (!mgmtdev->mhid_errclr) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	mgmtdev->evt_errclr = debugfs_create_file("evterrclr", S_IWUSR,
+			mgmtdev->debugfs, mgmtdev,
+			&qcom_hidma_mgmt_evterr_clrfops);
+	if (!mgmtdev->evt_errclr) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	mgmtdev->ide_errclr = debugfs_create_file("ideerrclr", S_IWUSR,
+			mgmtdev->debugfs, mgmtdev,
+			&qcom_hidma_mgmt_ideerr_clrfops);
+	if (!mgmtdev->ide_errclr) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	mgmtdev->ode_errclr = debugfs_create_file("odeerrclr", S_IWUSR,
+			mgmtdev->debugfs, mgmtdev,
+			&qcom_hidma_mgmt_odeerr_clrfops);
+	if (!mgmtdev->ode_errclr) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	mgmtdev->msi_errclr = debugfs_create_file("msierrclr", S_IWUSR,
+			mgmtdev->debugfs, mgmtdev,
+			&qcom_hidma_mgmt_msierr_clrfops);
+	if (!mgmtdev->msi_errclr) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	mgmtdev->tre_errclr = debugfs_create_file("treerrclr", S_IWUSR,
+			mgmtdev->debugfs, mgmtdev,
+			&qcom_hidma_mgmt_treerr_clrfops);
+	if (!mgmtdev->tre_errclr) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	mgmtdev->evt_ena = debugfs_create_file("evtena", S_IWUSR,
+			mgmtdev->debugfs, mgmtdev,
+			&qcom_hidma_mgmt_evtena_fops);
+	if (!mgmtdev->evt_ena) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	return 0;
+cleanup:
+	qcom_hidma_mgmt_debug_uninit(mgmtdev);
+	return rc;
+}
+#else
+static void qcom_hidma_mgmt_debug_uninit(struct qcom_hidma_mgmt_dev *mgmtdev)
+{
+}
+static int qcom_hidma_mgmt_debug_init(struct qcom_hidma_mgmt_dev *mgmtdev)
+{
+	return 0;
+}
+#endif
+
+static irqreturn_t qcom_hidma_mgmt_irq_handler(int irq, void *arg)
+{
+	/* TODO: handle irq here */
+	return IRQ_HANDLED;
+}
+
+static int qcom_hidma_mgmt_setup(struct qcom_hidma_mgmt_dev *mgmtdev)
+{
+	u32 val;
+	int i;
+
+	val = readl(mgmtdev->dev_virtaddr + MAX_BUS_REQ_LEN_OFFSET);
+
+	if (mgmtdev->max_write_request) {
+		val = val &
+			~(MAX_BUS_REQ_LEN_MASK << MAX_BUS_WR_REQ_BIT_POS);
+		val = val |
+			(mgmtdev->max_write_request << MAX_BUS_WR_REQ_BIT_POS);
+	}
+	if (mgmtdev->max_read_request) {
+		val = val & ~(MAX_BUS_REQ_LEN_MASK);
+		val = val | (mgmtdev->max_read_request);
+	}
+	writel(val, mgmtdev->dev_virtaddr + MAX_BUS_REQ_LEN_OFFSET);
+
+	val = readl(mgmtdev->dev_virtaddr + MAX_XACTIONS_OFFSET);
+	if (mgmtdev->max_wr_xactions) {
+		val = val &
+			~(MAX_WR_XACTIONS_MASK << MAX_WR_XACTIONS_BIT_POS);
+		val = val |
+			(mgmtdev->max_wr_xactions << MAX_WR_XACTIONS_BIT_POS);
+	}
+	if (mgmtdev->max_rd_xactions) {
+		val = val & ~(MAX_RD_XACTIONS_MASK);
+		val = val | (mgmtdev->max_rd_xactions);
+	}
+	writel(val, mgmtdev->dev_virtaddr + MAX_XACTIONS_OFFSET);
+
+	val = readl(mgmtdev->dev_virtaddr + MAX_BUS_REQ_LEN_OFFSET);
+	mgmtdev->max_write_request =
+		(val >> MAX_BUS_WR_REQ_BIT_POS) & MAX_BUS_REQ_LEN_MASK;
+	mgmtdev->max_read_request = val & MAX_BUS_REQ_LEN_MASK;
+
+	val = readl(mgmtdev->dev_virtaddr + MAX_XACTIONS_OFFSET);
+	mgmtdev->max_wr_xactions =
+		(val >> MAX_WR_XACTIONS_BIT_POS) & MAX_WR_XACTIONS_MASK;
+	mgmtdev->max_rd_xactions = val & MAX_RD_XACTIONS_MASK;
+
+	mgmtdev->sw_version = readl(mgmtdev->dev_virtaddr + SW_VERSION_OFFSET);
+
+	for (i = 0; i < mgmtdev->nr_channels; i++) {
+		val = readl(mgmtdev->dev_virtaddr + QOS_N_OFFSET + (4 * i));
+		val = val & ~(1 << PRIORITY_BIT_POS);
+		val = val |
+			((mgmtdev->priority[i] & 0x1) << PRIORITY_BIT_POS);
+		val = val & ~(WEIGHT_MASK << WRR_BIT_POS);
+		val = val
+			| ((mgmtdev->weight[i] & WEIGHT_MASK) << WRR_BIT_POS);
+		writel(val, mgmtdev->dev_virtaddr + QOS_N_OFFSET + (4 * i));
+	}
+
+	if (mgmtdev->chreset_timeout > 0) {
+		val = readl(mgmtdev->dev_virtaddr + CHRESET_TIMEOUUT_OFFSET);
+		val = val & ~CHRESET_TIMEOUUT_MASK;
+		val = val | (mgmtdev->chreset_timeout & CHRESET_TIMEOUUT_MASK);
+		writel(val, mgmtdev->dev_virtaddr + CHRESET_TIMEOUUT_OFFSET);
+	}
+	val = readl(mgmtdev->dev_virtaddr + CHRESET_TIMEOUUT_OFFSET);
+	mgmtdev->chreset_timeout = val & CHRESET_TIMEOUUT_MASK;
+
+	if (mgmtdev->max_memset_limit > 0) {
+		val = readl(mgmtdev->dev_virtaddr + MEMSET_LIMIT_OFFSET);
+		val = val & ~MEMSET_LIMIT_MASK;
+		val = val | (mgmtdev->max_memset_limit & MEMSET_LIMIT_MASK);
+		writel(val, mgmtdev->dev_virtaddr + MEMSET_LIMIT_OFFSET);
+	}
+	val = readl(mgmtdev->dev_virtaddr + MEMSET_LIMIT_OFFSET);
+	mgmtdev->max_memset_limit = val & MEMSET_LIMIT_MASK;
+
+	val = readl(mgmtdev->dev_virtaddr + CFG_OFFSET);
+	val = val | 1;
+	writel(val, mgmtdev->dev_virtaddr + CFG_OFFSET);
+
+	return 0;
+}
+
+static int qcom_hidma_mgmt_probe(struct platform_device *pdev)
+{
+	struct resource *dma_resource;
+	int irq;
+	int rc, i;
+	struct qcom_hidma_mgmt_dev *mgmtdev;
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, AUTOSUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	dma_resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!dma_resource) {
+		dev_err(&pdev->dev, "No memory resources found\n");
+		rc = -ENODEV;
+		goto out;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(&pdev->dev, "irq resources not found\n");
+		rc = -ENODEV;
+		goto out;
+	}
+
+	mgmtdev = devm_kzalloc(&pdev->dev, sizeof(*mgmtdev), GFP_KERNEL);
+	if (!mgmtdev) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	mgmtdev->pdev = pdev;
+	HIDMA_RUNTIME_GET(mgmtdev);
+
+	rc = devm_request_irq(&pdev->dev, irq, qcom_hidma_mgmt_irq_handler,
+		IRQF_SHARED, "qcom-hidmamgmt", mgmtdev);
+	if (rc) {
+		dev_err(&pdev->dev, "irq registration failed: %d\n", irq);
+		goto out;
+	}
+
+	mgmtdev->dev_physaddr = dma_resource->start;
+	mgmtdev->dev_addrsize = resource_size(dma_resource);
+
+	dev_dbg(&pdev->dev, "dev_physaddr:%pa\n", &mgmtdev->dev_physaddr);
+	dev_dbg(&pdev->dev, "dev_addrsize:%pa\n", &mgmtdev->dev_addrsize);
+
+	mgmtdev->dev_virtaddr = devm_ioremap_resource(&pdev->dev,
+							dma_resource);
+	if (IS_ERR(mgmtdev->dev_virtaddr)) {
+		dev_err(&pdev->dev, "can't map i/o memory at %pa\n",
+			&mgmtdev->dev_physaddr);
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if (device_property_read_u16(&pdev->dev, "nr-channels",
+		&mgmtdev->nr_channels)) {
+		dev_err(&pdev->dev, "number of channels missing\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	device_property_read_u16(&pdev->dev, "max-write",
+		&mgmtdev->max_write_request);
+	if ((mgmtdev->max_write_request != 128) &&
+		(mgmtdev->max_write_request != 256) &&
+		(mgmtdev->max_write_request != 512) &&
+		(mgmtdev->max_write_request != 1024)) {
+		dev_err(&pdev->dev, "invalid write request %d\n",
+			mgmtdev->max_write_request);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	device_property_read_u16(&pdev->dev, "max-read",
+		&mgmtdev->max_read_request);
+	if ((mgmtdev->max_read_request != 128) &&
+		(mgmtdev->max_read_request != 256) &&
+		(mgmtdev->max_read_request != 512) &&
+		(mgmtdev->max_read_request != 1024)) {
+		dev_err(&pdev->dev, "invalid read request %d\n",
+			mgmtdev->max_read_request);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	device_property_read_u8(&pdev->dev, "max-wxactions",
+		&mgmtdev->max_wr_xactions);
+
+	device_property_read_u8(&pdev->dev, "max-rdactions",
+		&mgmtdev->max_rd_xactions);
+
+	device_property_read_u8(&pdev->dev, "max-memset-limit",
+		&mgmtdev->max_memset_limit);
+
+	/* needs to be at least one */
+	if (mgmtdev->max_memset_limit == 0)
+		mgmtdev->max_memset_limit = 1;
+
+	mgmtdev->priority = devm_kcalloc(&pdev->dev,
+		mgmtdev->nr_channels, sizeof(*mgmtdev->priority), GFP_KERNEL);
+	if (!mgmtdev->priority) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	mgmtdev->weight = devm_kcalloc(&pdev->dev,
+		mgmtdev->nr_channels, sizeof(*mgmtdev->weight), GFP_KERNEL);
+	if (!mgmtdev->weight) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < mgmtdev->nr_channels; i++) {
+		char name[30];
+
+		sprintf(name, "ch-priority-%d", i);
+		device_property_read_u8(&pdev->dev, name,
+			&mgmtdev->priority[i]);
+
+		sprintf(name, "ch-weight-%d", i);
+		device_property_read_u8(&pdev->dev, name,
+			&mgmtdev->weight[i]);
+
+		if (mgmtdev->weight[i] > 15) {
+			dev_err(&pdev->dev, "max value of weight can be 15.\n");
+			rc = -EINVAL;
+			goto out;
+		}
+
+		/* weight needs to be at least one */
+		if (mgmtdev->weight[i] == 0)
+			mgmtdev->weight[i] = 1;
+	}
+
+	rc = qcom_hidma_mgmt_setup(mgmtdev);
+	if (rc) {
+		dev_err(&pdev->dev, "setup failed\n");
+		goto out;
+	}
+
+	rc = qcom_hidma_mgmt_debug_init(mgmtdev);
+	if (rc) {
+		dev_err(&pdev->dev, "debugfs init failed\n");
+		goto out;
+	}
+
+	dev_info(&pdev->dev,
+		"HI-DMA engine management driver registration complete\n");
+	platform_set_drvdata(pdev, mgmtdev);
+	HIDMA_RUNTIME_SET(mgmtdev);
+	return 0;
+out:
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_put_sync_suspend(&pdev->dev);
+	return rc;
+}
+
+static int qcom_hidma_mgmt_remove(struct platform_device *pdev)
+{
+	struct qcom_hidma_mgmt_dev *mgmtdev = platform_get_drvdata(pdev);
+
+	HIDMA_RUNTIME_GET(mgmtdev);
+	qcom_hidma_mgmt_debug_uninit(mgmtdev);
+	pm_runtime_put_sync_suspend(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	dev_info(&pdev->dev, "HI-DMA engine management driver removed\n");
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id qcom_hidma_mgmt_acpi_ids[] = {
+	{"QCOM8060"},
+	{},
+};
+#endif
+
+static const struct of_device_id qcom_hidma_mgmt_match[] = {
+	{ .compatible = "qcom,hidma_mgmt", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, qcom_hidma_mgmt_match);
+
+static struct platform_driver qcom_hidma_mgmt_driver = {
+	.probe = qcom_hidma_mgmt_probe,
+	.remove = qcom_hidma_mgmt_remove,
+	.driver = {
+		.name = MODULE_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(qcom_hidma_mgmt_match),
+		.acpi_match_table = ACPI_PTR(qcom_hidma_mgmt_acpi_ids),
+	},
+};
+
+static int __init qcom_hidma_mgmt_init(void)
+{
+	return platform_driver_register(&qcom_hidma_mgmt_driver);
+}
+device_initcall(qcom_hidma_mgmt_init);
+
+static void __exit qcom_hidma_mgmt_exit(void)
+{
+	platform_driver_unregister(&qcom_hidma_mgmt_driver);
+}
+module_exit(qcom_hidma_mgmt_exit);
-- 
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30  3:08 [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Sinan Kaya
@ 2015-10-30  3:08 ` Sinan Kaya
  2015-10-30  8:01   ` kbuild test robot
                     ` (3 more replies)
  2015-10-30  4:34 ` [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver kbuild test robot
                   ` (3 subsequent siblings)
  4 siblings, 4 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-30  3:08 UTC (permalink / raw)
  To: dmaengine, timur, cov, jcm
  Cc: Sinan Kaya, Rob Herring, Pawel Moll, Mark Rutland, Ian Campbell,
	Kumar Gala, Vinod Koul, Dan Williams, devicetree, linux-kernel

This patch adds support for hidma engine. The driver
consists of two logical blocks. The DMA engine interface
and the low-level interface. This version of the driver
does not support virtualization on this release and only
memcpy interface support is included.

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 .../devicetree/bindings/dma/qcom_hidma.txt         |   18 +
 drivers/dma/Kconfig                                |   11 +
 drivers/dma/Makefile                               |    4 +
 drivers/dma/qcom_hidma.c                           | 1717 ++++++++++++++++++++
 drivers/dma/qcom_hidma.h                           |   44 +
 drivers/dma/qcom_hidma_ll.c                        | 1132 +++++++++++++
 6 files changed, 2926 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/dma/qcom_hidma.txt
 create mode 100644 drivers/dma/qcom_hidma.c
 create mode 100644 drivers/dma/qcom_hidma.h
 create mode 100644 drivers/dma/qcom_hidma_ll.c

diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma.txt b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
new file mode 100644
index 0000000..9a01635
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
@@ -0,0 +1,18 @@
+Qualcomm Technologies HIDMA Channel driver
+
+Required properties:
+- compatible: must contain "qcom,hidma"
+- reg: Addresses for the transfer and event channel
+- interrupts: Should contain the event interrupt
+- desc-count: Number of asynchronous requests this channel can handle
+- event-channel: The HW event channel completions will be delivered.
+Example:
+
+	hidma_24: hidma@0x5c050000 {
+		compatible = "qcom,hidma";
+		reg = <0 0x5c050000 0x0 0x1000>,
+		      <0 0x5c0b0000 0x0 0x1000>;
+		interrupts = <0 389 0>;
+		desc-count = <10>;
+		event-channel = /bits/ 8 <4>;
+	};
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 76a5a5e..2645185 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -512,6 +512,17 @@ config QCOM_HIDMA_MGMT
 	  OS would run QCOM_HIDMA driver and the hypervisor would run
 	  the QCOM_HIDMA_MGMT driver.
 
+config QCOM_HIDMA
+	tristate "Qualcomm Technologies HIDMA support"
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Enable support for the Qualcomm Technologies HIDMA controller.
+	  The HIDMA controller supports optimized buffer copies
+	  (user to kernel, kernel to kernel, etc.).  It only supports
+	  memcpy/memset interfaces. The core is not intended for general
+	  purpose slave DMA.
+
 config XILINX_VDMA
 	tristate "Xilinx AXI VDMA Engine"
 	depends on (ARCH_ZYNQ || MICROBLAZE)
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 3d25ffd..5665df2 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -53,6 +53,10 @@ obj-$(CONFIG_PL330_DMA) += pl330.o
 obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
 obj-$(CONFIG_PXA_DMA) += pxa_dma.o
 obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
+obj-$(CONFIG_QCOM_HIDMA) +=  qcom_hdma.o
+qcom_hdma-objs        := qcom_hidma_ll.o qcom_hidma.o
+
+
 obj-$(CONFIG_QCOM_HIDMA_MGMT) += qcom_hidma_mgmt.o
 obj-$(CONFIG_RENESAS_DMA) += sh/
 obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
diff --git a/drivers/dma/qcom_hidma.c b/drivers/dma/qcom_hidma.c
new file mode 100644
index 0000000..2a92305
--- /dev/null
+++ b/drivers/dma/qcom_hidma.c
@@ -0,0 +1,1717 @@
+/*
+ * Qualcomm Technologies HIDMA DMA engine interface
+ *
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * Copyright (C) Freescale Semicondutor, Inc. 2007, 2008.
+ * Copyright (C) Semihalf 2009
+ * Copyright (C) Ilya Yanok, Emcraft Systems 2010
+ * Copyright (C) Alexander Popov, Promcontroller 2014
+ *
+ * Written by Piotr Ziecik <kosmo@semihalf.com>. Hardware description
+ * (defines, structures and comments) was taken from MPC5121 DMA driver
+ * written by Hongjun Chen <hong-jun.chen@freescale.com>.
+ *
+ * Approved as OSADL project by a majority of OSADL members and funded
+ * by OSADL membership fees in 2009;  for details see www.osadl.org.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/* Linux Foundation elects GPLv2 license only.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <asm/dma.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/of_dma.h>
+#include <linux/property.h>
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/acpi.h>
+#include <linux/irq.h>
+#include <linux/debugfs.h>
+#include <linux/atomic.h>
+#include <linux/pm_runtime.h>
+#include "dmaengine.h"
+#include "qcom_hidma.h"
+
+/* Default idle time is 2 seconds. This parameter can
+ * be overridden by changing the following
+ * /sys/bus/platform/devices/QCOM8061:<xy>/power/autosuspend_delay_ms
+ * during kernel boot.
+ */
+#define AUTOSUSPEND_TIMEOUT		2000
+#define HIDMA_DEFAULT_DESCRIPTOR_COUNT	16
+#define MODULE_NAME			"hidma"
+
+#define HIDMA_RUNTIME_GET(dmadev)				\
+do {								\
+	atomic_inc(&(dmadev)->pm_counter);			\
+	TRC_PM((dmadev)->ddev.dev,				\
+		"%s:%d pm_runtime_get %d\n", __func__, __LINE__,\
+		atomic_read(&(dmadev)->pm_counter));		\
+	pm_runtime_get_sync((dmadev)->ddev.dev);		\
+} while (0)
+
+#define HIDMA_RUNTIME_SET(dmadev)				\
+do {								\
+	atomic_dec(&(dmadev)->pm_counter);			\
+	TRC_PM((dmadev)->ddev.dev,				\
+		"%s:%d pm_runtime_put_autosuspend:%d\n",	\
+		__func__, __LINE__,				\
+		atomic_read(&(dmadev)->pm_counter));		\
+	pm_runtime_mark_last_busy((dmadev)->ddev.dev);		\
+	pm_runtime_put_autosuspend((dmadev)->ddev.dev);		\
+} while (0)
+
+struct hidma_test_sync {
+	atomic_t			counter;
+	wait_queue_head_t		wq;
+};
+
+struct hidma_dev {
+	u8				evridx;
+	u32				nr_descriptors;
+
+	void				*lldev;
+	void				__iomem *dev_trca;
+	void				__iomem *dev_evca;
+	int (*self_test)(struct hidma_dev *device);
+	struct dentry			*debugfs;
+	struct dentry			*stats;
+
+	/* used to protect the pending channel list*/
+	spinlock_t			lock;
+	dma_addr_t			dev_trca_phys;
+	struct dma_device		ddev;
+	struct tasklet_struct		tasklet;
+
+	resource_size_t			dev_trca_size;
+	dma_addr_t			dev_evca_phys;
+	resource_size_t			dev_evca_size;
+
+	struct hidma_test_sync		test_result;
+	atomic_t			pm_counter;
+};
+
+struct hidma_chan {
+	bool				paused;
+	bool				allocated;
+	char				name[16];
+	u32				dma_sig;
+
+	/*
+	 * active descriptor on this channel
+	 * It is used by the DMA complete notification to
+	 * locate the descriptor that initiated the transfer.
+	 */
+	struct dentry			*debugfs;
+	struct dentry			*stats;
+	struct hidma_dev		*dmadev;
+
+	struct dma_chan			chan;
+	struct list_head		free;
+	struct list_head		prepared;
+	struct list_head		active;
+	struct list_head		completed;
+
+	/* Lock for this structure */
+	spinlock_t			lock;
+};
+
+struct hidma_desc {
+	struct dma_async_tx_descriptor	desc;
+	/* link list node for this channel*/
+	struct list_head		node;
+	u32				tre_ch;
+};
+
+static inline
+struct hidma_dev *to_hidma_dev(struct dma_device *dmadev)
+{
+	return container_of(dmadev, struct hidma_dev, ddev);
+}
+
+static inline
+struct hidma_dev *to_hidma_dev_from_lldev(void *_lldev)
+{
+	return container_of(_lldev, struct hidma_dev, lldev);
+}
+
+static inline
+struct hidma_chan *to_hidma_chan(struct dma_chan *dmach)
+{
+	return container_of(dmach, struct hidma_chan, chan);
+}
+
+static inline struct hidma_desc *
+to_hidma_desc(struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct hidma_desc, desc);
+}
+
+static void hidma_free(struct hidma_dev *dmadev)
+{
+	dev_dbg(dmadev->ddev.dev, "free dmadev\n");
+	INIT_LIST_HEAD(&dmadev->ddev.channels);
+}
+
+static unsigned int debug_pm;
+module_param(debug_pm, uint, 0644);
+MODULE_PARM_DESC(debug_pm,
+		 "debug runtime power management transitions (default: 0)");
+
+#define TRC_PM(...) do {			\
+		if (debug_pm)			\
+			dev_info(__VA_ARGS__);	\
+	} while (0)
+
+/* process completed descriptors */
+static void hidma_process_completed(struct hidma_dev *mdma)
+{
+	dma_cookie_t last_cookie = 0;
+	struct hidma_chan *mchan;
+	struct hidma_desc *mdesc;
+	struct dma_async_tx_descriptor *desc;
+	unsigned long irqflags;
+	LIST_HEAD(list);
+	struct dma_chan *dmach = NULL;
+
+	list_for_each_entry(dmach, &mdma->ddev.channels,
+			device_node) {
+		mchan = to_hidma_chan(dmach);
+
+		/* Get all completed descriptors */
+		spin_lock_irqsave(&mchan->lock, irqflags);
+		if (!list_empty(&mchan->completed))
+			list_splice_tail_init(&mchan->completed, &list);
+		spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+		if (list_empty(&list))
+			continue;
+
+		/* Execute callbacks and run dependencies */
+		list_for_each_entry(mdesc, &list, node) {
+			desc = &mdesc->desc;
+
+			spin_lock_irqsave(&mchan->lock, irqflags);
+			dma_cookie_complete(desc);
+			spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+			if (desc->callback &&
+				(hidma_ll_status(mdma->lldev, mdesc->tre_ch)
+				== DMA_COMPLETE))
+				desc->callback(desc->callback_param);
+
+			last_cookie = desc->cookie;
+			dma_run_dependencies(desc);
+		}
+
+		/* Free descriptors */
+		spin_lock_irqsave(&mchan->lock, irqflags);
+		list_splice_tail_init(&list, &mchan->free);
+		spin_unlock_irqrestore(&mchan->lock, irqflags);
+	}
+}
+
+/*
+ * Execute all queued DMA descriptors.
+ * This function is called either on the first transfer attempt in tx_submit
+ * or from the callback routine when one transfer is finished. It can only be
+ * called from a single location since both of places check active list to be
+ * empty and will immediately fill the active list while lock is held.
+ *
+ * Following requirements must be met while calling hidma_execute():
+ *	a) mchan->lock is locked,
+ *	b) mchan->active list contains multiple entries.
+ *	c) pm protected
+ */
+static int hidma_execute(struct hidma_chan *mchan)
+{
+	struct hidma_dev *mdma = mchan->dmadev;
+	int rc;
+
+	if (!hidma_ll_isenabled(mdma->lldev))
+		return -ENODEV;
+
+	/* Start the transfer */
+	if (!list_empty(&mchan->active))
+		rc = hidma_ll_start(mdma->lldev);
+
+	return 0;
+}
+
+/*
+ * Called once for each submitted descriptor.
+ * PM is locked once for each descriptor that is currently
+ * in execution.
+ */
+static void hidma_callback(void *data)
+{
+	struct hidma_desc *mdesc = data;
+	struct hidma_chan *mchan = to_hidma_chan(mdesc->desc.chan);
+	unsigned long irqflags;
+	struct dma_device *ddev = mchan->chan.device;
+	struct hidma_dev *dmadev = to_hidma_dev(ddev);
+	bool queued = false;
+
+	dev_dbg(dmadev->ddev.dev, "callback: data:0x%p\n", data);
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+
+	if (mdesc->node.next) {
+		/* Delete from the active list, add to completed list */
+		list_move_tail(&mdesc->node, &mchan->completed);
+		queued = true;
+	}
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	hidma_process_completed(dmadev);
+
+	if (queued)
+		HIDMA_RUNTIME_SET(dmadev);
+}
+
+static int hidma_chan_init(struct hidma_dev *dmadev, u32 dma_sig)
+{
+	struct hidma_chan *mchan;
+	struct dma_device *ddev;
+
+	mchan = devm_kzalloc(dmadev->ddev.dev, sizeof(*mchan), GFP_KERNEL);
+	if (!mchan) {
+		dev_err(dmadev->ddev.dev, "chaninit: out of memory\n");
+		return -ENOMEM;
+	}
+
+	ddev = &dmadev->ddev;
+	mchan->dma_sig = dma_sig;
+	mchan->dmadev = dmadev;
+	mchan->chan.device = ddev;
+	dma_cookie_init(&mchan->chan);
+
+	INIT_LIST_HEAD(&mchan->free);
+	INIT_LIST_HEAD(&mchan->prepared);
+	INIT_LIST_HEAD(&mchan->active);
+	INIT_LIST_HEAD(&mchan->completed);
+
+	spin_lock_init(&mchan->lock);
+	list_add_tail(&mchan->chan.device_node, &ddev->channels);
+	dmadev->ddev.chancnt++;
+	return 0;
+}
+
+static void hidma_issue_pending(struct dma_chan *dmach)
+{
+}
+
+static enum dma_status hidma_tx_status(struct dma_chan *dmach,
+					dma_cookie_t cookie,
+					struct dma_tx_state *txstate)
+{
+	enum dma_status ret;
+	unsigned long irqflags;
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	if (mchan->paused)
+		ret = DMA_PAUSED;
+	else
+		ret = dma_cookie_status(dmach, cookie, txstate);
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	return ret;
+}
+
+/*
+ * Submit descriptor to hardware.
+ * Lock the PM for each descriptor we are sending.
+ */
+static dma_cookie_t hidma_tx_submit(struct dma_async_tx_descriptor *txd)
+{
+	struct hidma_chan *mchan = to_hidma_chan(txd->chan);
+	struct hidma_dev *dmadev = mchan->dmadev;
+	struct hidma_desc *mdesc;
+	unsigned long irqflags;
+	dma_cookie_t cookie;
+
+	if (!hidma_ll_isenabled(dmadev->lldev))
+		return -ENODEV;
+
+	HIDMA_RUNTIME_GET(dmadev);
+	mdesc = container_of(txd, struct hidma_desc, desc);
+	spin_lock_irqsave(&mchan->lock, irqflags);
+
+	/* Move descriptor to active */
+	list_move_tail(&mdesc->node, &mchan->active);
+
+	/* Update cookie */
+	cookie = dma_cookie_assign(txd);
+
+	hidma_ll_queue_request(dmadev->lldev, mdesc->tre_ch);
+	hidma_execute(mchan);
+
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	return cookie;
+}
+
+static int hidma_alloc_chan_resources(struct dma_chan *dmach)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_dev *dmadev = mchan->dmadev;
+	int rc = 0;
+	struct hidma_desc *mdesc, *tmp;
+	unsigned long irqflags;
+	LIST_HEAD(descs);
+	u32 i;
+
+	if (mchan->allocated)
+		return 0;
+
+	/* Alloc descriptors for this channel */
+	for (i = 0; i < dmadev->nr_descriptors; i++) {
+		mdesc = kzalloc(sizeof(struct hidma_desc), GFP_KERNEL);
+		if (!mdesc) {
+			dev_err(dmadev->ddev.dev, "Memory allocation error. ");
+			rc = -ENOMEM;
+			break;
+		}
+		dma_async_tx_descriptor_init(&mdesc->desc, dmach);
+		mdesc->desc.flags = DMA_CTRL_ACK;
+		mdesc->desc.tx_submit = hidma_tx_submit;
+
+		rc = hidma_ll_request(dmadev->lldev,
+				mchan->dma_sig, "DMA engine", hidma_callback,
+				mdesc, &mdesc->tre_ch);
+		if (rc != 1) {
+			dev_err(dmach->device->dev,
+				"channel alloc failed at %u\n", i);
+			kfree(mdesc);
+			break;
+		}
+		list_add_tail(&mdesc->node, &descs);
+	}
+
+	if (rc != 1) {
+		/* return the allocated descriptors */
+		list_for_each_entry_safe(mdesc, tmp, &descs, node) {
+			hidma_ll_free(dmadev->lldev, mdesc->tre_ch);
+			kfree(mdesc);
+		}
+		return rc;
+	}
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	list_splice_tail_init(&descs, &mchan->free);
+	mchan->allocated = true;
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+	dev_dbg(dmadev->ddev.dev,
+		"allocated channel for %u\n", mchan->dma_sig);
+	return rc;
+}
+
+static void hidma_free_chan_resources(struct dma_chan *dmach)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_dev *mdma = mchan->dmadev;
+	struct hidma_desc *mdesc, *tmp;
+	unsigned long irqflags;
+	LIST_HEAD(descs);
+
+	if (!list_empty(&mchan->prepared) ||
+		!list_empty(&mchan->active) ||
+		!list_empty(&mchan->completed)) {
+		/* We have unfinished requests waiting.
+		 * Terminate the request from the hardware.
+		 */
+		hidma_cleanup_pending_tre(mdma->lldev, 0x77, 0x77);
+
+		/* Give enough time for completions to be called. */
+		msleep(100);
+	}
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	/* Channel must be idle */
+	BUG_ON(!list_empty(&mchan->prepared));
+	BUG_ON(!list_empty(&mchan->active));
+	BUG_ON(!list_empty(&mchan->completed));
+
+	/* Move data */
+	list_splice_tail_init(&mchan->free, &descs);
+
+	/* Free descriptors */
+	list_for_each_entry_safe(mdesc, tmp, &descs, node) {
+		hidma_ll_free(mdma->lldev, mdesc->tre_ch);
+		list_del(&mdesc->node);
+		kfree(mdesc);
+	}
+
+	mchan->allocated = 0;
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+	dev_dbg(mdma->ddev.dev, "freed channel for %u\n", mchan->dma_sig);
+}
+
+
+static struct dma_async_tx_descriptor *
+hidma_prep_dma_memcpy(struct dma_chan *dmach, dma_addr_t dma_dest,
+			dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct hidma_chan *mchan = to_hidma_chan(dmach);
+	struct hidma_desc *mdesc = NULL;
+	struct hidma_dev *mdma = mchan->dmadev;
+	unsigned long irqflags;
+
+	dev_dbg(mdma->ddev.dev,
+		"memcpy: chan:%p dest:%pad src:%pad len:%zu\n", mchan,
+		&dma_dest, &dma_src, len);
+
+	/* Get free descriptor */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	if (!list_empty(&mchan->free)) {
+		mdesc = list_first_entry(&mchan->free, struct hidma_desc, node);
+		list_del(&mdesc->node);
+	}
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	if (!mdesc)
+		return NULL;
+
+	hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
+			dma_src, dma_dest, len, flags);
+
+	/* Place descriptor in prepared list */
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	list_add_tail(&mdesc->node, &mchan->prepared);
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	return &mdesc->desc;
+}
+
+static int hidma_terminate_all(struct dma_chan *chan)
+{
+	struct hidma_dev *dmadev;
+	LIST_HEAD(head);
+	unsigned long irqflags;
+	LIST_HEAD(list);
+	struct hidma_desc *tmp, *mdesc = NULL;
+	int rc = 0;
+	struct hidma_chan *mchan;
+
+	mchan = to_hidma_chan(chan);
+	dmadev = to_hidma_dev(mchan->chan.device);
+	dev_dbg(dmadev->ddev.dev, "terminateall: chan:0x%p\n", mchan);
+
+	HIDMA_RUNTIME_GET(dmadev);
+	/* give completed requests a chance to finish */
+	hidma_process_completed(dmadev);
+
+	spin_lock_irqsave(&mchan->lock, irqflags);
+	list_splice_init(&mchan->active, &list);
+	list_splice_init(&mchan->prepared, &list);
+	list_splice_init(&mchan->completed, &list);
+	spin_unlock_irqrestore(&mchan->lock, irqflags);
+
+	/* this suspends the existing transfer */
+	rc = hidma_ll_pause(dmadev->lldev);
+	if (rc) {
+		dev_err(dmadev->ddev.dev, "channel did not pause\n");
+		goto out;
+	}
+
+	/* return all user requests */
+	list_for_each_entry_safe(mdesc, tmp, &list, node) {
+		struct dma_async_tx_descriptor	*txd = &mdesc->desc;
+		dma_async_tx_callback callback = mdesc->desc.callback;
+		void *param = mdesc->desc.callback_param;
+		enum dma_status status;
+
+		dma_descriptor_unmap(txd);
+
+		status = hidma_ll_status(dmadev->lldev, mdesc->tre_ch);
+		/*
+		 * The API requires that no submissions are done from a
+		 * callback, so we don't need to drop the lock here
+		 */
+		if (callback && (status == DMA_COMPLETE))
+			callback(param);
+
+		dma_run_dependencies(txd);
+
+		/* move myself to free_list */
+		list_move(&mdesc->node, &mchan->free);
+	}
+
+	/* reinitialize the hardware */
+	rc = hidma_ll_setup(dmadev->lldev);
+
+out:
+	HIDMA_RUNTIME_SET(dmadev);
+	return rc;
+}
+
+static int hidma_pause(struct dma_chan *chan)
+{
+	struct hidma_chan *mchan;
+	struct hidma_dev *dmadev;
+
+	mchan = to_hidma_chan(chan);
+	dmadev = to_hidma_dev(mchan->chan.device);
+	dev_dbg(dmadev->ddev.dev, "pause: chan:0x%p\n", mchan);
+
+	HIDMA_RUNTIME_GET(dmadev);
+	if (!mchan->paused) {
+		if (hidma_ll_pause(dmadev->lldev))
+			dev_warn(dmadev->ddev.dev, "channel did not stop\n");
+		mchan->paused = true;
+	}
+	HIDMA_RUNTIME_SET(dmadev);
+	return 0;
+}
+
+static int hidma_resume(struct dma_chan *chan)
+{
+	struct hidma_chan *mchan;
+	struct hidma_dev *dmadev;
+	int rc = 0;
+
+	mchan = to_hidma_chan(chan);
+	dmadev = to_hidma_dev(mchan->chan.device);
+	dev_dbg(dmadev->ddev.dev, "resume: chan:0x%p\n", mchan);
+
+	HIDMA_RUNTIME_GET(dmadev);
+	if (mchan->paused) {
+		rc = hidma_ll_resume(dmadev->lldev);
+		if (!rc)
+			mchan->paused = false;
+		else
+			dev_err(dmadev->ddev.dev,
+					"failed to resume the channel");
+	}
+	HIDMA_RUNTIME_SET(dmadev);
+	return rc;
+}
+
+static void hidma_selftest_complete(void *arg)
+{
+	struct hidma_dev *dmadev = arg;
+
+	atomic_inc(&dmadev->test_result.counter);
+	wake_up_interruptible(&dmadev->test_result.wq);
+	dev_dbg(dmadev->ddev.dev, "self test transfer complete :%d\n",
+		atomic_read(&dmadev->test_result.counter));
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+static int hidma_selftest_sg(struct hidma_dev *dmadev,
+			struct dma_chan *dma_chanptr, u64 size,
+			unsigned long flags)
+{
+	dma_addr_t src_dma, dest_dma, dest_dma_it;
+	u8 *dest_buf;
+	u32 i, j = 0;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	int ret;
+	struct hidma_chan *hidma_chan;
+	struct sg_table sg_table;
+	struct scatterlist	*sg;
+	int nents = 10, count;
+	bool free_channel = 1;
+	u8 *src_buf;
+	int map_count;
+
+	atomic_set(&dmadev->test_result.counter, 0);
+
+	if (!dma_chanptr)
+		return -ENOMEM;
+
+	if (hidma_alloc_chan_resources(dma_chanptr) < 1)
+		return -ENODEV;
+
+	if (!dma_chanptr->device || !dmadev->ddev.dev) {
+		hidma_free_chan_resources(dma_chanptr);
+		return -ENODEV;
+	}
+
+	ret = sg_alloc_table(&sg_table, nents, GFP_KERNEL);
+	if (ret) {
+		err = ret;
+		goto sg_table_alloc_failed;
+	}
+
+	for_each_sg(sg_table.sgl, sg, nents, i) {
+		int alloc_sz = round_up(size, nents) / nents;
+		void *cpu_addr = kmalloc(alloc_sz, GFP_KERNEL);
+
+		if (!cpu_addr) {
+			err = -ENOMEM;
+			goto sg_buf_alloc_failed;
+		}
+
+		dev_dbg(dmadev->ddev.dev, "set sg buf[%d] :%p\n", i, cpu_addr);
+		sg_set_buf(sg, cpu_addr, alloc_sz);
+	}
+
+	dest_buf = kmalloc(round_up(size, nents), GFP_KERNEL);
+	if (!dest_buf) {
+		err = -ENOMEM;
+		goto dst_alloc_failed;
+	}
+	dev_dbg(dmadev->ddev.dev, "dest:%p\n", dest_buf);
+
+	/* Fill in src buffer */
+	count = 0;
+	for_each_sg(sg_table.sgl, sg, nents, i) {
+		src_buf = sg_virt(sg);
+		dev_dbg(dmadev->ddev.dev,
+			"set src[%d, %d, %p] = %d\n", i, j, src_buf, count);
+
+		for (j = 0; j < sg_dma_len(sg); j++)
+			src_buf[j] = count++;
+	}
+
+	/* dma_map_sg cleans and invalidates the cache in arm64 when
+	 * DMA_TO_DEVICE is selected for src. That's why, we need to do
+	 * the mapping after the data is copied.
+	 */
+	map_count = dma_map_sg(dmadev->ddev.dev, sg_table.sgl, nents,
+				DMA_TO_DEVICE);
+	if (!map_count) {
+		err =  -EINVAL;
+		goto src_map_failed;
+	}
+
+	dest_dma = dma_map_single(dmadev->ddev.dev, dest_buf,
+				size, DMA_FROM_DEVICE);
+
+	err = dma_mapping_error(dmadev->ddev.dev, dest_dma);
+	if (err)
+		goto dest_map_failed;
+
+	/* check scatter gather list contents */
+	for_each_sg(sg_table.sgl, sg, map_count, i)
+		dev_dbg(dmadev->ddev.dev,
+			"[%d/%d] src va=%p, iova = %pa len:%d\n",
+			i, map_count, sg_virt(sg), &sg_dma_address(sg),
+			sg_dma_len(sg));
+
+	dest_dma_it = dest_dma;
+	for_each_sg(sg_table.sgl, sg, map_count, i) {
+		src_buf = sg_virt(sg);
+		src_dma = sg_dma_address(sg);
+		dev_dbg(dmadev->ddev.dev, "src_dma: %pad dest_dma:%pad\n",
+			&src_dma, &dest_dma_it);
+
+		tx = hidma_prep_dma_memcpy(dma_chanptr, dest_dma_it, src_dma,
+					sg_dma_len(sg), flags);
+		if (!tx) {
+			dev_err(dmadev->ddev.dev,
+				"Self-test prep_dma_memcpy failed, disabling\n");
+			err = -ENODEV;
+			goto prep_memcpy_failed;
+		}
+
+		tx->callback_param = dmadev;
+		tx->callback = hidma_selftest_complete;
+		cookie = tx->tx_submit(tx);
+		dest_dma_it += sg_dma_len(sg);
+	}
+
+	hidma_issue_pending(dma_chanptr);
+
+	/*
+	 * It is assumed that the hardware can move the data within 1s
+	 * and signal the OS of the completion
+	 */
+	ret = wait_event_interruptible_timeout(dmadev->test_result.wq,
+		atomic_read(&dmadev->test_result.counter) == (map_count),
+				msecs_to_jiffies(10000));
+
+	if (ret <= 0) {
+		dev_err(dmadev->ddev.dev,
+			"Self-test sg copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+	dev_dbg(dmadev->ddev.dev,
+		"Self-test complete signal received\n");
+
+	if (hidma_tx_status(dma_chanptr, cookie, NULL) !=
+				DMA_COMPLETE) {
+		dev_err(dmadev->ddev.dev,
+			"Self-test sg status not complete, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+
+	dma_sync_single_for_cpu(dmadev->ddev.dev, dest_dma, size,
+				DMA_FROM_DEVICE);
+
+	hidma_chan = to_hidma_chan(dma_chanptr);
+	count = 0;
+	for_each_sg(sg_table.sgl, sg, map_count, i) {
+		src_buf = sg_virt(sg);
+		if (memcmp(src_buf, &dest_buf[count], sg_dma_len(sg)) == 0) {
+			count += sg_dma_len(sg);
+			continue;
+		}
+
+		for (j = 0; j < sg_dma_len(sg); j++) {
+			if (src_buf[j] != dest_buf[count]) {
+				dev_dbg(dmadev->ddev.dev,
+				"[%d, %d] (%p) src :%x dest (%p):%x cnt:%d\n",
+					i, j, &src_buf[j], src_buf[j],
+					&dest_buf[count], dest_buf[count],
+					count);
+				dev_err(dmadev->ddev.dev,
+				 "Self-test copy failed compare, disabling\n");
+				err = -EFAULT;
+				return err;
+				goto compare_failed;
+			}
+			count++;
+		}
+	}
+
+	/*
+	 * do not release the channel
+	 * we want to consume all the channels on self test
+	 */
+	free_channel = 0;
+
+compare_failed:
+tx_status:
+prep_memcpy_failed:
+	dma_unmap_single(dmadev->ddev.dev, dest_dma, size,
+			 DMA_FROM_DEVICE);
+dest_map_failed:
+	dma_unmap_sg(dmadev->ddev.dev, sg_table.sgl, nents,
+			DMA_TO_DEVICE);
+
+src_map_failed:
+	kfree(dest_buf);
+
+dst_alloc_failed:
+sg_buf_alloc_failed:
+	for_each_sg(sg_table.sgl, sg, nents, i) {
+		if (sg_virt(sg))
+			kfree(sg_virt(sg));
+	}
+	sg_free_table(&sg_table);
+sg_table_alloc_failed:
+	if (free_channel)
+		hidma_free_chan_resources(dma_chanptr);
+
+	return err;
+}
+
+/*
+ * Perform a streaming transaction to verify the HW works.
+ */
+static int hidma_selftest_streaming(struct hidma_dev *dmadev,
+			struct dma_chan *dma_chanptr, u64 size,
+			unsigned long flags)
+{
+	dma_addr_t src_dma, dest_dma;
+	u8 *dest_buf, *src_buf;
+	u32 i;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	int ret;
+	struct hidma_chan *hidma_chan;
+	bool free_channel = 1;
+
+	atomic_set(&dmadev->test_result.counter, 0);
+
+	if (!dma_chanptr)
+		return -ENOMEM;
+
+	if (hidma_alloc_chan_resources(dma_chanptr) < 1)
+		return -ENODEV;
+
+	if (!dma_chanptr->device || !dmadev->ddev.dev) {
+		hidma_free_chan_resources(dma_chanptr);
+		return -ENODEV;
+	}
+
+	src_buf = kmalloc(size, GFP_KERNEL);
+	if (!src_buf) {
+		err = -ENOMEM;
+		goto src_alloc_failed;
+	}
+
+	dest_buf = kmalloc(size, GFP_KERNEL);
+	if (!dest_buf) {
+		err = -ENOMEM;
+		goto dst_alloc_failed;
+	}
+
+	dev_dbg(dmadev->ddev.dev, "src: %p dest:%p\n", src_buf, dest_buf);
+
+	/* Fill in src buffer */
+	for (i = 0; i < size; i++)
+		src_buf[i] = (u8)i;
+
+	/* dma_map_single cleans and invalidates the cache in arm64 when
+	 * DMA_TO_DEVICE is selected for src. That's why, we need to do
+	 * the mapping after the data is copied.
+	 */
+	src_dma = dma_map_single(dmadev->ddev.dev, src_buf,
+				 size, DMA_TO_DEVICE);
+
+	err = dma_mapping_error(dmadev->ddev.dev, src_dma);
+	if (err)
+		goto src_map_failed;
+
+	dest_dma = dma_map_single(dmadev->ddev.dev, dest_buf,
+				size, DMA_FROM_DEVICE);
+
+	err = dma_mapping_error(dmadev->ddev.dev, dest_dma);
+	if (err)
+		goto dest_map_failed;
+	dev_dbg(dmadev->ddev.dev, "src_dma: %pad dest_dma:%pad\n", &src_dma,
+		&dest_dma);
+	tx = hidma_prep_dma_memcpy(dma_chanptr, dest_dma, src_dma,
+					size,
+					flags);
+	if (!tx) {
+		dev_err(dmadev->ddev.dev,
+			"Self-test prep_dma_memcpy failed, disabling\n");
+		err = -ENODEV;
+		goto prep_memcpy_failed;
+	}
+
+	tx->callback_param = dmadev;
+	tx->callback = hidma_selftest_complete;
+	cookie = tx->tx_submit(tx);
+	hidma_issue_pending(dma_chanptr);
+
+	/*
+	 * It is assumed that the hardware can move the data within 1s
+	 * and signal the OS of the completion
+	 */
+	ret = wait_event_interruptible_timeout(dmadev->test_result.wq,
+				atomic_read(&dmadev->test_result.counter) == 1,
+				msecs_to_jiffies(10000));
+
+	if (ret <= 0) {
+		dev_err(dmadev->ddev.dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+	dev_dbg(dmadev->ddev.dev, "Self-test complete signal received\n");
+
+	if (hidma_tx_status(dma_chanptr, cookie, NULL) !=
+				DMA_COMPLETE) {
+		dev_err(dmadev->ddev.dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+
+	dma_sync_single_for_cpu(dmadev->ddev.dev, dest_dma, size,
+				DMA_FROM_DEVICE);
+
+	hidma_chan = to_hidma_chan(dma_chanptr);
+	if (memcmp(src_buf, dest_buf, size)) {
+		for (i = 0; i < size/4; i++) {
+			if (((u32 *)src_buf)[i] != ((u32 *)(dest_buf))[i]) {
+				dev_dbg(dmadev->ddev.dev,
+					"[%d] src data:%x dest data:%x\n",
+					i, ((u32 *)src_buf)[i],
+					((u32 *)(dest_buf))[i]);
+				break;
+			}
+		}
+		dev_err(dmadev->ddev.dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -EFAULT;
+		goto compare_failed;
+	}
+
+	/*
+	 * do not release the channel
+	 * we want to consume all the channels on self test
+	 */
+	free_channel = 0;
+
+compare_failed:
+tx_status:
+prep_memcpy_failed:
+	dma_unmap_single(dmadev->ddev.dev, dest_dma, size,
+			 DMA_FROM_DEVICE);
+dest_map_failed:
+	dma_unmap_single(dmadev->ddev.dev, src_dma, size,
+			DMA_TO_DEVICE);
+
+src_map_failed:
+	kfree(dest_buf);
+
+dst_alloc_failed:
+	kfree(src_buf);
+
+src_alloc_failed:
+	if (free_channel)
+		hidma_free_chan_resources(dma_chanptr);
+
+	return err;
+}
+
+/*
+ * Perform a coherent transaction to verify the HW works.
+ */
+static int hidma_selftest_one_coherent(struct hidma_dev *dmadev,
+			struct dma_chan *dma_chanptr, u64 size,
+			unsigned long flags)
+{
+	dma_addr_t src_dma, dest_dma;
+	u8 *dest_buf, *src_buf;
+	u32 i;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	int ret;
+	struct hidma_chan *hidma_chan;
+	bool free_channel = true;
+
+	atomic_set(&dmadev->test_result.counter, 0);
+
+	if (!dma_chanptr)
+		return -ENOMEM;
+
+	if (hidma_alloc_chan_resources(dma_chanptr) < 1)
+		return -ENODEV;
+
+	if (!dma_chanptr->device || !dmadev->ddev.dev) {
+		hidma_free_chan_resources(dma_chanptr);
+		return -ENODEV;
+	}
+
+	src_buf = dma_alloc_coherent(dmadev->ddev.dev, size,
+				&src_dma, GFP_KERNEL);
+	if (!src_buf) {
+		err = -ENOMEM;
+		goto src_alloc_failed;
+	}
+
+	dest_buf = dma_alloc_coherent(dmadev->ddev.dev, size,
+				&dest_dma, GFP_KERNEL);
+	if (!dest_buf) {
+		err = -ENOMEM;
+		goto dst_alloc_failed;
+	}
+
+	dev_dbg(dmadev->ddev.dev, "src: %p dest:%p\n", src_buf, dest_buf);
+
+	/* Fill in src buffer */
+	for (i = 0; i < size; i++)
+		src_buf[i] = (u8)i;
+
+	dev_dbg(dmadev->ddev.dev, "src_dma: %pad dest_dma:%pad\n", &src_dma,
+		&dest_dma);
+	tx = hidma_prep_dma_memcpy(dma_chanptr, dest_dma, src_dma,
+					size,
+					flags);
+	if (!tx) {
+		dev_err(dmadev->ddev.dev,
+			"Self-test prep_dma_memcpy failed, disabling\n");
+		err = -ENODEV;
+		goto prep_memcpy_failed;
+	}
+
+	tx->callback_param = dmadev;
+	tx->callback = hidma_selftest_complete;
+	cookie = tx->tx_submit(tx);
+	hidma_issue_pending(dma_chanptr);
+
+	/*
+	 * It is assumed that the hardware can move the data within 1s
+	 * and signal the OS of the completion
+	 */
+	ret = wait_event_interruptible_timeout(dmadev->test_result.wq,
+				atomic_read(&dmadev->test_result.counter) == 1,
+				msecs_to_jiffies(10000));
+
+	if (ret <= 0) {
+		dev_err(dmadev->ddev.dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+	dev_dbg(dmadev->ddev.dev, "Self-test complete signal received\n");
+
+	if (hidma_tx_status(dma_chanptr, cookie, NULL) !=
+				DMA_COMPLETE) {
+		dev_err(dmadev->ddev.dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto tx_status;
+	}
+
+	hidma_chan = to_hidma_chan(dma_chanptr);
+	if (memcmp(src_buf, dest_buf, size)) {
+		for (i = 0; i < size/4; i++) {
+			if (((u32 *)src_buf)[i] != ((u32 *)(dest_buf))[i]) {
+				dev_dbg(dmadev->ddev.dev,
+					"[%d] src data:%x dest data:%x\n",
+					i, ((u32 *)src_buf)[i],
+					((u32 *)(dest_buf))[i]);
+				break;
+			}
+		}
+		dev_err(dmadev->ddev.dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -EFAULT;
+		goto compare_failed;
+	}
+
+	/*
+	 * do not release the channel
+	 * we want to consume all the channels on self test
+	 */
+	free_channel = 0;
+
+compare_failed:
+tx_status:
+prep_memcpy_failed:
+	dma_free_coherent(dmadev->ddev.dev, size, dest_buf, dest_dma);
+
+dst_alloc_failed:
+	dma_free_coherent(dmadev->ddev.dev, size, src_buf, src_dma);
+
+src_alloc_failed:
+	if (free_channel)
+		hidma_free_chan_resources(dma_chanptr);
+
+	return err;
+}
+
+static int hidma_selftest_all(struct hidma_dev *dmadev,
+				bool req_coherent, bool req_sg)
+{
+	int rc = -ENODEV, i = 0;
+	struct dma_chan **dmach_ptr = NULL;
+	u32 max_channels = 0;
+	u64 sizes[] = {PAGE_SIZE - 1, PAGE_SIZE, PAGE_SIZE + 1, 2801, 13295};
+	int count = 0;
+	u32 j;
+	u64 size;
+	int failed = 0;
+	struct dma_chan *dmach = NULL;
+
+	list_for_each_entry(dmach, &dmadev->ddev.channels,
+			device_node) {
+		max_channels++;
+	}
+
+	dmach_ptr = kcalloc(max_channels, sizeof(*dmach_ptr), GFP_KERNEL);
+	if (!dmach_ptr) {
+		rc = -ENOMEM;
+		goto failed_exit;
+	}
+
+	for (j = 0; j < sizeof(sizes)/sizeof(sizes[0]); j++) {
+		size = sizes[j];
+		count = 0;
+		dev_dbg(dmadev->ddev.dev, "test start for size:%llx\n", size);
+		list_for_each_entry(dmach, &dmadev->ddev.channels,
+				device_node) {
+			dmach_ptr[count] = dmach;
+			if (req_coherent)
+				rc = hidma_selftest_one_coherent(dmadev,
+					dmach, size,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+			else if (req_sg)
+				rc = hidma_selftest_sg(dmadev,
+					dmach, size,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+			else
+				rc = hidma_selftest_streaming(dmadev,
+					dmach, size,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+			if (rc) {
+				failed = 1;
+				break;
+			}
+			dev_dbg(dmadev->ddev.dev,
+				"self test passed for ch:%d\n", count);
+			count++;
+		}
+
+		/*
+		 * free the channels where the test passed
+		 * Channel resources are freed for a test that fails.
+		 */
+		for (i = 0; i < count; i++)
+			hidma_free_chan_resources(dmach_ptr[i]);
+
+		if (failed)
+			break;
+	}
+
+failed_exit:
+	kfree(dmach_ptr);
+
+	return rc;
+}
+
+static int hidma_test_mapsingle(struct device *dev)
+{
+	u32 buf_size = 256;
+	char *src;
+	int ret = -ENOMEM;
+	dma_addr_t dma_src;
+
+	src = kmalloc(buf_size, GFP_KERNEL);
+	if (!src) {
+		dev_err(dev, "mapsingle: kmalloc failed ret:%d\n", ret);
+		return -ENOMEM;
+	}
+	strcpy(src, "hello world");
+
+	dma_src = dma_map_single(dev, src, buf_size, DMA_TO_DEVICE);
+	dev_dbg(dev, "mapsingle: src:%p src_dma:%pad\n", src, &dma_src);
+
+	ret = dma_mapping_error(dev, dma_src);
+	if (ret) {
+		dev_err(dev, "dma_mapping_error with ret:%d\n", ret);
+		ret = -ENOMEM;
+	} else {
+		phys_addr_t phys;
+
+		phys = dma_to_phys(dev, dma_src);
+		if (strcmp(__va(phys), "hello world") != 0) {
+			dev_err(dev, "memory content mismatch\n");
+			ret = -EINVAL;
+		} else {
+			dev_dbg(dev, "mapsingle:dma_map_single works\n");
+		}
+		dma_unmap_single(dev, dma_src, buf_size, DMA_TO_DEVICE);
+	}
+	kfree(src);
+	return ret;
+}
+
+/*
+ * Self test all DMA channels.
+ */
+static int hidma_memcpy_self_test(struct hidma_dev *device)
+{
+	int rc;
+
+	hidma_test_mapsingle(device->ddev.dev);
+
+	/* streaming test */
+	rc = hidma_selftest_all(device, false, false);
+	if (rc)
+		return rc;
+	dev_dbg(device->ddev.dev, "streaming self test passed\n");
+
+	/* coherent test */
+	rc = hidma_selftest_all(device, true, false);
+	if (rc)
+		return rc;
+
+	dev_dbg(device->ddev.dev, "coherent self test passed\n");
+
+	/* scatter gather test */
+	rc = hidma_selftest_all(device, false, true);
+	if (rc)
+		return rc;
+
+	dev_dbg(device->ddev.dev, "scatter gather self test passed\n");
+	return 0;
+}
+
+static irqreturn_t hidma_chirq_handler(int chirq, void *arg)
+{
+	void **lldev_ptr = arg;
+	irqreturn_t ret;
+	struct hidma_dev *dmadev = to_hidma_dev_from_lldev(lldev_ptr);
+
+	HIDMA_RUNTIME_GET(dmadev);
+	ret = hidma_ll_inthandler(chirq, *lldev_ptr);
+	HIDMA_RUNTIME_SET(dmadev);
+
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+
+#define SIER_CHAN_SHOW(chan, name) \
+		seq_printf(s, #name "=%u\n", chan->name)
+
+/**
+ * hidma_chan_stats: display HIDMA channel statistics
+ *
+ * Display the statistics for the current HIDMA virtual channel device.
+ */
+static int hidma_chan_stats(struct seq_file *s, void *unused)
+{
+	struct hidma_chan *mchan = s->private;
+	struct hidma_desc *mdesc;
+	struct hidma_dev *dmadev = mchan->dmadev;
+
+	HIDMA_RUNTIME_GET(dmadev);
+	SIER_CHAN_SHOW(mchan, paused);
+	SIER_CHAN_SHOW(mchan, dma_sig);
+	seq_puts(s, "prepared\n");
+	list_for_each_entry(mdesc, &mchan->prepared, node)
+		hidma_ll_chstats(s, mchan->dmadev->lldev, mdesc->tre_ch);
+
+	seq_puts(s, "active\n");
+		list_for_each_entry(mdesc, &mchan->active, node)
+			hidma_ll_chstats(s, mchan->dmadev->lldev,
+				mdesc->tre_ch);
+
+	seq_puts(s, "completed\n");
+		list_for_each_entry(mdesc, &mchan->completed, node)
+			hidma_ll_chstats(s, mchan->dmadev->lldev,
+				mdesc->tre_ch);
+
+	hidma_ll_devstats(s, mchan->dmadev->lldev);
+	HIDMA_RUNTIME_SET(dmadev);
+	return 0;
+}
+
+/**
+ * hidma_dma_info: display HIDMA device info
+ *
+ * Display the info for the current HIDMA device.
+ */
+static int hidma_dma_info(struct seq_file *s, void *unused)
+{
+	struct hidma_dev *dmadev = s->private;
+	struct dma_device *dma = &dmadev->ddev;
+
+	seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors);
+	seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca);
+	seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->dev_trca_phys);
+	seq_printf(s, "dev_trca_size=%pa\n", &dmadev->dev_trca_size);
+	seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca);
+	seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->dev_evca_phys);
+	seq_printf(s, "dev_evca_size=%pa\n", &dmadev->dev_evca_size);
+	seq_printf(s, "self_test=%u\n",
+		atomic_read(&dmadev->test_result.counter));
+
+	seq_printf(s, "copy%s%s%s%s%s%s%s%s%s%s%s\n",
+		dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
+		dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
+		dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
+		dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
+		dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "",
+		dma_has_cap(DMA_SG, dma->cap_mask) ? " sg" : "",
+		dma_has_cap(DMA_ASYNC_TX, dma->cap_mask) ? " async" : "",
+		dma_has_cap(DMA_SLAVE, dma->cap_mask) ? " slave" : "",
+		dma_has_cap(DMA_CYCLIC, dma->cap_mask) ? " cyclic" : "",
+		dma_has_cap(DMA_INTERLEAVE, dma->cap_mask) ? " intl" : "",
+		dma_has_cap(DMA_MEMCPY, dma->cap_mask) ? " memcpy" : "");
+
+	return 0;
+}
+
+
+static int hidma_chan_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hidma_chan_stats, inode->i_private);
+}
+
+static int hidma_dma_info_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hidma_dma_info, inode->i_private);
+}
+
+static const struct file_operations hidma_chan_fops = {
+	.open = hidma_chan_stats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static const struct file_operations hidma_dma_fops = {
+	.open = hidma_dma_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+
+static void hidma_debug_uninit(struct hidma_dev *dmadev)
+{
+	struct list_head *position = NULL;
+
+	/* walk through the virtual channel list */
+	list_for_each(position, &dmadev->ddev.channels) {
+		struct hidma_chan *chan;
+
+		chan = list_entry(position, struct hidma_chan,
+				chan.device_node);
+		debugfs_remove(chan->stats);
+		debugfs_remove(chan->debugfs);
+	}
+
+	debugfs_remove(dmadev->stats);
+	debugfs_remove(dmadev->debugfs);
+}
+
+static int hidma_debug_init(struct hidma_dev *dmadev)
+{
+	int rc = 0;
+	int chidx = 0;
+	struct list_head *position = NULL;
+
+	dmadev->debugfs = debugfs_create_dir(dev_name(dmadev->ddev.dev), NULL);
+	if (!dmadev->debugfs) {
+		rc = -ENODEV;
+		return rc;
+	}
+
+	/* walk through the virtual channel list */
+	list_for_each(position, &dmadev->ddev.channels) {
+		struct hidma_chan *chan;
+
+		chan = list_entry(position, struct hidma_chan,
+				chan.device_node);
+		sprintf(chan->name, "chan%d", chidx);
+		chan->debugfs = debugfs_create_dir(chan->name,
+						dmadev->debugfs);
+		if (!chan->debugfs) {
+			rc = -ENOMEM;
+			goto cleanup;
+		}
+		chan->stats = debugfs_create_file("stats", S_IRUGO,
+				chan->debugfs, chan,
+				&hidma_chan_fops);
+		if (!chan->stats) {
+			rc = -ENOMEM;
+			goto cleanup;
+		}
+		chidx++;
+	}
+
+	dmadev->stats = debugfs_create_file("stats", S_IRUGO,
+			dmadev->debugfs, dmadev,
+			&hidma_dma_fops);
+	if (!dmadev->stats) {
+		rc = -ENOMEM;
+		goto cleanup;
+	}
+
+	return 0;
+cleanup:
+	hidma_debug_uninit(dmadev);
+	return rc;
+}
+#else
+static void hidma_debug_uninit(struct hidma_dev *dmadev)
+{
+}
+static int hidma_debug_init(struct hidma_dev *dmadev)
+{
+	return 0;
+}
+#endif
+
+static int hidma_probe(struct platform_device *pdev)
+{
+	struct hidma_dev *dmadev;
+	int rc = 0, i;
+	struct resource *trca_resource;
+	struct resource *evca_resource;
+	int chirq;
+
+	pm_runtime_set_autosuspend_delay(&pdev->dev, AUTOSUSPEND_TIMEOUT);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
+	trca_resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!trca_resource) {
+		dev_err(&pdev->dev, "TRCA mem resource not found\n");
+		rc = -ENODEV;
+		goto resource_get_failed;
+	}
+
+	evca_resource = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!evca_resource) {
+		dev_err(&pdev->dev, "EVCA mem resource not found\n");
+		rc = -ENODEV;
+		goto resource_get_failed;
+	}
+
+	/* This driver only handles the channel IRQs.
+	 * Common IRQ is handled by the management driver.
+	 */
+	chirq = platform_get_irq(pdev, 0);
+	if (chirq < 0) {
+		dev_err(&pdev->dev, "chirq resources not found\n");
+		rc = -ENODEV;
+		goto chirq_get_failed;
+	}
+
+	dev_dbg(&pdev->dev, "probe: starting\n");
+	dev_dbg(&pdev->dev, "We have %d resources\n", pdev->num_resources);
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		dev_dbg(&pdev->dev, "[%d] resource: %pR\n", i,
+			&pdev->resource[i]);
+	}
+
+	dmadev = devm_kzalloc(&pdev->dev, sizeof(*dmadev), GFP_KERNEL);
+	if (!dmadev) {
+		dev_err(&pdev->dev, "probe: kzalloc failed\n");
+		rc = -ENOMEM;
+		goto device_alloc_failed;
+	}
+
+	INIT_LIST_HEAD(&dmadev->ddev.channels);
+	spin_lock_init(&dmadev->lock);
+	dmadev->ddev.dev = &pdev->dev;
+	HIDMA_RUNTIME_GET(dmadev);
+
+	dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask);
+	/* Apply default dma_mask if needed */
+	if (!pdev->dev.dma_mask) {
+		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
+		pdev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
+	}
+
+	dmadev->dev_evca_phys = evca_resource->start;
+	dmadev->dev_evca_size = resource_size(evca_resource);
+
+	dev_dbg(&pdev->dev, "dev_evca_phys:%pa\n", &dmadev->dev_evca_phys);
+	dev_dbg(&pdev->dev, "dev_evca_size:%pa\n", &dmadev->dev_evca_size);
+
+	dmadev->dev_evca = devm_ioremap_resource(&pdev->dev,
+						evca_resource);
+	if (IS_ERR(dmadev->dev_evca)) {
+		dev_err(&pdev->dev, "can't map i/o memory at %pa\n",
+			&dmadev->dev_evca_phys);
+		rc = -ENOMEM;
+		goto remap_evca_failed;
+	}
+	dev_dbg(&pdev->dev, "qcom_hidma: mapped EVCA %pa to %p\n",
+		&dmadev->dev_evca_phys, dmadev->dev_evca);
+
+	dmadev->dev_trca_phys = trca_resource->start;
+	dmadev->dev_trca_size = resource_size(trca_resource);
+
+	dev_dbg(&pdev->dev, "dev_trca_phys:%pa\n", &dmadev->dev_trca_phys);
+	dev_dbg(&pdev->dev, "dev_trca_size:%pa\n", &dmadev->dev_trca_size);
+
+	dmadev->dev_trca = devm_ioremap_resource(&pdev->dev,
+						trca_resource);
+	if (IS_ERR(dmadev->dev_trca)) {
+		dev_err(&pdev->dev, "can't map i/o memory at %pa\n",
+			&dmadev->dev_trca_phys);
+		rc = -ENOMEM;
+		goto remap_trca_failed;
+	}
+	dev_dbg(&pdev->dev, "qcom_hidma: mapped TRCA %pa to %p\n",
+		&dmadev->dev_trca_phys, dmadev->dev_trca);
+
+	init_waitqueue_head(&dmadev->test_result.wq);
+	dmadev->self_test = hidma_memcpy_self_test;
+	dmadev->ddev.device_prep_dma_memcpy = hidma_prep_dma_memcpy;
+	dmadev->ddev.device_alloc_chan_resources =
+		hidma_alloc_chan_resources;
+	dmadev->ddev.device_free_chan_resources = hidma_free_chan_resources;
+	dmadev->ddev.device_tx_status = hidma_tx_status;
+	dmadev->ddev.device_issue_pending = hidma_issue_pending;
+	dmadev->ddev.device_pause = hidma_pause;
+	dmadev->ddev.device_resume = hidma_resume;
+	dmadev->ddev.device_terminate_all = hidma_terminate_all;
+	dmadev->ddev.copy_align = 8;
+	dmadev->nr_descriptors = HIDMA_DEFAULT_DESCRIPTOR_COUNT;
+
+	device_property_read_u32(&pdev->dev, "desc-count",
+				&dmadev->nr_descriptors);
+
+	if (device_property_read_u8(&pdev->dev, "event-channel",
+				&dmadev->evridx)) {
+		dev_err(&pdev->dev, "probe:can't find the event channel id\n");
+		goto evridx_failed;
+	}
+
+	/* Set DMA mask to 64 bits. */
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+	if (rc) {
+		dev_warn(&pdev->dev, "unable to set coherent mask to 64");
+		rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+	}
+	if (rc)
+		dev_warn(&pdev->dev, "unable to set coherent mask to 32");
+
+	rc = hidma_ll_init(&dmadev->lldev, dmadev->ddev.dev,
+				dmadev->nr_descriptors, dmadev->dev_trca,
+				dmadev->dev_evca, dmadev->evridx);
+	if (rc) {
+		dev_err(&pdev->dev, "probe:channel core init failed\n");
+		goto ll_init_failed;
+	}
+
+	rc = devm_request_irq(&pdev->dev, chirq, hidma_chirq_handler, 0,
+			      "qcom-hidma", &dmadev->lldev);
+	if (rc) {
+		dev_err(&pdev->dev, "chirq registration failed: %d\n", chirq);
+		goto chirq_request_failed;
+	}
+
+	dev_dbg(&pdev->dev, "initializing DMA channels\n");
+	INIT_LIST_HEAD(&dmadev->ddev.channels);
+	rc = hidma_chan_init(dmadev, 0);
+	if (rc) {
+		dev_err(&pdev->dev, "probe:channel init failed\n");
+		goto channel_init_failed;
+	}
+	dev_dbg(&pdev->dev, "HI-DMA engine driver starting self test\n");
+	rc = dmadev->self_test(dmadev);
+	if (rc) {
+		dev_err(&pdev->dev, "probe: self test failed: %d\n", rc);
+		goto self_test_failed;
+	}
+	dev_info(&pdev->dev, "probe: self test succeeded.\n");
+
+	dev_dbg(&pdev->dev, "calling dma_async_device_register\n");
+	rc = dma_async_device_register(&dmadev->ddev);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"probe: failed to register slave DMA: %d\n", rc);
+		goto device_register_failed;
+	}
+	dev_dbg(&pdev->dev, "probe: dma_async_device_register done\n");
+
+	rc = hidma_debug_init(dmadev);
+	if (rc) {
+		dev_err(&pdev->dev,
+			"probe: failed to init debugfs: %d\n", rc);
+		goto debug_init_failed;
+	}
+
+	dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n");
+	platform_set_drvdata(pdev, dmadev);
+	HIDMA_RUNTIME_SET(dmadev);
+	return 0;
+
+debug_init_failed:
+device_register_failed:
+self_test_failed:
+channel_init_failed:
+chirq_request_failed:
+	hidma_ll_uninit(dmadev->lldev);
+ll_init_failed:
+evridx_failed:
+remap_trca_failed:
+remap_evca_failed:
+	if (dmadev)
+		hidma_free(dmadev);
+device_alloc_failed:
+chirq_get_failed:
+resource_get_failed:
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_put_sync_suspend(&pdev->dev);
+	TRC_PM(&pdev->dev,
+		"%s:%d pm_runtime_put_autosuspend\n", __func__, __LINE__);
+	return rc;
+}
+
+static int hidma_remove(struct platform_device *pdev)
+{
+	struct hidma_dev *dmadev = platform_get_drvdata(pdev);
+
+	dev_dbg(&pdev->dev, "removing\n");
+	HIDMA_RUNTIME_GET(dmadev);
+	hidma_debug_uninit(dmadev);
+
+	dma_async_device_unregister(&dmadev->ddev);
+	hidma_ll_uninit(dmadev->lldev);
+	hidma_free(dmadev);
+
+	dev_info(&pdev->dev, "HI-DMA engine removed\n");
+	pm_runtime_put_sync_suspend(&pdev->dev);
+	TRC_PM(&pdev->dev,
+		"%s:%d pm_runtime_put_sync_suspend\n", __func__, __LINE__);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+#if IS_ENABLED(CONFIG_ACPI)
+static const struct acpi_device_id hidma_acpi_ids[] = {
+	{"QCOM8061"},
+	{},
+};
+#endif
+
+static const struct of_device_id hidma_match[] = {
+	{ .compatible = "qcom,hidma", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, hidma_match);
+
+static struct platform_driver hidma_driver = {
+	.probe = hidma_probe,
+	.remove = hidma_remove,
+	.driver = {
+		.name = MODULE_NAME,
+		.owner = THIS_MODULE,
+		.of_match_table = of_match_ptr(hidma_match),
+		.acpi_match_table = ACPI_PTR(hidma_acpi_ids),
+	},
+};
+
+static int __init hidma_init(void)
+{
+	return platform_driver_register(&hidma_driver);
+}
+late_initcall(hidma_init);
+
+static void __exit hidma_exit(void)
+{
+	platform_driver_unregister(&hidma_driver);
+}
+module_exit(hidma_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/qcom_hidma.h b/drivers/dma/qcom_hidma.h
new file mode 100644
index 0000000..0ab8314
--- /dev/null
+++ b/drivers/dma/qcom_hidma.h
@@ -0,0 +1,44 @@
+/*
+ * Qualcomm Technologies HIDMA data structures
+ *
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef QCOM_HIDMA_H
+#define QCOM_HIDMA_H
+
+struct hidma_lldev;
+struct hidma_llchan;
+struct seq_file;
+
+int hidma_ll_request(void *llhndl, u32 dev_id, const char *dev_name,
+			void (*callback)(void *data), void *data, u32 *tre_ch);
+
+void hidma_ll_free(void *llhndl, u32 tre_ch);
+enum dma_status hidma_ll_status(void *llhndl, u32 tre_ch);
+bool hidma_ll_isenabled(void *llhndl);
+int hidma_ll_queue_request(void *llhndl, u32 tre_ch);
+int hidma_ll_start(void *llhndl);
+int hidma_ll_pause(void *llhndl);
+int hidma_ll_resume(void *llhndl);
+void hidma_ll_set_transfer_params(void *llhndl, u32 tre_ch,
+	dma_addr_t src, dma_addr_t dest, u32 len, u32 flags);
+int hidma_ll_setup(struct hidma_lldev *lldev);
+int hidma_ll_init(void **llhndl, struct device *dev, u32 max_channels,
+			void __iomem *trca, void __iomem *evca,
+			u8 evridx);
+int hidma_ll_uninit(void *llhndl);
+irqreturn_t hidma_ll_inthandler(int irq, void *arg);
+void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch);
+void hidma_ll_devstats(struct seq_file *s, void *llhndl);
+void hidma_cleanup_pending_tre(void *llhndl, u8 err_info, u8 err_code);
+#endif
diff --git a/drivers/dma/qcom_hidma_ll.c b/drivers/dma/qcom_hidma_ll.c
new file mode 100644
index 0000000..0188afd
--- /dev/null
+++ b/drivers/dma/qcom_hidma_ll.c
@@ -0,0 +1,1132 @@
+/*
+ * Qualcomm Technologies HIDMA DMA engine low level code
+ *
+ * Copyright (c) 2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/atomic.h>
+#include "qcom_hidma.h"
+
+#define TRE_SIZE			32 /* each TRE is 32 bytes  */
+#define EVRE_SIZE			16 /* each EVRE is 16 bytes */
+
+#define TRCA_CTRLSTS_OFFSET		0x0
+#define TRCA_RING_LOW_OFFSET		0x8
+#define TRCA_RING_HIGH_OFFSET		0xC
+#define TRCA_RING_LEN_OFFSET		0x10
+#define TRCA_READ_PTR_OFFSET		0x18
+#define TRCA_WRITE_PTR_OFFSET		0x20
+#define TRCA_DOORBELL_OFFSET		0x400
+
+#define EVCA_CTRLSTS_OFFSET		0x0
+#define EVCA_INTCTRL_OFFSET		0x4
+#define EVCA_RING_LOW_OFFSET		0x8
+#define EVCA_RING_HIGH_OFFSET		0xC
+#define EVCA_RING_LEN_OFFSET		0x10
+#define EVCA_READ_PTR_OFFSET		0x18
+#define EVCA_WRITE_PTR_OFFSET		0x20
+#define EVCA_DOORBELL_OFFSET		0x400
+
+#define EVCA_IRQ_STAT_OFFSET		0x100
+#define EVCA_IRQ_CLR_OFFSET		0x108
+#define EVCA_IRQ_EN_OFFSET		0x110
+
+#define TRE_CFG_IDX			0
+#define TRE_LEN_IDX			1
+#define TRE_SRC_LOW_IDX		2
+#define TRE_SRC_HI_IDX			3
+#define TRE_DEST_LOW_IDX		4
+#define TRE_DEST_HI_IDX		5
+
+#define EVRE_CFG_IDX			0
+#define EVRE_LEN_IDX			1
+#define EVRE_DEST_LOW_IDX		2
+#define EVRE_DEST_HI_IDX		3
+
+#define EVRE_ERRINFO_BIT_POS		24
+#define EVRE_CODE_BIT_POS		28
+
+#define EVRE_ERRINFO_MASK		0xF
+#define EVRE_CODE_MASK			0xF
+
+#define CH_CONTROL_MASK		0xFF
+#define CH_STATE_MASK			0xFF
+#define CH_STATE_BIT_POS		0x8
+
+#define MAKE64(high, low) (((u64)(high) << 32) | (low))
+
+#define IRQ_EV_CH_EOB_IRQ_BIT_POS	0
+#define IRQ_EV_CH_WR_RESP_BIT_POS	1
+#define IRQ_EV_CH_RESET_ERR_BIT_POS	5
+#define IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS 9
+#define IRQ_TR_CH_DATA_RD_ER_BIT_POS	10
+#define IRQ_TR_CH_DATA_WR_ER_BIT_POS	11
+#define IRQ_TR_CH_RESET_ERROR		13
+#define IRQ_TR_CH_INVALID_TRE_BIT_POS	14
+
+#define	ENABLE_IRQS (BIT(IRQ_EV_CH_EOB_IRQ_BIT_POS) | \
+		BIT(IRQ_EV_CH_WR_RESP_BIT_POS) | \
+		BIT(IRQ_EV_CH_RESET_ERR_BIT_POS) |		 \
+		BIT(IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS) |	 \
+		BIT(IRQ_TR_CH_DATA_RD_ER_BIT_POS) |		 \
+		BIT(IRQ_TR_CH_DATA_WR_ER_BIT_POS) |		 \
+		BIT(IRQ_TR_CH_RESET_ERROR)	|		\
+		BIT(IRQ_TR_CH_INVALID_TRE_BIT_POS))
+
+enum ch_command {
+	CH_DISABLE = 0,
+	CH_ENABLE = 1,
+	CH_SUSPEND = 2,
+	CH_RESET = 9,
+};
+
+enum ch_state {
+	CH_DISABLED = 0,
+	CH_ENABLED = 1,
+	CH_RUNNING = 2,
+	CH_SUSPENDED = 3,
+	CH_STOPPED = 4,
+	CH_ERROR = 5,
+	CH_IN_RESET = 9,
+};
+
+enum tre_type {
+	TRE_MEMCPY = 3,
+	TRE_MEMSET = 4,
+};
+
+enum evre_type {
+	EVRE_DMA_COMPLETE = 0x23,
+	EVRE_IMM_DATA = 0x24,
+};
+
+enum err_code {
+	EVRE_STATUS_COMPLETE = 1,
+	EVRE_STATUS_ERROR = 4,
+};
+
+struct hidma_tx_status {
+	u8 err_info;			/* error record in this transfer    */
+	u8 err_code;			/* completion code		    */
+};
+
+struct hidma_lldev {
+	bool initialized;		/* initialized flag               */
+	u8 trch_state;			/* trch_state of the device	  */
+	u8 evch_state;			/* evch_state of the device	  */
+	u8 evridx;			/* event channel to notify	  */
+	u32 nr_tres;			/* max number of configs          */
+	spinlock_t lock;		/* reentrancy                     */
+	struct hidma_tre *trepool;	/* trepool of user configs */
+	struct device *dev;		/* device			  */
+	void __iomem *trca;		/* Transfer Channel address       */
+	void __iomem *evca;		/* Event Channel address          */
+	struct hidma_tre
+		**pending_tre_list;	/* Pointers to pending TREs	  */
+	struct hidma_tx_status
+		*tx_status_list;	/* Pointers to pending TREs status*/
+	s32 pending_tre_count;		/* Number of TREs pending	  */
+
+	void *tre_ring;		/* TRE ring			  */
+	dma_addr_t tre_ring_handle;	/* TRE ring to be shared with HW  */
+	u32 tre_ring_size;		/* Byte size of the ring	  */
+	u32 tre_processed_off;		/* last processed TRE		   */
+
+	void *evre_ring;		/* EVRE ring			   */
+	dma_addr_t evre_ring_handle;	/* EVRE ring to be shared with HW  */
+	u32 evre_ring_size;		/* Byte size of the ring	  */
+	u32 evre_processed_off;	/* last processed EVRE		   */
+
+	u32 tre_write_offset;           /* TRE write location              */
+};
+
+struct hidma_tre {
+	atomic_t allocated;		/* if this channel is allocated	    */
+	bool queued;			/* flag whether this is pending     */
+	u16 status;			/* status			    */
+	u32 chidx;			/* index of the tre	    */
+	u32 dma_sig;			/* signature of the tre	    */
+	const char *dev_name;		/* name of the device		    */
+	void (*callback)(void *data);	/* requester callback		    */
+	void *data;			/* Data associated with this channel*/
+	struct hidma_lldev *lldev;	/* lldma device pointer		    */
+	u32 tre_local[TRE_SIZE / sizeof(u32) + 1]; /* TRE local copy        */
+	struct tasklet_struct task;	/* task delivering notifications    */
+	u32 tre_index;			/* the offset where this was written*/
+	u32 int_flags;			/* interrupt flags*/
+};
+
+void hidma_ll_free(void *llhndl, u32 tre_ch)
+{
+	struct hidma_lldev *lldev = llhndl;
+	struct hidma_tre *tre;
+
+	if (unlikely(tre_ch >= lldev->nr_tres)) {
+		dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
+		return;
+	}
+
+	tre = &lldev->trepool[tre_ch];
+	if (unlikely(atomic_read(&tre->allocated) != true)) {
+		dev_err(lldev->dev, "trying to free an unused TRE:%d",
+			tre_ch);
+		return;
+	}
+
+	atomic_set(&tre->allocated, 0);
+	dev_dbg(lldev->dev, "free_dma: allocated:%d tre_ch:%d\n",
+		atomic_read(&tre->allocated), tre_ch);
+}
+EXPORT_SYMBOL_GPL(hidma_ll_free);
+
+int hidma_ll_request(void *llhndl, u32 dma_sig, const char *dev_name,
+			void (*callback)(void *data), void *data, u32 *tre_ch)
+{
+	u32 i;
+	struct hidma_lldev *lldev = llhndl;
+	struct hidma_tre *tre = NULL;
+	u32 *tre_local;
+
+	if (unlikely(!tre_ch) || unlikely(!lldev))
+		return -EINVAL;
+
+	/* need to have at least one empty spot in the queue */
+	for (i = 0; i < lldev->nr_tres - 1; i++) {
+		if (atomic_add_unless(&lldev->trepool[i].allocated, 1, 1))
+			break;
+	}
+
+	if (i == (lldev->nr_tres - 1))
+		return -ENOMEM;
+
+	tre = &lldev->trepool[i];
+	tre->dma_sig = dma_sig;
+	tre->dev_name = dev_name;
+	tre->callback = callback;
+	tre->data = data;
+	tre->chidx = i;
+	tre->status = 0;
+	tre->queued = 0;
+	lldev->tx_status_list[i].err_code = 0;
+	tre->lldev = lldev;
+	tre_local = &tre->tre_local[0];
+	tre_local[TRE_CFG_IDX] = TRE_MEMCPY;
+	tre_local[TRE_CFG_IDX] |= ((lldev->evridx & 0xFF) << 8);
+	tre_local[TRE_CFG_IDX] |= BIT(16);	/* set IEOB */
+	*tre_ch = i;
+	if (callback)
+		callback(data);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_request);
+
+/*
+ * Multiple TREs may be queued and waiting in the
+ * pending queue.
+ */
+static void hidma_ll_tre_complete(unsigned long arg)
+{
+	struct hidma_tre *tre = (struct hidma_tre *)arg;
+
+	/* call the user if it has been read by the hardware*/
+	if (tre->callback)
+		tre->callback(tre->data);
+}
+
+/*
+ * Called to handle the interrupt for the channel.
+ * Return a positive number if TRE or EVRE were consumed on this run.
+ * Return a positive number if there are pending TREs or EVREs.
+ * Return 0 if there is nothing to consume or no pending TREs/EVREs found.
+ */
+static int hidma_handle_tre_completion(struct hidma_lldev *lldev)
+{
+	struct hidma_tre *tre;
+	u32 evre_write_off;
+	u32 evre_ring_size = lldev->evre_ring_size;
+	u32 tre_ring_size = lldev->tre_ring_size;
+	u32 num_completed = 0, tre_iterator, evre_iterator;
+	unsigned long flags;
+
+	evre_write_off = readl_relaxed(lldev->evca + EVCA_WRITE_PTR_OFFSET);
+	tre_iterator = lldev->tre_processed_off;
+	evre_iterator = lldev->evre_processed_off;
+
+	if ((evre_write_off > evre_ring_size) ||
+		((evre_write_off % EVRE_SIZE) != 0)) {
+		dev_err(lldev->dev, "HW reports invalid EVRE write offset\n");
+		return 0;
+	}
+
+	/* By the time control reaches here the number of EVREs and TREs
+	 * may not match. Only consume the ones that hardware told us.
+	 */
+	while ((evre_iterator != evre_write_off)) {
+		u32 *current_evre = lldev->evre_ring + evre_iterator;
+		u32 cfg;
+		u8 err_info;
+
+		spin_lock_irqsave(&lldev->lock, flags);
+		tre = lldev->pending_tre_list[tre_iterator / TRE_SIZE];
+		if (!tre) {
+			spin_unlock_irqrestore(&lldev->lock, flags);
+			dev_warn(lldev->dev,
+				"tre_index [%d] and tre out of sync\n",
+				tre_iterator / TRE_SIZE);
+			tre_iterator += TRE_SIZE;
+			if (tre_iterator >= tre_ring_size)
+				tre_iterator -= tre_ring_size;
+			evre_iterator += EVRE_SIZE;
+			if (evre_iterator >= evre_ring_size)
+				evre_iterator -= evre_ring_size;
+
+			continue;
+		}
+		lldev->pending_tre_list[tre->tre_index] = NULL;
+
+		/* Keep track of pending TREs that SW is expecting to receive
+		 * from HW. We got one now. Decrement our counter.
+		 */
+		lldev->pending_tre_count--;
+		if (lldev->pending_tre_count < 0) {
+			dev_warn(lldev->dev,
+				"tre count mismatch on completion");
+			lldev->pending_tre_count = 0;
+		}
+
+		spin_unlock_irqrestore(&lldev->lock, flags);
+
+		cfg = current_evre[EVRE_CFG_IDX];
+		err_info = (cfg >> EVRE_ERRINFO_BIT_POS);
+		err_info = err_info & EVRE_ERRINFO_MASK;
+		lldev->tx_status_list[tre->chidx].err_info = err_info;
+		lldev->tx_status_list[tre->chidx].err_code =
+			(cfg >> EVRE_CODE_BIT_POS) & EVRE_CODE_MASK;
+		tre->queued = 0;
+
+		tasklet_schedule(&tre->task);
+
+		tre_iterator += TRE_SIZE;
+		if (tre_iterator >= tre_ring_size)
+			tre_iterator -= tre_ring_size;
+		evre_iterator += EVRE_SIZE;
+		if (evre_iterator >= evre_ring_size)
+			evre_iterator -= evre_ring_size;
+
+		/* Read the new event descriptor written by the HW.
+		 * As we are processing the delivered events, other events
+		 * get queued to the SW for processing.
+		 */
+		evre_write_off =
+			readl_relaxed(lldev->evca + EVCA_WRITE_PTR_OFFSET);
+		num_completed++;
+	}
+
+	if (num_completed) {
+		u32 evre_read_off = (lldev->evre_processed_off +
+				EVRE_SIZE * num_completed);
+		u32 tre_read_off = (lldev->tre_processed_off +
+				TRE_SIZE * num_completed);
+
+		evre_read_off = evre_read_off % evre_ring_size;
+		tre_read_off = tre_read_off % tre_ring_size;
+
+		writel(evre_read_off, lldev->evca + EVCA_DOORBELL_OFFSET);
+
+		/* record the last processed tre offset */
+		lldev->tre_processed_off = tre_read_off;
+		lldev->evre_processed_off = evre_read_off;
+	}
+
+	return num_completed;
+}
+
+void hidma_cleanup_pending_tre(void *llhndl, u8 err_info, u8 err_code)
+{
+	u32 tre_iterator;
+	struct hidma_tre *tre;
+	struct hidma_lldev *lldev = llhndl;
+	u32 tre_ring_size = lldev->tre_ring_size;
+	int num_completed = 0;
+	u32 tre_read_off;
+	unsigned long flags;
+
+	tre_iterator = lldev->tre_processed_off;
+	while (lldev->pending_tre_count) {
+		int tre_index = tre_iterator / TRE_SIZE;
+
+		spin_lock_irqsave(&lldev->lock, flags);
+		tre = lldev->pending_tre_list[tre_index];
+		if (!tre) {
+			spin_unlock_irqrestore(&lldev->lock, flags);
+			tre_iterator += TRE_SIZE;
+			if (tre_iterator >= tre_ring_size)
+				tre_iterator -= tre_ring_size;
+			continue;
+		}
+		lldev->pending_tre_list[tre_index] = NULL;
+		lldev->pending_tre_count--;
+		if (lldev->pending_tre_count < 0) {
+			dev_warn(lldev->dev,
+				"tre count mismatch on completion");
+			lldev->pending_tre_count = 0;
+		}
+		spin_unlock_irqrestore(&lldev->lock, flags);
+
+		lldev->tx_status_list[tre->chidx].err_info = err_info;
+		lldev->tx_status_list[tre->chidx].err_code = err_code;
+		tre->queued = 0;
+
+		tasklet_schedule(&tre->task);
+
+		tre_iterator += TRE_SIZE;
+		if (tre_iterator >= tre_ring_size)
+			tre_iterator -= tre_ring_size;
+
+		num_completed++;
+	}
+	tre_read_off = (lldev->tre_processed_off +
+			TRE_SIZE * num_completed);
+
+	tre_read_off = tre_read_off % tre_ring_size;
+
+	/* record the last processed tre offset */
+	lldev->tre_processed_off = tre_read_off;
+}
+EXPORT_SYMBOL_GPL(hidma_cleanup_pending_tre);
+
+static int hidma_ll_reset(struct hidma_lldev *lldev)
+{
+	u32 val;
+	int count;
+
+	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	val = val & ~(CH_CONTROL_MASK << 16);
+	val = val | (CH_RESET << 16);
+	writel_relaxed(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
+
+	/* wait until the reset is performed */
+	wmb();
+
+	/* Delay 10ms after reset to allow DMA logic to quiesce.*/
+	for (count = 0; count < 10; count++) {
+		val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+		lldev->trch_state = (val >> CH_STATE_BIT_POS)
+					& CH_STATE_MASK;
+		if (lldev->trch_state == CH_DISABLED)
+			break;
+		mdelay(1);
+	}
+	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	if (lldev->trch_state != CH_DISABLED) {
+		dev_err(lldev->dev,
+			"transfer channel did not reset\n");
+		return -ENODEV;
+	}
+
+	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	val = val & ~(CH_CONTROL_MASK << 16);
+	val = val | (CH_RESET << 16);
+	writel_relaxed(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
+
+	/* wait until the reset is performed */
+	wmb();
+
+	/* Delay 10ms after reset to allow DMA logic to quiesce.*/
+	for (count = 0; count < 10; count++) {
+		val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+		lldev->evch_state = (val >> CH_STATE_BIT_POS)
+					& CH_STATE_MASK;
+		if (lldev->evch_state == CH_DISABLED)
+			break;
+		mdelay(1);
+	}
+	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	if (lldev->evch_state != CH_DISABLED) {
+		dev_err(lldev->dev,
+			"event channel did not reset\n");
+		return -ENODEV;
+	}
+
+	/* ensure that both channels are disabled before leaving*/
+	mb();
+	lldev->trch_state = CH_DISABLED;
+	lldev->evch_state = CH_DISABLED;
+	return 0;
+}
+
+static void hidma_ll_enable_irq(struct hidma_lldev *lldev, u32 irq_bits)
+{
+	writel_relaxed(irq_bits, lldev->evca + EVCA_IRQ_EN_OFFSET);
+	dev_dbg(lldev->dev, "enableirq\n");
+}
+
+/*
+ * The interrupt handler for HIDMA will try to consume as many pending
+ * EVRE from the event queue as possible. Each EVRE has an associated
+ * TRE that holds the user interface parameters. EVRE reports the
+ * result of the transaction. Hardware guarantees ordering between EVREs
+ * and TREs. We use last processed offset to figure out which TRE is
+ * associated with which EVRE. If two TREs are consumed by HW, the EVREs
+ * are in order in the event ring.
+ * This handler will do a one pass for consuming EVREs. Other EVREs may
+ * be delivered while we are working. It will try to consume incoming
+ * EVREs one more time and return.
+ * For unprocessed EVREs, hardware will trigger another interrupt until
+ * all the interrupt bits are cleared.
+ */
+static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev)
+{
+	u32 status;
+	u32 enable;
+	u32 cause;
+	int repeat = 2;
+	unsigned long timeout;
+
+	status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+	enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
+	cause = status & enable;
+
+	if ((cause & (1<<IRQ_EV_CH_RESET_ERR_BIT_POS))
+		|| (cause & (1<<IRQ_TR_CH_RESET_ERROR))) {
+		u8 err_code = EVRE_STATUS_ERROR;
+		u8 err_info = 0xFE;
+
+		/* Clear out pending interrupts */
+		writel(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+		hidma_cleanup_pending_tre(lldev, err_info, err_code);
+		dev_err(lldev->dev,
+			"The channel is terminated(%x).Hardware reset needed\n",
+			cause);
+		return;
+	}
+
+	if ((cause & (BIT(IRQ_TR_CH_INVALID_TRE_BIT_POS))) ||
+			(cause & BIT(IRQ_TR_CH_TRE_RD_RSP_ER_BIT_POS)) ||
+			(cause & BIT(IRQ_EV_CH_WR_RESP_BIT_POS)) ||
+			(cause & BIT(IRQ_TR_CH_DATA_RD_ER_BIT_POS)) ||
+			(cause & BIT(IRQ_TR_CH_DATA_WR_ER_BIT_POS))) {
+		u8 err_code = EVRE_STATUS_ERROR;
+		u8 err_info = 0xFF;
+
+		/* Clear out pending interrupts */
+		writel(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+		dev_err(lldev->dev,
+			"error 0x%x, resetting...\n", cause);
+
+		hidma_cleanup_pending_tre(lldev, err_info, err_code);
+
+		/* reset the channel for recovery */
+		if (hidma_ll_setup(lldev)) {
+			dev_err(lldev->dev,
+				"channel reinitialize failed after error\n");
+			return;
+		}
+		hidma_ll_enable_irq(lldev, ENABLE_IRQS);
+		return;
+	}
+
+	/* Try to consume as many EVREs as possible.
+	 * skip this loop if the interrupt is spurious.
+	 */
+	while (cause && repeat) {
+		unsigned long start = jiffies;
+
+		/* This timeout should be sufficent for core to finish */
+		timeout = start + msecs_to_jiffies(500);
+
+		while (lldev->pending_tre_count) {
+			hidma_handle_tre_completion(lldev);
+			if (time_is_before_jiffies(timeout)) {
+				dev_warn(lldev->dev,
+					"ISR timeout %lx vs. %lx from %lx [%d]\n",
+					jiffies, timeout, start,
+					lldev->pending_tre_count);
+				break;
+			}
+		}
+
+		/* We consumed TREs or there are pending TREs or EVREs. */
+		writel_relaxed(cause, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+		/* Another interrupt might have arrived while we are
+		 * processing this one. Read the new cause.
+		 */
+		status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+		enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
+		cause = status & enable;
+
+		repeat--;
+	}
+}
+
+
+static int hidma_ll_enable(struct hidma_lldev *lldev)
+{
+	u32 val;
+
+	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	val &= ~(CH_CONTROL_MASK << 16);
+	val |= (CH_ENABLE << 16);
+
+	writel_relaxed(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
+
+	/* wait until channel is enabled */
+	wmb();
+
+	mdelay(1);
+
+	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	if ((lldev->evch_state != CH_ENABLED) &&
+		(lldev->evch_state != CH_RUNNING)) {
+		dev_err(lldev->dev,
+			"event channel did not get enabled\n");
+		return -ENODEV;
+	}
+
+	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	val = val & ~(CH_CONTROL_MASK << 16);
+	val = val | (CH_ENABLE << 16);
+	writel_relaxed(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
+
+	/* wait until channel is enabled */
+	wmb();
+
+	mdelay(1);
+
+	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	if ((lldev->trch_state != CH_ENABLED) &&
+		(lldev->trch_state != CH_RUNNING)) {
+		dev_err(lldev->dev,
+			"transfer channel did not get enabled\n");
+		return -ENODEV;
+	}
+
+	/* ensure that both channels are enabled before leaving*/
+	mb();
+
+	lldev->trch_state = CH_ENABLED;
+	lldev->evch_state = CH_ENABLED;
+
+	return 0;
+}
+
+int hidma_ll_resume(void *llhndl)
+{
+	return hidma_ll_enable((struct hidma_lldev *)llhndl);
+}
+EXPORT_SYMBOL_GPL(hidma_ll_resume);
+
+static int hidma_ll_hw_start(void *llhndl)
+{
+	int rc = 0;
+	struct hidma_lldev *lldev = llhndl;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&lldev->lock, irqflags);
+	writel_relaxed(lldev->tre_write_offset,
+			lldev->trca + TRCA_DOORBELL_OFFSET);
+	spin_unlock_irqrestore(&lldev->lock, irqflags);
+
+	return rc;
+}
+
+bool hidma_ll_isenabled(void *llhndl)
+{
+	struct hidma_lldev *lldev = llhndl;
+	u32 val;
+
+	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+
+	/* both channels have to be enabled before calling this function*/
+	if (((lldev->trch_state == CH_ENABLED) ||
+		(lldev->trch_state == CH_RUNNING)) &&
+		((lldev->evch_state == CH_ENABLED) ||
+			(lldev->evch_state == CH_RUNNING)))
+		return true;
+
+	dev_dbg(lldev->dev, "channels are not enabled or are in error state");
+	return false;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_isenabled);
+
+int hidma_ll_queue_request(void *llhndl, u32 tre_ch)
+{
+	struct hidma_lldev *lldev = llhndl;
+	struct hidma_tre *tre;
+	int rc = 0;
+	unsigned long flags;
+
+	tre = &lldev->trepool[tre_ch];
+
+	/* copy the TRE into its location in the TRE ring */
+	spin_lock_irqsave(&lldev->lock, flags);
+	tre->tre_index = lldev->tre_write_offset / TRE_SIZE;
+	lldev->pending_tre_list[tre->tre_index] = tre;
+	memcpy(lldev->tre_ring + lldev->tre_write_offset, &tre->tre_local[0],
+		TRE_SIZE);
+	lldev->tx_status_list[tre->chidx].err_code = 0;
+	lldev->tx_status_list[tre->chidx].err_info = 0;
+	tre->queued = 1;
+	lldev->pending_tre_count++;
+	lldev->tre_write_offset = (lldev->tre_write_offset + TRE_SIZE)
+				% lldev->tre_ring_size;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_queue_request);
+
+int hidma_ll_start(void *llhndl)
+{
+	return hidma_ll_hw_start(llhndl);
+}
+EXPORT_SYMBOL_GPL(hidma_ll_start);
+
+/*
+ * Note that even though we stop this channel
+ * if there is a pending transaction in flight
+ * it will complete and follow the callback.
+ * This request will prevent further requests
+ * to be made.
+ */
+int hidma_ll_pause(void *llhndl)
+{
+	struct hidma_lldev *lldev = llhndl;
+	u32 val;
+	int count;
+
+	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+
+	/* already suspended by this OS */
+	if ((lldev->trch_state == CH_SUSPENDED) ||
+		(lldev->evch_state == CH_SUSPENDED))
+		return 0;
+
+	/* already stopped by the manager */
+	if ((lldev->trch_state == CH_STOPPED) ||
+		(lldev->evch_state == CH_STOPPED))
+		return 0;
+
+	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	val = val & ~(CH_CONTROL_MASK << 16);
+	val = val | (CH_SUSPEND << 16);
+	writel_relaxed(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
+
+	/* start the wait right after the suspend is confirmed */
+	wmb();
+
+	/* Delay 10ms after reset to allow DMA logic to quiesce.*/
+	for (count = 0; count < 10; count++) {
+		val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+		lldev->trch_state = (val >> CH_STATE_BIT_POS)
+					& CH_STATE_MASK;
+		if (lldev->trch_state == CH_SUSPENDED)
+			break;
+		mdelay(1);
+	}
+	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
+	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	if (lldev->trch_state != CH_SUSPENDED)
+		return -ENODEV;
+
+	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	val = val & ~(CH_CONTROL_MASK << 16);
+	val = val | (CH_SUSPEND << 16);
+	writel_relaxed(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
+
+	/* start the wait right after the suspend is confirmed */
+	wmb();
+
+	/* Delay 10ms after reset to allow DMA logic to quiesce.*/
+	for (count = 0; count < 10; count++) {
+		val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+		lldev->evch_state = (val >> CH_STATE_BIT_POS)
+					& CH_STATE_MASK;
+		if (lldev->evch_state == CH_SUSPENDED)
+			break;
+		mdelay(1);
+	}
+	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
+	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
+	if (lldev->evch_state != CH_SUSPENDED)
+		return -ENODEV;
+
+	/* ensure that both channels are suspended before leaving*/
+	mb();
+
+	lldev->trch_state = CH_SUSPENDED;
+	lldev->evch_state = CH_SUSPENDED;
+	dev_dbg(lldev->dev, "stop\n");
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_pause);
+
+void hidma_ll_set_transfer_params(void *llhndl, u32 tre_ch,
+	dma_addr_t src, dma_addr_t dest, u32 len, u32 flags)
+{
+	struct hidma_lldev *lldev = llhndl;
+	struct hidma_tre *tre;
+	u32 *tre_local;
+
+	if (unlikely(tre_ch >= lldev->nr_tres)) {
+		dev_err(lldev->dev,
+			"invalid TRE number in transfer params:%d", tre_ch);
+		return;
+	}
+
+	tre = &lldev->trepool[tre_ch];
+	if (unlikely(atomic_read(&tre->allocated) != true)) {
+		dev_err(lldev->dev,
+			"trying to set params on an unused TRE:%d", tre_ch);
+		return;
+	}
+
+	tre_local = &tre->tre_local[0];
+	tre_local[TRE_LEN_IDX] = len;
+	tre_local[TRE_SRC_LOW_IDX] = lower_32_bits(src);
+	tre_local[TRE_SRC_HI_IDX] = upper_32_bits(src);
+	tre_local[TRE_DEST_LOW_IDX] = lower_32_bits(dest);
+	tre_local[TRE_DEST_HI_IDX] = upper_32_bits(dest);
+	tre->int_flags = flags;
+
+	dev_dbg(lldev->dev, "transferparams: tre_ch:%d %pap->%pap len:%u\n",
+		tre_ch, &src, &dest, len);
+}
+EXPORT_SYMBOL_GPL(hidma_ll_set_transfer_params);
+
+/* Called during initialization and after an error condition
+ * to restore hardware state.
+ */
+int hidma_ll_setup(struct hidma_lldev *lldev)
+{
+	int rc;
+	u64 addr;
+	u32 val;
+	u32 nr_tres = lldev->nr_tres;
+
+	lldev->pending_tre_count = 0;
+	lldev->tre_processed_off = 0;
+	lldev->evre_processed_off = 0;
+	lldev->tre_write_offset = 0;
+
+	/* disable interrupts */
+	hidma_ll_enable_irq(lldev, 0);
+
+	/* clear all pending interrupts */
+	val = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+	writel_relaxed(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+	rc = hidma_ll_reset(lldev);
+	if (rc)
+		return rc;
+
+	/* Clear all pending interrupts again.
+	 * Otherwise, we observe reset complete interrupts.
+	 */
+	val = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+	writel_relaxed(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+
+	/* disable interrupts again after reset */
+	hidma_ll_enable_irq(lldev, 0);
+
+	addr = lldev->tre_ring_handle;
+	writel_relaxed(lower_32_bits(addr),
+			lldev->trca + TRCA_RING_LOW_OFFSET);
+	writel_relaxed(upper_32_bits(addr),
+			lldev->trca + TRCA_RING_HIGH_OFFSET);
+	writel_relaxed(lldev->tre_ring_size,
+			lldev->trca + TRCA_RING_LEN_OFFSET);
+
+	addr = lldev->evre_ring_handle;
+	writel_relaxed(lower_32_bits(addr),
+			lldev->evca + EVCA_RING_LOW_OFFSET);
+	writel_relaxed(upper_32_bits(addr),
+			lldev->evca + EVCA_RING_HIGH_OFFSET);
+	writel_relaxed(EVRE_SIZE * nr_tres,
+			lldev->evca + EVCA_RING_LEN_OFFSET);
+
+	/* support IRQ only for now */
+	val = readl_relaxed(lldev->evca + EVCA_INTCTRL_OFFSET);
+	val = val & ~(0xF);
+	val = val | 0x1;
+	writel_relaxed(val, lldev->evca + EVCA_INTCTRL_OFFSET);
+
+	/* clear all pending interrupts and enable them*/
+	writel_relaxed(ENABLE_IRQS, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+	hidma_ll_enable_irq(lldev, ENABLE_IRQS);
+
+	rc = hidma_ll_enable(lldev);
+	if (rc)
+		return rc;
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_setup);
+
+int hidma_ll_init(void **lldevp, struct device *dev, u32 nr_tres,
+			void __iomem *trca, void __iomem *evca,
+			u8 evridx)
+{
+	u32 required_bytes;
+	struct hidma_lldev *lldev;
+	int rc;
+	u32 i;
+
+	if (!lldevp || !trca || !evca || !dev || !nr_tres)
+		return -EINVAL;
+
+	/* need at least four TREs */
+	if (nr_tres < 4)
+		return -EINVAL;
+
+	/* need an extra space */
+	nr_tres += 1;
+
+	lldev = devm_kzalloc(dev, sizeof(struct hidma_lldev), GFP_KERNEL);
+	if (!lldev)
+		return -ENOMEM;
+
+	lldev->evca = evca;
+	lldev->trca = trca;
+	lldev->dev = dev;
+	required_bytes = sizeof(struct hidma_tre) * nr_tres;
+	lldev->trepool = devm_kzalloc(lldev->dev, required_bytes, GFP_KERNEL);
+	if (!lldev->trepool)
+		return -ENOMEM;
+
+	required_bytes = sizeof(lldev->pending_tre_list[0]) * nr_tres;
+	lldev->pending_tre_list = devm_kzalloc(dev, required_bytes, GFP_KERNEL);
+	if (!lldev->pending_tre_list)
+		return -ENOMEM;
+
+	required_bytes = sizeof(lldev->tx_status_list[0]) * nr_tres;
+	lldev->tx_status_list = devm_kzalloc(dev, required_bytes, GFP_KERNEL);
+	if (!lldev->tx_status_list)
+		return -ENOMEM;
+
+	lldev->tre_ring = dmam_alloc_coherent(dev, (TRE_SIZE + 1) * nr_tres,
+					&lldev->tre_ring_handle, GFP_KERNEL);
+	if (!lldev->tre_ring)
+		return -ENOMEM;
+
+	memset(lldev->tre_ring, 0, (TRE_SIZE + 1) * nr_tres);
+	lldev->tre_ring_size = TRE_SIZE * nr_tres;
+	lldev->nr_tres = nr_tres;
+
+	/* the TRE ring has to be TRE_SIZE aligned */
+	if (!IS_ALIGNED(lldev->tre_ring_handle, TRE_SIZE)) {
+		u8  tre_ring_shift;
+
+		tre_ring_shift = lldev->tre_ring_handle % TRE_SIZE;
+		tre_ring_shift = TRE_SIZE - tre_ring_shift;
+		lldev->tre_ring_handle += tre_ring_shift;
+		lldev->tre_ring += tre_ring_shift;
+	}
+
+	lldev->evre_ring = dmam_alloc_coherent(dev, (EVRE_SIZE + 1) * nr_tres,
+					&lldev->evre_ring_handle, GFP_KERNEL);
+	if (!lldev->evre_ring)
+		return -ENOMEM;
+
+	memset(lldev->evre_ring, 0, (EVRE_SIZE + 1) * nr_tres);
+	lldev->evre_ring_size = EVRE_SIZE * nr_tres;
+
+	/* the EVRE ring has to be EVRE_SIZE aligned */
+	if (!IS_ALIGNED(lldev->evre_ring_handle, EVRE_SIZE)) {
+		u8  evre_ring_shift;
+
+		evre_ring_shift = lldev->evre_ring_handle % EVRE_SIZE;
+		evre_ring_shift = EVRE_SIZE - evre_ring_shift;
+		lldev->evre_ring_handle += evre_ring_shift;
+		lldev->evre_ring += evre_ring_shift;
+	}
+	lldev->nr_tres = nr_tres;
+	lldev->evridx = evridx;
+
+	rc = hidma_ll_setup(lldev);
+	if (rc)
+		return rc;
+
+	spin_lock_init(&lldev->lock);
+	for (i = 0; i < nr_tres; i++)
+		tasklet_init(&lldev->trepool[i].task, hidma_ll_tre_complete,
+				(unsigned long)&lldev->trepool[i]);
+	lldev->initialized = 1;
+	*lldevp = lldev;
+	hidma_ll_enable_irq(lldev, ENABLE_IRQS);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_init);
+
+int hidma_ll_uninit(void *llhndl)
+{
+	struct hidma_lldev *lldev = llhndl;
+	int rc = 0;
+	u32 val;
+
+	if (!lldev)
+		return -ENODEV;
+
+	if (lldev->initialized) {
+		u32 required_bytes;
+		u32 i;
+
+		lldev->initialized = 0;
+
+		required_bytes = sizeof(struct hidma_tre) * lldev->nr_tres;
+		for (i = 0; i < lldev->nr_tres; i++)
+			tasklet_kill(&lldev->trepool[i].task);
+		memset(lldev->trepool, 0, required_bytes);
+		lldev->trepool = NULL;
+		lldev->pending_tre_count = 0;
+		lldev->tre_write_offset = 0;
+
+		rc = hidma_ll_reset(lldev);
+
+		/* Clear all pending interrupts again.
+		 * Otherwise, we observe reset complete interrupts.
+		 */
+		val = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
+		writel_relaxed(val, lldev->evca + EVCA_IRQ_CLR_OFFSET);
+		hidma_ll_enable_irq(lldev, 0);
+	}
+	return rc;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_uninit);
+
+irqreturn_t hidma_ll_inthandler(int chirq, void *arg)
+{
+	struct hidma_lldev *lldev = arg;
+
+	hidma_ll_int_handler_internal(lldev);
+	return IRQ_HANDLED;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_inthandler);
+
+enum dma_status hidma_ll_status(void *llhndl, u32 tre_ch)
+{
+	struct hidma_lldev *lldev = llhndl;
+	enum dma_status ret = DMA_ERROR;
+	unsigned long flags;
+	u8 err_code;
+
+	spin_lock_irqsave(&lldev->lock, flags);
+	err_code = lldev->tx_status_list[tre_ch].err_code;
+
+	if (err_code & EVRE_STATUS_COMPLETE)
+		ret = DMA_COMPLETE;
+	else if (err_code & EVRE_STATUS_ERROR)
+		ret = DMA_ERROR;
+	else
+		ret = DMA_IN_PROGRESS;
+	spin_unlock_irqrestore(&lldev->lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hidma_ll_status);
+
+#if IS_ENABLED(CONFIG_DEBUG_FS)
+#define HIDMA_CHAN_SHOW(chan, name) \
+		seq_printf(s, #name "=0x%x\n", chan->name)
+
+#define HIDMA_DEVICE_SHOW(device, name) \
+		seq_printf(s, #name "=0x%x\n", device->name)
+
+void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch)
+{
+	struct hidma_lldev *lldev = llhndl;
+	struct hidma_tre *tre;
+	u32 length;
+	dma_addr_t src_start;
+	dma_addr_t dest_start;
+	u32 *tre_local;
+
+	if (unlikely(tre_ch >= lldev->nr_tres)) {
+		dev_err(lldev->dev, "invalid TRE number in chstats:%d",
+			tre_ch);
+		return;
+	}
+	tre = &lldev->trepool[tre_ch];
+	seq_printf(s, "------Channel %d -----\n", tre_ch);
+	seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated));
+	HIDMA_CHAN_SHOW(tre, queued);
+	seq_printf(s, "err_info=0x%x\n",
+		   lldev->tx_status_list[tre->chidx].err_info);
+	seq_printf(s, "err_code=0x%x\n",
+		   lldev->tx_status_list[tre->chidx].err_code);
+	HIDMA_CHAN_SHOW(tre, status);
+	HIDMA_CHAN_SHOW(tre, chidx);
+	HIDMA_CHAN_SHOW(tre, dma_sig);
+	seq_printf(s, "dev_name=%s\n", tre->dev_name);
+	seq_printf(s, "callback=%p\n", tre->callback);
+	seq_printf(s, "data=%p\n", tre->data);
+	HIDMA_CHAN_SHOW(tre, tre_index);
+
+	tre_local = &tre->tre_local[0];
+	src_start = tre_local[TRE_SRC_LOW_IDX];
+	src_start = ((u64)(tre_local[TRE_SRC_HI_IDX]) << 32) + src_start;
+	dest_start = tre_local[TRE_DEST_LOW_IDX];
+	dest_start += ((u64)(tre_local[TRE_DEST_HI_IDX]) << 32);
+	length = tre_local[TRE_LEN_IDX];
+
+	seq_printf(s, "src=%pap\n", &src_start);
+	seq_printf(s, "dest=%pap\n", &dest_start);
+	seq_printf(s, "length=0x%x\n", length);
+}
+EXPORT_SYMBOL_GPL(hidma_ll_chstats);
+
+void hidma_ll_devstats(struct seq_file *s, void *llhndl)
+{
+	struct hidma_lldev *lldev = llhndl;
+
+	seq_puts(s, "------Device -----\n");
+	HIDMA_DEVICE_SHOW(lldev, initialized);
+	HIDMA_DEVICE_SHOW(lldev, trch_state);
+	HIDMA_DEVICE_SHOW(lldev, evch_state);
+	HIDMA_DEVICE_SHOW(lldev, evridx);
+	HIDMA_DEVICE_SHOW(lldev, nr_tres);
+	seq_printf(s, "trca=%p\n", lldev->trca);
+	seq_printf(s, "tre_ring=%p\n", lldev->tre_ring);
+	seq_printf(s, "tre_ring_handle=%pap\n", &lldev->tre_ring_handle);
+	HIDMA_DEVICE_SHOW(lldev, tre_ring_size);
+	HIDMA_DEVICE_SHOW(lldev, tre_processed_off);
+	seq_printf(s, "pending_tre_count=%d\n", lldev->pending_tre_count);
+	seq_printf(s, "evca=%p\n", lldev->evca);
+	seq_printf(s, "evre_ring=%p\n", lldev->evre_ring);
+	seq_printf(s, "evre_ring_handle=%pap\n", &lldev->evre_ring_handle);
+	HIDMA_DEVICE_SHOW(lldev, evre_ring_size);
+	HIDMA_DEVICE_SHOW(lldev, evre_processed_off);
+	HIDMA_DEVICE_SHOW(lldev, tre_write_offset);
+}
+EXPORT_SYMBOL_GPL(hidma_ll_devstats);
+#endif
-- 
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project


^ permalink raw reply related	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30  3:08 [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Sinan Kaya
  2015-10-30  3:08 ` [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver Sinan Kaya
@ 2015-10-30  4:34 ` kbuild test robot
  2015-10-30  4:34 ` [PATCH] dma: fix platform_no_drv_owner.cocci warnings kbuild test robot
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 51+ messages in thread
From: kbuild test robot @ 2015-10-30  4:34 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: kbuild-all, dmaengine, timur, cov, jcm, Sinan Kaya, Rob Herring,
	Pawel Moll, Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul,
	Dan Williams, devicetree, linux-kernel

Hi Sinan,

[auto build test WARNING on lwn/docs-next -- if it's inappropriate base, please suggest rules for selecting the more suitable base]

url:    https://github.com/0day-ci/linux/commits/Sinan-Kaya/dma-add-Qualcomm-Technologies-HIDMA-management-driver/20151030-111408


coccinelle warnings: (new ones prefixed by >>)

>> drivers/dma/qcom_hidma_mgmt.c:852:3-8: No need to set .owner here. The core will do it.

Please review and possibly fold the followup patch.

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

^ permalink raw reply	[flat|nested] 51+ messages in thread

* [PATCH] dma: fix platform_no_drv_owner.cocci warnings
  2015-10-30  3:08 [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Sinan Kaya
  2015-10-30  3:08 ` [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver Sinan Kaya
  2015-10-30  4:34 ` [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver kbuild test robot
@ 2015-10-30  4:34 ` kbuild test robot
  2015-10-30  9:34 ` [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Arnd Bergmann
  2015-10-30 15:00 ` Mark Rutland
  4 siblings, 0 replies; 51+ messages in thread
From: kbuild test robot @ 2015-10-30  4:34 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: kbuild-all, dmaengine, timur, cov, jcm, Sinan Kaya, Rob Herring,
	Pawel Moll, Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul,
	Dan Williams, devicetree, linux-kernel

drivers/dma/qcom_hidma_mgmt.c:852:3-8: No need to set .owner here. The core will do it.

 Remove .owner field if calls are used which set it automatically

Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci

CC: Sinan Kaya <okaya@codeaurora.org>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---

 qcom_hidma_mgmt.c |    1 -
 1 file changed, 1 deletion(-)

--- a/drivers/dma/qcom_hidma_mgmt.c
+++ b/drivers/dma/qcom_hidma_mgmt.c
@@ -849,7 +849,6 @@ static struct platform_driver qcom_hidma
 	.remove = qcom_hidma_mgmt_remove,
 	.driver = {
 		.name = MODULE_NAME,
-		.owner = THIS_MODULE,
 		.of_match_table = of_match_ptr(qcom_hidma_mgmt_match),
 		.acpi_match_table = ACPI_PTR(qcom_hidma_mgmt_acpi_ids),
 	},

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30  3:08 ` [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver Sinan Kaya
@ 2015-10-30  8:01   ` kbuild test robot
  2015-10-30  8:07   ` kbuild test robot
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 51+ messages in thread
From: kbuild test robot @ 2015-10-30  8:01 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: kbuild-all, dmaengine, timur, cov, jcm, Sinan Kaya, Rob Herring,
	Pawel Moll, Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul,
	Dan Williams, devicetree, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 680 bytes --]

Hi Sinan,

[auto build test ERROR on lwn/docs-next -- if it's inappropriate base, please suggest rules for selecting the more suitable base]

url:    https://github.com/0day-ci/linux/commits/Sinan-Kaya/dma-add-Qualcomm-Technologies-HIDMA-management-driver/20151030-111408
config: i386-allmodconfig (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   ERROR: "__udivdi3" undefined!
>> ERROR: "dma_to_phys" undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 51527 bytes --]

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30  3:08 ` [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver Sinan Kaya
  2015-10-30  8:01   ` kbuild test robot
@ 2015-10-30  8:07   ` kbuild test robot
  2015-10-30 10:24   ` Arnd Bergmann
  2015-10-30 14:50   ` Mark Rutland
  3 siblings, 0 replies; 51+ messages in thread
From: kbuild test robot @ 2015-10-30  8:07 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: kbuild-all, dmaengine, timur, cov, jcm, Sinan Kaya, Rob Herring,
	Pawel Moll, Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul,
	Dan Williams, devicetree, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 749 bytes --]

Hi Sinan,

[auto build test ERROR on lwn/docs-next -- if it's inappropriate base, please suggest rules for selecting the more suitable base]

url:    https://github.com/0day-ci/linux/commits/Sinan-Kaya/dma-add-Qualcomm-Technologies-HIDMA-management-driver/20151030-111408
config: i386-allyesconfig (attached as .config)
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

   drivers/built-in.o: In function `hidma_selftest_sg.constprop.7':
>> qcom_hidma.c:(.text+0x175be6): undefined reference to `__udivdi3'

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation

[-- Attachment #2: .config.gz --]
[-- Type: application/octet-stream, Size: 50882 bytes --]

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30  3:08 [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Sinan Kaya
                   ` (2 preceding siblings ...)
  2015-10-30  4:34 ` [PATCH] dma: fix platform_no_drv_owner.cocci warnings kbuild test robot
@ 2015-10-30  9:34 ` Arnd Bergmann
  2015-10-31  6:51   ` Sinan Kaya
  2015-10-30 15:00 ` Mark Rutland
  4 siblings, 1 reply; 51+ messages in thread
From: Arnd Bergmann @ 2015-10-30  9:34 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Thursday 29 October 2015 23:08:12 Sinan Kaya wrote:
> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
> new file mode 100644
> index 0000000..81674ab
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
> @@ -0,0 +1,42 @@
> +Qualcomm Technologies HIDMA Management interface
> +
> +Required properties:
> +- compatible: must contain "qcom,hidma_mgmt"

No underscores in the name please. Also try to be more specific, in case this
device shows up in more than one SoC and they end up not being 100% compatible
we want to be able to tell them apart.

> +- reg: Address range for DMA device
> +- interrupts: Should contain the one interrupt shared by all channels
> +- nr-channels: Number of channels supported by this DMA controller.
> +- max-write: Maximum write burst in bytes. A memcpy requested is
> +  fragmented to multiples of this amount.
> +- max-read: Maximum read burst in bytes. A memcpy request is
> +  fragmented to multiples of this amount.
> +- max-wxactions: Maximum write transactions to perform in a burst
> +- max-rdactions: Maximum read transactions to perform in a burst
> +- max-memset-limit: Maximum memset limit
> +- ch-priority-#n: Priority of the channel
> +- ch-weight-#n: Round robin weight of the channel

For the last two, you can just use a multi-cell property, using one
cell per channel.

> +
> +#define MODULE_NAME			"hidma-mgmt"
> +#define PREFIX				MODULE_NAME ": "

These can be removed, just use dev_info() etc for messages, to include
a unique identifier.

> +#define AUTOSUSPEND_TIMEOUT		2000
> +
> +#define HIDMA_RUNTIME_GET(dmadev)				\
> +do {								\
> +	atomic_inc(&(dmadev)->pm_counter);			\
> +	TRC_PM(&(dmadev)->pdev->dev,				\
> +		"%s:%d pm_runtime_get %d\n", __func__, __LINE__,\
> +		atomic_read(&(dmadev)->pm_counter));		\
> +	pm_runtime_get_sync(&(dmadev)->pdev->dev);		\
> +} while (0)
> +
> +#define HIDMA_RUNTIME_SET(dmadev)				\
> +do {								\
> +	atomic_dec(&(dmadev)->pm_counter);			\
> +	TRC_PM(&(dmadev)->pdev->dev,				\
> +		"%s:%d pm_runtime_put_autosuspend:%d\n",	\
> +		__func__, __LINE__,				\
> +		atomic_read(&(dmadev)->pm_counter));		\
> +	pm_runtime_mark_last_busy(&(dmadev)->pdev->dev);	\
> +	pm_runtime_put_autosuspend(&(dmadev)->pdev->dev);	\
> +} while (0)

Better use inline functions.

> +struct qcom_hidma_mgmt_dev {
> +	u8 max_wr_xactions;
> +	u8 max_rd_xactions;
> +	u8 max_memset_limit;
> +	u16 max_write_request;
> +	u16 max_read_request;
> +	u16 nr_channels;
> +	u32 chreset_timeout;
> +	u32 sw_version;
> +	u8 *priority;
> +	u8 *weight;
> +
> +	atomic_t	pm_counter;
> +	/* Hardware device constants */
> +	dma_addr_t dev_physaddr;

No need to store the physaddr, it's write-only here.

> +
> +static unsigned int debug_pm;
> +module_param(debug_pm, uint, 0644);
> +MODULE_PARM_DESC(debug_pm,
> +		 "debug runtime power management transitions (default: 0)");
> +
> +#define TRC_PM(...) do {			\
> +		if (debug_pm)			\
> +			dev_info(__VA_ARGS__);	\
> +	} while (0)
> +

Once you are done debugging the driver and have a final version, please
remove these.

> +#if IS_ENABLED(CONFIG_DEBUG_FS)
> +
> +#define HIDMA_SHOW(dma, name) \
> +		seq_printf(s, #name "=0x%x\n", dma->name)

Only used once, just remove.

> +#define HIDMA_READ_SHOW(dma, name, offset) \
> +	do { \
> +		u32 val; \
> +		val = readl(dma->dev_virtaddr + offset); \
> +		seq_printf(s, name "=0x%x\n", val); \
> +	} while (0)

Inline function.

> +/**
> + * qcom_hidma_mgmt_info: display HIDMA device info
> + *
> + * Display the info for the current HIDMA device.
> + */
> +static int qcom_hidma_mgmt_info(struct seq_file *s, void *unused)
> +{
> +	struct qcom_hidma_mgmt_dev *mgmtdev = s->private;
> +	u32 val;
> +	int i;
> +
> +	HIDMA_RUNTIME_GET(mgmtdev);
> +	HIDMA_SHOW(mgmtdev, sw_version);
> +
> +	val = readl(mgmtdev->dev_virtaddr + CFG_OFFSET);
> +	seq_printf(s, "ENABLE=%d\n", val & 0x1);
> +
> +	val = readl(mgmtdev->dev_virtaddr + CHRESET_TIMEOUUT_OFFSET);
> +	seq_printf(s, "reset_timeout=%d\n", val & CHRESET_TIMEOUUT_MASK);
> +
> +	val = readl(mgmtdev->dev_virtaddr + MHICFG_OFFSET);
> +	seq_printf(s, "nr_event_channel=%d\n",
> +		(val >> NR_EV_CHANNEL_BIT_POS) & NR_CHANNEL_MASK);
> +	seq_printf(s, "nr_tr_channel=%d\n", (val & NR_CHANNEL_MASK));
> +	seq_printf(s, "nr_virt_tr_channel=%d\n", mgmtdev->nr_channels);
> +	seq_printf(s, "dev_virtaddr=%p\n", &mgmtdev->dev_virtaddr);
> +	seq_printf(s, "dev_physaddr=%pap\n", &mgmtdev->dev_physaddr);
> +	seq_printf(s, "dev_addrsize=%pap\n", &mgmtdev->dev_addrsize);

printing pointers is a security risk, just remove them if they are
not essential here.
> +
> +static int qcom_hidma_mgmt_err_open(struct inode *inode, struct file *file)
> +{
> +	return single_open(file, qcom_hidma_mgmt_err, inode->i_private);
> +}
> +
> +static const struct file_operations qcom_hidma_mgmt_err_fops = {
> +	.open = qcom_hidma_mgmt_err_open,
> +	.read = seq_read,
> +	.llseek = seq_lseek,
> +	.release = single_release,
> +};
> +
> +static ssize_t qcom_hidma_mgmt_mhiderr_clr(struct file *file,
> +	const char __user *user_buf, size_t count, loff_t *ppos)
> +{
> +	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
> +
> +	HIDMA_RUNTIME_GET(mgmtdev);
> +	writel(1, mgmtdev->dev_virtaddr + MHID_BUS_ERR_CLR_OFFSET);
> +	HIDMA_RUNTIME_SET(mgmtdev);
> +	return count;
> +}
> +
> +static const struct file_operations qcom_hidma_mgmt_mhiderr_clrfops = {
> +	.write = qcom_hidma_mgmt_mhiderr_clr,
> +};

Is this really just a debugging interface? If anyone would do this
for normal operation, it needs to be a proper API.

> +
> +static void qcom_hidma_mgmt_debug_uninit(struct qcom_hidma_mgmt_dev *mgmtdev)
> +{
> +	debugfs_remove(mgmtdev->evt_ena);
> +	debugfs_remove(mgmtdev->tre_errclr);
> +	debugfs_remove(mgmtdev->msi_errclr);
> +	debugfs_remove(mgmtdev->ode_errclr);
> +	debugfs_remove(mgmtdev->ide_errclr);
> +	debugfs_remove(mgmtdev->evt_errclr);
> +	debugfs_remove(mgmtdev->mhid_errclr);
> +	debugfs_remove(mgmtdev->err);
> +	debugfs_remove(mgmtdev->info);
> +	debugfs_remove(mgmtdev->debugfs);
> +}
>
I guess debugfs_remove_recursive() would do the job as well.

> +static int qcom_hidma_mgmt_debug_init(struct qcom_hidma_mgmt_dev *mgmtdev)
> +{
> +	int rc = 0;
> +
> +	mgmtdev->debugfs = debugfs_create_dir(dev_name(&mgmtdev->pdev->dev),
> +						NULL);
> +	if (!mgmtdev->debugfs) {
> +		rc = -ENODEV;
> +		return rc;
> +	}
> +
> +	mgmtdev->info = debugfs_create_file("info", S_IRUGO,
> +			mgmtdev->debugfs, mgmtdev, &qcom_hidma_mgmt_fops);
> +	if (!mgmtdev->info) {
> +		rc = -ENOMEM;
> +		goto cleanup;
> +	}
> +
> +	mgmtdev->err = debugfs_create_file("err", S_IRUGO,
> +			mgmtdev->debugfs, mgmtdev,
> +			&qcom_hidma_mgmt_err_fops);
> +	if (!mgmtdev->err) {
> +		rc = -ENOMEM;
> +		goto cleanup;
> +	}
> +
> +	mgmtdev->mhid_errclr = debugfs_create_file("mhiderrclr", S_IWUSR,
> +			mgmtdev->debugfs, mgmtdev,
> +			&qcom_hidma_mgmt_mhiderr_clrfops);
> +	if (!mgmtdev->mhid_errclr) {
> +		rc = -ENOMEM;
> +		goto cleanup;
> +	}

Maybe use a loop around an array here to avoid writing the same thing 10 times?

Also, you could move the debugging code into a separate file and have a separate
Kconfig symbol so users can disable this if they do not debug your driver
but still want to use debugfs for other things.

> +static irqreturn_t qcom_hidma_mgmt_irq_handler(int irq, void *arg)
> +{
> +	/* TODO: handle irq here */
> +	return IRQ_HANDLED;
> +}


> +	rc = devm_request_irq(&pdev->dev, irq, qcom_hidma_mgmt_irq_handler,
> +		IRQF_SHARED, "qcom-hidmamgmt", mgmtdev);
> +	if (rc) {
> +		dev_err(&pdev->dev, "irq registration failed: %d\n", irq);
> +		goto out;
> +	}

If you create a shared handler, you must check whether there was an
interrupt pending, otherwise you will lose interrupts for all other devices
that share the same IRQ.

Better remove the handler completely here so you don't need to check it.

> +
> +	if (device_property_read_u16(&pdev->dev, "nr-channels",
> +		&mgmtdev->nr_channels)) {
> +		dev_err(&pdev->dev, "number of channels missing\n");
> +		rc = -EINVAL;
> +		goto out;
> +	}

This should be a u32 name 'dma-channels' for consistency with the
generic DMA binding.

Also, try to avoid using u8 and u16 properties everywhere and just
us u32 for consistency.

> +
> +	for (i = 0; i < mgmtdev->nr_channels; i++) {
> +		char name[30];
> +
> +		sprintf(name, "ch-priority-%d", i);
> +		device_property_read_u8(&pdev->dev, name,
> +			&mgmtdev->priority[i]);
> +
> +		sprintf(name, "ch-weight-%d", i);
> +		device_property_read_u8(&pdev->dev, name,
> +			&mgmtdev->weight[i]);

Per comment above, just read this as an array.

> +	dev_info(&pdev->dev,
> +		"HI-DMA engine management driver registration complete\n");
> +	platform_set_drvdata(pdev, mgmtdev);
> +	HIDMA_RUNTIME_SET(mgmtdev);
> +	return 0;
> +out:
> +	pm_runtime_disable(&pdev->dev);
> +	pm_runtime_put_sync_suspend(&pdev->dev);
> +	return rc;
> +}

The rest of the probe function does not register any user interface aside from
the debugging stuff. Can you explain in the changelog how you expect the
driver to be used in a real system? Is there another driver coming?

> +
> +static const struct of_device_id qcom_hidma_mgmt_match[] = {
> +	{ .compatible = "qcom,hidma_mgmt", },
> +	{},
> +};
> +MODULE_DEVICE_TABLE(of, qcom_hidma_mgmt_match);
> +
> +static struct platform_driver qcom_hidma_mgmt_driver = {
> +	.probe = qcom_hidma_mgmt_probe,
> +	.remove = qcom_hidma_mgmt_remove,
> +	.driver = {
> +		.name = MODULE_NAME,
> +		.owner = THIS_MODULE,
> +		.of_match_table = of_match_ptr(qcom_hidma_mgmt_match),

drop the .owner field and the of_match_ptr() macro to avoid warnings here.

> +static int __init qcom_hidma_mgmt_init(void)
> +{
> +	return platform_driver_register(&qcom_hidma_mgmt_driver);
> +}
> +device_initcall(qcom_hidma_mgmt_init);
> +
> +static void __exit qcom_hidma_mgmt_exit(void)
> +{
> +	platform_driver_unregister(&qcom_hidma_mgmt_driver);
> +}
> +module_exit(qcom_hidma_mgmt_exit);

module_platform_driver()

	Arnd


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30  3:08 ` [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver Sinan Kaya
  2015-10-30  8:01   ` kbuild test robot
  2015-10-30  8:07   ` kbuild test robot
@ 2015-10-30 10:24   ` Arnd Bergmann
  2015-10-30 21:42     ` Sinan Kaya
  2015-10-30 14:50   ` Mark Rutland
  3 siblings, 1 reply; 51+ messages in thread
From: Arnd Bergmann @ 2015-10-30 10:24 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Thursday 29 October 2015 23:08:13 Sinan Kaya wrote:
> This patch adds support for hidma engine. The driver
> consists of two logical blocks. The DMA engine interface
> and the low-level interface. This version of the driver
> does not support virtualization on this release and only
> memcpy interface support is included.

Does that mean you can have slave device support eventually?

If so, please put that into the binding document already.

> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
> ---
>  .../devicetree/bindings/dma/qcom_hidma.txt         |   18 +
>  drivers/dma/Kconfig                                |   11 +
>  drivers/dma/Makefile                               |    4 +
>  drivers/dma/qcom_hidma.c                           | 1717 ++++++++++++++++++++
>  drivers/dma/qcom_hidma.h                           |   44 +
>  drivers/dma/qcom_hidma_ll.c                        | 1132 +++++++++++++
>  6 files changed, 2926 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/dma/qcom_hidma.txt
>  create mode 100644 drivers/dma/qcom_hidma.c
>  create mode 100644 drivers/dma/qcom_hidma.h
>  create mode 100644 drivers/dma/qcom_hidma_ll.c
> 
> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma.txt b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
> new file mode 100644
> index 0000000..9a01635
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
> @@ -0,0 +1,18 @@
> +Qualcomm Technologies HIDMA Channel driver
> +
> +Required properties:
> +- compatible: must contain "qcom,hidma"

Be more specific here. Also, should this be 'hisilicon,hidma-1.0' rather
than 'qcom'? I'm guessing from the name that this is a device you licensed
from them.

> +- reg: Addresses for the transfer and event channel
> +- interrupts: Should contain the event interrupt
> +- desc-count: Number of asynchronous requests this channel can handle
> +- event-channel: The HW event channel completions will be delivered.
> +Example:
> +
> +	hidma_24: hidma@0x5c050000 {

Name should be 'dma-controller' in DT, not 'hidma'.

> +		compatible = "qcom,hidma";
> +		reg = <0 0x5c050000 0x0 0x1000>,
> +		      <0 0x5c0b0000 0x0 0x1000>;
> +		interrupts = <0 389 0>;
> +		desc-count = <10>;
> +		event-channel = /bits/ 8 <4>;
> +	};

Remove the /bits/.

> diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
> index 76a5a5e..2645185 100644
> --- a/drivers/dma/Kconfig
> +++ b/drivers/dma/Kconfig
> @@ -512,6 +512,17 @@ config QCOM_HIDMA_MGMT
>  	  OS would run QCOM_HIDMA driver and the hypervisor would run
>  	  the QCOM_HIDMA_MGMT driver.
>  
> +config QCOM_HIDMA
> +	tristate "Qualcomm Technologies HIDMA support"
> +	select DMA_ENGINE
> +	select DMA_VIRTUAL_CHANNELS
> +	help
> +	  Enable support for the Qualcomm Technologies HIDMA controller.
> +	  The HIDMA controller supports optimized buffer copies
> +	  (user to kernel, kernel to kernel, etc.).  It only supports
> +	  memcpy/memset interfaces. The core is not intended for general
> +	  purpose slave DMA.
> +
>  config XILINX_VDMA
>  	tristate "Xilinx AXI VDMA Engine"
>  	depends on (ARCH_ZYNQ || MICROBLAZE)
> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
> index 3d25ffd..5665df2 100644
> --- a/drivers/dma/Makefile
> +++ b/drivers/dma/Makefile
> @@ -53,6 +53,10 @@ obj-$(CONFIG_PL330_DMA) += pl330.o
>  obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
>  obj-$(CONFIG_PXA_DMA) += pxa_dma.o
>  obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
> +obj-$(CONFIG_QCOM_HIDMA) +=  qcom_hdma.o
> +qcom_hdma-objs        := qcom_hidma_ll.o qcom_hidma.o
> +

The driver is linked into a single module, so all the EXPORT_SYMBOL
statements can be dropped.

> +/* Default idle time is 2 seconds. This parameter can
> + * be overridden by changing the following
> + * /sys/bus/platform/devices/QCOM8061:<xy>/power/autosuspend_delay_ms
> + * during kernel boot.
> + */
> +#define AUTOSUSPEND_TIMEOUT		2000
> +#define HIDMA_DEFAULT_DESCRIPTOR_COUNT	16
> +#define MODULE_NAME			"hidma"

MODULE_NAME and HIDMA_DEFAULT_DESCRIPTOR_COUNT can be dropped


> +#define HIDMA_RUNTIME_GET(dmadev)				\
> +do {								\
> +	atomic_inc(&(dmadev)->pm_counter);			\
> +	TRC_PM((dmadev)->ddev.dev,				\
> +		"%s:%d pm_runtime_get %d\n", __func__, __LINE__,\
> +		atomic_read(&(dmadev)->pm_counter));		\
> +	pm_runtime_get_sync((dmadev)->ddev.dev);		\
> +} while (0)
> +
> +#define HIDMA_RUNTIME_SET(dmadev)				\
> +do {								\
> +	atomic_dec(&(dmadev)->pm_counter);			\
> +	TRC_PM((dmadev)->ddev.dev,				\
> +		"%s:%d pm_runtime_put_autosuspend:%d\n",	\
> +		__func__, __LINE__,				\
> +		atomic_read(&(dmadev)->pm_counter));		\
> +	pm_runtime_mark_last_busy((dmadev)->ddev.dev);		\
> +	pm_runtime_put_autosuspend((dmadev)->ddev.dev);		\
> +} while (0)

Inline functions.

> +struct hidma_test_sync {
> +	atomic_t			counter;
> +	wait_queue_head_t		wq;
> +};

Let me guess, you converted this from a semaphore? ;-)

Just put the two members into the containing structure and relete it here.

> +struct hidma_dev {
> +	u8				evridx;
> +	u32				nr_descriptors;
> +
> +	void				*lldev;
> +	void				__iomem *dev_trca;
> +	void				__iomem *dev_evca;
> +	int (*self_test)(struct hidma_dev *device);
> +	struct dentry			*debugfs;
> +	struct dentry			*stats;
> +
> +	/* used to protect the pending channel list*/
> +	spinlock_t			lock;
> +	dma_addr_t			dev_trca_phys;
> +	struct dma_device		ddev;
> +	struct tasklet_struct		tasklet;

workqueue maybe?

> +	resource_size_t			dev_trca_size;
> +	dma_addr_t			dev_evca_phys;
> +	resource_size_t			dev_evca_size;

All three look unused and can be removed.

> +static unsigned int debug_pm;
> +module_param(debug_pm, uint, 0644);
> +MODULE_PARM_DESC(debug_pm,
> +		 "debug runtime power management transitions (default: 0)");
> +
> +#define TRC_PM(...) do {			\
> +		if (debug_pm)			\
> +			dev_info(__VA_ARGS__);	\
> +	} while (0)

Again, remove these after you are done debugging the problem at hand,
we don't need to clutter up the upstream version.

> +	/*
> +	 * It is assumed that the hardware can move the data within 1s
> +	 * and signal the OS of the completion
> +	 */
> +	ret = wait_event_interruptible_timeout(dmadev->test_result.wq,
> +		atomic_read(&dmadev->test_result.counter) == (map_count),
> +				msecs_to_jiffies(10000));
> +
> +	if (ret <= 0) {
> +		dev_err(dmadev->ddev.dev,
> +			"Self-test sg copy timed out, disabling\n");
> +		err = -ENODEV;
> +		goto tx_status;
> +	}

Why ENODEV? Could you make this handle restarted system calls?

> +
> +/*
> + * Perform a streaming transaction to verify the HW works.
> + */
> +static int hidma_selftest_streaming(struct hidma_dev *dmadev,
> +			struct dma_chan *dma_chanptr, u64 size,
> +			unsigned long flags)
> +{

You have a lot of selftest code here. Can you try to move that into a file
that works for all DMA engines? It feels like this should not be part of
a driver.

> +
> +#if IS_ENABLED(CONFIG_DEBUG_FS)
> +
> +#define SIER_CHAN_SHOW(chan, name) \
> +		seq_printf(s, #name "=%u\n", chan->name)

can be open-coded for clarity.

> +       ret = dma_mapping_error(dev, dma_src);
> +       if (ret) {
> +               dev_err(dev, "dma_mapping_error with ret:%d\n", ret);
> +               ret = -ENOMEM;
> +       } else {
> +               phys_addr_t phys;
> +
> +               phys = dma_to_phys(dev, dma_src);
> +               if (strcmp(__va(phys), "hello world") != 0) {
> +                       dev_err(dev, "memory content mismatch\n");
> +                       ret = -EINVAL;
> +               } else {
> +                       dev_dbg(dev, "mapsingle:dma_map_single works\n");
> +               }
> +               dma_unmap_single(dev, dma_src, buf_size, DMA_TO_DEVICE);
> +       }

dma_to_phys() is architecture specific and does not work if you
have an IOMMU. Just use the virtual address you passed into dma_map_*.

> +/**
> + * hidma_dma_info: display HIDMA device info
> + *
> + * Display the info for the current HIDMA device.
> + */
> +static int hidma_dma_info(struct seq_file *s, void *unused)
> +{
> +	struct hidma_dev *dmadev = s->private;
> +	struct dma_device *dma = &dmadev->ddev;
> +
> +	seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors);
> +	seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca);
> +	seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->dev_trca_phys);
> +	seq_printf(s, "dev_trca_size=%pa\n", &dmadev->dev_trca_size);
> +	seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca);
> +	seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->dev_evca_phys);
> +	seq_printf(s, "dev_evca_size=%pa\n", &dmadev->dev_evca_size);

Don't print pointers here.

> +	seq_printf(s, "self_test=%u\n",
> +		atomic_read(&dmadev->test_result.counter));
> +
> +	seq_printf(s, "copy%s%s%s%s%s%s%s%s%s%s%s\n",
> +		dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
> +		dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
> +		dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
> +		dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
> +		dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "",
> +		dma_has_cap(DMA_SG, dma->cap_mask) ? " sg" : "",
> +		dma_has_cap(DMA_ASYNC_TX, dma->cap_mask) ? " async" : "",
> +		dma_has_cap(DMA_SLAVE, dma->cap_mask) ? " slave" : "",
> +		dma_has_cap(DMA_CYCLIC, dma->cap_mask) ? " cyclic" : "",
> +		dma_has_cap(DMA_INTERLEAVE, dma->cap_mask) ? " intl" : "",
> +		dma_has_cap(DMA_MEMCPY, dma->cap_mask) ? " memcpy" : "");
> +
> +	return 0;
> +}

The selftest part and the features could just be separate files in the
core dmaengine code, it doesn't really belong here.

> +}
> +#else
> +static void hidma_debug_uninit(struct hidma_dev *dmadev)
> +{
> +}
> +static int hidma_debug_init(struct hidma_dev *dmadev)
> +{
> +	return 0;
> +}
> +#endif

You can remove the #ifdef here. The debugfs code already stubs out all
debugfs_create_*() and turns it all into nops when it is disabled.

> +	dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask);
> +	/* Apply default dma_mask if needed */
> +	if (!pdev->dev.dma_mask) {
> +		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
> +		pdev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
> +	}
> +

remove the check, or use

	if (WARN_ON(!pdev->dev.dma_mask))
		return -ENXIO;

The dma mask has to always be set by the platform code before probe()
is called. If it is not set, you are not allowed to perform DMA.

> +	dmadev->dev_evca_phys = evca_resource->start;
> +	dmadev->dev_evca_size = resource_size(evca_resource);
> +
> +	dev_dbg(&pdev->dev, "dev_evca_phys:%pa\n", &dmadev->dev_evca_phys);
> +	dev_dbg(&pdev->dev, "dev_evca_size:%pa\n", &dmadev->dev_evca_size);
> +

> +	dev_dbg(&pdev->dev, "qcom_hidma: mapped EVCA %pa to %p\n",
> +		&dmadev->dev_evca_phys, dmadev->dev_evca);



> +	dmadev->dev_trca_phys = trca_resource->start;
> +	dmadev->dev_trca_size = resource_size(trca_resource);
> +
> +	dev_dbg(&pdev->dev, "dev_trca_phys:%pa\n", &dmadev->dev_trca_phys);
> +	dev_dbg(&pdev->dev, "dev_trca_size:%pa\n", &dmadev->dev_trca_size);

Don't print pointers.

> +	rc = devm_request_irq(&pdev->dev, chirq, hidma_chirq_handler, 0,
> +			      "qcom-hidma", &dmadev->lldev);
> +	if (rc) {
> +		dev_err(&pdev->dev, "chirq registration failed: %d\n", chirq);
> +		goto chirq_request_failed;
> +	}
> +
> +	dev_dbg(&pdev->dev, "initializing DMA channels\n");
> +	INIT_LIST_HEAD(&dmadev->ddev.channels);
> +	rc = hidma_chan_init(dmadev, 0);
> +	if (rc) {
> +		dev_err(&pdev->dev, "probe:channel init failed\n");
> +		goto channel_init_failed;
> +	}
> +	dev_dbg(&pdev->dev, "HI-DMA engine driver starting self test\n");
> +	rc = dmadev->self_test(dmadev);
> +	if (rc) {
> +		dev_err(&pdev->dev, "probe: self test failed: %d\n", rc);
> +		goto self_test_failed;
> +	}
> +	dev_info(&pdev->dev, "probe: self test succeeded.\n");
> +
> +	dev_dbg(&pdev->dev, "calling dma_async_device_register\n");
> +	rc = dma_async_device_register(&dmadev->ddev);
> +	if (rc) {
> +		dev_err(&pdev->dev,
> +			"probe: failed to register slave DMA: %d\n", rc);
> +		goto device_register_failed;
> +	}
> +	dev_dbg(&pdev->dev, "probe: dma_async_device_register done\n");
> +
> +	rc = hidma_debug_init(dmadev);
> +	if (rc) {
> +		dev_err(&pdev->dev,
> +			"probe: failed to init debugfs: %d\n", rc);
> +		goto debug_init_failed;
> +	}
> +
> +	dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n");
> +	platform_set_drvdata(pdev, dmadev);
> +	HIDMA_RUNTIME_SET(dmadev);
> +	return 0;

Remove the debug prints when you are done debugging.

> +debug_init_failed:
> +device_register_failed:
> +self_test_failed:
> +channel_init_failed:
> +chirq_request_failed:
> +	hidma_ll_uninit(dmadev->lldev);
> +ll_init_failed:
> +evridx_failed:
> +remap_trca_failed:
> +remap_evca_failed:
> +	if (dmadev)
> +		hidma_free(dmadev);

Rename the labels according to what you do at in the failure case
and remove most of them. This is 'goto', not 'comefrom' ;-)

> +static struct platform_driver hidma_driver = {
> +	.probe = hidma_probe,
> +	.remove = hidma_remove,
> +	.driver = {
> +		.name = MODULE_NAME,
> +		.owner = THIS_MODULE,
> +		.of_match_table = of_match_ptr(hidma_match),
> +		.acpi_match_table = ACPI_PTR(hidma_acpi_ids),


Remove .owner and of_match_ptr().

> +	},
> +};
> +
> +static int __init hidma_init(void)
> +{
> +	return platform_driver_register(&hidma_driver);
> +}
> +late_initcall(hidma_init);
> +
> +static void __exit hidma_exit(void)
> +{
> +	platform_driver_unregister(&hidma_driver);
> +}
> +module_exit(hidma_exit);

module_platform_driver()

> +
> +	if (unlikely(tre_ch >= lldev->nr_tres)) {
> +		dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
> +		return;
> +	}
> +
> +	tre = &lldev->trepool[tre_ch];
> +	if (unlikely(atomic_read(&tre->allocated) != true)) {
> +		dev_err(lldev->dev, "trying to free an unused TRE:%d",
> +			tre_ch);
> +		return;
> +	}

Remove the 'unlikely' and the redundant '!= true'.

Only use 'likely' or 'unlikely' if you can measure a difference.

> +static int hidma_ll_reset(struct hidma_lldev *lldev)
> +{
> +	u32 val;
> +	int count;
> +
> +	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +	val = val & ~(CH_CONTROL_MASK << 16);
> +	val = val | (CH_RESET << 16);
> +	writel_relaxed(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
> +
> +	/* wait until the reset is performed */
> +	wmb();
> +
> +	/* Delay 10ms after reset to allow DMA logic to quiesce.*/
> +	for (count = 0; count < 10; count++) {
> +		val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +		lldev->trch_state = (val >> CH_STATE_BIT_POS)
> +					& CH_STATE_MASK;
> +		if (lldev->trch_state == CH_DISABLED)
> +			break;
> +		mdelay(1);
> +	}
> +	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
> +	if (lldev->trch_state != CH_DISABLED) {
> +		dev_err(lldev->dev,
> +			"transfer channel did not reset\n");
> +		return -ENODEV;
> +	}
> +
> +	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +	val = val & ~(CH_CONTROL_MASK << 16);
> +	val = val | (CH_RESET << 16);
> +	writel_relaxed(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
> +
> +	/* wait until the reset is performed */
> +	wmb();
> +
> +	/* Delay 10ms after reset to allow DMA logic to quiesce.*/
> +	for (count = 0; count < 10; count++) {
> +		val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +		lldev->evch_state = (val >> CH_STATE_BIT_POS)
> +					& CH_STATE_MASK;
> +		if (lldev->evch_state == CH_DISABLED)
> +			break;
> +		mdelay(1);
> +	}

Try using a workqueue to get into a state where you can call msleep()
instead of mdelay().

Also, if you waste CPU cycles for hundreds of milliseconds, it's unlikely
that the function is so performance critical that it requires writel_relaxed().

Just use writel() here.
> +/*
> + * The interrupt handler for HIDMA will try to consume as many pending
> + * EVRE from the event queue as possible. Each EVRE has an associated
> + * TRE that holds the user interface parameters. EVRE reports the
> + * result of the transaction. Hardware guarantees ordering between EVREs
> + * and TREs. We use last processed offset to figure out which TRE is
> + * associated with which EVRE. If two TREs are consumed by HW, the EVREs
> + * are in order in the event ring.
> + * This handler will do a one pass for consuming EVREs. Other EVREs may
> + * be delivered while we are working. It will try to consume incoming
> + * EVREs one more time and return.
> + * For unprocessed EVREs, hardware will trigger another interrupt until
> + * all the interrupt bits are cleared.
> + */
> +static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev)
> +{
> +	u32 status;
> +	u32 enable;
> +	u32 cause;
> +	int repeat = 2;
> +	unsigned long timeout;
> +
> +	status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
> +	enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
> +	cause = status & enable;
> +

Reading the status probably requires a readl() rather than readl_relaxed()
to guarantee that the DMA data has arrived in memory by the time that the
register data is seen by the CPU. If using readl_relaxed() here is a valid
and required optimization, please add a comment to explain why it works
and how much you gain.

> +		/* Another interrupt might have arrived while we are
> +		 * processing this one. Read the new cause.
> +		 */
> +		status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
> +		enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
> +		cause = status & enable;
> +
> +		repeat--;
> +	}

Same here.


> +}
> +
> +
> +static int hidma_ll_enable(struct hidma_lldev *lldev)
> +{
> +	u32 val;
> +
> +	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +	val &= ~(CH_CONTROL_MASK << 16);
> +	val |= (CH_ENABLE << 16);
> +
> +	writel_relaxed(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
> +
> +	/* wait until channel is enabled */
> +	wmb();
> +
> +	mdelay(1);
> +
> +	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
> +	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
> +	if ((lldev->evch_state != CH_ENABLED) &&
> +		(lldev->evch_state != CH_RUNNING)) {
> +		dev_err(lldev->dev,
> +			"event channel did not get enabled\n");
> +		return -ENODEV;
> +	}
> +
> +	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
> +	val = val & ~(CH_CONTROL_MASK << 16);
> +	val = val | (CH_ENABLE << 16);
> +	writel_relaxed(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
> +
> +	/* wait until channel is enabled */
> +	wmb();
> +
> +	mdelay(1);

Another workqueue? You should basically never call mdelay(). 

> +static int hidma_ll_hw_start(void *llhndl)
> +{
> +	int rc = 0;
> +	struct hidma_lldev *lldev = llhndl;
> +	unsigned long irqflags;
> +
> +	spin_lock_irqsave(&lldev->lock, irqflags);
> +	writel_relaxed(lldev->tre_write_offset,
> +			lldev->trca + TRCA_DOORBELL_OFFSET);
> +	spin_unlock_irqrestore(&lldev->lock, irqflags);

How does this work? The writel_relaxed() won't synchronize with either
the DMA data or the spinlock.

> +int hidma_ll_init(void **lldevp, struct device *dev, u32 nr_tres,
> +			void __iomem *trca, void __iomem *evca,
> +			u8 evridx)

How about returning the pointer rather than passing in an indirect pointer?

Also, your abstraction seem to go a little too far if the upper driver
doesn't know what the lower driver calls its main device structure.

Or you can go further and just embed the struct hidma_lldev within the
struct hidma_dev to save one?

> +void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch)
> +{
> +	struct hidma_lldev *lldev = llhndl;
> +	struct hidma_tre *tre;
> +	u32 length;
> +	dma_addr_t src_start;
> +	dma_addr_t dest_start;
> +	u32 *tre_local;
> +
> +	if (unlikely(tre_ch >= lldev->nr_tres)) {
> +		dev_err(lldev->dev, "invalid TRE number in chstats:%d",
> +			tre_ch);
> +		return;
> +	}
> +	tre = &lldev->trepool[tre_ch];
> +	seq_printf(s, "------Channel %d -----\n", tre_ch);
> +	seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated));
> +	HIDMA_CHAN_SHOW(tre, queued);
> +	seq_printf(s, "err_info=0x%x\n",
> +		   lldev->tx_status_list[tre->chidx].err_info);
> +	seq_printf(s, "err_code=0x%x\n",
> +		   lldev->tx_status_list[tre->chidx].err_code);
> +	HIDMA_CHAN_SHOW(tre, status);
> +	HIDMA_CHAN_SHOW(tre, chidx);
> +	HIDMA_CHAN_SHOW(tre, dma_sig);
> +	seq_printf(s, "dev_name=%s\n", tre->dev_name);
> +	seq_printf(s, "callback=%p\n", tre->callback);
> +	seq_printf(s, "data=%p\n", tre->data);
> +	HIDMA_CHAN_SHOW(tre, tre_index);
> +
> +	tre_local = &tre->tre_local[0];
> +	src_start = tre_local[TRE_SRC_LOW_IDX];
> +	src_start = ((u64)(tre_local[TRE_SRC_HI_IDX]) << 32) + src_start;
> +	dest_start = tre_local[TRE_DEST_LOW_IDX];
> +	dest_start += ((u64)(tre_local[TRE_DEST_HI_IDX]) << 32);
> +	length = tre_local[TRE_LEN_IDX];
> +
> +	seq_printf(s, "src=%pap\n", &src_start);
> +	seq_printf(s, "dest=%pap\n", &dest_start);
> +	seq_printf(s, "length=0x%x\n", length);
> +}
> +EXPORT_SYMBOL_GPL(hidma_ll_chstats);

Remove all the pointers here. I guess you can remove the entire debugfs
file really ;-)

This looks like it is better done using ftrace for the low-level internals
of the driver.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30  3:08 ` [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver Sinan Kaya
                     ` (2 preceding siblings ...)
  2015-10-30 10:24   ` Arnd Bergmann
@ 2015-10-30 14:50   ` Mark Rutland
  2015-10-31  4:27     ` Sinan Kaya
  3 siblings, 1 reply; 51+ messages in thread
From: Mark Rutland @ 2015-10-30 14:50 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

On Thu, Oct 29, 2015 at 11:08:13PM -0400, Sinan Kaya wrote:
> This patch adds support for hidma engine. The driver
> consists of two logical blocks. The DMA engine interface
> and the low-level interface. This version of the driver
> does not support virtualization on this release and only
> memcpy interface support is included.
> 
> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
> ---
>  .../devicetree/bindings/dma/qcom_hidma.txt         |   18 +
>  drivers/dma/Kconfig                                |   11 +
>  drivers/dma/Makefile                               |    4 +
>  drivers/dma/qcom_hidma.c                           | 1717 ++++++++++++++++++++
>  drivers/dma/qcom_hidma.h                           |   44 +
>  drivers/dma/qcom_hidma_ll.c                        | 1132 +++++++++++++
>  6 files changed, 2926 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/dma/qcom_hidma.txt
>  create mode 100644 drivers/dma/qcom_hidma.c
>  create mode 100644 drivers/dma/qcom_hidma.h
>  create mode 100644 drivers/dma/qcom_hidma_ll.c
> 
> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma.txt b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
> new file mode 100644
> index 0000000..9a01635
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
> @@ -0,0 +1,18 @@
> +Qualcomm Technologies HIDMA Channel driver
> +
> +Required properties:
> +- compatible: must contain "qcom,hidma"
> +- reg: Addresses for the transfer and event channel
> +- interrupts: Should contain the event interrupt
> +- desc-count: Number of asynchronous requests this channel can handle
> +- event-channel: The HW event channel completions will be delivered.
> +Example:
> +
> +	hidma_24: hidma@0x5c050000 {
> +		compatible = "qcom,hidma";
> +		reg = <0 0x5c050000 0x0 0x1000>,
> +		      <0 0x5c0b0000 0x0 0x1000>;
> +		interrupts = <0 389 0>;
> +		desc-count = <10>;
> +		event-channel = /bits/ 8 <4>;

The documentation above didn't state this was an 8-bit quantity, and you
save nothing by doing this (all values in the binary format are padded
to 4-byte alignment).

Have this as a cell, and use of_property_read_u32. That means you may
need a temporary u32, but it's far less surprising and makes things far
easier for dts authors to get right.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30  3:08 [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Sinan Kaya
                   ` (3 preceding siblings ...)
  2015-10-30  9:34 ` [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Arnd Bergmann
@ 2015-10-30 15:00 ` Mark Rutland
  2015-10-30 17:59   ` Andy Shevchenko
  2015-10-31 17:11   ` Sinan Kaya
  4 siblings, 2 replies; 51+ messages in thread
From: Mark Rutland @ 2015-10-30 15:00 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

On Thu, Oct 29, 2015 at 11:08:12PM -0400, Sinan Kaya wrote:
> The Qualcomm Technologies HIDMA device has been designed
> to support virtualization technology. The driver has been
> divided into two to follow the hardware design. The management
> driver is executed in hypervisor context and is the main
> managment for all channels provided by the device. The
> channel driver is exected in the guest OS context.
> 
> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
> ---
>  .../devicetree/bindings/dma/qcom_hidma_mgmt.txt    |  42 +
>  drivers/dma/Kconfig                                |  11 +
>  drivers/dma/Makefile                               |   1 +
>  drivers/dma/qcom_hidma_mgmt.c                      | 868 +++++++++++++++++++++
>  4 files changed, 922 insertions(+)
>  create mode 100644 Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
>  create mode 100644 drivers/dma/qcom_hidma_mgmt.c
> 
> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
> new file mode 100644
> index 0000000..81674ab
> --- /dev/null
> +++ b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
> @@ -0,0 +1,42 @@
> +Qualcomm Technologies HIDMA Management interface
> +
> +Required properties:
> +- compatible: must contain "qcom,hidma_mgmt"

s/_/-/ in property names and compatible strings.

Per your rationale for splitting the device nodes, surely the
relationship between the two needs to be described?

The GIC has a similar split, yet we use a single node, with the
hypervisor portions being optional.

What if there were multiple instances?

> +- reg: Address range for DMA device
> +- interrupts: Should contain the one interrupt shared by all channels
> +- nr-channels: Number of channels supported by this DMA controller.
> +- max-write: Maximum write burst in bytes. A memcpy requested is
> +  fragmented to multiples of this amount.
> +- max-read: Maximum read burst in bytes. A memcpy request is
> +  fragmented to multiples of this amount.

Thse would be better named as max-read-burst-bytes and
min-read-burst-bytes.

> +- max-wxactions: Maximum write transactions to perform in a burst
> +- max-rdactions: Maximum read transactions to perform in a burst

max-{write,read}-transactions

> +- max-memset-limit: Maximum memset limit

I have no idea what this is meant to mean.

> +- ch-priority-#n: Priority of the channel
> +- ch-weight-#n: Round robin weight of the channel

These need better descriptions. They sound like configuration options.

[...]

> +#if IS_ENABLED(CONFIG_ACPI)
> +static const struct acpi_device_id qcom_hidma_mgmt_acpi_ids[] = {
> +	{"QCOM8060"},
> +	{},
> +};
> +#endif

How do DMA engines work with ACPI?

How are client relationships defined?

THanks,
Mark.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 15:00 ` Mark Rutland
@ 2015-10-30 17:59   ` Andy Shevchenko
  2015-10-30 18:08     ` Sinan Kaya
  2015-10-31 17:11   ` Sinan Kaya
  1 sibling, 1 reply; 51+ messages in thread
From: Andy Shevchenko @ 2015-10-30 17:59 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Sinan Kaya, dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

On Fri, Oct 30, 2015 at 5:00 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> On Thu, Oct 29, 2015 at 11:08:12PM -0400, Sinan Kaya wrote:
>> The Qualcomm Technologies HIDMA device has been designed
>> to support virtualization technology. The driver has been
>> divided into two to follow the hardware design. The management
>> driver is executed in hypervisor context and is the main
>> managment for all channels provided by the device. The
>> channel driver is exected in the guest OS context.
>>

>> +#if IS_ENABLED(CONFIG_ACPI)
>> +static const struct acpi_device_id qcom_hidma_mgmt_acpi_ids[] = {
>> +     {"QCOM8060"},
>> +     {},
>> +};
>> +#endif
>
> How do DMA engines work with ACPI?
>
> How are client relationships defined?

The ACPI tables DSDT and CSRT (more info here:
http://www.acpi.info/links.htm) defines properties.

DSDT:
 per DMAC: the resources
 per client: FixedDMA descriptor that contains channel / request line pair.

CSRT:
 necessary table to map which DMAC provides which request line, thus
request line numbering are global on platform.

When DMAC driver is probed in the running system it should call as
well registration function from acpi-dma.c.

All clients when use new DMA slave API gets channel automatically
based on their FixedDMA property.

So, above is how it should be done. Didn't actually checked what this
driver does.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 17:59   ` Andy Shevchenko
@ 2015-10-30 18:08     ` Sinan Kaya
  2015-10-30 18:18       ` Mark Rutland
  2015-10-30 18:18       ` Andy Shevchenko
  0 siblings, 2 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-30 18:08 UTC (permalink / raw)
  To: Andy Shevchenko, Mark Rutland
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel



On 10/30/2015 1:59 PM, Andy Shevchenko wrote:
> On Fri, Oct 30, 2015 at 5:00 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>> On Thu, Oct 29, 2015 at 11:08:12PM -0400, Sinan Kaya wrote:
>>> The Qualcomm Technologies HIDMA device has been designed
>>> to support virtualization technology. The driver has been
>>> divided into two to follow the hardware design. The management
>>> driver is executed in hypervisor context and is the main
>>> managment for all channels provided by the device. The
>>> channel driver is exected in the guest OS context.
>>>
>
>>> +#if IS_ENABLED(CONFIG_ACPI)
>>> +static const struct acpi_device_id qcom_hidma_mgmt_acpi_ids[] = {
>>> +     {"QCOM8060"},
>>> +     {},
>>> +};
>>> +#endif
>>
>> How do DMA engines work with ACPI?
>>
>> How are client relationships defined?
>
> The ACPI tables DSDT and CSRT (more info here:
> http://www.acpi.info/links.htm) defines properties.
>
> DSDT:
>   per DMAC: the resources
>   per client: FixedDMA descriptor that contains channel / request line pair.
>
> CSRT:
>   necessary table to map which DMAC provides which request line, thus
> request line numbering are global on platform.
>
> When DMAC driver is probed in the running system it should call as
> well registration function from acpi-dma.c.
>
> All clients when use new DMA slave API gets channel automatically
> based on their FixedDMA property.
>
> So, above is how it should be done. Didn't actually checked what this
> driver does.
>
I was going to reply to all the questions in one pass but let me handle 
piece by piece.

Here are some facts.
- This hardware supports memcpy and memset only.
- Memset feature was removed from the kernel sometime around 3.14. So no 
memset support in this driver either.
- The hardware does not support DMA slave support
- The goal is to provide an interface to DMA engine framework for memcpy 
optimization so that the rest of the kernel drivers and applications 
make use of the hardware.

CSRT is an Intel specific ACPI table for slave devices. It was decided 
by Linaro that CSRT will not be supported for ARM64.

There were some discussions in ACPI forums to define a similar table for 
ARM64 but we are not there today and this hardware does not support 
slave interface.

ACPI enumeration is just like any other platform device. The driver gets 
looked up by a QCOM specific HID and the driver gets probed with the 
rest of the arguments in DSM object similar to device-tree attributes. 
The code uses device functions so the driver is not aware of where the 
parameters are coming from.


-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 18:08     ` Sinan Kaya
@ 2015-10-30 18:18       ` Mark Rutland
  2015-10-30 18:18       ` Andy Shevchenko
  1 sibling, 0 replies; 51+ messages in thread
From: Mark Rutland @ 2015-10-30 18:18 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: Andy Shevchenko, dmaengine, timur, cov, jcm, Rob Herring,
	Pawel Moll, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Fri, Oct 30, 2015 at 02:08:07PM -0400, Sinan Kaya wrote:
> 
> 
> On 10/30/2015 1:59 PM, Andy Shevchenko wrote:
> >On Fri, Oct 30, 2015 at 5:00 PM, Mark Rutland <mark.rutland@arm.com> wrote:
> >>On Thu, Oct 29, 2015 at 11:08:12PM -0400, Sinan Kaya wrote:
> >>>The Qualcomm Technologies HIDMA device has been designed
> >>>to support virtualization technology. The driver has been
> >>>divided into two to follow the hardware design. The management
> >>>driver is executed in hypervisor context and is the main
> >>>managment for all channels provided by the device. The
> >>>channel driver is exected in the guest OS context.
> >>>
> >
> >>>+#if IS_ENABLED(CONFIG_ACPI)
> >>>+static const struct acpi_device_id qcom_hidma_mgmt_acpi_ids[] = {
> >>>+     {"QCOM8060"},
> >>>+     {},
> >>>+};
> >>>+#endif
> >>
> >>How do DMA engines work with ACPI?
> >>
> >>How are client relationships defined?
> >
> >The ACPI tables DSDT and CSRT (more info here:
> >http://www.acpi.info/links.htm) defines properties.
> >
> >DSDT:
> >  per DMAC: the resources
> >  per client: FixedDMA descriptor that contains channel / request line pair.
> >
> >CSRT:
> >  necessary table to map which DMAC provides which request line, thus
> >request line numbering are global on platform.
> >
> >When DMAC driver is probed in the running system it should call as
> >well registration function from acpi-dma.c.
> >
> >All clients when use new DMA slave API gets channel automatically
> >based on their FixedDMA property.
> >
> >So, above is how it should be done. Didn't actually checked what this
> >driver does.
> >
> I was going to reply to all the questions in one pass but let me
> handle piece by piece.
> 
> Here are some facts.
> - This hardware supports memcpy and memset only.
> - Memset feature was removed from the kernel sometime around 3.14.
> So no memset support in this driver either.
> - The hardware does not support DMA slave support

This point is what I'd missed when reviewing. Apologies for that.

> - The goal is to provide an interface to DMA engine framework for
> memcpy optimization so that the rest of the kernel drivers and
> applications make use of the hardware.
> 
> CSRT is an Intel specific ACPI table for slave devices. It was
> decided by Linaro that CSRT will not be supported for ARM64.
> 
> There were some discussions in ACPI forums to define a similar table
> for ARM64 but we are not there today and this hardware does not
> support slave interface.

Ok. Thanks for the info!

> ACPI enumeration is just like any other platform device. The driver
> gets looked up by a QCOM specific HID and the driver gets probed
> with the rest of the arguments in DSM object similar to device-tree
> attributes. The code uses device functions so the driver is not
> aware of where the parameters are coming from.

Ok.

So far the properties look device-internal, so I'm ok with thoses
properties being uniform and parsed in the manner that they are, though
I'm worried about the way the device description is split over two
nodes.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 18:08     ` Sinan Kaya
  2015-10-30 18:18       ` Mark Rutland
@ 2015-10-30 18:18       ` Andy Shevchenko
  2015-10-30 18:25         ` Mark Rutland
  1 sibling, 1 reply; 51+ messages in thread
From: Andy Shevchenko @ 2015-10-30 18:18 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: Mark Rutland, dmaengine, timur, cov, jcm, Rob Herring,
	Pawel Moll, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Fri, Oct 30, 2015 at 8:08 PM, Sinan Kaya <okaya@codeaurora.org> wrote:

>> The ACPI tables DSDT and CSRT (more info here:
>> http://www.acpi.info/links.htm) defines properties.
>>
>> DSDT:
>>   per DMAC: the resources
>>   per client: FixedDMA descriptor that contains channel / request line
>> pair.
>>
>> CSRT:
>>   necessary table to map which DMAC provides which request line, thus
>> request line numbering are global on platform.
>>
>> When DMAC driver is probed in the running system it should call as
>> well registration function from acpi-dma.c.
>>
>> All clients when use new DMA slave API gets channel automatically
>> based on their FixedDMA property.
>>
>> So, above is how it should be done. Didn't actually checked what this
>> driver does.
>>
> I was going to reply to all the questions in one pass but let me handle
> piece by piece.
>
> Here are some facts.
> - This hardware supports memcpy and memset only.
> - Memset feature was removed from the kernel sometime around 3.14. So no
> memset support in this driver either.
> - The hardware does not support DMA slave support
> - The goal is to provide an interface to DMA engine framework for memcpy
> optimization so that the rest of the kernel drivers and applications make
> use of the hardware.
>

> CSRT is an Intel specific ACPI table for slave devices.

Wrong.
It was designed by Microsoft to support multiple controllers, in
particular DMACs.
Have you read that document I posted link to?

> It was decided by
> Linaro that CSRT will not be supported for ARM64.

Interesting, ARM64 platforms are not going to have more than one DMAC
per system?

> There were some discussions in ACPI forums to define a similar table for
> ARM64 but we are not there today and this hardware does not support slave
> interface.
>
> ACPI enumeration is just like any other platform device. The driver gets
> looked up by a QCOM specific HID and the driver gets probed with the rest of
> the arguments in DSM object similar to device-tree attributes. The code uses
> device functions so the driver is not aware of where the parameters are
> coming from.



-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 18:18       ` Andy Shevchenko
@ 2015-10-30 18:25         ` Mark Rutland
  2015-10-30 18:40           ` Andy Shevchenko
  2015-10-30 19:36           ` Sinan Kaya
  0 siblings, 2 replies; 51+ messages in thread
From: Mark Rutland @ 2015-10-30 18:25 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Sinan Kaya, dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

On Fri, Oct 30, 2015 at 08:18:49PM +0200, Andy Shevchenko wrote:
> On Fri, Oct 30, 2015 at 8:08 PM, Sinan Kaya <okaya@codeaurora.org> wrote:
> 
> >> The ACPI tables DSDT and CSRT (more info here:
> >> http://www.acpi.info/links.htm) defines properties.
> >>
> >> DSDT:
> >>   per DMAC: the resources
> >>   per client: FixedDMA descriptor that contains channel / request line
> >> pair.
> >>
> >> CSRT:
> >>   necessary table to map which DMAC provides which request line, thus
> >> request line numbering are global on platform.
> >>
> >> When DMAC driver is probed in the running system it should call as
> >> well registration function from acpi-dma.c.
> >>
> >> All clients when use new DMA slave API gets channel automatically
> >> based on their FixedDMA property.
> >>
> >> So, above is how it should be done. Didn't actually checked what this
> >> driver does.
> >>
> > I was going to reply to all the questions in one pass but let me handle
> > piece by piece.
> >
> > Here are some facts.
> > - This hardware supports memcpy and memset only.
> > - Memset feature was removed from the kernel sometime around 3.14. So no
> > memset support in this driver either.
> > - The hardware does not support DMA slave support
> > - The goal is to provide an interface to DMA engine framework for memcpy
> > optimization so that the rest of the kernel drivers and applications make
> > use of the hardware.
> >
> 
> > CSRT is an Intel specific ACPI table for slave devices.
> 
> Wrong.
> It was designed by Microsoft to support multiple controllers, in
> particular DMACs.
> Have you read that document I posted link to?
> 
> > It was decided by
> > Linaro that CSRT will not be supported for ARM64.
> 
> Interesting, ARM64 platforms are not going to have more than one DMAC
> per system?

I cannot imagine that being true, and I don't see why Linaro would
decide such a thing.

It does appear that it's not relevant to this device and driver, given
the lack of clients, unless I've misunderstood?

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 18:25         ` Mark Rutland
@ 2015-10-30 18:40           ` Andy Shevchenko
  2015-10-30 18:48             ` Sinan Kaya
  2015-10-30 19:36           ` Sinan Kaya
  1 sibling, 1 reply; 51+ messages in thread
From: Andy Shevchenko @ 2015-10-30 18:40 UTC (permalink / raw)
  To: Mark Rutland
  Cc: Sinan Kaya, dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

On Fri, Oct 30, 2015 at 8:25 PM, Mark Rutland <mark.rutland@arm.com> wrote:

>> > CSRT is an Intel specific ACPI table for slave devices.
>>
>> Wrong.
>> It was designed by Microsoft to support multiple controllers, in
>> particular DMACs.
>> Have you read that document I posted link to?
>>
>> > It was decided by
>> > Linaro that CSRT will not be supported for ARM64.
>>
>> Interesting, ARM64 platforms are not going to have more than one DMAC
>> per system?
>
> I cannot imagine that being true, and I don't see why Linaro would
> decide such a thing.
>
> It does appear that it's not relevant to this device and driver, given
> the lack of clients, unless I've misunderstood?

Yeah, just a side note.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 18:40           ` Andy Shevchenko
@ 2015-10-30 18:48             ` Sinan Kaya
  2015-10-30 19:01               ` Mark Rutland
  0 siblings, 1 reply; 51+ messages in thread
From: Sinan Kaya @ 2015-10-30 18:48 UTC (permalink / raw)
  To: Andy Shevchenko, Mark Rutland
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel



On 10/30/2015 2:40 PM, Andy Shevchenko wrote:
> On Fri, Oct 30, 2015 at 8:25 PM, Mark Rutland <mark.rutland@arm.com> wrote:
>
>>>> CSRT is an Intel specific ACPI table for slave devices.
>>>
>>> Wrong.
>>> It was designed by Microsoft to support multiple controllers, in
>>> particular DMACs.
>>> Have you read that document I posted link to?
>>>
Nope.

>>>> It was decided by
>>>> Linaro that CSRT will not be supported for ARM64.
>>>
>>> Interesting, ARM64 platforms are not going to have more than one DMAC
>>> per system?
>>
>> I cannot imagine that being true, and I don't see why Linaro would
>> decide such a thing.

See this.

https://wiki.linaro.org/LEG/Engineering/Kernel/ACPI/TablePriorities


>>
>> It does appear that it's not relevant to this device and driver, given
>> the lack of clients, unless I've misunderstood?
>
> Yeah, just a side note.
>
OK.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 18:48             ` Sinan Kaya
@ 2015-10-30 19:01               ` Mark Rutland
  2015-10-30 20:08                 ` Al Stone
  0 siblings, 1 reply; 51+ messages in thread
From: Mark Rutland @ 2015-10-30 19:01 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: Andy Shevchenko, dmaengine, timur, cov, jcm, Rob Herring,
	Pawel Moll, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel, leif.lindholm, al.stone

On Fri, Oct 30, 2015 at 02:48:06PM -0400, Sinan Kaya wrote:
> >>>>It was decided by
> >>>>Linaro that CSRT will not be supported for ARM64.

> See this.
> 
> https://wiki.linaro.org/LEG/Engineering/Kernel/ACPI/TablePriorities

The CSRT is listed under "Want", not "Never" or "Don't Care", so Linaro
have certainly not said that CSRT will not be supported. If anything,
they have stated that the table should be supported.

I suspect that the rationale for it not being "Critical" or "Must" is
that it's possible to use contemporary systems without DMA engines, but
we will need support at some point in future.

I also note that there are Linaro patches adding supoprt for DBG2 [1] ,
which is listed as "Never". So the page may be out of date...

Leif, Al?

[1] https://lkml.org/lkml/2015/9/8/287

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 18:25         ` Mark Rutland
  2015-10-30 18:40           ` Andy Shevchenko
@ 2015-10-30 19:36           ` Sinan Kaya
  1 sibling, 0 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-30 19:36 UTC (permalink / raw)
  To: Mark Rutland, Andy Shevchenko
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel



On 10/30/2015 2:25 PM, Mark Rutland wrote:
> It does appear that it's not relevant to this device and driver, given
> the lack of clients, unless I've misunderstood?

You are right. Kernel and guest machine kernel are the only user of this 
DMA device.

No other HW devices.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 19:01               ` Mark Rutland
@ 2015-10-30 20:08                 ` Al Stone
  2015-10-30 20:15                   ` Andy Shevchenko
  0 siblings, 1 reply; 51+ messages in thread
From: Al Stone @ 2015-10-30 20:08 UTC (permalink / raw)
  To: Mark Rutland, Sinan Kaya
  Cc: Andy Shevchenko, dmaengine, timur, cov, jcm, Rob Herring,
	Pawel Moll, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel, leif.lindholm

On 10/30/2015 01:01 PM, Mark Rutland wrote:
> On Fri, Oct 30, 2015 at 02:48:06PM -0400, Sinan Kaya wrote:
>>>>>> It was decided by
>>>>>> Linaro that CSRT will not be supported for ARM64.

Hrm.  I personally decided we didn't have to worry about using the CSRT
(specifically, let's wait until someone has a need for it), but I know
of no decision to *not* support it.  Please cite a source for that claim.

>> See this.
>>
>> https://wiki.linaro.org/LEG/Engineering/Kernel/ACPI/TablePriorities
> 
> The CSRT is listed under "Want", not "Never" or "Don't Care", so Linaro
> have certainly not said that CSRT will not be supported. If anything,
> they have stated that the table should be supported.

"Want" means interesting, and probably useful, but no clear indication that
anyone actually needs it.  At one point, we thought we might use the CSRT
for describing DMA, but it turns out we have not needed to.

However, let's make sure we're saying the same thing: the CSRT table is
properly defined in the kernel include/acpi/actbl2.h file so one can read
such a table and use it if they so choose.  Nothing that we have done at
Linaro in the arm64 part of the kernel relies on any of the content from
the CSRT, nor does it preclude someone relying on that content.  So, the
CSRT is defined, and is usable, but is just not being used -- by Linaro --
at present.

If that needs to change, let me know; no one has asked us to use the CSRT
for a DMA engine, and we have not been provided any hardware that requires
it.

> I suspect that the rationale for it not being "Critical" or "Must" is
> that it's possible to use contemporary systems without DMA engines, but
> we will need support at some point in future.

Essentially correct.  We (Linaro ACPI team) would be glad to work on it,
if needed.  We just need to know that someone wants it, and we need to
have some way to test it (i.e., aka "hardware").

> I also note that there are Linaro patches adding supoprt for DBG2 [1] ,
> which is listed as "Never". So the page may be out of date...

Sadly, it is indeed out of date, but it is on my TODO list.  The DBG2
table was originally listed as "Never" only because of the licensing;
this has since changed and is now usable for consoles, so it got worked
on and implemented.

> Leif, Al?
> 
> [1] https://lkml.org/lkml/2015/9/8/287
> 
> Thanks,
> Mark.
> 


-- 
ciao,
al
-----------------------------------
Al Stone
Software Engineer
Linaro Enterprise Group
al.stone@linaro.org
-----------------------------------

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 20:08                 ` Al Stone
@ 2015-10-30 20:15                   ` Andy Shevchenko
  2015-10-30 20:18                     ` Mark Rutland
  2015-10-31  3:33                     ` Jon Masters
  0 siblings, 2 replies; 51+ messages in thread
From: Andy Shevchenko @ 2015-10-30 20:15 UTC (permalink / raw)
  To: Al Stone
  Cc: Mark Rutland, Sinan Kaya, dmaengine, timur, cov, jcm,
	Rob Herring, Pawel Moll, Ian Campbell, Kumar Gala, Vinod Koul,
	Dan Williams, devicetree, linux-kernel, leif.lindholm

On Fri, Oct 30, 2015 at 10:08 PM, Al Stone <al.stone@linaro.org> wrote:
> On 10/30/2015 01:01 PM, Mark Rutland wrote:
>> On Fri, Oct 30, 2015 at 02:48:06PM -0400, Sinan Kaya wrote:

>> The CSRT is listed under "Want", not "Never" or "Don't Care", so Linaro
>> have certainly not said that CSRT will not be supported. If anything,
>> they have stated that the table should be supported.
>
> "Want" means interesting, and probably useful, but no clear indication that
> anyone actually needs it.  At one point, we thought we might use the CSRT
> for describing DMA, but it turns out we have not needed to.

Then you are going to have either 1 or 0 DMAC for slave devices, right?

The CSRT, unfortunately, the only way how to enumerate DMAC to be used
by slave devices.
You may look into drivers/dma/acpi-dma.c for usage in Linux.

Yes, I know about _DSD, but I don't think it will provide soon any
other official mechanisms to what we have now. Better to ask Rafael
and Mika.

> However, let's make sure we're saying the same thing: the CSRT table is
> properly defined in the kernel include/acpi/actbl2.h file so one can read
> such a table and use it if they so choose.  Nothing that we have done at
> Linaro in the arm64 part of the kernel relies on any of the content from
> the CSRT, nor does it preclude someone relying on that content.  So, the
> CSRT is defined, and is usable, but is just not being used -- by Linaro --
> at present.

This sounds clear.

> If that needs to change, let me know; no one has asked us to use the CSRT
> for a DMA engine, and we have not been provided any hardware that requires
> it.

See above.

-- 
With Best Regards,
Andy Shevchenko

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 20:15                   ` Andy Shevchenko
@ 2015-10-30 20:18                     ` Mark Rutland
  2015-10-31  3:33                     ` Jon Masters
  1 sibling, 0 replies; 51+ messages in thread
From: Mark Rutland @ 2015-10-30 20:18 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Al Stone, Sinan Kaya, dmaengine, timur, cov, jcm, Rob Herring,
	Pawel Moll, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel, leif.lindholm

On Fri, Oct 30, 2015 at 10:15:09PM +0200, Andy Shevchenko wrote:
> On Fri, Oct 30, 2015 at 10:08 PM, Al Stone <al.stone@linaro.org> wrote:
> > On 10/30/2015 01:01 PM, Mark Rutland wrote:
> >> On Fri, Oct 30, 2015 at 02:48:06PM -0400, Sinan Kaya wrote:
> 
> >> The CSRT is listed under "Want", not "Never" or "Don't Care", so Linaro
> >> have certainly not said that CSRT will not be supported. If anything,
> >> they have stated that the table should be supported.
> >
> > "Want" means interesting, and probably useful, but no clear indication that
> > anyone actually needs it.  At one point, we thought we might use the CSRT
> > for describing DMA, but it turns out we have not needed to.
> 
> Then you are going to have either 1 or 0 DMAC for slave devices, right?

I suspect that the currently supported platforms have 0. If not, we have
problems.

> The CSRT, unfortunately, the only way how to enumerate DMAC to be used
> by slave devices.
> You may look into drivers/dma/acpi-dma.c for usage in Linux.
> 
> Yes, I know about _DSD, but I don't think it will provide soon any
> other official mechanisms to what we have now. Better to ask Rafael
> and Mika.

_DSD is insufficient here, given that there is no prescribed way to use
it to describe DMA. Anything for that would have to go through ASWG
before we could use it.

Thanks,
Mark.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 10:24   ` Arnd Bergmann
@ 2015-10-30 21:42     ` Sinan Kaya
  2015-10-30 21:55       ` Timur Tabi
  2015-10-30 22:28       ` Arnd Bergmann
  0 siblings, 2 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-30 21:42 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

thanks for the review. I'll work on these.

On 10/30/2015 6:24 AM, Arnd Bergmann wrote:
> On Thursday 29 October 2015 23:08:13 Sinan Kaya wrote:
>> This patch adds support for hidma engine. The driver
>> consists of two logical blocks. The DMA engine interface
>> and the low-level interface. This version of the driver
>> does not support virtualization on this release and only
>> memcpy interface support is included.
>
> Does that mean you can have slave device support eventually?
>
> If so, please put that into the binding document already.

will do.

>
>> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
>> ---
>>   .../devicetree/bindings/dma/qcom_hidma.txt         |   18 +
>>   drivers/dma/Kconfig                                |   11 +
>>   drivers/dma/Makefile                               |    4 +
>>   drivers/dma/qcom_hidma.c                           | 1717 ++++++++++++++++++++
>>   drivers/dma/qcom_hidma.h                           |   44 +
>>   drivers/dma/qcom_hidma_ll.c                        | 1132 +++++++++++++
>>   6 files changed, 2926 insertions(+)
>>   create mode 100644 Documentation/devicetree/bindings/dma/qcom_hidma.txt
>>   create mode 100644 drivers/dma/qcom_hidma.c
>>   create mode 100644 drivers/dma/qcom_hidma.h
>>   create mode 100644 drivers/dma/qcom_hidma_ll.c
>>
>> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma.txt b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
>> new file mode 100644
>> index 0000000..9a01635
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
>> @@ -0,0 +1,18 @@
>> +Qualcomm Technologies HIDMA Channel driver
>> +
>> +Required properties:
>> +- compatible: must contain "qcom,hidma"
>
> Be more specific here. Also, should this be 'hisilicon,hidma-1.0' rather
> than 'qcom'? I'm guessing from the name that this is a device you licensed
> from them.
>
No, this is a QCOM part. I used the driver from hisilicon as a starting 
point instead of starting from scratch. That's why, I kept the original 
copy rights.

>> +- reg: Addresses for the transfer and event channel
>> +- interrupts: Should contain the event interrupt
>> +- desc-count: Number of asynchronous requests this channel can handle
>> +- event-channel: The HW event channel completions will be delivered.
>> +Example:
>> +
>> +	hidma_24: hidma@0x5c050000 {
>
> Name should be 'dma-controller' in DT, not 'hidma'.
will test and change.

>
>> +		compatible = "qcom,hidma";
>> +		reg = <0 0x5c050000 0x0 0x1000>,
>> +		      <0 0x5c0b0000 0x0 0x1000>;
>> +		interrupts = <0 389 0>;
>> +		desc-count = <10>;
>> +		event-channel = /bits/ 8 <4>;
>> +	};
>
> Remove the /bits/.

ok

>
>> diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
>> index 76a5a5e..2645185 100644
>> --- a/drivers/dma/Kconfig
>> +++ b/drivers/dma/Kconfig
>> @@ -512,6 +512,17 @@ config QCOM_HIDMA_MGMT
>>   	  OS would run QCOM_HIDMA driver and the hypervisor would run
>>   	  the QCOM_HIDMA_MGMT driver.
>>
>> +config QCOM_HIDMA
>> +	tristate "Qualcomm Technologies HIDMA support"
>> +	select DMA_ENGINE
>> +	select DMA_VIRTUAL_CHANNELS
>> +	help
>> +	  Enable support for the Qualcomm Technologies HIDMA controller.
>> +	  The HIDMA controller supports optimized buffer copies
>> +	  (user to kernel, kernel to kernel, etc.).  It only supports
>> +	  memcpy/memset interfaces. The core is not intended for general
>> +	  purpose slave DMA.
>> +
>>   config XILINX_VDMA
>>   	tristate "Xilinx AXI VDMA Engine"
>>   	depends on (ARCH_ZYNQ || MICROBLAZE)
>> diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
>> index 3d25ffd..5665df2 100644
>> --- a/drivers/dma/Makefile
>> +++ b/drivers/dma/Makefile
>> @@ -53,6 +53,10 @@ obj-$(CONFIG_PL330_DMA) += pl330.o
>>   obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
>>   obj-$(CONFIG_PXA_DMA) += pxa_dma.o
>>   obj-$(CONFIG_QCOM_BAM_DMA) += qcom_bam_dma.o
>> +obj-$(CONFIG_QCOM_HIDMA) +=  qcom_hdma.o
>> +qcom_hdma-objs        := qcom_hidma_ll.o qcom_hidma.o
>> +
>
> The driver is linked into a single module, so all the EXPORT_SYMBOL
> statements can be dropped.

will test and change.

>
>> +/* Default idle time is 2 seconds. This parameter can
>> + * be overridden by changing the following
>> + * /sys/bus/platform/devices/QCOM8061:<xy>/power/autosuspend_delay_ms
>> + * during kernel boot.
>> + */
>> +#define AUTOSUSPEND_TIMEOUT		2000
>> +#define HIDMA_DEFAULT_DESCRIPTOR_COUNT	16
>> +#define MODULE_NAME			"hidma"
>
> MODULE_NAME and HIDMA_DEFAULT_DESCRIPTOR_COUNT can be dropped

I'll remove the module_name. The default descriptor is used if 
device-tree or acpi table does not have the descriptor count.

>
>
>> +#define HIDMA_RUNTIME_GET(dmadev)				\
>> +do {								\
>> +	atomic_inc(&(dmadev)->pm_counter);			\
>> +	TRC_PM((dmadev)->ddev.dev,				\
>> +		"%s:%d pm_runtime_get %d\n", __func__, __LINE__,\
>> +		atomic_read(&(dmadev)->pm_counter));		\
>> +	pm_runtime_get_sync((dmadev)->ddev.dev);		\
>> +} while (0)
>> +
>> +#define HIDMA_RUNTIME_SET(dmadev)				\
>> +do {								\
>> +	atomic_dec(&(dmadev)->pm_counter);			\
>> +	TRC_PM((dmadev)->ddev.dev,				\
>> +		"%s:%d pm_runtime_put_autosuspend:%d\n",	\
>> +		__func__, __LINE__,				\
>> +		atomic_read(&(dmadev)->pm_counter));		\
>> +	pm_runtime_mark_last_busy((dmadev)->ddev.dev);		\
>> +	pm_runtime_put_autosuspend((dmadev)->ddev.dev);		\
>> +} while (0)
>
> Inline functions.

ok, I was hoping to keep the file and line numbers as the PM stuff gets 
called from multiple locations. should I call the inline functions with 
__func__ and __LINE__ from another macro  like

#define HIDMA_RUNTIME_GET(dmadev)
	func(dmadev, __func__, __LINE__)

etc.


>
>> +struct hidma_test_sync {
>> +	atomic_t			counter;
>> +	wait_queue_head_t		wq;
>> +};
>
> Let me guess, you converted this from a semaphore? ;-)
>
> Just put the two members into the containing structure and relete it here.

Probably some other code that I don't remember. will do.

>
>> +struct hidma_dev {
>> +	u8				evridx;
>> +	u32				nr_descriptors;
>> +
>> +	void				*lldev;
>> +	void				__iomem *dev_trca;
>> +	void				__iomem *dev_evca;
>> +	int (*self_test)(struct hidma_dev *device);
>> +	struct dentry			*debugfs;
>> +	struct dentry			*stats;
>> +
>> +	/* used to protect the pending channel list*/
>> +	spinlock_t			lock;
>> +	dma_addr_t			dev_trca_phys;
>> +	struct dma_device		ddev;
>> +	struct tasklet_struct		tasklet;
>
> workqueue maybe?

is there an advantage of workqueue over tasklet? I'm open to suggestions.

>
>> +	resource_size_t			dev_trca_size;
>> +	dma_addr_t			dev_evca_phys;
>> +	resource_size_t			dev_evca_size;
>
> All three look unused and can be removed.

ok.

>
>> +static unsigned int debug_pm;
>> +module_param(debug_pm, uint, 0644);
>> +MODULE_PARM_DESC(debug_pm,
>> +		 "debug runtime power management transitions (default: 0)");
>> +
>> +#define TRC_PM(...) do {			\
>> +		if (debug_pm)			\
>> +			dev_info(__VA_ARGS__);	\
>> +	} while (0)
>
> Again, remove these after you are done debugging the problem at hand,
> we don't need to clutter up the upstream version.

ok, I'll take them out if that's the norm. I was hoping to keep them in 
case something shows up.

>
>> +	/*
>> +	 * It is assumed that the hardware can move the data within 1s
>> +	 * and signal the OS of the completion
>> +	 */
>> +	ret = wait_event_interruptible_timeout(dmadev->test_result.wq,
>> +		atomic_read(&dmadev->test_result.counter) == (map_count),
>> +				msecs_to_jiffies(10000));
>> +
>> +	if (ret <= 0) {
>> +		dev_err(dmadev->ddev.dev,
>> +			"Self-test sg copy timed out, disabling\n");
>> +		err = -ENODEV;
>> +		goto tx_status;
>> +	}
>
> Why ENODEV? Could you make this handle restarted system calls?

This is the self test code. It gets called from probe. If there is a 
problem with the device or system configuration, I don't want to enable 
this device. I can certainly return a different error code though. 
What's a good code?

>
>> +
>> +/*
>> + * Perform a streaming transaction to verify the HW works.
>> + */
>> +static int hidma_selftest_streaming(struct hidma_dev *dmadev,
>> +			struct dma_chan *dma_chanptr, u64 size,
>> +			unsigned long flags)
>> +{
>
> You have a lot of selftest code here. Can you try to move that into a file
> that works for all DMA engines? It feels like this should not be part of
> a driver.

Will try.

>
>> +
>> +#if IS_ENABLED(CONFIG_DEBUG_FS)
>> +
>> +#define SIER_CHAN_SHOW(chan, name) \
>> +		seq_printf(s, #name "=%u\n", chan->name)
>
> can be open-coded for clarity.
>

I think I copied this from another driver. Would you rather see 
seq_printf in the code than a macro.

Sorry, I didn't get what you mean by "open-coded".

>> +       ret = dma_mapping_error(dev, dma_src);
>> +       if (ret) {
>> +               dev_err(dev, "dma_mapping_error with ret:%d\n", ret);
>> +               ret = -ENOMEM;
>> +       } else {
>> +               phys_addr_t phys;
>> +
>> +               phys = dma_to_phys(dev, dma_src);
>> +               if (strcmp(__va(phys), "hello world") != 0) {
>> +                       dev_err(dev, "memory content mismatch\n");
>> +                       ret = -EINVAL;
>> +               } else {
>> +                       dev_dbg(dev, "mapsingle:dma_map_single works\n");
>> +               }
>> +               dma_unmap_single(dev, dma_src, buf_size, DMA_TO_DEVICE);
>> +       }
>
> dma_to_phys() is architecture specific and does not work if you
> have an IOMMU. Just use the virtual address you passed into dma_map_*.

Should we fix the real problem?

I patched dma_to_phys to call IOMMU driver for translation when IOMMU is 
present. I was planning to upstream that patch when Robin Murphy's 
changes make upstream.

The assumption of phys==dma address doesn't look scalable to me. I was 
checking the DMA subsystem for correctness with this test. If use 
dma_src, it is going to be a 1==1 test.

>
>> +/**
>> + * hidma_dma_info: display HIDMA device info
>> + *
>> + * Display the info for the current HIDMA device.
>> + */
>> +static int hidma_dma_info(struct seq_file *s, void *unused)
>> +{
>> +	struct hidma_dev *dmadev = s->private;
>> +	struct dma_device *dma = &dmadev->ddev;
>> +
>> +	seq_printf(s, "nr_descriptors=%d\n", dmadev->nr_descriptors);
>> +	seq_printf(s, "dev_trca=%p\n", &dmadev->dev_trca);
>> +	seq_printf(s, "dev_trca_phys=%pa\n", &dmadev->dev_trca_phys);
>> +	seq_printf(s, "dev_trca_size=%pa\n", &dmadev->dev_trca_size);
>> +	seq_printf(s, "dev_evca=%p\n", &dmadev->dev_evca);
>> +	seq_printf(s, "dev_evca_phys=%pa\n", &dmadev->dev_evca_phys);
>> +	seq_printf(s, "dev_evca_size=%pa\n", &dmadev->dev_evca_size);
>
> Don't print pointers here.

ok

>
>> +	seq_printf(s, "self_test=%u\n",
>> +		atomic_read(&dmadev->test_result.counter));
>> +
>> +	seq_printf(s, "copy%s%s%s%s%s%s%s%s%s%s%s\n",
>> +		dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
>> +		dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
>> +		dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
>> +		dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
>> +		dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "",
>> +		dma_has_cap(DMA_SG, dma->cap_mask) ? " sg" : "",
>> +		dma_has_cap(DMA_ASYNC_TX, dma->cap_mask) ? " async" : "",
>> +		dma_has_cap(DMA_SLAVE, dma->cap_mask) ? " slave" : "",
>> +		dma_has_cap(DMA_CYCLIC, dma->cap_mask) ? " cyclic" : "",
>> +		dma_has_cap(DMA_INTERLEAVE, dma->cap_mask) ? " intl" : "",
>> +		dma_has_cap(DMA_MEMCPY, dma->cap_mask) ? " memcpy" : "");
>> +
>> +	return 0;
>> +}
>
> The selftest part and the features could just be separate files in the
> core dmaengine code, it doesn't really belong here.
>
I'll take a look. It could take a couple of iterations to get this right 
but I could certainly move my testing to another file for sharing.

>> +}
>> +#else
>> +static void hidma_debug_uninit(struct hidma_dev *dmadev)
>> +{
>> +}
>> +static int hidma_debug_init(struct hidma_dev *dmadev)
>> +{
>> +	return 0;
>> +}
>> +#endif
>
> You can remove the #ifdef here. The debugfs code already stubs out all
> debugfs_create_*() and turns it all into nops when it is disabled.

ok, I think I got compilation errors with the data structures but I'll 
take a look.

>
>> +	dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask);
>> +	/* Apply default dma_mask if needed */
>> +	if (!pdev->dev.dma_mask) {
>> +		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
>> +		pdev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
>> +	}
>> +
>
> remove the check, or use

I got caught on this problem before. Let me go back and revisit. It took 
my entire day to figure out that dma_mask pointer was not set.

>
> 	if (WARN_ON(!pdev->dev.dma_mask))
> 		return -ENXIO;
>
> The dma mask has to always be set by the platform code before probe()
> is called. If it is not set, you are not allowed to perform DMA.

I tested this on an ACPI platform BTW when I was working on the initial 
implementation.

>
>> +	dmadev->dev_evca_phys = evca_resource->start;
>> +	dmadev->dev_evca_size = resource_size(evca_resource);
>> +
>> +	dev_dbg(&pdev->dev, "dev_evca_phys:%pa\n", &dmadev->dev_evca_phys);
>> +	dev_dbg(&pdev->dev, "dev_evca_size:%pa\n", &dmadev->dev_evca_size);
>> +
>
>> +	dev_dbg(&pdev->dev, "qcom_hidma: mapped EVCA %pa to %p\n",
>> +		&dmadev->dev_evca_phys, dmadev->dev_evca);
>
>
>
>> +	dmadev->dev_trca_phys = trca_resource->start;
>> +	dmadev->dev_trca_size = resource_size(trca_resource);
>> +
>> +	dev_dbg(&pdev->dev, "dev_trca_phys:%pa\n", &dmadev->dev_trca_phys);
>> +	dev_dbg(&pdev->dev, "dev_trca_size:%pa\n", &dmadev->dev_trca_size);
>
> Don't print pointers.

OK

>
>> +	rc = devm_request_irq(&pdev->dev, chirq, hidma_chirq_handler, 0,
>> +			      "qcom-hidma", &dmadev->lldev);
>> +	if (rc) {
>> +		dev_err(&pdev->dev, "chirq registration failed: %d\n", chirq);
>> +		goto chirq_request_failed;
>> +	}
>> +
>> +	dev_dbg(&pdev->dev, "initializing DMA channels\n");
>> +	INIT_LIST_HEAD(&dmadev->ddev.channels);
>> +	rc = hidma_chan_init(dmadev, 0);
>> +	if (rc) {
>> +		dev_err(&pdev->dev, "probe:channel init failed\n");
>> +		goto channel_init_failed;
>> +	}
>> +	dev_dbg(&pdev->dev, "HI-DMA engine driver starting self test\n");
>> +	rc = dmadev->self_test(dmadev);
>> +	if (rc) {
>> +		dev_err(&pdev->dev, "probe: self test failed: %d\n", rc);
>> +		goto self_test_failed;
>> +	}
>> +	dev_info(&pdev->dev, "probe: self test succeeded.\n");
>> +
>> +	dev_dbg(&pdev->dev, "calling dma_async_device_register\n");
>> +	rc = dma_async_device_register(&dmadev->ddev);
>> +	if (rc) {
>> +		dev_err(&pdev->dev,
>> +			"probe: failed to register slave DMA: %d\n", rc);
>> +		goto device_register_failed;
>> +	}
>> +	dev_dbg(&pdev->dev, "probe: dma_async_device_register done\n");
>> +
>> +	rc = hidma_debug_init(dmadev);
>> +	if (rc) {
>> +		dev_err(&pdev->dev,
>> +			"probe: failed to init debugfs: %d\n", rc);
>> +		goto debug_init_failed;
>> +	}
>> +
>> +	dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n");
>> +	platform_set_drvdata(pdev, dmadev);
>> +	HIDMA_RUNTIME_SET(dmadev);
>> +	return 0;
>
> Remove the debug prints when you are done debugging.

errors or the dev_infos?

>
>> +debug_init_failed:
>> +device_register_failed:
>> +self_test_failed:
>> +channel_init_failed:
>> +chirq_request_failed:
>> +	hidma_ll_uninit(dmadev->lldev);
>> +ll_init_failed:
>> +evridx_failed:
>> +remap_trca_failed:
>> +remap_evca_failed:
>> +	if (dmadev)
>> +		hidma_free(dmadev);
>
> Rename the labels according to what you do at in the failure case
> and remove most of them. This is 'goto', not 'comefrom' ;-)

ok, We had different opinions how gotos should be written.
>
>> +static struct platform_driver hidma_driver = {
>> +	.probe = hidma_probe,
>> +	.remove = hidma_remove,
>> +	.driver = {
>> +		.name = MODULE_NAME,
>> +		.owner = THIS_MODULE,
>> +		.of_match_table = of_match_ptr(hidma_match),
>> +		.acpi_match_table = ACPI_PTR(hidma_acpi_ids),
>
>
> Remove .owner and of_match_ptr().
>
ok, will do.

>> +	},
>> +};
>> +
>> +static int __init hidma_init(void)
>> +{
>> +	return platform_driver_register(&hidma_driver);
>> +}
>> +late_initcall(hidma_init);
>> +
>> +static void __exit hidma_exit(void)
>> +{
>> +	platform_driver_unregister(&hidma_driver);
>> +}
>> +module_exit(hidma_exit);
>
> module_platform_driver()

ok
>
>> +
>> +	if (unlikely(tre_ch >= lldev->nr_tres)) {
>> +		dev_err(lldev->dev, "invalid TRE number in free:%d", tre_ch);
>> +		return;
>> +	}
>> +
>> +	tre = &lldev->trepool[tre_ch];
>> +	if (unlikely(atomic_read(&tre->allocated) != true)) {
>> +		dev_err(lldev->dev, "trying to free an unused TRE:%d",
>> +			tre_ch);
>> +		return;
>> +	}
>
> Remove the 'unlikely' and the redundant '!= true'.
>
> Only use 'likely' or 'unlikely' if you can measure a difference.

ok
>
>> +static int hidma_ll_reset(struct hidma_lldev *lldev)
>> +{
>> +	u32 val;
>> +	int count;
>> +
>> +	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +	val = val & ~(CH_CONTROL_MASK << 16);
>> +	val = val | (CH_RESET << 16);
>> +	writel_relaxed(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +
>> +	/* wait until the reset is performed */
>> +	wmb();
>> +
>> +	/* Delay 10ms after reset to allow DMA logic to quiesce.*/
>> +	for (count = 0; count < 10; count++) {
>> +		val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +		lldev->trch_state = (val >> CH_STATE_BIT_POS)
>> +					& CH_STATE_MASK;
>> +		if (lldev->trch_state == CH_DISABLED)
>> +			break;
>> +		mdelay(1);
>> +	}
>> +	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +	lldev->trch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
>> +	if (lldev->trch_state != CH_DISABLED) {
>> +		dev_err(lldev->dev,
>> +			"transfer channel did not reset\n");
>> +		return -ENODEV;
>> +	}
>> +
>> +	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +	val = val & ~(CH_CONTROL_MASK << 16);
>> +	val = val | (CH_RESET << 16);
>> +	writel_relaxed(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +
>> +	/* wait until the reset is performed */
>> +	wmb();
>> +
>> +	/* Delay 10ms after reset to allow DMA logic to quiesce.*/
>> +	for (count = 0; count < 10; count++) {
>> +		val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +		lldev->evch_state = (val >> CH_STATE_BIT_POS)
>> +					& CH_STATE_MASK;
>> +		if (lldev->evch_state == CH_DISABLED)
>> +			break;
>> +		mdelay(1);
>> +	}
>
> Try using a workqueue to get into a state where you can call msleep()
> instead of mdelay().
>
will try

> Also, if you waste CPU cycles for hundreds of milliseconds, it's unlikely
> that the function is so performance critical that it requires writel_relaxed().
>
> Just use writel() here.

The issue is not writel_relaxed vs. writel. After I issue reset, I need 
wait for some time to confirm reset was done. I can use readl_polling 
instead of mdelay if we don't like mdelay.

>> +/*
>> + * The interrupt handler for HIDMA will try to consume as many pending
>> + * EVRE from the event queue as possible. Each EVRE has an associated
>> + * TRE that holds the user interface parameters. EVRE reports the
>> + * result of the transaction. Hardware guarantees ordering between EVREs
>> + * and TREs. We use last processed offset to figure out which TRE is
>> + * associated with which EVRE. If two TREs are consumed by HW, the EVREs
>> + * are in order in the event ring.
>> + * This handler will do a one pass for consuming EVREs. Other EVREs may
>> + * be delivered while we are working. It will try to consume incoming
>> + * EVREs one more time and return.
>> + * For unprocessed EVREs, hardware will trigger another interrupt until
>> + * all the interrupt bits are cleared.
>> + */
>> +static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev)
>> +{
>> +	u32 status;
>> +	u32 enable;
>> +	u32 cause;
>> +	int repeat = 2;
>> +	unsigned long timeout;
>> +
>> +	status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
>> +	enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
>> +	cause = status & enable;
>> +
>
> Reading the status probably requires a readl() rather than readl_relaxed()
> to guarantee that the DMA data has arrived in memory by the time that the
> register data is seen by the CPU. If using readl_relaxed() here is a valid
> and required optimization, please add a comment to explain why it works
> and how much you gain.

I will add some description. This is a high speed peripheral. I don't 
like spreading barriers as candies inside the readl and writel unless I 
have to.

According to the barriers video, I watched on youtube this should be the 
rule for ordering.

"if you do two relaxed reads and check the results of the returned 
variables, ARM architecture guarantees that these two relaxed variables 
will get observed during the check."

this is called implied ordering or something of that sort.

>
>> +		/* Another interrupt might have arrived while we are
>> +		 * processing this one. Read the new cause.
>> +		 */
>> +		status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
>> +		enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
>> +		cause = status & enable;
>> +
>> +		repeat--;
>> +	}
>
> Same here.
>

comment above.

>
>> +}
>> +
>> +
>> +static int hidma_ll_enable(struct hidma_lldev *lldev)
>> +{
>> +	u32 val;
>> +
>> +	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +	val &= ~(CH_CONTROL_MASK << 16);
>> +	val |= (CH_ENABLE << 16);
>> +
>> +	writel_relaxed(val, lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +
>> +	/* wait until channel is enabled */
>> +	wmb();
>> +
>> +	mdelay(1);
>> +
>> +	val = readl_relaxed(lldev->evca + EVCA_CTRLSTS_OFFSET);
>> +	lldev->evch_state = (val >> CH_STATE_BIT_POS) & CH_STATE_MASK;
>> +	if ((lldev->evch_state != CH_ENABLED) &&
>> +		(lldev->evch_state != CH_RUNNING)) {
>> +		dev_err(lldev->dev,
>> +			"event channel did not get enabled\n");
>> +		return -ENODEV;
>> +	}
>> +
>> +	val = readl_relaxed(lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +	val = val & ~(CH_CONTROL_MASK << 16);
>> +	val = val | (CH_ENABLE << 16);
>> +	writel_relaxed(val, lldev->trca + TRCA_CTRLSTS_OFFSET);
>> +
>> +	/* wait until channel is enabled */
>> +	wmb();
>> +
>> +	mdelay(1);
>
> Another workqueue? You should basically never call mdelay().

I'll use polled read instead.

>
>> +static int hidma_ll_hw_start(void *llhndl)
>> +{
>> +	int rc = 0;
>> +	struct hidma_lldev *lldev = llhndl;
>> +	unsigned long irqflags;
>> +
>> +	spin_lock_irqsave(&lldev->lock, irqflags);
>> +	writel_relaxed(lldev->tre_write_offset,
>> +			lldev->trca + TRCA_DOORBELL_OFFSET);
>> +	spin_unlock_irqrestore(&lldev->lock, irqflags);
>
> How does this work? The writel_relaxed() won't synchronize with either
> the DMA data or the spinlock.

mutex and spinlocks have barriers inside. See the youtube video.

https://www.youtube.com/watch?v=6ORn6_35kKo


>
>> +int hidma_ll_init(void **lldevp, struct device *dev, u32 nr_tres,
>> +			void __iomem *trca, void __iomem *evca,
>> +			u8 evridx)
>
> How about returning the pointer rather than passing in an indirect pointer?

ok

>
> Also, your abstraction seem to go a little too far if the upper driver
> doesn't know what the lower driver calls its main device structure.
>
> Or you can go further and just embed the struct hidma_lldev within the
> struct hidma_dev to save one?

That's how it was before. It got too complex and variables/spinlocks got 
intermixed. I borrowed the upper layer and it worked as it is. I rather 
keep all hardware stuff in another file and do not mix and match for safety.

>
>> +void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch)
>> +{
>> +	struct hidma_lldev *lldev = llhndl;
>> +	struct hidma_tre *tre;
>> +	u32 length;
>> +	dma_addr_t src_start;
>> +	dma_addr_t dest_start;
>> +	u32 *tre_local;
>> +
>> +	if (unlikely(tre_ch >= lldev->nr_tres)) {
>> +		dev_err(lldev->dev, "invalid TRE number in chstats:%d",
>> +			tre_ch);
>> +		return;
>> +	}
>> +	tre = &lldev->trepool[tre_ch];
>> +	seq_printf(s, "------Channel %d -----\n", tre_ch);
>> +	seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated));
>> +	HIDMA_CHAN_SHOW(tre, queued);
>> +	seq_printf(s, "err_info=0x%x\n",
>> +		   lldev->tx_status_list[tre->chidx].err_info);
>> +	seq_printf(s, "err_code=0x%x\n",
>> +		   lldev->tx_status_list[tre->chidx].err_code);
>> +	HIDMA_CHAN_SHOW(tre, status);
>> +	HIDMA_CHAN_SHOW(tre, chidx);
>> +	HIDMA_CHAN_SHOW(tre, dma_sig);
>> +	seq_printf(s, "dev_name=%s\n", tre->dev_name);
>> +	seq_printf(s, "callback=%p\n", tre->callback);
>> +	seq_printf(s, "data=%p\n", tre->data);
>> +	HIDMA_CHAN_SHOW(tre, tre_index);
>> +
>> +	tre_local = &tre->tre_local[0];
>> +	src_start = tre_local[TRE_SRC_LOW_IDX];
>> +	src_start = ((u64)(tre_local[TRE_SRC_HI_IDX]) << 32) + src_start;
>> +	dest_start = tre_local[TRE_DEST_LOW_IDX];
>> +	dest_start += ((u64)(tre_local[TRE_DEST_HI_IDX]) << 32);
>> +	length = tre_local[TRE_LEN_IDX];
>> +
>> +	seq_printf(s, "src=%pap\n", &src_start);
>> +	seq_printf(s, "dest=%pap\n", &dest_start);
>> +	seq_printf(s, "length=0x%x\n", length);
>> +}
>> +EXPORT_SYMBOL_GPL(hidma_ll_chstats);
>
> Remove all the pointers here. I guess you can remove the entire debugfs
> file really ;-)

ok, I need some facility to print out stuff when problems happened. 
Would you rather use sysfs?

>
> This looks like it is better done using ftrace for the low-level internals
> of the driver.

I'll look at ftrace.

>
> 	Arnd
>

thanks for the review again.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 21:42     ` Sinan Kaya
@ 2015-10-30 21:55       ` Timur Tabi
  2015-10-30 22:47         ` Arnd Bergmann
  2015-10-30 22:28       ` Arnd Bergmann
  1 sibling, 1 reply; 51+ messages in thread
From: Timur Tabi @ 2015-10-30 21:55 UTC (permalink / raw)
  To: Sinan Kaya, Arnd Bergmann
  Cc: dmaengine, cov, jcm, Rob Herring, Pawel Moll, Mark Rutland,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

On 10/30/2015 04:42 PM, Sinan Kaya wrote:
>>
>>     if (WARN_ON(!pdev->dev.dma_mask))
>>         return -ENXIO;
>>
>> The dma mask has to always be set by the platform code before probe()
>> is called. If it is not set, you are not allowed to perform DMA.
>
> I tested this on an ACPI platform BTW when I was working on the initial
> implementation.

PowerPC sets the mask to 32 bits by default:

http://lxr.free-electrons.com/ident?i=arch_setup_pdev_archdata

Should we do something similar in ARM64?  Today, we have to manually set 
the DMA mask in all drivers.

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 21:42     ` Sinan Kaya
  2015-10-30 21:55       ` Timur Tabi
@ 2015-10-30 22:28       ` Arnd Bergmann
  2015-10-30 22:36         ` Timur Tabi
                           ` (2 more replies)
  1 sibling, 3 replies; 51+ messages in thread
From: Arnd Bergmann @ 2015-10-30 22:28 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Friday 30 October 2015 17:42:26 Sinan Kaya wrote:
> On 10/30/2015 6:24 AM, Arnd Bergmann wrote:
> > On Thursday 29 October 2015 23:08:13 Sinan Kaya wrote:
> >> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
> >> ---
> >>   .../devicetree/bindings/dma/qcom_hidma.txt         |   18 +
> >>   drivers/dma/Kconfig                                |   11 +
> >>   drivers/dma/Makefile                               |    4 +
> >>   drivers/dma/qcom_hidma.c                           | 1717 ++++++++++++++++++++
> >>   drivers/dma/qcom_hidma.h                           |   44 +
> >>   drivers/dma/qcom_hidma_ll.c                        | 1132 +++++++++++++
> >>   6 files changed, 2926 insertions(+)
> >>   create mode 100644 Documentation/devicetree/bindings/dma/qcom_hidma.txt
> >>   create mode 100644 drivers/dma/qcom_hidma.c
> >>   create mode 100644 drivers/dma/qcom_hidma.h
> >>   create mode 100644 drivers/dma/qcom_hidma_ll.c
> >>
> >> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma.txt b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
> >> new file mode 100644
> >> index 0000000..9a01635
> >> --- /dev/null
> >> +++ b/Documentation/devicetree/bindings/dma/qcom_hidma.txt
> >> @@ -0,0 +1,18 @@
> >> +Qualcomm Technologies HIDMA Channel driver
> >> +
> >> +Required properties:
> >> +- compatible: must contain "qcom,hidma"
> >
> > Be more specific here. Also, should this be 'hisilicon,hidma-1.0' rather
> > than 'qcom'? I'm guessing from the name that this is a device you licensed
> > from them.
> >
> No, this is a QCOM part. I used the driver from hisilicon as a starting 
> point instead of starting from scratch. That's why, I kept the original 
> copy rights.

I meant the 'hidma' name. ;-)

> >> +- reg: Addresses for the transfer and event channel
> >> +- interrupts: Should contain the event interrupt
> >> +- desc-count: Number of asynchronous requests this channel can handle
> >> +- event-channel: The HW event channel completions will be delivered.
> >> +Example:
> >> +
> >> +	hidma_24: hidma@0x5c050000 {
> >
> > Name should be 'dma-controller' in DT, not 'hidma'.
> will test and change.

no need to test, Linux ignores the node name.

> >
> > MODULE_NAME and HIDMA_DEFAULT_DESCRIPTOR_COUNT can be dropped
> 
> I'll remove the module_name. The default descriptor is used if 
> device-tree or acpi table does not have the descriptor count.

I missed that part. If the descriptor count is a hardware feature,
just make that property mandatory. OTOH, if this is an optimization
setting, better drop that property entirely.

> >> +#define HIDMA_RUNTIME_GET(dmadev)				\
> >> +do {								\
> >> +	atomic_inc(&(dmadev)->pm_counter);			\
> >> +	TRC_PM((dmadev)->ddev.dev,				\
> >> +		"%s:%d pm_runtime_get %d\n", __func__, __LINE__,\
> >> +		atomic_read(&(dmadev)->pm_counter));		\
> >> +	pm_runtime_get_sync((dmadev)->ddev.dev);		\
> >> +} while (0)
> >> +
> >> +#define HIDMA_RUNTIME_SET(dmadev)				\
> >> +do {								\
> >> +	atomic_dec(&(dmadev)->pm_counter);			\
> >> +	TRC_PM((dmadev)->ddev.dev,				\
> >> +		"%s:%d pm_runtime_put_autosuspend:%d\n",	\
> >> +		__func__, __LINE__,				\
> >> +		atomic_read(&(dmadev)->pm_counter));		\
> >> +	pm_runtime_mark_last_busy((dmadev)->ddev.dev);		\
> >> +	pm_runtime_put_autosuspend((dmadev)->ddev.dev);		\
> >> +} while (0)
> >
> > Inline functions.
> 
> ok, I was hoping to keep the file and line numbers as the PM stuff gets 
> called from multiple locations. should I call the inline functions with 
> __func__ and __LINE__ from another macro  like
> 
> #define HIDMA_RUNTIME_GET(dmadev)
> 	func(dmadev, __func__, __LINE__)
> 
> etc.

I would just drop the debugging macros entirely here.

> >
> >> +struct hidma_dev {
> >> +	u8				evridx;
> >> +	u32				nr_descriptors;
> >> +
> >> +	void				*lldev;
> >> +	void				__iomem *dev_trca;
> >> +	void				__iomem *dev_evca;
> >> +	int (*self_test)(struct hidma_dev *device);
> >> +	struct dentry			*debugfs;
> >> +	struct dentry			*stats;
> >> +
> >> +	/* used to protect the pending channel list*/
> >> +	spinlock_t			lock;
> >> +	dma_addr_t			dev_trca_phys;
> >> +	struct dma_device		ddev;
> >> +	struct tasklet_struct		tasklet;
> >
> > workqueue maybe?
> 
> is there an advantage of workqueue over tasklet? I'm open to suggestions.

tasklets are not normally used in new code. They are only marginally
faster than workqueues, but are executed in atomic context and cannot
call sleeping functions.

> >> +static unsigned int debug_pm;
> >> +module_param(debug_pm, uint, 0644);
> >> +MODULE_PARM_DESC(debug_pm,
> >> +		 "debug runtime power management transitions (default: 0)");
> >> +
> >> +#define TRC_PM(...) do {			\
> >> +		if (debug_pm)			\
> >> +			dev_info(__VA_ARGS__);	\
> >> +	} while (0)
> >
> > Again, remove these after you are done debugging the problem at hand,
> > we don't need to clutter up the upstream version.
> 
> ok, I'll take them out if that's the norm. I was hoping to keep them in 
> case something shows up.

In my experience, the bugs you have now are different from the bugs
that other people find next, and debugging macros like this end up
not helping then.

> >> +	/*
> >> +	 * It is assumed that the hardware can move the data within 1s
> >> +	 * and signal the OS of the completion
> >> +	 */
> >> +	ret = wait_event_interruptible_timeout(dmadev->test_result.wq,
> >> +		atomic_read(&dmadev->test_result.counter) == (map_count),
> >> +				msecs_to_jiffies(10000));
> >> +
> >> +	if (ret <= 0) {
> >> +		dev_err(dmadev->ddev.dev,
> >> +			"Self-test sg copy timed out, disabling\n");
> >> +		err = -ENODEV;
> >> +		goto tx_status;
> >> +	}
> >
> > Why ENODEV? Could you make this handle restarted system calls?
> 
> This is the self test code. It gets called from probe. If there is a 
> problem with the device or system configuration, I don't want to enable 
> this device. I can certainly return a different error code though. 
> What's a good code?

I see. probe() is not restartable, so it cannot be -ERESTARTSYS.

Maybe better use wait_event_timeout and not handle the signals then.
It will eventually time out if something goes wrong.

> >> +
> >> +#if IS_ENABLED(CONFIG_DEBUG_FS)
> >> +
> >> +#define SIER_CHAN_SHOW(chan, name) \
> >> +		seq_printf(s, #name "=%u\n", chan->name)
> >
> > can be open-coded for clarity.
> >
> 
> I think I copied this from another driver. Would you rather see 
> seq_printf in the code than a macro.
> 
> Sorry, I didn't get what you mean by "open-coded".

Yes, that is what I meant. Keep the code but don't put it into
an inline function or macro.

> >> +       ret = dma_mapping_error(dev, dma_src);
> >> +       if (ret) {
> >> +               dev_err(dev, "dma_mapping_error with ret:%d\n", ret);
> >> +               ret = -ENOMEM;
> >> +       } else {
> >> +               phys_addr_t phys;
> >> +
> >> +               phys = dma_to_phys(dev, dma_src);
> >> +               if (strcmp(__va(phys), "hello world") != 0) {
> >> +                       dev_err(dev, "memory content mismatch\n");
> >> +                       ret = -EINVAL;
> >> +               } else {
> >> +                       dev_dbg(dev, "mapsingle:dma_map_single works\n");
> >> +               }
> >> +               dma_unmap_single(dev, dma_src, buf_size, DMA_TO_DEVICE);
> >> +       }
> >
> > dma_to_phys() is architecture specific and does not work if you
> > have an IOMMU. Just use the virtual address you passed into dma_map_*.
> 
> Should we fix the real problem?
> 
> I patched dma_to_phys to call IOMMU driver for translation when IOMMU is 
> present. I was planning to upstream that patch when Robin Murphy's 
> changes make upstream.
> 
> The assumption of phys==dma address doesn't look scalable to me. I was 
> checking the DMA subsystem for correctness with this test. If use 
> dma_src, it is going to be a 1==1 test.

dma_to_phys is the internal helper that is used by the iommu
implementation, drivers should never call it, and I don't see
why you would do that.


> >
> >> +	seq_printf(s, "self_test=%u\n",
> >> +		atomic_read(&dmadev->test_result.counter));
> >> +
> >> +	seq_printf(s, "copy%s%s%s%s%s%s%s%s%s%s%s\n",
> >> +		dma_has_cap(DMA_PQ, dma->cap_mask) ? " pq" : "",
> >> +		dma_has_cap(DMA_PQ_VAL, dma->cap_mask) ? " pq_val" : "",
> >> +		dma_has_cap(DMA_XOR, dma->cap_mask) ? " xor" : "",
> >> +		dma_has_cap(DMA_XOR_VAL, dma->cap_mask) ? " xor_val" : "",
> >> +		dma_has_cap(DMA_INTERRUPT, dma->cap_mask) ? " intr" : "",
> >> +		dma_has_cap(DMA_SG, dma->cap_mask) ? " sg" : "",
> >> +		dma_has_cap(DMA_ASYNC_TX, dma->cap_mask) ? " async" : "",
> >> +		dma_has_cap(DMA_SLAVE, dma->cap_mask) ? " slave" : "",
> >> +		dma_has_cap(DMA_CYCLIC, dma->cap_mask) ? " cyclic" : "",
> >> +		dma_has_cap(DMA_INTERLEAVE, dma->cap_mask) ? " intl" : "",
> >> +		dma_has_cap(DMA_MEMCPY, dma->cap_mask) ? " memcpy" : "");
> >> +
> >> +	return 0;
> >> +}
> >
> > The selftest part and the features could just be separate files in the
> > core dmaengine code, it doesn't really belong here.
> >
> I'll take a look. It could take a couple of iterations to get this right 
> but I could certainly move my testing to another file for sharing.

I also missed the part about the selftest being called from probe
rather than through debugfs.

> >> +}
> >> +#else
> >> +static void hidma_debug_uninit(struct hidma_dev *dmadev)
> >> +{
> >> +}
> >> +static int hidma_debug_init(struct hidma_dev *dmadev)
> >> +{
> >> +	return 0;
> >> +}
> >> +#endif
> >
> > You can remove the #ifdef here. The debugfs code already stubs out all
> > debugfs_create_*() and turns it all into nops when it is disabled.
> 
> ok, I think I got compilation errors with the data structures but I'll 
> take a look.

Ok. There may be cases where a structure definition is mistakenly placed
in an #ifdef and should instead be taken out.

> >> +	dma_cap_set(DMA_MEMCPY, dmadev->ddev.cap_mask);
> >> +	/* Apply default dma_mask if needed */
> >> +	if (!pdev->dev.dma_mask) {
> >> +		pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask;
> >> +		pdev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
> >> +	}
> >> +
> >
> > remove the check, or use
> 
> I got caught on this problem before. Let me go back and revisit. It took 
> my entire day to figure out that dma_mask pointer was not set.
> 
> >
> > 	if (WARN_ON(!pdev->dev.dma_mask))
> > 		return -ENXIO;
> >
> > The dma mask has to always be set by the platform code before probe()
> > is called. If it is not set, you are not allowed to perform DMA.
> 
> I tested this on an ACPI platform BTW when I was working on the initial 
> implementation.

I remember there was a bug in ACPI that it was not setting the dma mask
pointer right. That should be fixed in newer kernels. If not, don't
add another workaround for broken ACPI but instead fix the ACPI code.

> >
> >> +	rc = devm_request_irq(&pdev->dev, chirq, hidma_chirq_handler, 0,
> >> +			      "qcom-hidma", &dmadev->lldev);
> >> +	if (rc) {
> >> +		dev_err(&pdev->dev, "chirq registration failed: %d\n", chirq);
> >> +		goto chirq_request_failed;
> >> +	}
> >> +
> >> +	dev_dbg(&pdev->dev, "initializing DMA channels\n");
> >> +	INIT_LIST_HEAD(&dmadev->ddev.channels);
> >> +	rc = hidma_chan_init(dmadev, 0);
> >> +	if (rc) {
> >> +		dev_err(&pdev->dev, "probe:channel init failed\n");
> >> +		goto channel_init_failed;
> >> +	}
> >> +	dev_dbg(&pdev->dev, "HI-DMA engine driver starting self test\n");
> >> +	rc = dmadev->self_test(dmadev);
> >> +	if (rc) {
> >> +		dev_err(&pdev->dev, "probe: self test failed: %d\n", rc);
> >> +		goto self_test_failed;
> >> +	}
> >> +	dev_info(&pdev->dev, "probe: self test succeeded.\n");
> >> +
> >> +	dev_dbg(&pdev->dev, "calling dma_async_device_register\n");
> >> +	rc = dma_async_device_register(&dmadev->ddev);
> >> +	if (rc) {
> >> +		dev_err(&pdev->dev,
> >> +			"probe: failed to register slave DMA: %d\n", rc);
> >> +		goto device_register_failed;
> >> +	}
> >> +	dev_dbg(&pdev->dev, "probe: dma_async_device_register done\n");
> >> +
> >> +	rc = hidma_debug_init(dmadev);
> >> +	if (rc) {
> >> +		dev_err(&pdev->dev,
> >> +			"probe: failed to init debugfs: %d\n", rc);
> >> +		goto debug_init_failed;
> >> +	}
> >> +
> >> +	dev_info(&pdev->dev, "HI-DMA engine driver registration complete\n");
> >> +	platform_set_drvdata(pdev, dmadev);
> >> +	HIDMA_RUNTIME_SET(dmadev);
> >> +	return 0;
> >
> > Remove the debug prints when you are done debugging.
> 
> errors or the dev_infos?

I would remove both. Some might still be useful, but generally you only
want to print messages when something happens that is not triggered by
user action.

> 
> > Also, if you waste CPU cycles for hundreds of milliseconds, it's unlikely
> > that the function is so performance critical that it requires writel_relaxed().
> >
> > Just use writel() here.
> 
> The issue is not writel_relaxed vs. writel. After I issue reset, I need 
> wait for some time to confirm reset was done. I can use readl_polling 
> instead of mdelay if we don't like mdelay.

I meant that both _relaxed() and mdelay() are probably wrong here.

readl_polling() would avoid the part with _relaxed(), but if that can
still take more than a few microseconds, you should try to sleep inbetween
rather than burn CPU cycles.

> >> +/*
> >> + * The interrupt handler for HIDMA will try to consume as many pending
> >> + * EVRE from the event queue as possible. Each EVRE has an associated
> >> + * TRE that holds the user interface parameters. EVRE reports the
> >> + * result of the transaction. Hardware guarantees ordering between EVREs
> >> + * and TREs. We use last processed offset to figure out which TRE is
> >> + * associated with which EVRE. If two TREs are consumed by HW, the EVREs
> >> + * are in order in the event ring.
> >> + * This handler will do a one pass for consuming EVREs. Other EVREs may
> >> + * be delivered while we are working. It will try to consume incoming
> >> + * EVREs one more time and return.
> >> + * For unprocessed EVREs, hardware will trigger another interrupt until
> >> + * all the interrupt bits are cleared.
> >> + */
> >> +static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev)
> >> +{
> >> +	u32 status;
> >> +	u32 enable;
> >> +	u32 cause;
> >> +	int repeat = 2;
> >> +	unsigned long timeout;
> >> +
> >> +	status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
> >> +	enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
> >> +	cause = status & enable;
> >> +
> >
> > Reading the status probably requires a readl() rather than readl_relaxed()
> > to guarantee that the DMA data has arrived in memory by the time that the
> > register data is seen by the CPU. If using readl_relaxed() here is a valid
> > and required optimization, please add a comment to explain why it works
> > and how much you gain.
> 
> I will add some description. This is a high speed peripheral. I don't 
> like spreading barriers as candies inside the readl and writel unless I 
> have to.
> 
> According to the barriers video, I watched on youtube this should be the 
> rule for ordering.
> 
> "if you do two relaxed reads and check the results of the returned 
> variables, ARM architecture guarantees that these two relaxed variables 
> will get observed during the check."
> 
> this is called implied ordering or something of that sort.

My point was a bit different: while it is guaranteed that the
result of the readl_relaxed() is observed in order, they do not
guarantee that a DMA from device to memory that was started by
the device before the readl_relaxed() has arrived in memory
by the time that the readl_relaxed() result is visible to the
CPU and it starts accessing the memory.

In other words, when the hardware sends you data followed by an
interrupt to tell you the data is there, your interrupt handler
can tell the driver that is waiting for this data that the DMA
is complete while the data itself is still in flight, e.g. waiting
for an IOMMU to fetch page table entries.

> >> +	wmb();
> >> +
> >> +	mdelay(1);
> >
> > Another workqueue? You should basically never call mdelay().
> 
> I'll use polled read instead.

Ok, but again make sure that you call msleep() or usleep_range()
between the reads.

> >> +static int hidma_ll_hw_start(void *llhndl)
> >> +{
> >> +	int rc = 0;
> >> +	struct hidma_lldev *lldev = llhndl;
> >> +	unsigned long irqflags;
> >> +
> >> +	spin_lock_irqsave(&lldev->lock, irqflags);
> >> +	writel_relaxed(lldev->tre_write_offset,
> >> +			lldev->trca + TRCA_DOORBELL_OFFSET);
> >> +	spin_unlock_irqrestore(&lldev->lock, irqflags);
> >
> > How does this work? The writel_relaxed() won't synchronize with either
> > the DMA data or the spinlock.
> 
> mutex and spinlocks have barriers inside. See the youtube video.
> 
> https://www.youtube.com/watch?v=6ORn6_35kKo

I'm pretty sure these barriers only make sense to the CPU, so the
spinlock guarantees that the access to lldev->tre_write_offset is
protected, but not the access to lldev->trca, because that write
is posted on the bus and might not complete until after the
unlock. There is no "dsb(st)" in here:

static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
        unsigned long tmp;

        asm volatile(ARM64_LSE_ATOMIC_INSN(
        /* LL/SC */
        "       ldrh    %w1, %0\n"
        "       add     %w1, %w1, #1\n"
        "       stlrh   %w1, %0",
        /* LSE atomics */
        "       mov     %w1, #1\n"
        "       nop\n"
        "       staddlh %w1, %0")
        : "=Q" (lock->owner), "=&r" (tmp)
        :
        : "memory");
}

> >
> > Also, your abstraction seem to go a little too far if the upper driver
> > doesn't know what the lower driver calls its main device structure.
> >
> > Or you can go further and just embed the struct hidma_lldev within the
> > struct hidma_dev to save one?
> 
> That's how it was before. It got too complex and variables/spinlocks got 
> intermixed. I borrowed the upper layer and it worked as it is. I rather 
> keep all hardware stuff in another file and do not mix and match for safety.

Ok, then just use a forward declaration for the struct name so you can
have a type-safe pointer but don't need to show the members.

> >> +void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch)
> >> +{
> >> +	struct hidma_lldev *lldev = llhndl;
> >> +	struct hidma_tre *tre;
> >> +	u32 length;
> >> +	dma_addr_t src_start;
> >> +	dma_addr_t dest_start;
> >> +	u32 *tre_local;
> >> +
> >> +	if (unlikely(tre_ch >= lldev->nr_tres)) {
> >> +		dev_err(lldev->dev, "invalid TRE number in chstats:%d",
> >> +			tre_ch);
> >> +		return;
> >> +	}
> >> +	tre = &lldev->trepool[tre_ch];
> >> +	seq_printf(s, "------Channel %d -----\n", tre_ch);
> >> +	seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated));
> >> +	HIDMA_CHAN_SHOW(tre, queued);
> >> +	seq_printf(s, "err_info=0x%x\n",
> >> +		   lldev->tx_status_list[tre->chidx].err_info);
> >> +	seq_printf(s, "err_code=0x%x\n",
> >> +		   lldev->tx_status_list[tre->chidx].err_code);
> >> +	HIDMA_CHAN_SHOW(tre, status);
> >> +	HIDMA_CHAN_SHOW(tre, chidx);
> >> +	HIDMA_CHAN_SHOW(tre, dma_sig);
> >> +	seq_printf(s, "dev_name=%s\n", tre->dev_name);
> >> +	seq_printf(s, "callback=%p\n", tre->callback);
> >> +	seq_printf(s, "data=%p\n", tre->data);
> >> +	HIDMA_CHAN_SHOW(tre, tre_index);
> >> +
> >> +	tre_local = &tre->tre_local[0];
> >> +	src_start = tre_local[TRE_SRC_LOW_IDX];
> >> +	src_start = ((u64)(tre_local[TRE_SRC_HI_IDX]) << 32) + src_start;
> >> +	dest_start = tre_local[TRE_DEST_LOW_IDX];
> >> +	dest_start += ((u64)(tre_local[TRE_DEST_HI_IDX]) << 32);
> >> +	length = tre_local[TRE_LEN_IDX];
> >> +
> >> +	seq_printf(s, "src=%pap\n", &src_start);
> >> +	seq_printf(s, "dest=%pap\n", &dest_start);
> >> +	seq_printf(s, "length=0x%x\n", length);
> >> +}
> >> +EXPORT_SYMBOL_GPL(hidma_ll_chstats);
> >
> > Remove all the pointers here. I guess you can remove the entire debugfs
> > file really ;-)
> 
> ok, I need some facility to print out stuff when problems happened. 
> Would you rather use sysfs?

sysfs would be less appropriate, as that requires providing a stable ABI
for user space. I think ftrace should provide what you need. Let me know
if that doesn't work out.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 22:28       ` Arnd Bergmann
@ 2015-10-30 22:36         ` Timur Tabi
  2015-10-30 22:50           ` Arnd Bergmann
  2015-10-31  1:53         ` Sinan Kaya
  2015-11-01 18:50         ` Sinan Kaya
  2 siblings, 1 reply; 51+ messages in thread
From: Timur Tabi @ 2015-10-30 22:36 UTC (permalink / raw)
  To: Arnd Bergmann, Sinan Kaya
  Cc: dmaengine, cov, jcm, Rob Herring, Pawel Moll, Mark Rutland,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

On 10/30/2015 05:28 PM, Arnd Bergmann wrote:
>>> > >Why ENODEV? Could you make this handle restarted system calls?
>> >
>> >This is the self test code. It gets called from probe. If there is a
>> >problem with the device or system configuration, I don't want to enable
>> >this device. I can certainly return a different error code though.
>> >What's a good code?
> I see. probe() is not restartable, so it cannot be -ERESTARTSYS.
>
> Maybe better use wait_event_timeout and not handle the signals then.
> It will eventually time out if something goes wrong.

What about -EPROBE_DEFER?  Isn't that "restartable"?  Granted, it's only 
supposed to be used if the driver is dependent on another driver to 
probe, so I'm not sure it applies here.  If the self-test fails, then it 
is possible that it could succeed later?

-- 
Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the
Code Aurora Forum, a Linux Foundation Collaborative Project.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 21:55       ` Timur Tabi
@ 2015-10-30 22:47         ` Arnd Bergmann
  0 siblings, 0 replies; 51+ messages in thread
From: Arnd Bergmann @ 2015-10-30 22:47 UTC (permalink / raw)
  To: Timur Tabi
  Cc: Sinan Kaya, dmaengine, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Friday 30 October 2015 16:55:16 Timur Tabi wrote:
> 
> On 10/30/2015 04:42 PM, Sinan Kaya wrote:
> >>
> >>     if (WARN_ON(!pdev->dev.dma_mask))
> >>         return -ENXIO;
> >>
> >> The dma mask has to always be set by the platform code before probe()
> >> is called. If it is not set, you are not allowed to perform DMA.
> >
> > I tested this on an ACPI platform BTW when I was working on the initial
> > implementation.
> 
> PowerPC sets the mask to 32 bits by default:
> 
> http://lxr.free-electrons.com/ident?i=arch_setup_pdev_archdata
> 
> Should we do something similar in ARM64?  Today, we have to manually set 
> the DMA mask in all drivers.

We set the dma mask from the 'dma-ranges' property of the parent device,
but fall back to 32-bit because we did not manage to mandate this property
in time for all arm64 machines to use.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 22:36         ` Timur Tabi
@ 2015-10-30 22:50           ` Arnd Bergmann
  0 siblings, 0 replies; 51+ messages in thread
From: Arnd Bergmann @ 2015-10-30 22:50 UTC (permalink / raw)
  To: Timur Tabi
  Cc: Sinan Kaya, dmaengine, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Friday 30 October 2015 17:36:45 Timur Tabi wrote:
> On 10/30/2015 05:28 PM, Arnd Bergmann wrote:
> >>> > >Why ENODEV? Could you make this handle restarted system calls?
> >> >
> >> >This is the self test code. It gets called from probe. If there is a
> >> >problem with the device or system configuration, I don't want to enable
> >> >this device. I can certainly return a different error code though.
> >> >What's a good code?
> > I see. probe() is not restartable, so it cannot be -ERESTARTSYS.
> >
> > Maybe better use wait_event_timeout and not handle the signals then.
> > It will eventually time out if something goes wrong.
> 
> What about -EPROBE_DEFER?  Isn't that "restartable"?  Granted, it's only 
> supposed to be used if the driver is dependent on another driver to 
> probe, so I'm not sure it applies here.  If the self-test fails, then it 
> is possible that it could succeed later?

No, this is different. The probe function can get called from all sorts
of contexts (sys_init_module, device_create, deferred probing), and not
all of them go back to user space when returning an error, so we cannot
deliver a signal to the calling process this way.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 22:28       ` Arnd Bergmann
  2015-10-30 22:36         ` Timur Tabi
@ 2015-10-31  1:53         ` Sinan Kaya
  2015-11-01 18:50         ` Sinan Kaya
  2 siblings, 0 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-31  1:53 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel



On 10/30/2015 6:28 PM, Arnd Bergmann wrote:
> I missed that part. If the descriptor count is a hardware feature,
> just make that property mandatory. OTOH, if this is an optimization
> setting, better drop that property entirely.

I'm going to make this a module parameter instead and get rid of the 
constant. The reason, I have this default parameter today is that QEMU 
does not support passing device tree arguments for platform devices.

QEMU allows you to set memory and interrupt resources only for platform 
devices.

At least now, I can pass the argument via command line before starting QEMU.

I'll put checks that the value needs to come from either 
DTS/ACPI/command line.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 20:15                   ` Andy Shevchenko
  2015-10-30 20:18                     ` Mark Rutland
@ 2015-10-31  3:33                     ` Jon Masters
  2015-10-31 17:34                       ` Sinan Kaya
  1 sibling, 1 reply; 51+ messages in thread
From: Jon Masters @ 2015-10-31  3:33 UTC (permalink / raw)
  To: Andy Shevchenko, Al Stone
  Cc: Mark Rutland, Sinan Kaya, dmaengine, timur, cov, Rob Herring,
	Pawel Moll, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel, leif.lindholm

Hi Andy,

On 10/30/2015 04:15 PM, Andy Shevchenko wrote:
> On Fri, Oct 30, 2015 at 10:08 PM, Al Stone <al.stone@linaro.org> wrote:
>> On 10/30/2015 01:01 PM, Mark Rutland wrote:
>>> On Fri, Oct 30, 2015 at 02:48:06PM -0400, Sinan Kaya wrote:
> 
>>> The CSRT is listed under "Want", not "Never" or "Don't Care", so Linaro
>>> have certainly not said that CSRT will not be supported. If anything,
>>> they have stated that the table should be supported.
>>
>> "Want" means interesting, and probably useful, but no clear indication that
>> anyone actually needs it.  At one point, we thought we might use the CSRT
>> for describing DMA, but it turns out we have not needed to.
> 
> Then you are going to have either 1 or 0 DMAC for slave devices, right?

I believe what Al means is that such hardware has not appeared
(publicly) until this time and so such situation was theoretical and
thus not covered by the Linaro wiki. Linaro had not prioritized CSRT
because it didn't seem that the need to support it would arise soon.

> The CSRT, unfortunately, the only way how to enumerate DMAC to be used
> by slave devices.
> You may look into drivers/dma/acpi-dma.c for usage in Linux.
> 
> Yes, I know about _DSD, but I don't think it will provide soon any
> other official mechanisms to what we have now. Better to ask Rafael
> and Mika.

Thanks for the feedback. I agree that generally the plan is to use
existing tables from x86 on arm64 when possible. Please see below.

>> However, let's make sure we're saying the same thing: the CSRT table is
>> properly defined in the kernel include/acpi/actbl2.h file so one can read
>> such a table and use it if they so choose.  Nothing that we have done at
>> Linaro in the arm64 part of the kernel relies on any of the content from
>> the CSRT, nor does it preclude someone relying on that content.  So, the
>> CSRT is defined, and is usable, but is just not being used -- by Linaro --
>> at present.
> 
> This sounds clear.
> 
>> If that needs to change, let me know; no one has asked us to use the CSRT
>> for a DMA engine, and we have not been provided any hardware that requires
>> it.
> 
> See above.

Here's, what I believe to be the summary:

1). QCOM are not implementing slave device support in their current
HIDMA hardware, therefore the requirement for CSRT does not exist *at
present* for this specific driver to be merged and the discussion in
this sub-thread pertains only to a situation not affecting HIDMA.

2). A requirement upon the CSRT should be clarified in the various
specifications. The SBBR[0] currently "recommends" CSRT but does not
necessarily provide guidance about what kinds of system resources would
be covered by that, and so there is a potential for this to be missed.

As one of the lead authors of certain ARM server specifications, I will
contact those involved in such and ensure that this is addressed with a
clarification in a future release.

Thanks for raising the concern,

Jon.

[0]
http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 14:50   ` Mark Rutland
@ 2015-10-31  4:27     ` Sinan Kaya
  0 siblings, 0 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-31  4:27 UTC (permalink / raw)
  To: Mark Rutland
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel



On 10/30/2015 10:50 AM, Mark Rutland wrote:
> The documentation above didn't state this was an 8-bit quantity, and you
> save nothing by doing this (all values in the binary format are padded
> to 4-byte alignment).

done.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30  9:34 ` [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Arnd Bergmann
@ 2015-10-31  6:51   ` Sinan Kaya
  2015-10-31 12:53     ` Timur Tabi
                       ` (2 more replies)
  0 siblings, 3 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-31  6:51 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel



On 10/30/2015 5:34 AM, Arnd Bergmann wrote:
> On Thursday 29 October 2015 23:08:12 Sinan Kaya wrote:
>> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
>> new file mode 100644
>> index 0000000..81674ab
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
>> @@ -0,0 +1,42 @@
>> +Qualcomm Technologies HIDMA Management interface
>> +
>> +Required properties:
>> +- compatible: must contain "qcom,hidma_mgmt"
>
> No underscores in the name please. Also try to be more specific, in case this
> device shows up in more than one SoC and they end up not being 100% compatible
> we want to be able to tell them apart.

ok

>
>> +- reg: Address range for DMA device
>> +- interrupts: Should contain the one interrupt shared by all channels
>> +- nr-channels: Number of channels supported by this DMA controller.
>> +- max-write: Maximum write burst in bytes. A memcpy requested is
>> +  fragmented to multiples of this amount.
>> +- max-read: Maximum read burst in bytes. A memcpy request is
>> +  fragmented to multiples of this amount.
>> +- max-wxactions: Maximum write transactions to perform in a burst
>> +- max-rdactions: Maximum read transactions to perform in a burst
>> +- max-memset-limit: Maximum memset limit
>> +- ch-priority-#n: Priority of the channel
>> +- ch-weight-#n: Round robin weight of the channel
>
> For the last two, you can just use a multi-cell property, using one
> cell per channel.

will look
>
>> +
>> +#define MODULE_NAME			"hidma-mgmt"
>> +#define PREFIX				MODULE_NAME ": "
>
> These can be removed, just use dev_info() etc for messages, to include
> a unique identifier.

got rid of them

>
>> +#define AUTOSUSPEND_TIMEOUT		2000
>> +
>> +#define HIDMA_RUNTIME_GET(dmadev)				\
>> +do {								\
>> +	atomic_inc(&(dmadev)->pm_counter);			\
>> +	TRC_PM(&(dmadev)->pdev->dev,				\
>> +		"%s:%d pm_runtime_get %d\n", __func__, __LINE__,\
>> +		atomic_read(&(dmadev)->pm_counter));		\
>> +	pm_runtime_get_sync(&(dmadev)->pdev->dev);		\
>> +} while (0)
>> +
>> +#define HIDMA_RUNTIME_SET(dmadev)				\
>> +do {								\
>> +	atomic_dec(&(dmadev)->pm_counter);			\
>> +	TRC_PM(&(dmadev)->pdev->dev,				\
>> +		"%s:%d pm_runtime_put_autosuspend:%d\n",	\
>> +		__func__, __LINE__,				\
>> +		atomic_read(&(dmadev)->pm_counter));		\
>> +	pm_runtime_mark_last_busy(&(dmadev)->pdev->dev);	\
>> +	pm_runtime_put_autosuspend(&(dmadev)->pdev->dev);	\
>> +} while (0)
>
> Better use inline functions.

removed these

>
>> +struct qcom_hidma_mgmt_dev {
>> +	u8 max_wr_xactions;
>> +	u8 max_rd_xactions;
>> +	u8 max_memset_limit;
>> +	u16 max_write_request;
>> +	u16 max_read_request;
>> +	u16 nr_channels;
>> +	u32 chreset_timeout;
>> +	u32 sw_version;
>> +	u8 *priority;
>> +	u8 *weight;
>> +
>> +	atomic_t	pm_counter;
>> +	/* Hardware device constants */
>> +	dma_addr_t dev_physaddr;
>
> No need to store the physaddr, it's write-only here.
ok
>
>> +
>> +static unsigned int debug_pm;
>> +module_param(debug_pm, uint, 0644);
>> +MODULE_PARM_DESC(debug_pm,
>> +		 "debug runtime power management transitions (default: 0)");
>> +
>> +#define TRC_PM(...) do {			\
>> +		if (debug_pm)			\
>> +			dev_info(__VA_ARGS__);	\
>> +	} while (0)
>> +
>
> Once you are done debugging the driver and have a final version, please
> remove these.

removed TRC_PM

>
>> +#if IS_ENABLED(CONFIG_DEBUG_FS)
>> +
>> +#define HIDMA_SHOW(dma, name) \
>> +		seq_printf(s, #name "=0x%x\n", dma->name)
>
> Only used once, just remove.
ok

>
>> +#define HIDMA_READ_SHOW(dma, name, offset) \
>> +	do { \
>> +		u32 val; \
>> +		val = readl(dma->dev_virtaddr + offset); \
>> +		seq_printf(s, name "=0x%x\n", val); \
>> +	} while (0)
>
> Inline function.

ok

>
>> +/**
>> + * qcom_hidma_mgmt_info: display HIDMA device info
>> + *
>> + * Display the info for the current HIDMA device.
>> + */
>> +static int qcom_hidma_mgmt_info(struct seq_file *s, void *unused)
>> +{
>> +	struct qcom_hidma_mgmt_dev *mgmtdev = s->private;
>> +	u32 val;
>> +	int i;
>> +
>> +	HIDMA_RUNTIME_GET(mgmtdev);
>> +	HIDMA_SHOW(mgmtdev, sw_version);
>> +
>> +	val = readl(mgmtdev->dev_virtaddr + CFG_OFFSET);
>> +	seq_printf(s, "ENABLE=%d\n", val & 0x1);
>> +
>> +	val = readl(mgmtdev->dev_virtaddr + CHRESET_TIMEOUUT_OFFSET);
>> +	seq_printf(s, "reset_timeout=%d\n", val & CHRESET_TIMEOUUT_MASK);
>> +
>> +	val = readl(mgmtdev->dev_virtaddr + MHICFG_OFFSET);
>> +	seq_printf(s, "nr_event_channel=%d\n",
>> +		(val >> NR_EV_CHANNEL_BIT_POS) & NR_CHANNEL_MASK);
>> +	seq_printf(s, "nr_tr_channel=%d\n", (val & NR_CHANNEL_MASK));
>> +	seq_printf(s, "nr_virt_tr_channel=%d\n", mgmtdev->nr_channels);
>> +	seq_printf(s, "dev_virtaddr=%p\n", &mgmtdev->dev_virtaddr);
>> +	seq_printf(s, "dev_physaddr=%pap\n", &mgmtdev->dev_physaddr);
>> +	seq_printf(s, "dev_addrsize=%pap\n", &mgmtdev->dev_addrsize);
>
> printing pointers is a security risk, just remove them if they are
> not essential here.
ok

>> +
>> +static int qcom_hidma_mgmt_err_open(struct inode *inode, struct file *file)
>> +{
>> +	return single_open(file, qcom_hidma_mgmt_err, inode->i_private);
>> +}
>> +
>> +static const struct file_operations qcom_hidma_mgmt_err_fops = {
>> +	.open = qcom_hidma_mgmt_err_open,
>> +	.read = seq_read,
>> +	.llseek = seq_lseek,
>> +	.release = single_release,
>> +};
>> +
>> +static ssize_t qcom_hidma_mgmt_mhiderr_clr(struct file *file,
>> +	const char __user *user_buf, size_t count, loff_t *ppos)
>> +{
>> +	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
>> +
>> +	HIDMA_RUNTIME_GET(mgmtdev);
>> +	writel(1, mgmtdev->dev_virtaddr + MHID_BUS_ERR_CLR_OFFSET);
>> +	HIDMA_RUNTIME_SET(mgmtdev);
>> +	return count;
>> +}
>> +
>> +static const struct file_operations qcom_hidma_mgmt_mhiderr_clrfops = {
>> +	.write = qcom_hidma_mgmt_mhiderr_clr,
>> +};
>
> Is this really just a debugging interface? If anyone would do this
> for normal operation, it needs to be a proper API.
>
This will be used by the system admin to monitor/reset the execution 
state of the DMA channels. This will be the management interface. 
Debugfs is probably not the right choice. I originally had sysfs but 
than had some doubts. I'm open to suggestions.

>> +
>> +static void qcom_hidma_mgmt_debug_uninit(struct qcom_hidma_mgmt_dev *mgmtdev)
>> +{
>> +	debugfs_remove(mgmtdev->evt_ena);
>> +	debugfs_remove(mgmtdev->tre_errclr);
>> +	debugfs_remove(mgmtdev->msi_errclr);
>> +	debugfs_remove(mgmtdev->ode_errclr);
>> +	debugfs_remove(mgmtdev->ide_errclr);
>> +	debugfs_remove(mgmtdev->evt_errclr);
>> +	debugfs_remove(mgmtdev->mhid_errclr);
>> +	debugfs_remove(mgmtdev->err);
>> +	debugfs_remove(mgmtdev->info);
>> +	debugfs_remove(mgmtdev->debugfs);
>> +}
>>
> I guess debugfs_remove_recursive() would do the job as well.

ok

>
>> +static int qcom_hidma_mgmt_debug_init(struct qcom_hidma_mgmt_dev *mgmtdev)
>> +{
>> +	int rc = 0;
>> +
>> +	mgmtdev->debugfs = debugfs_create_dir(dev_name(&mgmtdev->pdev->dev),
>> +						NULL);
>> +	if (!mgmtdev->debugfs) {
>> +		rc = -ENODEV;
>> +		return rc;
>> +	}
>> +
>> +	mgmtdev->info = debugfs_create_file("info", S_IRUGO,
>> +			mgmtdev->debugfs, mgmtdev, &qcom_hidma_mgmt_fops);
>> +	if (!mgmtdev->info) {
>> +		rc = -ENOMEM;
>> +		goto cleanup;
>> +	}
>> +
>> +	mgmtdev->err = debugfs_create_file("err", S_IRUGO,
>> +			mgmtdev->debugfs, mgmtdev,
>> +			&qcom_hidma_mgmt_err_fops);
>> +	if (!mgmtdev->err) {
>> +		rc = -ENOMEM;
>> +		goto cleanup;
>> +	}
>> +
>> +	mgmtdev->mhid_errclr = debugfs_create_file("mhiderrclr", S_IWUSR,
>> +			mgmtdev->debugfs, mgmtdev,
>> +			&qcom_hidma_mgmt_mhiderr_clrfops);
>> +	if (!mgmtdev->mhid_errclr) {
>> +		rc = -ENOMEM;
>> +		goto cleanup;
>> +	}
>
> Maybe use a loop around an array here to avoid writing the same thing 10 times?
>
will do

> Also, you could move the debugging code into a separate file and have a separate
> Kconfig symbol so users can disable this if they do not debug your driver
> but still want to use debugfs for other things.
>
I need these to be accessible all the time, maybe via sysfs or ioctl.

>> +static irqreturn_t qcom_hidma_mgmt_irq_handler(int irq, void *arg)
>> +{
>> +	/* TODO: handle irq here */
>> +	return IRQ_HANDLED;
>> +}
>
>
>> +	rc = devm_request_irq(&pdev->dev, irq, qcom_hidma_mgmt_irq_handler,
>> +		IRQF_SHARED, "qcom-hidmamgmt", mgmtdev);
>> +	if (rc) {
>> +		dev_err(&pdev->dev, "irq registration failed: %d\n", irq);
>> +		goto out;
>> +	}
>
> If you create a shared handler, you must check whether there was an
> interrupt pending, otherwise you will lose interrupts for all other devices
> that share the same IRQ.

ok, I'll remove it for now. The interrupt handler is a to-do.

>
> Better remove the handler completely here so you don't need to check it.
>
>> +
>> +	if (device_property_read_u16(&pdev->dev, "nr-channels",
>> +		&mgmtdev->nr_channels)) {
>> +		dev_err(&pdev->dev, "number of channels missing\n");
>> +		rc = -EINVAL;
>> +		goto out;
>> +	}
>
> This should be a u32 name 'dma-channels' for consistency with the
> generic DMA binding.
>
ok

> Also, try to avoid using u8 and u16 properties everywhere and just
> us u32 for consistency.

will do

>
>> +
>> +	for (i = 0; i < mgmtdev->nr_channels; i++) {
>> +		char name[30];
>> +
>> +		sprintf(name, "ch-priority-%d", i);
>> +		device_property_read_u8(&pdev->dev, name,
>> +			&mgmtdev->priority[i]);
>> +
>> +		sprintf(name, "ch-weight-%d", i);
>> +		device_property_read_u8(&pdev->dev, name,
>> +			&mgmtdev->weight[i]);
>
> Per comment above, just read this as an array.

will look. While device tree syntax for arrays is easy, describing an 
array in DSM package looks really ugly. I tried to keep things simple. 
I'll spend some time on it.

>
>> +	dev_info(&pdev->dev,
>> +		"HI-DMA engine management driver registration complete\n");
>> +	platform_set_drvdata(pdev, mgmtdev);
>> +	HIDMA_RUNTIME_SET(mgmtdev);
>> +	return 0;
>> +out:
>> +	pm_runtime_disable(&pdev->dev);
>> +	pm_runtime_put_sync_suspend(&pdev->dev);
>> +	return rc;
>> +}
>
> The rest of the probe function does not register any user interface aside from
> the debugging stuff. Can you explain in the changelog how you expect the
> driver to be used in a real system? Is there another driver coming?

I expect this driver to grow in functionality over time. Right now, it 
does the global init for the DMA. After that all channels execute on 
their own without depending on each other. Global init has to be done 
first before attempting to do any channel initialization.

There is also implied startup ordering requirements. I was doing this by 
using channel driver with the late binding to guarantee that.

As soon as I use module_platform_driver, the ordering gets reversed for 
some reason.

>
>> +
>> +static const struct of_device_id qcom_hidma_mgmt_match[] = {
>> +	{ .compatible = "qcom,hidma_mgmt", },
>> +	{},
>> +};
>> +MODULE_DEVICE_TABLE(of, qcom_hidma_mgmt_match);
>> +
>> +static struct platform_driver qcom_hidma_mgmt_driver = {
>> +	.probe = qcom_hidma_mgmt_probe,
>> +	.remove = qcom_hidma_mgmt_remove,
>> +	.driver = {
>> +		.name = MODULE_NAME,
>> +		.owner = THIS_MODULE,
>> +		.of_match_table = of_match_ptr(qcom_hidma_mgmt_match),
>
> drop the .owner field and the of_match_ptr() macro to avoid warnings here.
>
>> +static int __init qcom_hidma_mgmt_init(void)
>> +{
>> +	return platform_driver_register(&qcom_hidma_mgmt_driver);
>> +}
>> +device_initcall(qcom_hidma_mgmt_init);
>> +
>> +static void __exit qcom_hidma_mgmt_exit(void)
>> +{
>> +	platform_driver_unregister(&qcom_hidma_mgmt_driver);
>> +}
>> +module_exit(qcom_hidma_mgmt_exit);
>
> module_platform_driver()
>
> 	Arnd
>
thanks for the feedback.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-31  6:51   ` Sinan Kaya
@ 2015-10-31 12:53     ` Timur Tabi
  2015-10-31 12:53     ` Timur Tabi
  2015-11-02 21:30     ` Arnd Bergmann
  2 siblings, 0 replies; 51+ messages in thread
From: Timur Tabi @ 2015-10-31 12:53 UTC (permalink / raw)
  To: Sinan Kaya, Arnd Bergmann
  Cc: dmaengine, cov, jcm, Rob Herring, Pawel Moll, Mark Rutland,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

Sinan Kaya wrote:
>
> I expect this driver to grow in functionality over time. Right now, it
> does the global init for the DMA. After that all channels execute on
> their own without depending on each other. Global init has to be done
> first before attempting to do any channel initialization.
>
> There is also implied startup ordering requirements. I was doing this by
> using channel driver with the late binding to guarantee that.
>
> As soon as I use module_platform_driver, the ordering gets reversed for
> some reason.

If you want to force two probe functions to be called in order, then 
that's what -EPROBE_DEFER is for.

-- 
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the
Code Aurora Forum, hosted by The Linux Foundation.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-31  6:51   ` Sinan Kaya
  2015-10-31 12:53     ` Timur Tabi
@ 2015-10-31 12:53     ` Timur Tabi
  2015-11-02 21:30     ` Arnd Bergmann
  2 siblings, 0 replies; 51+ messages in thread
From: Timur Tabi @ 2015-10-31 12:53 UTC (permalink / raw)
  To: Sinan Kaya, Arnd Bergmann
  Cc: dmaengine, cov, jcm, Rob Herring, Pawel Moll, Mark Rutland,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

Sinan Kaya wrote:
>
> I expect this driver to grow in functionality over time. Right now, it
> does the global init for the DMA. After that all channels execute on
> their own without depending on each other. Global init has to be done
> first before attempting to do any channel initialization.
>
> There is also implied startup ordering requirements. I was doing this by
> using channel driver with the late binding to guarantee that.
>
> As soon as I use module_platform_driver, the ordering gets reversed for
> some reason.


-- 
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the
Code Aurora Forum, hosted by The Linux Foundation.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-30 15:00 ` Mark Rutland
  2015-10-30 17:59   ` Andy Shevchenko
@ 2015-10-31 17:11   ` Sinan Kaya
  1 sibling, 0 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-31 17:11 UTC (permalink / raw)
  To: Mark Rutland
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel



On 10/30/2015 11:00 AM, Mark Rutland wrote:
> On Thu, Oct 29, 2015 at 11:08:12PM -0400, Sinan Kaya wrote:
>> The Qualcomm Technologies HIDMA device has been designed
>> to support virtualization technology. The driver has been
>> divided into two to follow the hardware design. The management
>> driver is executed in hypervisor context and is the main
>> managment for all channels provided by the device. The
>> channel driver is exected in the guest OS context.
>>
>> Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
>> ---
>>   .../devicetree/bindings/dma/qcom_hidma_mgmt.txt    |  42 +
>>   drivers/dma/Kconfig                                |  11 +
>>   drivers/dma/Makefile                               |   1 +
>>   drivers/dma/qcom_hidma_mgmt.c                      | 868 +++++++++++++++++++++
>>   4 files changed, 922 insertions(+)
>>   create mode 100644 Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
>>   create mode 100644 drivers/dma/qcom_hidma_mgmt.c
>>
>> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
>> new file mode 100644
>> index 0000000..81674ab
>> --- /dev/null
>> +++ b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
>> @@ -0,0 +1,42 @@
>> +Qualcomm Technologies HIDMA Management interface
>> +
>> +Required properties:
>> +- compatible: must contain "qcom,hidma_mgmt"
>
> s/_/-/ in property names and compatible strings.
>
done

> Per your rationale for splitting the device nodes, surely the
> relationship between the two needs to be described?
>
I changed the commit message to make it more clear. I also added
the same message to the device binding document for reference.

> The GIC has a similar split, yet we use a single node, with the
> hypervisor portions being optional.
>
> What if there were multiple instances?
>
The driver already supports multiple instances. It has been tested 
against 4 instances. I ended up having 4 management device entities 
managing 6 different channels each.


>> +- reg: Address range for DMA device
>> +- interrupts: Should contain the one interrupt shared by all channels
>> +- nr-channels: Number of channels supported by this DMA controller.
>> +- max-write: Maximum write burst in bytes. A memcpy requested is
>> +  fragmented to multiples of this amount.
>> +- max-read: Maximum read burst in bytes. A memcpy request is
>> +  fragmented to multiples of this amount.
>
> Thse would be better named as max-read-burst-bytes and
> min-read-burst-bytes.

ok

>
>> +- max-wxactions: Maximum write transactions to perform in a burst
>> +- max-rdactions: Maximum read transactions to perform in a burst
>
> max-{write,read}-transactions
ok
>
>> +- max-memset-limit: Maximum memset limit
>
> I have no idea what this is meant to mean.
>
old stuff. no memset support anymore. will remove it.

>> +- ch-priority-#n: Priority of the channel
>> +- ch-weight-#n: Round robin weight of the channel
>
> These need better descriptions. They sound like configuration options.
>
ok.

> [...]
>
>> +#if IS_ENABLED(CONFIG_ACPI)
>> +static const struct acpi_device_id qcom_hidma_mgmt_acpi_ids[] = {
>> +	{"QCOM8060"},
>> +	{},
>> +};
>> +#endif
>
> How do DMA engines work with ACPI?
>
> How are client relationships defined?
>
> THanks,
> Mark.
>

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-31  3:33                     ` Jon Masters
@ 2015-10-31 17:34                       ` Sinan Kaya
  0 siblings, 0 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-10-31 17:34 UTC (permalink / raw)
  To: Jon Masters, Andy Shevchenko, Al Stone
  Cc: Mark Rutland, dmaengine, timur, cov, Rob Herring, Pawel Moll,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel, leif.lindholm



On 10/30/2015 11:33 PM, Jon Masters wrote:
> Hi Andy,
>
> On 10/30/2015 04:15 PM, Andy Shevchenko wrote:
>> On Fri, Oct 30, 2015 at 10:08 PM, Al Stone <al.stone@linaro.org> wrote:
>>> On 10/30/2015 01:01 PM, Mark Rutland wrote:
>>>> On Fri, Oct 30, 2015 at 02:48:06PM -0400, Sinan Kaya wrote:
>>
>>>> The CSRT is listed under "Want", not "Never" or "Don't Care", so Linaro
>>>> have certainly not said that CSRT will not be supported. If anything,
>>>> they have stated that the table should be supported.
>>>
>>> "Want" means interesting, and probably useful, but no clear indication that
>>> anyone actually needs it.  At one point, we thought we might use the CSRT
>>> for describing DMA, but it turns out we have not needed to.
>>
>> Then you are going to have either 1 or 0 DMAC for slave devices, right?
>
> I believe what Al means is that such hardware has not appeared
> (publicly) until this time and so such situation was theoretical and
> thus not covered by the Linaro wiki. Linaro had not prioritized CSRT
> because it didn't seem that the need to support it would arise soon.
>
>> The CSRT, unfortunately, the only way how to enumerate DMAC to be used
>> by slave devices.
>> You may look into drivers/dma/acpi-dma.c for usage in Linux.
>>
>> Yes, I know about _DSD, but I don't think it will provide soon any
>> other official mechanisms to what we have now. Better to ask Rafael
>> and Mika.
>
> Thanks for the feedback. I agree that generally the plan is to use
> existing tables from x86 on arm64 when possible. Please see below.
>
>>> However, let's make sure we're saying the same thing: the CSRT table is
>>> properly defined in the kernel include/acpi/actbl2.h file so one can read
>>> such a table and use it if they so choose.  Nothing that we have done at
>>> Linaro in the arm64 part of the kernel relies on any of the content from
>>> the CSRT, nor does it preclude someone relying on that content.  So, the
>>> CSRT is defined, and is usable, but is just not being used -- by Linaro --
>>> at present.
>>
>> This sounds clear.
>>
>>> If that needs to change, let me know; no one has asked us to use the CSRT
>>> for a DMA engine, and we have not been provided any hardware that requires
>>> it.
>>
>> See above.
>
> Here's, what I believe to be the summary:
>
> 1). QCOM are not implementing slave device support in their current
> HIDMA hardware, therefore the requirement for CSRT does not exist *at
> present* for this specific driver to be merged and the discussion in
> this sub-thread pertains only to a situation not affecting HIDMA.
>
> 2). A requirement upon the CSRT should be clarified in the various
> specifications. The SBBR[0] currently "recommends" CSRT but does not
> necessarily provide guidance about what kinds of system resources would
> be covered by that, and so there is a potential for this to be missed.
>
> As one of the lead authors of certain ARM server specifications, I will
> contact those involved in such and ensure that this is addressed with a
> clarification in a future release.
>
> Thanks for raising the concern,
>
> Jon.
>
> [0]
> http://infocenter.arm.com/help/topic/com.arm.doc.den0044a/Server_Base_Boot_Requirements.pdf
>

Apologies for creating confusion.

I tried using the dma-acpi.c implementation at the beginning. It didn't 
quite play well with what I was trying to do with HIDMA. Since the file 
header said Intel, I assumed it was an Intel specific implementation.

To confirm my suspicion, I looked at the Linaro wiki. I haven't seen 
CSRT in MUST tables.

I considered WANT meaning some spec work required similar to DMAR vs. 
IORT table.


-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-10-30 22:28       ` Arnd Bergmann
  2015-10-30 22:36         ` Timur Tabi
  2015-10-31  1:53         ` Sinan Kaya
@ 2015-11-01 18:50         ` Sinan Kaya
  2015-11-01 20:21           ` Timur Tabi
  2015-11-02 16:33           ` Arnd Bergmann
  2 siblings, 2 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-11-01 18:50 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel


>>
>>> Also, if you waste CPU cycles for hundreds of milliseconds, it's unlikely
>>> that the function is so performance critical that it requires writel_relaxed().
>>>
>>> Just use writel() here.
ok, replaced writel_relaxed+wmb with writel.

>>
>> The issue is not writel_relaxed vs. writel. After I issue reset, I need
>> wait for some time to confirm reset was done. I can use readl_polling
>> instead of mdelay if we don't like mdelay.
>
> I meant that both _relaxed() and mdelay() are probably wrong here.

You are right about redundant writel_relaxed + wmb. They are effectively 
equal to writel.

However, after issuing the command; I still need to wait some amount of 
time until hardware acknowledges the commands like reset/enable/disable. 
These are relatively faster operations happening in microseconds. That's 
why, I have mdelay there.

I'll take a look at workqueues but it could turn out to be an overkill 
for few microseconds.

>
> readl_polling() would avoid the part with _relaxed(), but if that can
> still take more than a few microseconds, you should try to sleep inbetween
> rather than burn CPU cycles.
>
>>>> +/*
>>>> + * The interrupt handler for HIDMA will try to consume as many pending
>>>> + * EVRE from the event queue as possible. Each EVRE has an associated
>>>> + * TRE that holds the user interface parameters. EVRE reports the
>>>> + * result of the transaction. Hardware guarantees ordering between EVREs
>>>> + * and TREs. We use last processed offset to figure out which TRE is
>>>> + * associated with which EVRE. If two TREs are consumed by HW, the EVREs
>>>> + * are in order in the event ring.
>>>> + * This handler will do a one pass for consuming EVREs. Other EVREs may
>>>> + * be delivered while we are working. It will try to consume incoming
>>>> + * EVREs one more time and return.
>>>> + * For unprocessed EVREs, hardware will trigger another interrupt until
>>>> + * all the interrupt bits are cleared.
>>>> + */
>>>> +static void hidma_ll_int_handler_internal(struct hidma_lldev *lldev)
>>>> +{
>>>> +	u32 status;
>>>> +	u32 enable;
>>>> +	u32 cause;
>>>> +	int repeat = 2;
>>>> +	unsigned long timeout;
>>>> +
>>>> +	status = readl_relaxed(lldev->evca + EVCA_IRQ_STAT_OFFSET);
>>>> +	enable = readl_relaxed(lldev->evca + EVCA_IRQ_EN_OFFSET);
>>>> +	cause = status & enable;
>>>> +
>>>
>>> Reading the status probably requires a readl() rather than readl_relaxed()
>>> to guarantee that the DMA data has arrived in memory by the time that the
>>> register data is seen by the CPU. If using readl_relaxed() here is a valid
>>> and required optimization, please add a comment to explain why it works
>>> and how much you gain.
>>
>> I will add some description. This is a high speed peripheral. I don't
>> like spreading barriers as candies inside the readl and writel unless I
>> have to.
>>
>> According to the barriers video, I watched on youtube this should be the
>> rule for ordering.
>>
>> "if you do two relaxed reads and check the results of the returned
>> variables, ARM architecture guarantees that these two relaxed variables
>> will get observed during the check."
>>
>> this is called implied ordering or something of that sort.
>
> My point was a bit different: while it is guaranteed that the
> result of the readl_relaxed() is observed in order, they do not
> guarantee that a DMA from device to memory that was started by
> the device before the readl_relaxed() has arrived in memory
> by the time that the readl_relaxed() result is visible to the
> CPU and it starts accessing the memory.
>
I checked with the hardware designers. Hardware guarantees that by the 
time interrupt is observed, all data transactions in flight are 
delivered to their respective places and are visible to the CPU. I'll 
add a comment in the code about this.

> In other words, when the hardware sends you data followed by an
> interrupt to tell you the data is there, your interrupt handler
> can tell the driver that is waiting for this data that the DMA
> is complete while the data itself is still in flight, e.g. waiting
> for an IOMMU to fetch page table entries.
>
There is HW guarantee for ordering.

On demand paging for IOMMU is only supported for PCIe via PRI (Page 
Request Interface) not for HIDMA. All other hardware instances work on 
pinned DMA addresses. I'll drop a note about this too to the code as well.


>>>> +	wmb();
>>>> +
>>>> +	mdelay(1);
>>>
>>> Another workqueue? You should basically never call mdelay().
>>
>> I'll use polled read instead.
>
> Ok, but again make sure that you call msleep() or usleep_range()
> between the reads.
>
>>>> +static int hidma_ll_hw_start(void *llhndl)
>>>> +{
>>>> +	int rc = 0;
>>>> +	struct hidma_lldev *lldev = llhndl;
>>>> +	unsigned long irqflags;
>>>> +
>>>> +	spin_lock_irqsave(&lldev->lock, irqflags);
>>>> +	writel_relaxed(lldev->tre_write_offset,
>>>> +			lldev->trca + TRCA_DOORBELL_OFFSET);
>>>> +	spin_unlock_irqrestore(&lldev->lock, irqflags);
>>>
>>> How does this work? The writel_relaxed() won't synchronize with either
>>> the DMA data or the spinlock.
>>
>> mutex and spinlocks have barriers inside. See the youtube video.
>>
>> https://www.youtube.com/watch?v=6ORn6_35kKo
>
> I'm pretty sure these barriers only make sense to the CPU, so the
> spinlock guarantees that the access to lldev->tre_write_offset is
> protected, but not the access to lldev->trca, because that write
> is posted on the bus and might not complete until after the
> unlock. There is no "dsb(st)" in here:
>
> static inline void arch_spin_unlock(arch_spinlock_t *lock)
> {
>          unsigned long tmp;
>
>          asm volatile(ARM64_LSE_ATOMIC_INSN(
>          /* LL/SC */
>          "       ldrh    %w1, %0\n"
>          "       add     %w1, %w1, #1\n"
>          "       stlrh   %w1, %0",
>          /* LSE atomics */
>          "       mov     %w1, #1\n"
>          "       nop\n"
>          "       staddlh %w1, %0")
>          : "=Q" (lock->owner), "=&r" (tmp)
>          :
>          : "memory");
> }
>
ok. I'm changing it to readl. Thanks for the insight.

>>>
>>> Also, your abstraction seem to go a little too far if the upper driver
>>> doesn't know what the lower driver calls its main device structure.
>>>
>>> Or you can go further and just embed the struct hidma_lldev within the
>>> struct hidma_dev to save one?
>>
>> That's how it was before. It got too complex and variables/spinlocks got
>> intermixed. I borrowed the upper layer and it worked as it is. I rather
>> keep all hardware stuff in another file and do not mix and match for safety.
>
> Ok, then just use a forward declaration for the struct name so you can
> have a type-safe pointer but don't need to show the members.
>
>>>> +void hidma_ll_chstats(struct seq_file *s, void *llhndl, u32 tre_ch)
>>>> +{
>>>> +	struct hidma_lldev *lldev = llhndl;
>>>> +	struct hidma_tre *tre;
>>>> +	u32 length;
>>>> +	dma_addr_t src_start;
>>>> +	dma_addr_t dest_start;
>>>> +	u32 *tre_local;
>>>> +
>>>> +	if (unlikely(tre_ch >= lldev->nr_tres)) {
>>>> +		dev_err(lldev->dev, "invalid TRE number in chstats:%d",
>>>> +			tre_ch);
>>>> +		return;
>>>> +	}
>>>> +	tre = &lldev->trepool[tre_ch];
>>>> +	seq_printf(s, "------Channel %d -----\n", tre_ch);
>>>> +	seq_printf(s, "allocated=%d\n", atomic_read(&tre->allocated));
>>>> +	HIDMA_CHAN_SHOW(tre, queued);
>>>> +	seq_printf(s, "err_info=0x%x\n",
>>>> +		   lldev->tx_status_list[tre->chidx].err_info);
>>>> +	seq_printf(s, "err_code=0x%x\n",
>>>> +		   lldev->tx_status_list[tre->chidx].err_code);
>>>> +	HIDMA_CHAN_SHOW(tre, status);
>>>> +	HIDMA_CHAN_SHOW(tre, chidx);
>>>> +	HIDMA_CHAN_SHOW(tre, dma_sig);
>>>> +	seq_printf(s, "dev_name=%s\n", tre->dev_name);
>>>> +	seq_printf(s, "callback=%p\n", tre->callback);
>>>> +	seq_printf(s, "data=%p\n", tre->data);
>>>> +	HIDMA_CHAN_SHOW(tre, tre_index);
>>>> +
>>>> +	tre_local = &tre->tre_local[0];
>>>> +	src_start = tre_local[TRE_SRC_LOW_IDX];
>>>> +	src_start = ((u64)(tre_local[TRE_SRC_HI_IDX]) << 32) + src_start;
>>>> +	dest_start = tre_local[TRE_DEST_LOW_IDX];
>>>> +	dest_start += ((u64)(tre_local[TRE_DEST_HI_IDX]) << 32);
>>>> +	length = tre_local[TRE_LEN_IDX];
>>>> +
>>>> +	seq_printf(s, "src=%pap\n", &src_start);
>>>> +	seq_printf(s, "dest=%pap\n", &dest_start);
>>>> +	seq_printf(s, "length=0x%x\n", length);
>>>> +}
>>>> +EXPORT_SYMBOL_GPL(hidma_ll_chstats);
>>>
>>> Remove all the pointers here. I guess you can remove the entire debugfs
>>> file really ;-)
>>
>> ok, I need some facility to print out stuff when problems happened.
>> Would you rather use sysfs?
>
> sysfs would be less appropriate, as that requires providing a stable ABI
> for user space. I think ftrace should provide what you need. Let me know
> if that doesn't work out.
>
> 	Arnd
>

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-01 18:50         ` Sinan Kaya
@ 2015-11-01 20:21           ` Timur Tabi
  2015-11-01 20:27             ` Sinan Kaya
  2015-11-02 16:33           ` Arnd Bergmann
  1 sibling, 1 reply; 51+ messages in thread
From: Timur Tabi @ 2015-11-01 20:21 UTC (permalink / raw)
  To: Sinan Kaya, Arnd Bergmann
  Cc: dmaengine, cov, jcm, Rob Herring, Pawel Moll, Mark Rutland,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel

Sinan Kaya wrote:
> However, after issuing the command; I still need to wait some amount of
> time until hardware acknowledges the commands like reset/enable/disable.
> These are relatively faster operations happening in microseconds. That's
> why, I have mdelay there.

Can you use readl_poll_timeout()?

-- 
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the
Code Aurora Forum, hosted by The Linux Foundation.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-01 20:21           ` Timur Tabi
@ 2015-11-01 20:27             ` Sinan Kaya
  0 siblings, 0 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-11-01 20:27 UTC (permalink / raw)
  To: Timur Tabi, Arnd Bergmann
  Cc: dmaengine, cov, jcm, Rob Herring, Pawel Moll, Mark Rutland,
	Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams, devicetree,
	linux-kernel



On 11/1/2015 3:21 PM, Timur Tabi wrote:
> Sinan Kaya wrote:
>> However, after issuing the command; I still need to wait some amount of
>> time until hardware acknowledges the commands like reset/enable/disable.
>> These are relatively faster operations happening in microseconds. That's
>> why, I have mdelay there.
>
> Can you use readl_poll_timeout()?
>
Yes, I'm changing them to readl_poll_timeout after looking at the 
workqueue vs. expected delay requirements. I decided to stick with 
polled read.

First, I missed the point here that mdelay is discouraged.
https://www.kernel.org/doc/Documentation/timers/timers-howto.txt

According to same document, msleep can sleep up to 20ms which is not 
what I want.

This code is trying to sleep 10ms maximum and treating longer durations 
as failures.

readl_poll_timeout calls usleep_range that seems to satisfy what I'm 
looking for.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-01 18:50         ` Sinan Kaya
  2015-11-01 20:21           ` Timur Tabi
@ 2015-11-02 16:33           ` Arnd Bergmann
  2015-11-02 19:21             ` Sinan Kaya
  1 sibling, 1 reply; 51+ messages in thread
From: Arnd Bergmann @ 2015-11-02 16:33 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Sunday 01 November 2015 13:50:53 Sinan Kaya wrote:
> 
> >> The issue is not writel_relaxed vs. writel. After I issue reset, I need
> >> wait for some time to confirm reset was done. I can use readl_polling
> >> instead of mdelay if we don't like mdelay.
> >
> > I meant that both _relaxed() and mdelay() are probably wrong here.
> 
> You are right about redundant writel_relaxed + wmb. They are effectively 
> equal to writel.

Actually, writel() is wmb()+writel_relaxed(), not the other way round:

When sending a command to a device that can start a DMA transfer,
the barrier is required to ensure that the DMA happens after previously
written data has gone from the CPU write buffers into the memory that
is used as the source for the transfer.

A barrier after the writel() has no effect, as MMIO writes are posted
on the bus.

> However, after issuing the command; I still need to wait some amount of 
> time until hardware acknowledges the commands like reset/enable/disable. 
> These are relatively faster operations happening in microseconds. That's 
> why, I have mdelay there.
> 
> I'll take a look at workqueues but it could turn out to be an overkill 
> for few microseconds.

Most devices are able to provide an interrupt for long-running commands.
Are you sure that yours is unable to do this? If so, is this a design
mistake or an implementation bug?

> >>> Reading the status probably requires a readl() rather than readl_relaxed()
> >>> to guarantee that the DMA data has arrived in memory by the time that the
> >>> register data is seen by the CPU. If using readl_relaxed() here is a valid
> >>> and required optimization, please add a comment to explain why it works
> >>> and how much you gain.
> >>
> >> I will add some description. This is a high speed peripheral. I don't
> >> like spreading barriers as candies inside the readl and writel unless I
> >> have to.
> >>
> >> According to the barriers video, I watched on youtube this should be the
> >> rule for ordering.
> >>
> >> "if you do two relaxed reads and check the results of the returned
> >> variables, ARM architecture guarantees that these two relaxed variables
> >> will get observed during the check."
> >>
> >> this is called implied ordering or something of that sort.
> >
> > My point was a bit different: while it is guaranteed that the
> > result of the readl_relaxed() is observed in order, they do not
> > guarantee that a DMA from device to memory that was started by
> > the device before the readl_relaxed() has arrived in memory
> > by the time that the readl_relaxed() result is visible to the
> > CPU and it starts accessing the memory.
> >
> I checked with the hardware designers. Hardware guarantees that by the 
> time interrupt is observed, all data transactions in flight are 
> delivered to their respective places and are visible to the CPU. I'll 
> add a comment in the code about this.

I'm curious about this. Does that mean the device is not meant for
high-performance transfers and just synchronizes the bus before
triggering the interrupt?

> > In other words, when the hardware sends you data followed by an
> > interrupt to tell you the data is there, your interrupt handler
> > can tell the driver that is waiting for this data that the DMA
> > is complete while the data itself is still in flight, e.g. waiting
> > for an IOMMU to fetch page table entries.
> >
> There is HW guarantee for ordering.
> 
> On demand paging for IOMMU is only supported for PCIe via PRI (Page 
> Request Interface) not for HIDMA. All other hardware instances work on 
> pinned DMA addresses. I'll drop a note about this too to the code as well.

I wasn't talking about paging, just fetching the IOTLB from the
preloaded page tables in RAM. This can takes several uncached memory
accesses, so it would generally be slow.


	Arnd


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-02 16:33           ` Arnd Bergmann
@ 2015-11-02 19:21             ` Sinan Kaya
  2015-11-02 20:55               ` Arnd Bergmann
  0 siblings, 1 reply; 51+ messages in thread
From: Sinan Kaya @ 2015-11-02 19:21 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On 11/2/2015 11:33 AM, Arnd Bergmann wrote:
> On Sunday 01 November 2015 13:50:53 Sinan Kaya wrote:
>>
>>>> The issue is not writel_relaxed vs. writel. After I issue reset, I need
>>>> wait for some time to confirm reset was done. I can use readl_polling
>>>> instead of mdelay if we don't like mdelay.
>>>
>>> I meant that both _relaxed() and mdelay() are probably wrong here.
>>
>> You are right about redundant writel_relaxed + wmb. They are effectively
>> equal to writel.
>
> Actually, writel() is wmb()+writel_relaxed(), not the other way round:

I agree. Semantics... but important one since order matters this time.

>
> When sending a command to a device that can start a DMA transfer,
> the barrier is required to ensure that the DMA happens after previously
> written data has gone from the CPU write buffers into the memory that
> is used as the source for the transfer.
>
Agreed.

> A barrier after the writel() has no effect, as MMIO writes are posted
> on the bus.

I had two use cases in the original code. We are talking about start 
routine here. I was giving reference to enable/reset/disable uses above.

1. Start routine
--------------
spin_lock
writel_relaxed
spin_unlock

and

2. enable/reset/disable
--------------
writel_relaxed
wmb

I changed writel_relaxed to writel now in start routine and submitted 
the second version of the patchset yesterday. I hope you have received 
it. I was relying on the spinlocks before.


>
>> However, after issuing the command; I still need to wait some amount of
>> time until hardware acknowledges the commands like reset/enable/disable.
>> These are relatively faster operations happening in microseconds. That's
>> why, I have mdelay there.
>>
>> I'll take a look at workqueues but it could turn out to be an overkill
>> for few microseconds.
>
> Most devices are able to provide an interrupt for long-running commands.
> Are you sure that yours is unable to do this? If so, is this a design
> mistake or an implementation bug?

I think I was not clear on how long these command take. These command 
are really fast and get acknowledged at status register in few 
microseconds. That's why I choose polling.

I was waiting up to 10ms before and manually sleeping 1 milliseconds in 
between each using mdelay. I followed your suggestion and got rid of the 
mdelay. Then, I used polled read command which calls the usleep_range 
function as you suggested.

Hardware supports error interrupts but this is a SW design philosophy 
discussion. Why would you want to trigger an interrupt for few 
microseconds delay that only happens during the first time init from probe?

>
>>>>> Reading the status probably requires a readl() rather than readl_relaxed()
>>>>> to guarantee that the DMA data has arrived in memory by the time that the
>>>>> register data is seen by the CPU. If using readl_relaxed() here is a valid
>>>>> and required optimization, please add a comment to explain why it works
>>>>> and how much you gain.
>>>>
>>>> I will add some description. This is a high speed peripheral. I don't
>>>> like spreading barriers as candies inside the readl and writel unless I
>>>> have to.
>>>>
>>>> According to the barriers video, I watched on youtube this should be the
>>>> rule for ordering.
>>>>
>>>> "if you do two relaxed reads and check the results of the returned
>>>> variables, ARM architecture guarantees that these two relaxed variables
>>>> will get observed during the check."
>>>>
>>>> this is called implied ordering or something of that sort.
>>>
>>> My point was a bit different: while it is guaranteed that the
>>> result of the readl_relaxed() is observed in order, they do not
>>> guarantee that a DMA from device to memory that was started by
>>> the device before the readl_relaxed() has arrived in memory
>>> by the time that the readl_relaxed() result is visible to the
>>> CPU and it starts accessing the memory.
>>>
>> I checked with the hardware designers. Hardware guarantees that by the
>> time interrupt is observed, all data transactions in flight are
>> delivered to their respective places and are visible to the CPU. I'll
>> add a comment in the code about this.
>
> I'm curious about this. Does that mean the device is not meant for
> high-performance transfers and just synchronizes the bus before
> triggering the interrupt?

HIDMA meaning, as you probably guessed, is high performance DMA. We had 
several name iterations in the company and this was the one that sticked.

I'm a SW person. I don't have the expertise to go deeper into HW design.
I'm following the programming document. It says coherency and guaranteed 
interrupt ordering. High performance can mean how fast you can move data 
from one location to the other one vs. how fast you can queue up 
multiple requests and get acks in response.

I followed a simple design here. HW can take multiple requests 
simultaneously and give me an ack when it is finished with interrupt.

If there are requests in flight, other requests will get queued up in SW 
and will not be serviced until the previous requests get acknowledged. 
Then, as soon as HW stops processing; I queue a bunch of other requests 
and kick start it. Current SW design does not allow simultaneous SW 
queuing vs. HW processing. I can try this on the next iteration. This 
implementation, IMO, is good enough now and has been working reliably 
for a long time (since 2014).

>
>>> In other words, when the hardware sends you data followed by an
>>> interrupt to tell you the data is there, your interrupt handler
>>> can tell the driver that is waiting for this data that the DMA
>>> is complete while the data itself is still in flight, e.g. waiting
>>> for an IOMMU to fetch page table entries.
>>>
>> There is HW guarantee for ordering.
>>
>> On demand paging for IOMMU is only supported for PCIe via PRI (Page
>> Request Interface) not for HIDMA. All other hardware instances work on
>> pinned DMA addresses. I'll drop a note about this too to the code as well.
>
> I wasn't talking about paging, just fetching the IOTLB from the
> preloaded page tables in RAM. This can takes several uncached memory
> accesses, so it would generally be slow.
>
I see.

HIDMA is not aware of IOMMU presence since it follows the DMA API. All 
IOMMU latency will be built into the data movement time. By the time 
interrupt happens, IOMMU lookups + data movement has already taken place.

>
> 	Arnd
>

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-02 19:21             ` Sinan Kaya
@ 2015-11-02 20:55               ` Arnd Bergmann
  2015-11-03  5:29                 ` Sinan Kaya
  0 siblings, 1 reply; 51+ messages in thread
From: Arnd Bergmann @ 2015-11-02 20:55 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Monday 02 November 2015 14:21:37 Sinan Kaya wrote:
> On 11/2/2015 11:33 AM, Arnd Bergmann wrote:
> > On Sunday 01 November 2015 13:50:53 Sinan Kaya wrote:
> > A barrier after the writel() has no effect, as MMIO writes are posted
> > on the bus.
> 
> I had two use cases in the original code. We are talking about start 
> routine here. I was giving reference to enable/reset/disable uses above.
> 
> 1. Start routine
> --------------
> spin_lock
> writel_relaxed
> spin_unlock
> 
> and
> 
> 2. enable/reset/disable
> --------------
> writel_relaxed
> wmb
> 
> I changed writel_relaxed to writel now in start routine and submitted 
> the second version of the patchset yesterday. I hope you have received 
> it. I was relying on the spinlocks before.

Ok

> >
> >> However, after issuing the command; I still need to wait some amount of
> >> time until hardware acknowledges the commands like reset/enable/disable.
> >> These are relatively faster operations happening in microseconds. That's
> >> why, I have mdelay there.
> >>
> >> I'll take a look at workqueues but it could turn out to be an overkill
> >> for few microseconds.
> >
> > Most devices are able to provide an interrupt for long-running commands.
> > Are you sure that yours is unable to do this? If so, is this a design
> > mistake or an implementation bug?
> 
> I think I was not clear on how long these command take. These command 
> are really fast and get acknowledged at status register in few 
> microseconds. That's why I choose polling.
> 
> I was waiting up to 10ms before and manually sleeping 1 milliseconds in 
> between each using mdelay. I followed your suggestion and got rid of the 
> mdelay. Then, I used polled read command which calls the usleep_range 
> function as you suggested.
> 
> Hardware supports error interrupts but this is a SW design philosophy 
> discussion. Why would you want to trigger an interrupt for few 
> microseconds delay that only happens during the first time init from probe?

If you get called in sleeping context and can use usleep_range() for
delaying, that is fine, but in effect that just means you generate another
interrupt from the timer that is not synchronized to your device, and
hide the complexity behind the usleep_range() function call.

My first choice would have been to use a struct completion to wait for
the next interrupt here, which has similar complexity on the source code
side, but never waits longer than necessary. If the hrtimer based method
works for you, there is no need to change that.

> >> I checked with the hardware designers. Hardware guarantees that by the
> >> time interrupt is observed, all data transactions in flight are
> >> delivered to their respective places and are visible to the CPU. I'll
> >> add a comment in the code about this.
> >
> > I'm curious about this. Does that mean the device is not meant for
> > high-performance transfers and just synchronizes the bus before
> > triggering the interrupt?
> 
> HIDMA meaning, as you probably guessed, is high performance DMA. We had 
> several name iterations in the company and this was the one that sticked.
> 
> I'm a SW person. I don't have the expertise to go deeper into HW design.
> I'm following the programming document. It says coherency and guaranteed 
> interrupt ordering. High performance can mean how fast you can move data 
> from one location to the other one vs. how fast you can queue up 
> multiple requests and get acks in response.
> 
> I followed a simple design here. HW can take multiple requests 
> simultaneously and give me an ack when it is finished with interrupt.
> 
> If there are requests in flight, other requests will get queued up in SW 
> and will not be serviced until the previous requests get acknowledged. 
> Then, as soon as HW stops processing; I queue a bunch of other requests 
> and kick start it. Current SW design does not allow simultaneous SW 
> queuing vs. HW processing. I can try this on the next iteration. This 
> implementation, IMO, is good enough now and has been working reliably 
> for a long time (since 2014).

Are you using message signaled interrupts then? Typically MSI guarantees
ordering against DMA, but level or edge triggered interrupts by definition
cannot (at least on PCI, but most other buses are the same way), because
the DMA master has no insight into when a DMA is actually complete.

If you use MSI, please add a comment to the readl_relaxed() that it
is safe because of that, otherwise the next person who tries to debug
a problem with your driver has to look into this.

> >>> In other words, when the hardware sends you data followed by an
> >>> interrupt to tell you the data is there, your interrupt handler
> >>> can tell the driver that is waiting for this data that the DMA
> >>> is complete while the data itself is still in flight, e.g. waiting
> >>> for an IOMMU to fetch page table entries.
> >>>
> >> There is HW guarantee for ordering.
> >>
> >> On demand paging for IOMMU is only supported for PCIe via PRI (Page
> >> Request Interface) not for HIDMA. All other hardware instances work on
> >> pinned DMA addresses. I'll drop a note about this too to the code as well.
> >
> > I wasn't talking about paging, just fetching the IOTLB from the
> > preloaded page tables in RAM. This can takes several uncached memory
> > accesses, so it would generally be slow.
> >
> I see.
> 
> HIDMA is not aware of IOMMU presence since it follows the DMA API. All 
> IOMMU latency will be built into the data movement time. By the time 
> interrupt happens, IOMMU lookups + data movement has already taken place.

Ok.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-10-31  6:51   ` Sinan Kaya
  2015-10-31 12:53     ` Timur Tabi
  2015-10-31 12:53     ` Timur Tabi
@ 2015-11-02 21:30     ` Arnd Bergmann
  2015-11-03  4:45       ` Sinan Kaya
  2 siblings, 1 reply; 51+ messages in thread
From: Arnd Bergmann @ 2015-11-02 21:30 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Saturday 31 October 2015 02:51:46 Sinan Kaya wrote:
> On 10/30/2015 5:34 AM, Arnd Bergmann wrote:
> > On Thursday 29 October 2015 23:08:12 Sinan Kaya wrote:
> >> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
> >> +
> >> +static int qcom_hidma_mgmt_err_open(struct inode *inode, struct file *file)
> >> +{
> >> +	return single_open(file, qcom_hidma_mgmt_err, inode->i_private);
> >> +}
> >> +
> >> +static const struct file_operations qcom_hidma_mgmt_err_fops = {
> >> +	.open = qcom_hidma_mgmt_err_open,
> >> +	.read = seq_read,
> >> +	.llseek = seq_lseek,
> >> +	.release = single_release,
> >> +};
> >> +
> >> +static ssize_t qcom_hidma_mgmt_mhiderr_clr(struct file *file,
> >> +	const char __user *user_buf, size_t count, loff_t *ppos)
> >> +{
> >> +	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
> >> +
> >> +	HIDMA_RUNTIME_GET(mgmtdev);
> >> +	writel(1, mgmtdev->dev_virtaddr + MHID_BUS_ERR_CLR_OFFSET);
> >> +	HIDMA_RUNTIME_SET(mgmtdev);
> >> +	return count;
> >> +}
> >> +
> >> +static const struct file_operations qcom_hidma_mgmt_mhiderr_clrfops = {
> >> +	.write = qcom_hidma_mgmt_mhiderr_clr,
> >> +};
> >
> > Is this really just a debugging interface? If anyone would do this
> > for normal operation, it needs to be a proper API.
> >
> This will be used by the system admin to monitor/reset the execution 
> state of the DMA channels. This will be the management interface. 
> Debugfs is probably not the right choice. I originally had sysfs but 
> than had some doubts. I'm open to suggestions.

User interface design is unfortunately always hard, and I don't have
an obvious answer for you.

Using debugfs by definition means that you don't expect users to
rely on ABI stability, so they should not write any automated scripts
against the contents of the files.

With sysfs, the opposite is true: you need to maintain compatibility
for as long as anyone might rely on the current interface, and it
needs to be reviewed properly and documented in Documentation/ABI/.

Other options are to use ioctl(), netlink or your own virtual file
system, but each of them has the same ABI requirements as sysfs.

Regardless of what you pick, you also need to consider how other drivers
would use the same interface: If someone else has hardware that does
the same thing, we want to be able to use the same tools to access
it, so you should avoid having any hardware specific data in it and
keep it as generic and extensible as possible. In this particular
case, that probably means you should implement the user interfaces in
the dmaengine core driver, and let the specific DMA driver provide
callback function pointers along with the normal ones to fill that
data.

> >> +	dev_info(&pdev->dev,
> >> +		"HI-DMA engine management driver registration complete\n");
> >> +	platform_set_drvdata(pdev, mgmtdev);
> >> +	HIDMA_RUNTIME_SET(mgmtdev);
> >> +	return 0;
> >> +out:
> >> +	pm_runtime_disable(&pdev->dev);
> >> +	pm_runtime_put_sync_suspend(&pdev->dev);
> >> +	return rc;
> >> +}
> >
> > The rest of the probe function does not register any user interface aside from
> > the debugging stuff. Can you explain in the changelog how you expect the
> > driver to be used in a real system? Is there another driver coming?
> 
> I expect this driver to grow in functionality over time. Right now, it 
> does the global init for the DMA. After that all channels execute on 
> their own without depending on each other. Global init has to be done 
> first before attempting to do any channel initialization.
> 
> There is also implied startup ordering requirements. I was doing this by 
> using channel driver with the late binding to guarantee that.
> 
> As soon as I use module_platform_driver, the ordering gets reversed for 
> some reason.

For the ordering requirements, it's probably best to export a symbol
with the entry point and let the normal driver call into that. Using
separate initcall levels is not something you should do in a normal
device driver like this.

What is the relation between the device nodes for the two kinds of
devices? Does it make sense to model the other one as a child device
of this one? That way you would trivially do the ordering by not marking
this one as 'compatible="simple-bus"' and triggering the registration
of the child from the parent probe function.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-11-02 21:30     ` Arnd Bergmann
@ 2015-11-03  4:45       ` Sinan Kaya
  2015-11-03 12:42         ` Arnd Bergmann
  0 siblings, 1 reply; 51+ messages in thread
From: Sinan Kaya @ 2015-11-03  4:45 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel



On 11/2/2015 4:30 PM, Arnd Bergmann wrote:
> On Saturday 31 October 2015 02:51:46 Sinan Kaya wrote:
>> On 10/30/2015 5:34 AM, Arnd Bergmann wrote:
>>> On Thursday 29 October 2015 23:08:12 Sinan Kaya wrote:
>>>> diff --git a/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt b/Documentation/devicetree/bindings/dma/qcom_hidma_mgmt.txt
>>>> +
>>>> +static int qcom_hidma_mgmt_err_open(struct inode *inode, struct file *file)
>>>> +{
>>>> +	return single_open(file, qcom_hidma_mgmt_err, inode->i_private);
>>>> +}
>>>> +
>>>> +static const struct file_operations qcom_hidma_mgmt_err_fops = {
>>>> +	.open = qcom_hidma_mgmt_err_open,
>>>> +	.read = seq_read,
>>>> +	.llseek = seq_lseek,
>>>> +	.release = single_release,
>>>> +};
>>>> +
>>>> +static ssize_t qcom_hidma_mgmt_mhiderr_clr(struct file *file,
>>>> +	const char __user *user_buf, size_t count, loff_t *ppos)
>>>> +{
>>>> +	struct qcom_hidma_mgmt_dev *mgmtdev = file->f_inode->i_private;
>>>> +
>>>> +	HIDMA_RUNTIME_GET(mgmtdev);
>>>> +	writel(1, mgmtdev->dev_virtaddr + MHID_BUS_ERR_CLR_OFFSET);
>>>> +	HIDMA_RUNTIME_SET(mgmtdev);
>>>> +	return count;
>>>> +}
>>>> +
>>>> +static const struct file_operations qcom_hidma_mgmt_mhiderr_clrfops = {
>>>> +	.write = qcom_hidma_mgmt_mhiderr_clr,
>>>> +};
>>>
>>> Is this really just a debugging interface? If anyone would do this
>>> for normal operation, it needs to be a proper API.
>>>
>> This will be used by the system admin to monitor/reset the execution
>> state of the DMA channels. This will be the management interface.
>> Debugfs is probably not the right choice. I originally had sysfs but
>> than had some doubts. I'm open to suggestions.
>
> User interface design is unfortunately always hard, and I don't have
> an obvious answer for you.
>
> Using debugfs by definition means that you don't expect users to
> rely on ABI stability, so they should not write any automated scripts
> against the contents of the files.
>
> With sysfs, the opposite is true: you need to maintain compatibility
> for as long as anyone might rely on the current interface, and it
> needs to be reviewed properly and documented in Documentation/ABI/.
>
> Other options are to use ioctl(), netlink or your own virtual file
> system, but each of them has the same ABI requirements as sysfs.
>
> Regardless of what you pick, you also need to consider how other drivers
> would use the same interface: If someone else has hardware that does
> the same thing, we want to be able to use the same tools to access
> it, so you should avoid having any hardware specific data in it and
> keep it as generic and extensible as possible. In this particular
> case, that probably means you should implement the user interfaces in
> the dmaengine core driver, and let the specific DMA driver provide
> callback function pointers along with the normal ones to fill that
> data.
>
Thanks, I'll think about this. I'm inclined towards sysfs.

>>>> +	dev_info(&pdev->dev,
>>>> +		"HI-DMA engine management driver registration complete\n");
>>>> +	platform_set_drvdata(pdev, mgmtdev);
>>>> +	HIDMA_RUNTIME_SET(mgmtdev);
>>>> +	return 0;
>>>> +out:
>>>> +	pm_runtime_disable(&pdev->dev);
>>>> +	pm_runtime_put_sync_suspend(&pdev->dev);
>>>> +	return rc;
>>>> +}
>>>
>>> The rest of the probe function does not register any user interface aside from
>>> the debugging stuff. Can you explain in the changelog how you expect the
>>> driver to be used in a real system? Is there another driver coming?
>>
>> I expect this driver to grow in functionality over time. Right now, it
>> does the global init for the DMA. After that all channels execute on
>> their own without depending on each other. Global init has to be done
>> first before attempting to do any channel initialization.
>>
>> There is also implied startup ordering requirements. I was doing this by
>> using channel driver with the late binding to guarantee that.
>>
>> As soon as I use module_platform_driver, the ordering gets reversed for
>> some reason.
>
> For the ordering requirements, it's probably best to export a symbol
> with the entry point and let the normal driver call into that. Using
> separate initcall levels is not something you should do in a normal
> device driver like this.
>
I figured this out. If the channel driver starts before the management 
driver; then channel reset fails. I'm handling this in the channel 
driver and am returning -EPROBE_DEFER. After that, management driver 
gets its chance to work. Then, the channel driver again. This change is 
in the v2 series.

> What is the relation between the device nodes for the two kinds of
> devices? Does it make sense to model the other one as a child device
> of this one? That way you would trivially do the ordering by not marking
> this one as 'compatible="simple-bus"' and triggering the registration
> of the child from the parent probe function.
>

The required order is management driver first, channel drivers next. If 
the order is reversed, channel init fails. I handle this with deferred 
probing.

I tried to keep loose binding between the management driver due to QEMU.

QEMU auto-generates the devicetree entries. The guest machine just sees 
one devicetree object for the DMA channel but guest machine device-tree 
kernel does not have any management driver entity.

This requires DMA channel driver to work independently in the guest 
machine without dependencies.

> 	Arnd
>

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-02 20:55               ` Arnd Bergmann
@ 2015-11-03  5:29                 ` Sinan Kaya
  2015-11-03 10:43                   ` Arnd Bergmann
  0 siblings, 1 reply; 51+ messages in thread
From: Sinan Kaya @ 2015-11-03  5:29 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel



On 11/2/2015 3:55 PM, Arnd Bergmann wrote:
> Are you using message signaled interrupts then?
> Typically MSI guarantees
> ordering against DMA, but level or edge triggered interrupts by definition
> cannot (at least on PCI, but most other buses are the same way), because
> the DMA master has no insight into when a DMA is actually complete.
>
> If you use MSI, please add a comment to the readl_relaxed() that it
> is safe because of that, otherwise the next person who tries to debug
> a problem with your driver has to look into this.

No, using regular GIC SPI interrupts at this moment. I know that HW 
doesn't use any of the typical AHB/AXI ARM buses.

I'm familiar with how PCI endpoints works. While the first read in a 
typical PCI endpoint ISR flushes all outstanding requests traditionally 
to the destination, this concept does not apply here for this HW.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-03  5:29                 ` Sinan Kaya
@ 2015-11-03 10:43                   ` Arnd Bergmann
  2015-11-03 21:07                     ` Sinan Kaya
  0 siblings, 1 reply; 51+ messages in thread
From: Arnd Bergmann @ 2015-11-03 10:43 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Tuesday 03 November 2015 00:29:07 Sinan Kaya wrote:
> On 11/2/2015 3:55 PM, Arnd Bergmann wrote:
> > Are you using message signaled interrupts then?
> > Typically MSI guarantees
> > ordering against DMA, but level or edge triggered interrupts by definition
> > cannot (at least on PCI, but most other buses are the same way), because
> > the DMA master has no insight into when a DMA is actually complete.
> >
> > If you use MSI, please add a comment to the readl_relaxed() that it
> > is safe because of that, otherwise the next person who tries to debug
> > a problem with your driver has to look into this.
> 
> No, using regular GIC SPI interrupts at this moment. I know that HW 
> doesn't use any of the typical AHB/AXI ARM buses.
> 
> I'm familiar with how PCI endpoints works. While the first read in a 
> typical PCI endpoint ISR flushes all outstanding requests traditionally 
> to the destination, this concept does not apply here for this HW.
> 

Ok, got it.

Best add an explanation like the above in the interrupt handler,
to prevent this from accidentally getting 'cleaned up' to use
readl(), or copied into a driver that uses PCI ordering rules
where it is actually wrong.

I think it should be done like this:

- anything that is not performance critical, use normal readl/writel
- in the fast path, add a comment to each readl_relaxed()/writel_relaxed()
  that is safe in this driver but that would not be safe in a PCI
  device
- For the ones that would be safe on PCI as weel, use
  readl_relaxed()/writel_relaxed() without a comment on each one,
  but clarify somewhere that these are all intentional.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-11-03  4:45       ` Sinan Kaya
@ 2015-11-03 12:42         ` Arnd Bergmann
  2015-11-03 14:26           ` Timur Tabi
  2015-11-04  1:04           ` Sinan Kaya
  0 siblings, 2 replies; 51+ messages in thread
From: Arnd Bergmann @ 2015-11-03 12:42 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Monday 02 November 2015 23:45:17 Sinan Kaya wrote:
> On 11/2/2015 4:30 PM, Arnd Bergmann wrote:
> > On Saturday 31 October 2015 02:51:46 Sinan Kaya wrote:
> >> On 10/30/2015 5:34 AM, Arnd Bergmann wrote:
> >>> On Thursday 29 October 2015 23:08:12 Sinan Kaya wrote:
> >> This will be used by the system admin to monitor/reset the execution
> >> state of the DMA channels. This will be the management interface.
> >> Debugfs is probably not the right choice. I originally had sysfs but
> >> than had some doubts. I'm open to suggestions.
> >
> > User interface design is unfortunately always hard, and I don't have
> > an obvious answer for you.
> >
> > Using debugfs by definition means that you don't expect users to
> > rely on ABI stability, so they should not write any automated scripts
> > against the contents of the files.
> >
> > With sysfs, the opposite is true: you need to maintain compatibility
> > for as long as anyone might rely on the current interface, and it
> > needs to be reviewed properly and documented in Documentation/ABI/.
> >
> > Other options are to use ioctl(), netlink or your own virtual file
> > system, but each of them has the same ABI requirements as sysfs.
> >
> > Regardless of what you pick, you also need to consider how other drivers
> > would use the same interface: If someone else has hardware that does
> > the same thing, we want to be able to use the same tools to access
> > it, so you should avoid having any hardware specific data in it and
> > keep it as generic and extensible as possible. In this particular
> > case, that probably means you should implement the user interfaces in
> > the dmaengine core driver, and let the specific DMA driver provide
> > callback function pointers along with the normal ones to fill that
> > data.
> >
> Thanks, I'll think about this. I'm inclined towards sysfs.

Ok. Documentation/sysfs-rules.txt has a good introduction of how this is done.

> >>> The rest of the probe function does not register any user interface aside from
> >>> the debugging stuff. Can you explain in the changelog how you expect the
> >>> driver to be used in a real system? Is there another driver coming?
> >>
> >> I expect this driver to grow in functionality over time. Right now, it
> >> does the global init for the DMA. After that all channels execute on
> >> their own without depending on each other. Global init has to be done
> >> first before attempting to do any channel initialization.
> >>
> >> There is also implied startup ordering requirements. I was doing this by
> >> using channel driver with the late binding to guarantee that.
> >>
> >> As soon as I use module_platform_driver, the ordering gets reversed for
> >> some reason.
> >
> > For the ordering requirements, it's probably best to export a symbol
> > with the entry point and let the normal driver call into that. Using
> > separate initcall levels is not something you should do in a normal
> > device driver like this.
> >
> I figured this out. If the channel driver starts before the management 
> driver; then channel reset fails. I'm handling this in the channel 
> driver and am returning -EPROBE_DEFER. After that, management driver 
> gets its chance to work. Then, the channel driver again. This change is 
> in the v2 series.

If you change the order in the Makefile, the management driver should
always get probed first if both are built-in. When the driver is a
loadable module, the ordering should work because of the way that the
modules are loaded. Using the deferred probing makes sense here,
so that would just be an optimization to avoid it normally. Things
can still get shuffled around e.g. if the management device is
deferred itself and we end up probing the channel driver first.

> > What is the relation between the device nodes for the two kinds of
> > devices? Does it make sense to model the other one as a child device
> > of this one? That way you would trivially do the ordering by not marking
> > this one as 'compatible="simple-bus"' and triggering the registration
> > of the child from the parent probe function.
> >
> 
> The required order is management driver first, channel drivers next. If 
> the order is reversed, channel init fails. I handle this with deferred 
> probing.
> 
> I tried to keep loose binding between the management driver due to QEMU.
> 
> QEMU auto-generates the devicetree entries. The guest machine just sees 
> one devicetree object for the DMA channel but guest machine device-tree 
> kernel does not have any management driver entity.
> 
> This requires DMA channel driver to work independently in the guest 
> machine without dependencies.

You have a distinct "compatible" string for qemu, right? It sounds like
this is not the same device if the dependencies are different, and
you could just have two ways to probe the same device.

The split between the two drivers still feels a little awkward overall,
it might be good to give it some more thought.

Would it work to describe the combination of the channel and management
registers as a single device with a single driver, but the management
parts being optional? That way, the management registers could be
intergrated better into the dmaengine framework, to provide a consistent
API to user space.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-11-03 12:42         ` Arnd Bergmann
@ 2015-11-03 14:26           ` Timur Tabi
  2015-11-04  1:04           ` Sinan Kaya
  1 sibling, 0 replies; 51+ messages in thread
From: Timur Tabi @ 2015-11-03 14:26 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: Arnd Bergmann, dmaengine, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

Arnd Bergmann wrote:
> If you change the order in the Makefile, the management driver should
> always get probed first if both are built-in. When the driver is a
> loadable module, the ordering should work because of the way that the
> modules are loaded.

This sounds like something that should be commented in the Makefile. 
Even if you use -EPROBE_DEFER and you re-order the Makefile only so that 
it's less likely to be a problem, that's still worthy of a comment.

-- 
Sent by an employee of the Qualcomm Innovation Center, Inc.
The Qualcomm Innovation Center, Inc. is a member of the
Code Aurora Forum, hosted by The Linux Foundation.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-03 10:43                   ` Arnd Bergmann
@ 2015-11-03 21:07                     ` Sinan Kaya
  2015-11-03 21:10                       ` Arnd Bergmann
  0 siblings, 1 reply; 51+ messages in thread
From: Sinan Kaya @ 2015-11-03 21:07 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel



On 11/3/2015 5:43 AM, Arnd Bergmann wrote:
> Ok, got it.
>
> Best add an explanation like the above in the interrupt handler,
> to prevent this from accidentally getting 'cleaned up' to use
> readl(), or copied into a driver that uses PCI ordering rules
> where it is actually wrong.
>

I'm adding this disclaimer into the ISR routine.

	/*
	 * Fine tuned for this HW...
	 *
	 * This ISR has been designed for this particular hardware. Relaxed read
	 * and write accessors are used for performance reasons due to interrupt
	 * delivery guarantees. Do not copy this code blindly and expect
	 * that to work.
	 */


> I think it should be done like this:
>
> - anything that is not performance critical, use normal readl/writel
> - in the fast path, add a comment to each readl_relaxed()/writel_relaxed()
>    that is safe in this driver but that would not be safe in a PCI
>    device
> - For the ones that would be safe on PCI as weel, use
>    readl_relaxed()/writel_relaxed() without a comment on each one,
>    but clarify somewhere that these are all intentional.

Makes sense.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver
  2015-11-03 21:07                     ` Sinan Kaya
@ 2015-11-03 21:10                       ` Arnd Bergmann
  0 siblings, 0 replies; 51+ messages in thread
From: Arnd Bergmann @ 2015-11-03 21:10 UTC (permalink / raw)
  To: Sinan Kaya
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel

On Tuesday 03 November 2015 16:07:34 Sinan Kaya wrote:
> On 11/3/2015 5:43 AM, Arnd Bergmann wrote:
> > Ok, got it.
> >
> > Best add an explanation like the above in the interrupt handler,
> > to prevent this from accidentally getting 'cleaned up' to use
> > readl(), or copied into a driver that uses PCI ordering rules
> > where it is actually wrong.
> >
> 
> I'm adding this disclaimer into the ISR routine.
> 
>         /*
>          * Fine tuned for this HW...
>          *
>          * This ISR has been designed for this particular hardware. Relaxed read
>          * and write accessors are used for performance reasons due to interrupt
>          * delivery guarantees. Do not copy this code blindly and expect
>          * that to work.
>          */
> 
> 

Sounds good.

	Arnd

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver
  2015-11-03 12:42         ` Arnd Bergmann
  2015-11-03 14:26           ` Timur Tabi
@ 2015-11-04  1:04           ` Sinan Kaya
  1 sibling, 0 replies; 51+ messages in thread
From: Sinan Kaya @ 2015-11-04  1:04 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: dmaengine, timur, cov, jcm, Rob Herring, Pawel Moll,
	Mark Rutland, Ian Campbell, Kumar Gala, Vinod Koul, Dan Williams,
	devicetree, linux-kernel



On 11/3/2015 7:42 AM, Arnd Bergmann wrote:
> You have a distinct "compatible" string for qemu, right? It sounds like
> this is not the same device if the dependencies are different, and
> you could just have two ways to probe the same device.
>
No, it is the same dma channel object that gets probed by the same name 
like the hypervisor. The channel object gets unbound from the 
hypervisor, it then gets bound to VFIO. Then, eventually QEMU takes 
over. The channel driver does not know under which OS it is running and 
it works in both environments as it is without any code changes at this 
moment.

> The split between the two drivers still feels a little awkward overall,
> it might be good to give it some more thought.

I see. I'd like to keep the management driver as independent as possible 
from the channel driver for security and functionality reasons. I need 
to keep the management addresses and functionality in the hypervisor only.

> > Would it work to describe the combination of the channel and management
> registers as a single device with a single driver, but the management
> parts being optional? That way, the management registers could be
> intergrated better into the dmaengine framework, to provide a consistent
> API to user space.

I can compile both management driver and channel driver into the same 
module if it sounds better. I can probe one with channel and another 
with the management name. I just need to be careful about not sharing 
any kind of data structure between them otherwise virtualization will break.

I consider the management driver a client of the DMA engine API at this 
moment.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2015-11-04  1:04 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-10-30  3:08 [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Sinan Kaya
2015-10-30  3:08 ` [PATCH 2/2] dma: add Qualcomm Technologies HIDMA channel driver Sinan Kaya
2015-10-30  8:01   ` kbuild test robot
2015-10-30  8:07   ` kbuild test robot
2015-10-30 10:24   ` Arnd Bergmann
2015-10-30 21:42     ` Sinan Kaya
2015-10-30 21:55       ` Timur Tabi
2015-10-30 22:47         ` Arnd Bergmann
2015-10-30 22:28       ` Arnd Bergmann
2015-10-30 22:36         ` Timur Tabi
2015-10-30 22:50           ` Arnd Bergmann
2015-10-31  1:53         ` Sinan Kaya
2015-11-01 18:50         ` Sinan Kaya
2015-11-01 20:21           ` Timur Tabi
2015-11-01 20:27             ` Sinan Kaya
2015-11-02 16:33           ` Arnd Bergmann
2015-11-02 19:21             ` Sinan Kaya
2015-11-02 20:55               ` Arnd Bergmann
2015-11-03  5:29                 ` Sinan Kaya
2015-11-03 10:43                   ` Arnd Bergmann
2015-11-03 21:07                     ` Sinan Kaya
2015-11-03 21:10                       ` Arnd Bergmann
2015-10-30 14:50   ` Mark Rutland
2015-10-31  4:27     ` Sinan Kaya
2015-10-30  4:34 ` [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver kbuild test robot
2015-10-30  4:34 ` [PATCH] dma: fix platform_no_drv_owner.cocci warnings kbuild test robot
2015-10-30  9:34 ` [PATCH 1/2] dma: add Qualcomm Technologies HIDMA management driver Arnd Bergmann
2015-10-31  6:51   ` Sinan Kaya
2015-10-31 12:53     ` Timur Tabi
2015-10-31 12:53     ` Timur Tabi
2015-11-02 21:30     ` Arnd Bergmann
2015-11-03  4:45       ` Sinan Kaya
2015-11-03 12:42         ` Arnd Bergmann
2015-11-03 14:26           ` Timur Tabi
2015-11-04  1:04           ` Sinan Kaya
2015-10-30 15:00 ` Mark Rutland
2015-10-30 17:59   ` Andy Shevchenko
2015-10-30 18:08     ` Sinan Kaya
2015-10-30 18:18       ` Mark Rutland
2015-10-30 18:18       ` Andy Shevchenko
2015-10-30 18:25         ` Mark Rutland
2015-10-30 18:40           ` Andy Shevchenko
2015-10-30 18:48             ` Sinan Kaya
2015-10-30 19:01               ` Mark Rutland
2015-10-30 20:08                 ` Al Stone
2015-10-30 20:15                   ` Andy Shevchenko
2015-10-30 20:18                     ` Mark Rutland
2015-10-31  3:33                     ` Jon Masters
2015-10-31 17:34                       ` Sinan Kaya
2015-10-30 19:36           ` Sinan Kaya
2015-10-31 17:11   ` Sinan Kaya

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).