All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] pmu/gk20a: PMU boot support.
@ 2015-03-11  6:33 Deepak Goyal
       [not found] ` <1426055631-1166-1-git-send-email-dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Deepak Goyal @ 2015-03-11  6:33 UTC (permalink / raw)
  To: bskeggs-H+wXaHxf7aLQT0dZR+AlfA, gnurou-Re5JQEeQqe8AvxtiuMwx3w,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	linux-tegra-u79uwXL29TY76Z2rM5mHXA
  Cc: Deepak Goyal

It adds PMU boot support.It loads PMU
firmware into PMU falcon.RM/Kernel driver
receives INIT ack (through interrupt mechanism)
from PMU when PMU boots with success.

Signed-off-by: Deepak Goyal <dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
---
 drm/nouveau/include/nvkm/subdev/pmu.h |   26 +-
 drm/nouveau/nvkm/subdev/pmu/base.c    |  108 ++
 drm/nouveau/nvkm/subdev/pmu/gk20a.c   | 2131 ++++++++++++++++++++++++++++++++-
 drm/nouveau/nvkm/subdev/pmu/gk20a.h   |  369 ++++++
 drm/nouveau/nvkm/subdev/pmu/priv.h    |  264 ++++
 5 files changed, 2884 insertions(+), 14 deletions(-)
 create mode 100644 drm/nouveau/nvkm/subdev/pmu/gk20a.h

diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h
index 7b86acc634a0..659b4e0ba02b 100644
--- a/drm/nouveau/include/nvkm/subdev/pmu.h
+++ b/drm/nouveau/include/nvkm/subdev/pmu.h
@@ -1,7 +1,20 @@
 #ifndef __NVKM_PMU_H__
 #define __NVKM_PMU_H__
 #include <core/subdev.h>
+#include <core/device.h>
+#include <subdev/mmu.h>
+#include <linux/debugfs.h>
 
+struct pmu_buf_desc {
+	struct nvkm_gpuobj *pmubufobj;
+	struct nvkm_vma pmubufvma;
+	size_t size;
+};
+struct pmu_priv_vm {
+	struct nvkm_gpuobj *mem;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+};
 struct nvkm_pmu {
 	struct nvkm_subdev base;
 
@@ -20,9 +33,20 @@ struct nvkm_pmu {
 		u32 message;
 		u32 data[2];
 	} recv;
-
+	wait_queue_head_t init_wq;
+	bool gr_initialised;
+	struct dentry *debugfs;
+	struct pmu_buf_desc *pg_buf;
+	struct pmu_priv_vm *pmuvm;
 	int  (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
 	void (*pgob)(struct nvkm_pmu *, bool);
+	int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token);
+	int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token);
+	int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load);
+	int (*pmu_load_update)(struct nvkm_pmu *pmu);
+	void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu);
+	void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 *busy_cycles,
+		u32 *total_cycles);
 };
 
 static inline struct nvkm_pmu *
diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c b/drm/nouveau/nvkm/subdev/pmu/base.c
index 054b2d2eec35..6afd389b9764 100644
--- a/drm/nouveau/nvkm/subdev/pmu/base.c
+++ b/drm/nouveau/nvkm/subdev/pmu/base.c
@@ -25,6 +25,114 @@
 
 #include <subdev/timer.h>
 
+/* init allocator struct */
+int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
+		const char *name, u32 start, u32 len)
+{
+	memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
+
+	strncpy(allocator->name, name, 32);
+
+	allocator->base = start;
+	allocator->limit = start + len - 1;
+
+	allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long),
+			GFP_KERNEL);
+	if (!allocator->bitmap)
+		return -ENOMEM;
+
+	allocator_dbg(allocator, "%s : base %d, limit %d",
+		allocator->name, allocator->base);
+
+	init_rwsem(&allocator->rw_sema);
+
+	allocator->alloc = nvkm_pmu_allocator_block_alloc;
+	allocator->free = nvkm_pmu_allocator_block_free;
+
+	return 0;
+}
+
+/* destroy allocator, free all remaining blocks if any */
+void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator)
+{
+	down_write(&allocator->rw_sema);
+
+	kfree(allocator->bitmap);
+
+	memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
+}
+
+/*
+ * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
+ * returned to caller in *addr.
+ *
+ * contiguous allocation, which allocates one block of
+ * contiguous address.
+*/
+int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
+		u32 *addr, u32 len, u32 align)
+{
+	unsigned long _addr;
+
+	allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
+
+	if ((*addr != 0 && *addr < allocator->base) || /* check addr range */
+	    *addr + len > allocator->limit || /* check addr range */
+	    *addr & (align - 1) || /* check addr alignment */
+	     len == 0)                        /* check len */
+		return -EINVAL;
+
+	len = ALIGN(len, align);
+	if (!len)
+		return -ENOMEM;
+
+	down_write(&allocator->rw_sema);
+
+	_addr = bitmap_find_next_zero_area(allocator->bitmap,
+			allocator->limit - allocator->base + 1,
+			*addr ? (*addr - allocator->base) : 0,
+			len,
+			align - 1);
+	if ((_addr > allocator->limit - allocator->base + 1) ||
+	    (*addr && *addr != (_addr + allocator->base))) {
+		up_write(&allocator->rw_sema);
+		return -ENOMEM;
+	}
+
+	bitmap_set(allocator->bitmap, _addr, len);
+	*addr = allocator->base + _addr;
+
+	up_write(&allocator->rw_sema);
+
+	allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
+
+	return 0;
+}
+
+/* free all blocks between start and end */
+int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
+		u32 addr, u32 len, u32 align)
+{
+	allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
+
+	if (addr + len > allocator->limit || /* check addr range */
+	    addr < allocator->base ||
+	    addr & (align - 1))   /* check addr alignment */
+		return -EINVAL;
+
+	len = ALIGN(len, align);
+	if (!len)
+		return -EINVAL;
+
+	down_write(&allocator->rw_sema);
+	bitmap_clear(allocator->bitmap, addr - allocator->base, len);
+	up_write(&allocator->rw_sema);
+
+	allocator_dbg(allocator, "[out] addr %d, len %d", addr, len);
+
+	return 0;
+}
+
 void
 nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
 {
diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
index a49934bbe637..0fd2530301a3 100644
--- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
@@ -20,21 +20,67 @@
  * DEALINGS IN THE SOFTWARE.
  */
 #include "priv.h"
+#include "gk20a.h"
+#include <core/client.h>
+#include <core/gpuobj.h>
+#include <subdev/bar.h>
+#include <subdev/fb.h>
+#include <subdev/mc.h>
+#include <subdev/timer.h>
+#include <subdev/mmu.h>
+#include <subdev/pmu.h>
+#include <engine/falcon.h>
 
+#include <linux/delay.h>	/* for mdelay */
+#include <linux/firmware.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/uaccess.h>
 #include <subdev/clk.h>
 #include <subdev/timer.h>
 #include <subdev/volt.h>
 
 #define BUSY_SLOT	0
 #define CLK_SLOT	7
+#define GK20A_PMU_UCODE_IMAGE	"gpmu_ucode.bin"
+
+static int falc_trace_show(struct seq_file *s, void *data);
+static int falc_trace_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, falc_trace_show, inode->i_private);
+}
+static const struct file_operations falc_trace_fops = {
+	.open		= falc_trace_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+struct pmu_priv_vm pmuvm;
+const struct firmware *pmufw;
+
+static void  gk20a_pmu_isr(struct nvkm_pmu *ppmu);
+static void pmu_process_message(struct work_struct *work);
+
+static int
+gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw);
+static void
+gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, const struct firmware *fw);
+
+static int
+gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw);
+static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu);
+static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc);
+static void gk20a_pmu_intr(struct nvkm_subdev *subdev);
 
+static void gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable);
 struct gk20a_pmu_dvfs_data {
 	int p_load_target;
 	int p_load_max;
 	int p_smooth;
 	unsigned int avg_load;
 };
-
 struct gk20a_pmu_priv {
 	struct nvkm_pmu base;
 	struct nvkm_alarm alarm;
@@ -46,7 +92,30 @@ struct gk20a_pmu_dvfs_dev_status {
 	unsigned long busy;
 	int cur_state;
 };
-
+int gk20a_pmu_debugfs_init(struct nvkm_pmu *ppmu)
+{
+	struct dentry *d;
+	ppmu->debugfs = debugfs_create_dir("PMU", NULL);
+	if (!ppmu->debugfs)
+		goto err_out;
+	nv_debug(ppmu, "PMU directory created with success\n");
+	d = debugfs_create_file(
+		"falc_trace", 0644, ppmu->debugfs, ppmu,
+						&falc_trace_fops);
+	if (!d)
+		goto err_out;
+	return 0;
+err_out:
+	pr_err("%s: Failed to make debugfs node\n", __func__);
+	debugfs_remove_recursive(ppmu->debugfs);
+	return -ENOMEM;
+}
+void gk20a_pmu_release_firmware(struct nvkm_pmu *ppmu,
+						    const struct firmware *pfw)
+{
+	nv_debug(ppmu, "firmware released\n");
+	release_firmware(pfw);
+}
 static int
 gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state)
 {
@@ -164,31 +233,145 @@ gk20a_pmu_fini(struct nvkm_object *object, bool suspend)
 {
 	struct nvkm_pmu *pmu = (void *)object;
 	struct gk20a_pmu_priv *priv = (void *)pmu;
-
+	nv_wr32(pmu, 0x10a014, 0x00000060);
+	flush_work(&pmu->recv.work);
 	nvkm_timer_alarm_cancel(priv, &priv->alarm);
 
 	return nvkm_subdev_fini(&pmu->base, suspend);
 }
+static bool find_hex_in_string(char *strings, u32 *hex_pos)
+{
+	u32 i = 0, j = strlen(strings);
+	for (; i < j; i++) {
+		if (strings[i] == '%')
+			if (strings[i + 1] == 'x' || strings[i + 1] == 'X') {
+				*hex_pos = i;
+				return true;
+			}
+	}
+	*hex_pos = -1;
+	return false;
+}
+static int falc_trace_show(struct seq_file *s, void *data)
+{
+	struct nvkm_pmu *ppmu = s->private;
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	u32 i = 0, j = 0, k, l, m;
+	char part_str[40];
+	u32 data1;
+	char *log_data = kmalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL);
+	char *trace = log_data;
+	u32 *trace1 = (u32 *)log_data;
+	for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 4) {
+		data1 = nv_ro32(pmu->trace_buf.pmubufobj, 0x0000 + i);
+		memcpy(log_data + i, (void *)(&data1), 32);
+	}
+	for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
+		for (j = 0; j < 0x40; j++)
+			if (trace1[(i / 4) + j])
+				break;
+		if (j == 0x40)
+			goto out;
+		seq_printf(s, "Index %x: ", trace1[(i / 4)]);
+		l = 0;
+		m = 0;
+		while (find_hex_in_string((trace+i+20+m), &k)) {
+			if (k >= 40)
+				break;
+			strncpy(part_str, (trace+i+20+m), k);
+			part_str[k] = 0;
+			seq_printf(s, "%s0x%x", part_str,
+					trace1[(i / 4) + 1 + l]);
+			l++;
+			m += k + 2;
+		}
+		seq_printf(s, "%s", (trace+i+20+m));
+	}
+out:
+	kfree(log_data);
+	return 0;
+}
 
 int
 gk20a_pmu_init(struct nvkm_object *object)
 {
-	struct nvkm_pmu *pmu = (void *)object;
-	struct gk20a_pmu_priv *priv = (void *)pmu;
+	struct nvkm_pmu *ppmu = (void *)object;
+	struct nvkm_mc *pmc = nvkm_mc(object);
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu;
+	struct gk20a_pmu_priv *priv;
+	struct pmu_gk20a_data *gk20adata;
 	int ret;
 
-	ret = nvkm_subdev_init(&pmu->base);
+	pmu = &impl->pmudata;
+
+	nv_subdev(ppmu)->intr = gk20a_pmu_intr;
+
+	mutex_init(&pmu->isr_mutex);
+	mutex_init(&pmu->pmu_copy_lock);
+	mutex_init(&pmu->pmu_seq_lock);
+
+	if (pmufw == NULL) {
+		ret = gk20a_pmu_load_firmware(ppmu, &pmufw);
+		if (ret < 0) {
+			nv_error(ppmu, "failed to load pmu fimware\n");
+			return ret;
+		}
+		nv_debug(ppmu, "loading firmware sucessful\n");
+		ret = gk20a_pmu_init_vm(ppmu, pmufw);
+		if (ret < 0) {
+			nv_error(ppmu, "failed to map pmu fw to va space\n");
+			goto init_vm_err;
+		}
+	}
+	pmu->desc = (struct pmu_ucode_desc *)pmufw->data;
+	gk20a_pmu_dump_firmware_info(ppmu, pmufw);
+
+	if (pmu->desc->app_version != APP_VERSION_GK20A) {
+		nv_error(ppmu,
+		"PMU code version not supported version: %d\n",
+			pmu->desc->app_version);
+		ret = -EINVAL;
+		goto app_ver_err;
+	}
+	gk20adata = kzalloc(sizeof(*gk20adata), GFP_KERNEL);
+	if (!gk20adata) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	pmu->pmu_chip_data = (void *)gk20adata;
+
+	pmu->remove_support = gk20a_remove_pmu_support;
+
+	ret = gk20a_init_pmu_setup_sw(ppmu);
 	if (ret)
-		return ret;
+		goto err;
+
+	pmu->pmu_state = PMU_STATE_STARTING;
+	ret = gk20a_init_pmu_setup_hw1(ppmu, pmc);
+	if (ret)
+		goto err;
+
+	priv = (void *)ppmu;
 
-	pmu->pgob = nvkm_pmu_pgob;
+	ret = nvkm_subdev_init(&ppmu->base);
+	if (ret)
+		goto err;
+
+	ppmu->pgob = nvkm_pmu_pgob;
 
-	/* init pwr perf counter */
-	nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
-	nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
-	nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
+	/* init pmu perf counter */
+	nv_wr32(ppmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
+	nv_wr32(ppmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
+	nv_wr32(ppmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
 
-	nvkm_timer_alarm(pmu, 2000000000, &priv->alarm);
+	nvkm_timer_alarm(ppmu, 2000000000, &priv->alarm);
+err:
+init_vm_err:
+app_ver_err:
+	gk20a_pmu_release_firmware(ppmu, pmufw);
 	return ret;
 }
 
@@ -226,4 +409,1926 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) {
 		.init = gk20a_pmu_init,
 		.fini = gk20a_pmu_fini,
 	},
+	.base.handle = NV_SUBDEV(PMU, 0xea),
+	.pgob = gk20a_pmu_pgob,
 }.base;
+void pmu_copy_from_dmem(struct pmu_desc *pmu,
+		u32 src, u8 *dst, u32 size, u8 port)
+{
+	u32 i, words, bytes;
+	u32 data, addr_mask;
+	u32 *dst_u32 = (u32 *)dst;
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+	if (size == 0) {
+		nv_error(ppmu, "size is zero\n");
+		goto out;
+	}
+
+	if (src & 0x3) {
+		nv_error(ppmu, "src (0x%08x) not 4-byte aligned\n", src);
+		goto out;
+	}
+
+	mutex_lock(&pmu->pmu_copy_lock);
+
+	words = size >> 2;
+	bytes = size & 0x3;
+
+	addr_mask = (0x3f << 2) | 0xff << 8;
+
+	src &= addr_mask;
+
+	nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (src | (0x1 << 25)));
+
+	for (i = 0; i < words; i++) {
+		dst_u32[i] = nv_rd32(ppmu, (0x0010a1c4 + port * 8));
+		nv_debug(ppmu, "0x%08x\n", dst_u32[i]);
+	}
+	if (bytes > 0) {
+		data = nv_rd32(ppmu, (0x0010a1c4 + port * 8));
+		nv_debug(ppmu, "0x%08x\n", data);
+
+		for (i = 0; i < bytes; i++)
+			dst[(words << 2) + i] = ((u8 *)&data)[i];
+	}
+	mutex_unlock(&pmu->pmu_copy_lock);
+out:
+	nv_debug(ppmu, "exit %s\n", __func__);
+}
+
+void pmu_copy_to_dmem(struct pmu_desc *pmu,
+		u32 dst, u8 *src, u32 size, u8 port)
+{
+	u32 i, words, bytes;
+	u32 data, addr_mask;
+	u32 *src_u32 = (u32 *)src;
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+	if (size == 0) {
+		nv_error(ppmu, "size is zero\n");
+		goto out;
+	}
+
+	if (dst & 0x3) {
+		nv_error(ppmu, "dst (0x%08x) not 4-byte aligned\n", dst);
+		goto out;
+	}
+
+	mutex_lock(&pmu->pmu_copy_lock);
+
+	words = size >> 2;
+	bytes = size & 0x3;
+
+	addr_mask = (0x3f << 2) | 0xff << 8;
+
+	dst &= addr_mask;
+
+	nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24)));
+
+	for (i = 0; i < words; i++) {
+		nv_wr32(ppmu, (0x10a1c4 + (port * 8)), src_u32[i]);
+		nv_debug(ppmu, "0x%08x\n", src_u32[i]);
+	}
+	if (bytes > 0) {
+		data = 0;
+		for (i = 0; i < bytes; i++)
+			((u8 *)&data)[i] = src[(words << 2) + i];
+		nv_wr32(ppmu, (0x10a1c4 + (port * 8)), data);
+		nv_debug(ppmu, "0x%08x\n", data);
+	}
+
+	data = nv_rd32(ppmu, (0x10a1c0 + (port * 8))) & addr_mask;
+	size = ALIGN(size, 4);
+	if (data != dst + size) {
+		nv_error(ppmu, "copy failed. bytes written %d, expected %d",
+			data - dst, size);
+	}
+	mutex_unlock(&pmu->pmu_copy_lock);
+out:
+	nv_debug(ppmu, "exit %s", __func__);
+}
+
+static int pmu_idle(struct nvkm_pmu *ppmu)
+{
+	unsigned long end_jiffies = jiffies +
+		msecs_to_jiffies(2000);
+	u32 idle_stat;
+
+	/* wait for pmu idle */
+	do {
+		idle_stat = nv_rd32(ppmu, 0x0010a04c);
+
+		if (((idle_stat & 0x01) == 0) &&
+			((idle_stat >> 1) & 0x7fff) == 0) {
+			break;
+		}
+
+		if (time_after_eq(jiffies, end_jiffies)) {
+			nv_error(ppmu, "timeout waiting pmu idle : 0x%08x",
+				  idle_stat);
+			return -EBUSY;
+		}
+		usleep_range(100, 200);
+	} while (1);
+
+	return 0;
+}
+
+void pmu_enable_irq(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
+			bool enable)
+{
+
+	nv_wr32(pmc, 0x00000640,
+		nv_rd32(pmc, 0x00000640) &
+		~0x1000000);
+	nv_wr32(pmc, 0x00000644,
+		nv_rd32(pmc, 0x00000644) &
+		~0x1000000);
+	nv_wr32(ppmu, 0x0010a014, 0xff);
+
+	if (enable) {
+		nv_debug(ppmu, "enable pmu irq\n");
+		/* dest 0=falcon, 1=host; level 0=irq0, 1=irq1
+		nv_wr32(ppmu, 0x0010a01c, 0xff01ff52);
+		0=disable, 1=enable*/
+
+		nv_wr32(ppmu, 0x0010a010, 0xff);
+		nv_wr32(pmc, 0x00000640,
+			nv_rd32(pmc, 0x00000640) |
+			0x1000000);
+		nv_wr32(pmc, 0x00000644,
+			nv_rd32(pmc, 0x00000644) |
+			0x1000000);
+	} else {
+		nv_debug(ppmu, "disable pmu irq\n");
+	}
+
+}
+
+static int pmu_enable_hw(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
+			bool enable)
+{
+	u32 reg;
+
+	if (enable) {
+		int retries = GK20A_IDLE_CHECK_MAX / GK20A_IDLE_CHECK_DEFAULT;
+		/*need a spinlock?*/
+		reg = nv_rd32(pmc, 0x00000200);
+		reg |= 0x2000;
+		nv_wr32(pmc, 0x00000200, reg);
+		nv_rd32(pmc, 0x00000200);
+		do {
+			u32 w = nv_rd32(ppmu, 0x0010a10c) & 0x6;
+
+			if (!w)
+				return 0;
+
+			udelay(GK20A_IDLE_CHECK_DEFAULT);
+		} while (--retries);
+
+		reg = nv_rd32(pmc, 0x00000200);
+		reg &= ~0x2000;
+		nv_wr32(pmc, 0x00000200, reg);
+		nv_error(ppmu, "Falcon mem scrubbing timeout\n");
+
+		goto error;
+	} else {
+		reg = nv_rd32(pmc, 0x00000200);
+		reg &= ~0x2000;
+		nv_wr32(pmc, 0x00000200, reg);
+		return 0;
+	}
+error:
+	return -ETIMEDOUT;
+}
+
+static int pmu_enable(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
+			bool enable)
+{
+	u32 pmc_enable;
+	int err;
+
+	if (!enable) {
+		pmc_enable = nv_rd32(pmc, 0x200);
+		if ((pmc_enable & 0x2000) != 0x0) {
+			pmu_enable_irq(ppmu, pmc, false);
+			pmu_enable_hw(ppmu, pmc, false);
+		}
+	} else {
+		err = pmu_enable_hw(ppmu, pmc, true);
+		if (err)
+			return err;
+
+		/* TBD: post reset */
+
+		err = pmu_idle(ppmu);
+		if (err)
+			return err;
+
+		pmu_enable_irq(ppmu, pmc, true);
+	}
+
+	return 0;
+}
+
+int pmu_reset(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
+{
+	int err;
+
+	err = pmu_idle(ppmu);
+	if (err)
+		return err;
+
+	/* TBD: release pmu hw mutex */
+
+	err = pmu_enable(ppmu, pmc, false);
+	if (err)
+		return err;
+
+	err = pmu_enable(ppmu, pmc, true);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int pmu_bootstrap(struct pmu_desc *pmu)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	struct pmu_ucode_desc *desc = pmu->desc;
+	u64 addr_code, addr_data, addr_load;
+	u32 i, blocks, addr_args;
+	u32 *adr_data, *adr_load, *adr_code;
+	struct pmu_cmdline_args_gk20a cmdline_args;
+	struct pmu_priv_vm *ppmuvm = &pmuvm;
+
+	nv_wr32(ppmu, 0x0010a048,
+		nv_rd32(ppmu, 0x0010a048) | 0x01);
+	/*bind the address*/
+	nv_wr32(ppmu, 0x0010a480,
+		ppmuvm->mem->addr >> 12 |
+		0x1 << 30 |
+		0x20000000);
+
+	/* TBD: load all other surfaces */
+	cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE;
+	cmdline_args.falc_trace_dma_base =
+				 u64_lo32(pmu->trace_buf.pmubufvma.offset >> 8);
+	cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT;
+	cmdline_args.cpu_freq_hz = 204;
+	cmdline_args.secure_mode = 0;
+
+	addr_args = (nv_rd32(ppmu, 0x0010a108) >> 9) & 0x1ff;
+	addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2;
+	addr_args -= sizeof(struct pmu_cmdline_args_gk20a);
+	nv_debug(ppmu, "initiating copy to dmem\n");
+	pmu_copy_to_dmem(pmu, addr_args,
+			(u8 *)&cmdline_args,
+			sizeof(struct pmu_cmdline_args_gk20a), 0);
+
+	nv_wr32(ppmu, 0x0010a1c0, 0x1 << 24);
+
+
+	addr_code = u64_lo32((pmu->ucode.pmubufvma.offset +
+			desc->app_start_offset +
+			desc->app_resident_code_offset) >> 8);
+
+	addr_data = u64_lo32((pmu->ucode.pmubufvma.offset +
+			desc->app_start_offset +
+			desc->app_resident_data_offset) >> 8);
+
+	addr_load = u64_lo32((pmu->ucode.pmubufvma.offset +
+			desc->bootloader_start_offset) >> 8);
+
+	adr_code = (u32 *) (&addr_code);
+	adr_load = (u32 *) (&addr_load);
+	adr_data = (u32 *) (&addr_data);
+	nv_wr32(ppmu, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE);
+	nv_debug(ppmu, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE);
+	nv_wr32(ppmu, 0x0010a1c4, *(adr_code));
+	nv_debug(ppmu, "0x%08x\n", *(adr_code));
+	nv_wr32(ppmu, 0x0010a1c4, desc->app_size);
+	nv_debug(ppmu, "0x%08x\n", desc->app_size);
+	nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_code_size);
+	nv_debug(ppmu, "0x%08x\n", desc->app_resident_code_size);
+	nv_wr32(ppmu, 0x0010a1c4, desc->app_imem_entry);
+	nv_debug(ppmu, "0x%08x\n", desc->app_imem_entry);
+	nv_wr32(ppmu, 0x0010a1c4,  *(adr_data));
+	nv_debug(ppmu, "0x%08x\n", *(adr_data));
+	nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_data_size);
+	nv_debug(ppmu, "0x%08x\n", desc->app_resident_data_size);
+	nv_wr32(ppmu, 0x0010a1c4, *(adr_code));
+	nv_debug(ppmu, "0x%08x\n", *(adr_code));
+	nv_wr32(ppmu, 0x0010a1c4, 0x1);
+	nv_debug(ppmu, "0x%08x\n", 1);
+	nv_wr32(ppmu, 0x0010a1c4, addr_args);
+	nv_debug(ppmu, "0x%08x\n", addr_args);
+
+
+	nv_wr32(ppmu, 0x0010a110,
+		*(adr_load) - (desc->bootloader_imem_offset >> 8));
+
+	blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
+
+	for (i = 0; i < blocks; i++) {
+		nv_wr32(ppmu, 0x0010a114,
+			desc->bootloader_imem_offset + (i << 8));
+		nv_wr32(ppmu, 0x0010a11c,
+			desc->bootloader_imem_offset + (i << 8));
+		nv_wr32(ppmu, 0x0010a118,
+			0x01 << 4  |
+			0x06 << 8  |
+			((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12));
+	}
+
+
+	nv_wr32(ppmu, 0x0010a104,
+		(0xffffffff & desc->bootloader_entry_point));
+
+	nv_wr32(ppmu, 0x0010a100, 0x1 << 1);
+
+	nv_wr32(ppmu, 0x0010a080, desc->app_version);
+
+	return 0;
+}
+
+void pmu_seq_init(struct pmu_desc *pmu)
+{
+	u32 i;
+
+	memset(pmu->seq, 0,
+		sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
+	memset(pmu->pmu_seq_tbl, 0,
+		sizeof(pmu->pmu_seq_tbl));
+
+	for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
+		pmu->seq[i].id = i;
+}
+
+static int pmu_seq_acquire(struct pmu_desc *pmu,
+			struct pmu_sequence **pseq)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	struct pmu_sequence *seq;
+	u32 index;
+
+	mutex_lock(&pmu->pmu_seq_lock);
+	index = find_first_zero_bit(pmu->pmu_seq_tbl,
+				sizeof(pmu->pmu_seq_tbl));
+	if (index >= sizeof(pmu->pmu_seq_tbl)) {
+		nv_error(ppmu,
+			"no free sequence available");
+		mutex_unlock(&pmu->pmu_seq_lock);
+		return -EAGAIN;
+	}
+	set_bit(index, pmu->pmu_seq_tbl);
+	mutex_unlock(&pmu->pmu_seq_lock);
+
+	seq = &pmu->seq[index];
+	seq->state = PMU_SEQ_STATE_PENDING;
+
+	*pseq = seq;
+	return 0;
+}
+
+static void pmu_seq_release(struct pmu_desc *pmu,
+			struct pmu_sequence *seq)
+{
+	seq->state	= PMU_SEQ_STATE_FREE;
+	seq->desc	= PMU_INVALID_SEQ_DESC;
+	seq->callback	= NULL;
+	seq->cb_params	= NULL;
+	seq->msg	= NULL;
+	seq->out_payload = NULL;
+	seq->in_gk20a.alloc.dmem.size = 0;
+	seq->out_gk20a.alloc.dmem.size = 0;
+	clear_bit(seq->id, pmu->pmu_seq_tbl);
+}
+
+static int pmu_queue_init(struct pmu_desc *pmu,
+		u32 id, struct pmu_init_msg_pmu_gk20a *init)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	struct pmu_queue *queue = &pmu->queue[id];
+
+	queue->id	= id;
+	queue->index    = init->queue_info[id].index;
+	queue->offset   = init->queue_info[id].offset;
+	queue->size = init->queue_info[id].size;
+	queue->mutex_id = id;
+	mutex_init(&queue->mutex);
+
+	nv_debug(ppmu, "queue %d: index %d, offset 0x%08x, size 0x%08x",
+		id, queue->index, queue->offset, queue->size);
+
+	return 0;
+}
+
+static int pmu_queue_head(struct pmu_desc *pmu, struct pmu_queue *queue,
+			u32 *head, bool set)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+	BUG_ON(!head);
+
+	if (PMU_IS_COMMAND_QUEUE(queue->id)) {
+
+		if (queue->index >= 0x00000004)
+			return -EINVAL;
+
+		if (!set)
+			*head = nv_rd32(ppmu, 0x0010a4a0 + (queue->index * 4)) &
+				0xffffffff;
+		else
+			nv_wr32(ppmu,
+				(0x0010a4a0 + (queue->index * 4)),
+				(*head & 0xffffffff));
+	} else {
+		if (!set)
+			*head = nv_rd32(ppmu, 0x0010a4c8) & 0xffffffff;
+		else
+			nv_wr32(ppmu, 0x0010a4c8, (*head & 0xffffffff));
+	}
+
+	return 0;
+}
+
+static int pmu_queue_tail(struct pmu_desc *pmu, struct pmu_queue *queue,
+			u32 *tail, bool set)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+	BUG_ON(!tail);
+
+	if (PMU_IS_COMMAND_QUEUE(queue->id)) {
+
+		if (queue->index >= 0x00000004)
+			return -EINVAL;
+
+		if (!set)
+			*tail = nv_rd32(ppmu, 0x0010a4b0 + (queue->index * 4)) &
+				0xffffffff;
+		else
+			nv_wr32(ppmu, (0x0010a4b0 + (queue->index * 4)),
+							  (*tail & 0xffffffff));
+	} else {
+		if (!set)
+			*tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff;
+		else
+			nv_wr32(ppmu, 0x0010a4cc, (*tail & 0xffffffff));
+	}
+
+	return 0;
+}
+
+static inline void pmu_queue_read(struct pmu_desc *pmu,
+			u32 offset, u8 *dst, u32 size)
+{
+	pmu_copy_from_dmem(pmu, offset, dst, size, 0);
+}
+
+static inline void pmu_queue_write(struct pmu_desc *pmu,
+			u32 offset, u8 *src, u32 size)
+{
+	pmu_copy_to_dmem(pmu, offset, src, size, 0);
+}
+
+int pmu_mutex_acquire(struct nvkm_pmu *ppmu, u32 id, u32 *token)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	struct pmu_mutex *mutex;
+	u32 data, owner, max_retry;
+
+	if (!pmu->initialized)
+		return -EINVAL;
+
+	BUG_ON(!token);
+	BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
+	BUG_ON(id > pmu->mutex_cnt);
+
+	mutex = &pmu->mutex[id];
+
+	owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff;
+
+	if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
+		BUG_ON(mutex->ref_cnt == 0);
+		nv_debug(ppmu, "already acquired by owner : 0x%08x", *token);
+		mutex->ref_cnt++;
+		return 0;
+	}
+
+	max_retry = 40;
+	do {
+		data = nv_rd32(ppmu, 0x0010a488) & 0xff;
+		if (data == 0x00000000 ||
+		    data == 0x000000ff) {
+			nv_warn(ppmu,
+				"fail to generate mutex token: val 0x%08x",
+				owner);
+			usleep_range(20, 40);
+			continue;
+		}
+
+		owner = data;
+		nv_wr32(ppmu, (0x0010a580 + mutex->index * 4),
+			owner & 0xff);
+
+		data = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4));
+
+		if (owner == data) {
+			mutex->ref_cnt = 1;
+			nv_debug(ppmu, "mutex acquired: id=%d, token=0x%x",
+				mutex->index, *token);
+			*token = owner;
+			goto out;
+		} else {
+		  nv_debug(ppmu, "fail to acquire mutex idx=0x%08x",
+				mutex->index);
+
+			nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff));
+
+			usleep_range(20, 40);
+			continue;
+		}
+	} while (max_retry-- > 0);
+
+	return -EBUSY;
+out:
+	return 0;
+}
+
+int pmu_mutex_release(struct nvkm_pmu *ppmu, u32 id, u32 *token)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	struct pmu_mutex *mutex;
+	u32 owner;
+
+	if (!pmu->initialized)
+		return -EINVAL;
+
+	BUG_ON(!token);
+	BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
+	BUG_ON(id > pmu->mutex_cnt);
+
+	mutex = &pmu->mutex[id];
+
+	owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff;
+
+	if (*token != owner) {
+		nv_error(ppmu,
+			"requester 0x%08x NOT match owner 0x%08x",
+			*token, owner);
+		return -EINVAL;
+	}
+
+	if (--mutex->ref_cnt > 0)
+		return -EBUSY;
+
+	nv_wr32(ppmu, 0x0010a580 + (mutex->index * 4), 0x00);
+
+	nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff));
+
+	nv_debug(ppmu, "mutex released: id=%d, token=0x%x",
+							  mutex->index, *token);
+
+	return 0;
+}
+
+static int pmu_queue_lock(struct pmu_desc *pmu,
+			struct pmu_queue *queue)
+{
+	int ret;
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+	if (PMU_IS_MESSAGE_QUEUE(queue->id))
+		return 0;
+
+	if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
+		mutex_lock(&queue->mutex);
+		return 0;
+	}
+
+	ret = pmu_mutex_acquire(ppmu, queue->mutex_id, &queue->mutex_lock);
+	return ret;
+}
+
+static int pmu_queue_unlock(struct pmu_desc *pmu,
+			struct pmu_queue *queue)
+{
+	int ret;
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+	if (PMU_IS_MESSAGE_QUEUE(queue->id))
+		return 0;
+
+	if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
+		mutex_unlock(&queue->mutex);
+		return 0;
+	}
+
+	ret = pmu_mutex_release(ppmu, queue->mutex_id, &queue->mutex_lock);
+	return ret;
+}
+
+/* called by pmu_read_message, no lock */
+static bool pmu_queue_is_empty(struct pmu_desc *pmu,
+			struct pmu_queue *queue)
+{
+	u32 head, tail;
+
+	pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+	if (queue->opened && queue->oflag == OFLAG_READ)
+		tail = queue->position;
+	else
+		pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
+
+	return head == tail;
+}
+
+static bool pmu_queue_has_room(struct pmu_desc *pmu,
+			struct pmu_queue *queue, u32 size, bool *need_rewind)
+{
+	u32 head, tail, free;
+	bool rewind = false;
+
+	size = ALIGN(size, QUEUE_ALIGNMENT);
+
+	pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+	pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
+
+	if (head >= tail) {
+		free = queue->offset + queue->size - head;
+		free -= PMU_CMD_HDR_SIZE;
+
+		if (size > free) {
+			rewind = true;
+			head = queue->offset;
+		}
+	}
+
+	if (head < tail)
+		free = tail - head - 1;
+
+	if (need_rewind)
+		*need_rewind = rewind;
+
+	return size <= free;
+}
+
+static int pmu_queue_push(struct pmu_desc *pmu,
+			struct pmu_queue *queue, void *data, u32 size)
+{
+
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	if (!queue->opened && queue->oflag == OFLAG_WRITE) {
+		nv_error(ppmu, "queue not opened for write\n");
+		return -EINVAL;
+	}
+
+	pmu_queue_write(pmu, queue->position, data, size);
+	queue->position += ALIGN(size, QUEUE_ALIGNMENT);
+	return 0;
+}
+
+static int pmu_queue_pop(struct pmu_desc *pmu,
+			struct pmu_queue *queue, void *data, u32 size,
+			u32 *bytes_read)
+{
+	u32 head, tail, used;
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+	*bytes_read = 0;
+
+	if (!queue->opened && queue->oflag == OFLAG_READ) {
+		nv_error(ppmu, "queue not opened for read\n");
+		return -EINVAL;
+	}
+
+	pmu_queue_head(pmu, queue, &head, QUEUE_GET);
+	tail = queue->position;
+
+	if (head == tail)
+		return 0;
+
+	if (head > tail)
+		used = head - tail;
+	else
+		used = queue->offset + queue->size - tail;
+
+	if (size > used) {
+		nv_warn(ppmu, "queue size smaller than request read\n");
+		size = used;
+	}
+
+	pmu_queue_read(pmu, tail, data, size);
+	queue->position += ALIGN(size, QUEUE_ALIGNMENT);
+	*bytes_read = size;
+	return 0;
+}
+
+static void pmu_queue_rewind(struct pmu_desc *pmu,
+			struct pmu_queue *queue)
+{
+	struct pmu_cmd cmd;
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+
+	if (!queue->opened) {
+		nv_error(ppmu, "queue not opened\n");
+		goto out;
+	}
+
+	if (queue->oflag == OFLAG_WRITE) {
+		cmd.hdr.unit_id = PMU_UNIT_REWIND;
+		cmd.hdr.size = PMU_CMD_HDR_SIZE;
+		pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
+		nv_debug(ppmu, "queue %d rewinded\n", queue->id);
+	}
+
+	queue->position = queue->offset;
+out:
+	nv_debug(ppmu, "exit %s\n", __func__);
+}
+
+/* open for read and lock the queue */
+static int pmu_queue_open_read(struct pmu_desc *pmu,
+			struct pmu_queue *queue)
+{
+	int err;
+
+	err = pmu_queue_lock(pmu, queue);
+	if (err)
+		return err;
+
+	if (queue->opened)
+		BUG();
+
+	pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
+	queue->oflag = OFLAG_READ;
+	queue->opened = true;
+
+	return 0;
+}
+
+/* open for write and lock the queue
+   make sure there's enough free space for the write */
+static int pmu_queue_open_write(struct pmu_desc *pmu,
+			struct pmu_queue *queue, u32 size)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	bool rewind = false;
+	int err;
+
+	err = pmu_queue_lock(pmu, queue);
+	if (err)
+		return err;
+
+	if (queue->opened)
+		BUG();
+
+	if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
+		nv_error(ppmu, "queue full");
+		pmu_queue_unlock(pmu, queue);
+		return -EAGAIN;
+	}
+
+	pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
+	queue->oflag = OFLAG_WRITE;
+	queue->opened = true;
+
+	if (rewind)
+		pmu_queue_rewind(pmu, queue);
+
+	return 0;
+}
+
+/* close and unlock the queue */
+static int pmu_queue_close(struct pmu_desc *pmu,
+			struct pmu_queue *queue, bool commit)
+{
+	if (!queue->opened)
+		return 0;
+
+	if (commit) {
+		if (queue->oflag == OFLAG_READ) {
+			pmu_queue_tail(pmu, queue,
+				&queue->position, QUEUE_SET);
+		} else {
+			pmu_queue_head(pmu, queue,
+				&queue->position, QUEUE_SET);
+		}
+	}
+
+	queue->opened = false;
+
+	pmu_queue_unlock(pmu, queue);
+
+	return 0;
+}
+
+int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout,
+				 u32 *var, u32 val)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
+	unsigned long delay = GK20A_IDLE_CHECK_DEFAULT;
+
+	do {
+		if (*var == val)
+			return 0;
+
+		if (nv_rd32(ppmu, 0x0010a008))
+			gk20a_pmu_isr(ppmu);
+
+		usleep_range(delay, delay * 2);
+		delay = min_t(u32, delay << 1, GK20A_IDLE_CHECK_MAX);
+	} while (time_before(jiffies, end_jiffies));
+
+	return -ETIMEDOUT;
+}
+
+void pmu_dump_falcon_stats(struct pmu_desc *pmu)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	int i;
+
+	nv_debug(ppmu, "pmu_falcon_os_r : %d\n",
+		nv_rd32(ppmu, 0x0010a080));
+	nv_debug(ppmu, "pmu_falcon_cpuctl_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a100));
+	nv_debug(ppmu, "pmu_falcon_idlestate_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a04c));
+	nv_debug(ppmu, "pmu_falcon_mailbox0_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a040));
+	nv_debug(ppmu, "pmu_falcon_mailbox1_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a044));
+	nv_debug(ppmu, "pmu_falcon_irqstat_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a008));
+	nv_debug(ppmu, "pmu_falcon_irqmode_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a00c));
+	nv_debug(ppmu, "pmu_falcon_irqmask_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a018));
+	nv_debug(ppmu, "pmu_falcon_irqdest_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a01c));
+
+	for (i = 0; i < 0x0000000c; i++)
+		nv_debug(ppmu, "pmu_pmu_mailbox_r(%d) : 0x%x\n",
+			i, nv_rd32(ppmu, 0x0010a450 + i*4));
+
+	for (i = 0; i < 0x00000004; i++)
+		nv_debug(ppmu, "pmu_pmu_debug_r(%d) : 0x%x\n",
+			i, nv_rd32(ppmu, 0x0010a5c0 + i*4));
+
+	for (i = 0; i < 6/*NV_Ppmu_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
+		nv_wr32(ppmu, 0x0010a200,
+			0xe |
+			(i & 0x1f) << 8);
+		nv_debug(ppmu, "pmu_rstat (%d) : 0x%x\n",
+			i, nv_rd32(ppmu, 0x0010a20c));
+	}
+
+	i = nv_rd32(ppmu, 0x0010a7b0);
+	nv_debug(ppmu, "pmu_pmu_bar0_error_status_r : 0x%x\n", i);
+	if (i != 0) {
+		nv_debug(ppmu, "pmu_pmu_bar0_addr_r : 0x%x\n",
+			nv_rd32(ppmu, 0x0010a7a0));
+		nv_debug(ppmu, "pmu_pmu_bar0_data_r : 0x%x\n",
+			nv_rd32(ppmu, 0x0010a7a4));
+		nv_debug(ppmu, "pmu_pmu_bar0_timeout_r : 0x%x\n",
+			nv_rd32(ppmu, 0x0010a7a8));
+		nv_debug(ppmu, "pmu_pmu_bar0_ctl_r : 0x%x\n",
+			nv_rd32(ppmu, 0x0010a7ac));
+	}
+
+	i = nv_rd32(ppmu, 0x0010a988);
+	nv_debug(ppmu, "pmu_pmu_bar0_fecs_error_r : 0x%x\n", i);
+
+	i = nv_rd32(ppmu, 0x0010a16c);
+	nv_debug(ppmu, "pmu_falcon_exterrstat_r : 0x%x\n", i);
+	if (((i >> 31) & 0x1)) {
+		nv_debug(ppmu, "pmu_falcon_exterraddr_r : 0x%x\n",
+			nv_rd32(ppmu, 0x0010a168));
+		/*nv_debug(ppmu, "pmc_enable : 0x%x\n",
+		  nv_rd32(pmc, 0x00000200));*/
+	}
+
+	nv_debug(ppmu, "pmu_falcon_engctl_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a0a4));
+	nv_debug(ppmu, "pmu_falcon_curctx_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a050));
+	nv_debug(ppmu, "pmu_falcon_nxtctx_r : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a054));
+
+	nv_wr32(ppmu, 0x0010a200,
+		0x8 |
+		((PMU_FALCON_REG_IMB & 0x1f) << 8));
+	nv_debug(ppmu, "PMU_FALCON_REG_IMB : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a20c));
+
+	nv_wr32(ppmu, 0x0010a200,
+		0x8 |
+		((PMU_FALCON_REG_DMB & 0x1f) << 8));
+	nv_debug(ppmu, "PMU_FALCON_REG_DMB : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a20c));
+
+	nv_wr32(ppmu, 0x0010a200,
+		0x8 |
+		((PMU_FALCON_REG_CSW & 0x1f) << 8));
+	nv_debug(ppmu, "PMU_FALCON_REG_CSW : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a20c));
+
+	nv_wr32(ppmu, 0x0010a200,
+		0x8 |
+		((PMU_FALCON_REG_CTX & 0x1f) << 8));
+	nv_debug(ppmu, "PMU_FALCON_REG_CTX : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a20c));
+
+	nv_wr32(ppmu, 0x0010a200,
+		0x8 |
+		((PMU_FALCON_REG_EXCI & 0x1f) << 8));
+	nv_debug(ppmu, "PMU_FALCON_REG_EXCI : 0x%x\n",
+		nv_rd32(ppmu, 0x0010a20c));
+
+	for (i = 0; i < 4; i++) {
+		nv_wr32(ppmu, 0x0010a200,
+			0x8 |
+			((PMU_FALCON_REG_PC & 0x1f) << 8));
+		nv_debug(ppmu, "PMU_FALCON_REG_PC : 0x%x\n",
+			nv_rd32(ppmu, 0x0010a20c));
+
+		nv_wr32(ppmu, 0x0010a200,
+			0x8 |
+			((PMU_FALCON_REG_SP & 0x1f) << 8));
+		nv_debug(ppmu, "PMU_FALCON_REG_SP : 0x%x\n",
+			nv_rd32(ppmu, 0x0010a20c));
+	}
+
+	/* PMU may crash due to FECS crash. Dump FECS status */
+	/*gk20a_fecs_dump_falcon_stats(g);*/
+}
+
+static bool pmu_validate_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd,
+			struct pmu_msg *msg, struct pmu_payload *payload,
+			u32 queue_id)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	struct pmu_queue *queue;
+	u32 in_size, out_size;
+
+	nv_debug(ppmu, "pmu validate cmd\n");
+	pmu_dump_falcon_stats(pmu);
+
+	if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
+		goto invalid_cmd;
+
+	queue = &pmu->queue[queue_id];
+	if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
+		goto invalid_cmd;
+
+	if (cmd->hdr.size > (queue->size >> 1))
+		goto invalid_cmd;
+
+	if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
+		goto invalid_cmd;
+
+	if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
+		goto invalid_cmd;
+
+	if (payload == NULL)
+		return true;
+
+	if (payload->in.buf == NULL && payload->out.buf == NULL)
+		goto invalid_cmd;
+
+	if ((payload->in.buf != NULL && payload->in.size == 0) ||
+	    (payload->out.buf != NULL && payload->out.size == 0))
+		goto invalid_cmd;
+
+	in_size = PMU_CMD_HDR_SIZE;
+	if (payload->in.buf) {
+		in_size += payload->in.offset;
+		in_size += sizeof(struct pmu_allocation_gk20a);
+	}
+
+	out_size = PMU_CMD_HDR_SIZE;
+	if (payload->out.buf) {
+		out_size += payload->out.offset;
+		out_size += sizeof(struct pmu_allocation_gk20a);
+	}
+
+	if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
+		goto invalid_cmd;
+
+
+	if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
+	    (payload->out.offset != 0 && payload->out.buf == NULL))
+		goto invalid_cmd;
+
+	return true;
+
+invalid_cmd:
+	nv_error(ppmu, "invalid pmu cmd :\n"
+		"queue_id=%d,\n"
+		"cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
+		"payload in=%p, in_size=%d, in_offset=%d,\n"
+		"payload out=%p, out_size=%d, out_offset=%d",
+		queue_id, cmd->hdr.size, cmd->hdr.unit_id,
+		msg, msg ? msg->hdr.unit_id : ~0,
+		&payload->in, payload->in.size, payload->in.offset,
+		&payload->out, payload->out.size, payload->out.offset);
+
+	return false;
+}
+
+static int pmu_write_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd,
+			u32 queue_id, unsigned long timeout)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	struct pmu_queue *queue;
+	unsigned long end_jiffies = jiffies +
+		msecs_to_jiffies(timeout);
+	int err;
+
+	nv_debug(ppmu, "pmu write cmd\n");
+
+	queue = &pmu->queue[queue_id];
+
+	do {
+		err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
+		if (err == -EAGAIN && time_before(jiffies, end_jiffies))
+			usleep_range(1000, 2000);
+		else
+			break;
+	} while (1);
+
+	if (err)
+		goto clean_up;
+
+	pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
+
+	err = pmu_queue_close(pmu, queue, true);
+
+clean_up:
+	if (err)
+		nv_error(ppmu,
+			"fail to write cmd to queue %d", queue_id);
+	else
+		nv_debug(ppmu, "cmd writing done");
+
+	return err;
+}
+
+int gk20a_pmu_cmd_post(struct nvkm_pmu *ppmu, struct pmu_cmd *cmd,
+		struct pmu_msg *msg, struct pmu_payload *payload,
+		u32 queue_id, pmu_callback callback, void *cb_param,
+		u32 *seq_desc, unsigned long timeout)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	struct pmu_sequence *seq;
+	struct pmu_allocation_gk20a *in = NULL, *out = NULL;
+	int err;
+
+	BUG_ON(!cmd);
+	BUG_ON(!seq_desc);
+	BUG_ON(!pmu->pmu_ready);
+	nv_debug(ppmu, "Post CMD\n");
+	if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
+		return -EINVAL;
+
+	err = pmu_seq_acquire(pmu, &seq);
+	if (err)
+		return err;
+
+	cmd->hdr.seq_id = seq->id;
+
+	cmd->hdr.ctrl_flags = 0;
+	cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
+	cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
+
+	seq->callback = callback;
+	seq->cb_params = cb_param;
+	seq->msg = msg;
+	seq->out_payload = NULL;
+	seq->desc = pmu->next_seq_desc++;
+
+	if (payload)
+		seq->out_payload = payload->out.buf;
+
+	*seq_desc = seq->desc;
+
+	if (payload && payload->in.offset != 0) {
+		in = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd +
+			payload->in.offset);
+
+		if (payload->in.buf != payload->out.buf)
+			in->alloc.dmem.size = (u16)payload->in.size;
+		else
+			in->alloc.dmem.size =
+				(u16)max(payload->in.size, payload->out.size);
+
+		err = pmu->dmem.alloc(&pmu->dmem,
+			(void *)&in->alloc.dmem.offset,
+			in->alloc.dmem.size,
+			PMU_DMEM_ALLOC_ALIGNMENT);
+		if (err)
+			goto clean_up;
+
+		pmu_copy_to_dmem(pmu, (in->alloc.dmem.offset),
+			payload->in.buf, payload->in.size, 0);
+		seq->in_gk20a.alloc.dmem.size = in->alloc.dmem.size;
+		seq->in_gk20a.alloc.dmem.offset = in->alloc.dmem.offset;
+	}
+
+	if (payload && payload->out.offset != 0) {
+		out = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd +
+			payload->out.offset);
+		out->alloc.dmem.size = (u16)payload->out.size;
+
+		if (payload->out.buf != payload->in.buf) {
+			err = pmu->dmem.alloc(&pmu->dmem,
+				(void *)&out->alloc.dmem.offset,
+				out->alloc.dmem.size,
+				PMU_DMEM_ALLOC_ALIGNMENT);
+			if (err)
+				goto clean_up;
+		} else {
+			BUG_ON(in == NULL);
+			out->alloc.dmem.offset = in->alloc.dmem.offset;
+		}
+
+		seq->out_gk20a.alloc.dmem.size = out->alloc.dmem.size;
+		seq->out_gk20a.alloc.dmem.offset = out->alloc.dmem.offset;
+	}
+
+	seq->state = PMU_SEQ_STATE_USED;
+	err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
+	if (err)
+		seq->state = PMU_SEQ_STATE_PENDING;
+
+	nv_debug(ppmu, "cmd posted\n");
+
+	return 0;
+
+clean_up:
+	nv_debug(ppmu, "cmd post failed\n");
+	if (in)
+		pmu->dmem.free(&pmu->dmem,
+			in->alloc.dmem.offset,
+			in->alloc.dmem.size,
+			PMU_DMEM_ALLOC_ALIGNMENT);
+	if (out)
+		pmu->dmem.free(&pmu->dmem,
+			out->alloc.dmem.offset,
+			out->alloc.dmem.size,
+			PMU_DMEM_ALLOC_ALIGNMENT);
+
+	pmu_seq_release(pmu, seq);
+	return err;
+}
+
+void gk20a_pmu_isr(struct nvkm_pmu *ppmu)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	struct nvkm_mc *pmc = nvkm_mc(ppmu);
+	struct pmu_queue *queue;
+	u32 intr, mask;
+	bool recheck = false;
+	if (!pmu->isr_enabled)
+		goto out;
+
+	mask = nv_rd32(ppmu, 0x0010a018) &
+		nv_rd32(ppmu, 0x0010a01c);
+
+	intr = nv_rd32(ppmu, 0x0010a008) & mask;
+
+	nv_debug(ppmu, "received falcon interrupt: 0x%08x", intr);
+	pmu_enable_irq(ppmu, pmc, false);
+	if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
+		nv_wr32(ppmu, 0x0010a004, intr);
+		nv_error(ppmu, "pmu state off\n");
+		pmu_enable_irq(ppmu, pmc, true);
+		goto out;
+	}
+	if (intr & 0x10) {
+		nv_error(ppmu,
+			"pmu halt intr not implemented");
+		pmu_dump_falcon_stats(pmu);
+	}
+	if (intr & 0x20) {
+		nv_error(ppmu,
+			"pmu exterr intr not implemented. Clearing interrupt.");
+		pmu_dump_falcon_stats(pmu);
+
+		nv_wr32(ppmu, 0x0010a16c,
+			nv_rd32(ppmu, 0x0010a16c) &
+				~(0x1 << 31));
+	}
+	if (intr & 0x40) {
+		nv_debug(ppmu, "scheduling work\n");
+		schedule_work(&pmu->isr_workq);
+		pmu_enable_irq(ppmu, pmc, true);
+		recheck = true;
+	}
+
+	if (recheck) {
+		queue = &pmu->queue[PMU_MESSAGE_QUEUE];
+		if (!pmu_queue_is_empty(pmu, queue))
+			nv_wr32(ppmu, 0x0010a000, 0x40);
+	} else {
+		pmu_enable_irq(ppmu, pmc, true);
+	}
+
+	pmu_enable_irq(ppmu, pmc, true);
+	nv_wr32(ppmu, 0x0010a004, intr);
+out:
+	nv_debug(ppmu, "irq handled\n");
+}
+
+static int
+gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw)
+{
+	int ret = 0;
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	u32 *ucode_image;
+	struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data;
+	int i;
+	struct pmu_priv_vm *ppmuvm = &pmuvm;
+	struct nvkm_device *device = nv_device(&ppmu->base);
+	struct nvkm_vm *vm;
+	u64 pmu_area_len = 300*1024;
+
+	ppmu->pmuvm = &pmuvm;
+	ppmu->pg_buf = &pmu->pg_buf;
+	pmu->pmu = ppmu;
+	/* mem for inst blk*/
+	ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x1000, 0, 0,
+				&ppmuvm->mem);
+	if (ret)
+		goto instblk_alloc_err;
+
+	/* mem for pgd*/
+	ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x8000, 0, 0,
+				&ppmuvm->pgd);
+	if (ret)
+		goto pgd_alloc_err;
+
+	/*allocate virtual memory range*/
+	ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm);
+	if (ret)
+		goto virt_alloc_err;
+
+	atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]);
+	/*update VM with pgd */
+
+	ret = nvkm_vm_ref(vm, &ppmuvm->vm, ppmuvm->pgd);
+	if (ret)
+		goto virt_alloc_err;
+
+	/*update pgd in inst blk */
+	nv_wo32(ppmuvm->mem, 0x0200, lower_32_bits(ppmuvm->pgd->addr));
+	nv_wo32(ppmuvm->mem, 0x0204, upper_32_bits(ppmuvm->pgd->addr));
+	nv_wo32(ppmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1));
+	nv_wo32(ppmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1));
+
+	/* allocate memory for pmu fw to be copied to*/
+	ret = nvkm_gpuobj_new(nv_object(ppmu), NULL,
+		   GK20A_PMU_UCODE_SIZE_MAX, 0x1000, 0, &pmu->ucode.pmubufobj);
+	if (ret)
+		goto fw_alloc_err;
+
+	ucode_image = (u32 *)((u32)desc + desc->descriptor_size);
+	for (i = 0; i < (desc->app_start_offset + desc->app_size) >> 2; i++) {
+		nv_wo32(pmu->ucode.pmubufobj, i << 2, ucode_image[i]);
+		pr_info("writing 0x%08x\n", ucode_image[i]);
+	}
+	/* map allocated memory into GMMU */
+	ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->ucode.pmubufobj), vm,
+				    NV_MEM_ACCESS_RW,
+				    &pmu->ucode.pmubufvma);
+	if (ret)
+		goto map_err;
+
+	nv_debug(ppmu, "%s function end\n", __func__);
+	return ret;
+map_err:
+	nvkm_gpuobj_destroy(pmu->ucode.pmubufobj);
+virt_alloc_err:
+fw_alloc_err:
+	nvkm_gpuobj_destroy(ppmuvm->pgd);
+pgd_alloc_err:
+	nvkm_gpuobj_destroy(ppmuvm->mem);
+instblk_alloc_err:
+	return ret;
+
+}
+
+static int
+gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw)
+{
+	struct nvkm_device *dev;
+	char name[32];
+
+	dev = nv_device(ppmu);
+
+	snprintf(name, sizeof(name), "nvidia/tegra124/%s",
+							 GK20A_PMU_UCODE_IMAGE);
+
+	return request_firmware(pfw, name, nv_device_base(dev));
+}
+
+static void
+gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu,
+		const struct firmware *fw)
+{
+	struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data;
+
+	nv_debug(ppmu, "GK20A PMU firmware information\n");
+	nv_debug(ppmu, "descriptor size = %u\n", desc->descriptor_size);
+	nv_debug(ppmu, "image size  = %u\n", desc->image_size);
+	nv_debug(ppmu, "app_version = 0x%08x\n", desc->app_version);
+	nv_debug(ppmu, "date = %s\n", desc->date);
+	nv_debug(ppmu, "bootloader_start_offset = 0x%08x\n",
+				desc->bootloader_start_offset);
+	nv_debug(ppmu, "bootloader_size = 0x%08x\n", desc->bootloader_size);
+	nv_debug(ppmu, "bootloader_imem_offset = 0x%08x\n",
+				desc->bootloader_imem_offset);
+	nv_debug(ppmu, "bootloader_entry_point = 0x%08x\n",
+				desc->bootloader_entry_point);
+	nv_debug(ppmu, "app_start_offset = 0x%08x\n", desc->app_start_offset);
+	nv_debug(ppmu, "app_size = 0x%08x\n", desc->app_size);
+	nv_debug(ppmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset);
+	nv_debug(ppmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry);
+	nv_debug(ppmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset);
+	nv_debug(ppmu, "app_resident_code_offset = 0x%08x\n",
+			desc->app_resident_code_offset);
+	nv_debug(ppmu, "app_resident_code_size = 0x%08x\n",
+			desc->app_resident_code_size);
+	nv_debug(ppmu, "app_resident_data_offset = 0x%08x\n",
+			desc->app_resident_data_offset);
+	nv_debug(ppmu, "app_resident_data_size = 0x%08x\n",
+			desc->app_resident_data_size);
+	nv_debug(ppmu, "nb_overlays = %d\n", desc->nb_overlays);
+
+	nv_debug(ppmu, "compressed = %u\n", desc->compressed);
+}
+
+static int pmu_process_init_msg(struct pmu_desc *pmu,
+			struct pmu_msg *msg)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	struct pmu_init_msg_pmu_gk20a *init;
+	struct pmu_sha1_gid_data gid_data;
+	u32 i, tail = 0;
+
+	tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff;
+
+	pmu_copy_from_dmem(pmu, tail,
+		(u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
+
+	if (msg->hdr.unit_id != PMU_UNIT_INIT) {
+		nv_error(ppmu,
+			"expecting init msg");
+		return -EINVAL;
+	}
+
+	pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
+		(u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
+
+	if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
+		nv_error(ppmu,
+			"expecting init msg");
+		return -EINVAL;
+	}
+
+	tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
+	nv_wr32(ppmu, 0x0010a4cc,
+		tail & 0xffffffff);
+
+	init = &msg->msg.init.pmu_init_gk20a;
+	if (!pmu->gid_info.valid) {
+
+		pmu_copy_from_dmem(pmu,
+			init->sw_managed_area_offset,
+			(u8 *)&gid_data,
+			sizeof(struct pmu_sha1_gid_data), 0);
+
+		pmu->gid_info.valid =
+			(*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
+
+		if (pmu->gid_info.valid) {
+
+			BUG_ON(sizeof(pmu->gid_info.gid) !=
+				sizeof(gid_data.gid));
+
+			memcpy(pmu->gid_info.gid, gid_data.gid,
+				sizeof(pmu->gid_info.gid));
+		}
+	}
+
+	for (i = 0; i < PMU_QUEUE_COUNT; i++)
+		pmu_queue_init(pmu, i, init);
+
+	if (!pmu->dmem.alloc)
+		nvkm_pmu_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
+				init->sw_managed_area_offset,
+				init->sw_managed_area_size);
+
+	pmu->pmu_ready = true;
+	pmu->pmu_state = PMU_STATE_INIT_RECEIVED;
+
+	return 0;
+}
+
+static bool pmu_read_message(struct pmu_desc *pmu, struct pmu_queue *queue,
+			struct pmu_msg *msg, int *status)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	u32 read_size, bytes_read;
+	int err;
+
+	*status = 0;
+
+	if (pmu_queue_is_empty(pmu, queue))
+		return false;
+
+	err = pmu_queue_open_read(pmu, queue);
+	if (err) {
+		nv_error(ppmu,
+			"fail to open queue %d for read", queue->id);
+		*status = err;
+		return false;
+	}
+
+	err = pmu_queue_pop(pmu, queue, &msg->hdr,
+			PMU_MSG_HDR_SIZE, &bytes_read);
+	if (err || bytes_read != PMU_MSG_HDR_SIZE) {
+		nv_error(ppmu,
+			"fail to read msg from queue %d", queue->id);
+		*status = err | -EINVAL;
+		goto clean_up;
+	}
+
+	if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
+		pmu_queue_rewind(pmu, queue);
+		/* read again after rewind */
+		err = pmu_queue_pop(pmu, queue, &msg->hdr,
+				PMU_MSG_HDR_SIZE, &bytes_read);
+		if (err || bytes_read != PMU_MSG_HDR_SIZE) {
+			nv_error(ppmu,
+				"fail to read msg from queue %d", queue->id);
+			*status = err | -EINVAL;
+			goto clean_up;
+		}
+	}
+
+	if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
+		nv_error(ppmu,
+			"read invalid unit_id %d from queue %d",
+			msg->hdr.unit_id, queue->id);
+			*status = -EINVAL;
+			goto clean_up;
+	}
+
+	if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
+		read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
+		err = pmu_queue_pop(pmu, queue, &msg->msg,
+			read_size, &bytes_read);
+		if (err || bytes_read != read_size) {
+			nv_error(ppmu,
+				"fail to read msg from queue %d", queue->id);
+			*status = err;
+			goto clean_up;
+		}
+	}
+
+	err = pmu_queue_close(pmu, queue, true);
+	if (err) {
+		nv_error(ppmu,
+			"fail to close queue %d", queue->id);
+		*status = err;
+		return false;
+	}
+
+	return true;
+
+clean_up:
+	err = pmu_queue_close(pmu, queue, false);
+	if (err)
+		nv_error(ppmu,
+			"fail to close queue %d", queue->id);
+	return false;
+}
+
+static int pmu_response_handle(struct pmu_desc *pmu,
+			struct pmu_msg *msg)
+{
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	struct pmu_sequence *seq;
+	int ret = 0;
+
+	nv_debug(ppmu, "handling pmu response\n");
+	seq = &pmu->seq[msg->hdr.seq_id];
+	if (seq->state != PMU_SEQ_STATE_USED &&
+	    seq->state != PMU_SEQ_STATE_CANCELLED) {
+		nv_error(ppmu,
+			"msg for an unknown sequence %d", seq->id);
+		return -EINVAL;
+	}
+
+	if (msg->hdr.unit_id == PMU_UNIT_RC &&
+	    msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
+		nv_error(ppmu,
+			"unhandled cmd: seq %d", seq->id);
+	} else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
+		if (seq->msg) {
+			if (seq->msg->hdr.size >= msg->hdr.size) {
+				memcpy(seq->msg, msg, msg->hdr.size);
+				if (seq->out_gk20a.alloc.dmem.size != 0) {
+					pmu_copy_from_dmem(pmu,
+					seq->out_gk20a.alloc.dmem.offset,
+					seq->out_payload,
+					seq->out_gk20a.alloc.dmem.size, 0);
+				}
+			} else {
+				nv_error(ppmu,
+					"sequence %d msg buffer too small",
+					seq->id);
+			}
+		}
+	} else
+		seq->callback = NULL;
+	if (seq->in_gk20a.alloc.dmem.size != 0)
+		pmu->dmem.free(&pmu->dmem,
+			seq->in_gk20a.alloc.dmem.offset,
+			seq->in_gk20a.alloc.dmem.size,
+			PMU_DMEM_ALLOC_ALIGNMENT);
+	if (seq->out_gk20a.alloc.dmem.size != 0)
+		pmu->dmem.free(&pmu->dmem,
+			seq->out_gk20a.alloc.dmem.offset,
+			seq->out_gk20a.alloc.dmem.size,
+			PMU_DMEM_ALLOC_ALIGNMENT);
+
+	if (seq->callback)
+		seq->callback(ppmu, msg, seq->cb_params, seq->desc, ret);
+
+	pmu_seq_release(pmu, seq);
+
+	/* TBD: notify client waiting for available dmem */
+	nv_debug(ppmu, "pmu response processed\n");
+
+	return 0;
+}
+
+int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout,
+				 u32 *var, u32 val);
+
+
+static int pmu_handle_event(struct pmu_desc *pmu, struct pmu_msg *msg)
+{
+	int err = 0;
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+
+	switch (msg->hdr.unit_id) {
+	case PMU_UNIT_PERFMON:
+		nv_debug(ppmu, "init perfmon event generated\n");
+		break;
+	default:
+		nv_debug(ppmu, "default event generated\n");
+		break;
+	}
+
+	return err;
+}
+
+void pmu_process_message(struct work_struct *work)
+{
+	struct pmu_desc *pmu = container_of(work, struct pmu_desc, isr_workq);
+	struct pmu_msg msg;
+	int status;
+	struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
+		impl_from_pmu(pmu));
+	struct nvkm_mc *pmc = nvkm_mc(ppmu);
+
+	mutex_lock(&pmu->isr_mutex);
+	if (unlikely(!pmu->pmu_ready)) {
+		nv_debug(ppmu, "processing init msg\n");
+		pmu_process_init_msg(pmu, &msg);
+		mutex_unlock(&pmu->isr_mutex);
+		pmu_enable_irq(ppmu, pmc, true);
+		goto out;
+	}
+
+	while (pmu_read_message(pmu,
+		&pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
+
+		nv_debug(ppmu, "read msg hdr:\n"
+				"unit_id = 0x%08x, size = 0x%08x,\n"
+				"ctrl_flags = 0x%08x, seq_id = 0x%08x\n",
+				msg.hdr.unit_id, msg.hdr.size,
+				msg.hdr.ctrl_flags, msg.hdr.seq_id);
+
+		msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
+
+		if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT)
+			pmu_handle_event(pmu, &msg);
+		else
+			pmu_response_handle(pmu, &msg);
+	}
+	mutex_unlock(&pmu->isr_mutex);
+	pmu_enable_irq(ppmu, pmc, true);
+out:
+	nv_debug(ppmu, "exit %s\n", __func__);
+}
+
+int gk20a_pmu_destroy(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+
+	/* make sure the pending operations are finished before we continue */
+	cancel_work_sync(&pmu->isr_workq);
+	pmu->initialized = false;
+
+	mutex_lock(&pmu->isr_mutex);
+	pmu_enable(ppmu, pmc, false);
+	pmu->isr_enabled = false;
+	mutex_unlock(&pmu->isr_mutex);
+
+	pmu->pmu_state = PMU_STATE_OFF;
+	pmu->pmu_ready = false;
+	pmu->zbc_ready = false;
+
+	return 0;
+}
+
+int gk20a_pmu_load_norm(struct nvkm_pmu *ppmu, u32 *load)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	*load = pmu->load_shadow;
+	return 0;
+}
+
+int gk20a_pmu_load_update(struct nvkm_pmu *ppmu)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	u16 _load = 0;
+
+	pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
+	pmu->load_shadow = _load / 10;
+	pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
+
+	return 0;
+}
+
+void gk20a_pmu_get_load_counters(struct nvkm_pmu *ppmu, u32 *busy_cycles,
+				 u32 *total_cycles)
+{
+  /*todo if (!g->power_on || gk20a_busy(g->dev)) {
+		*busy_cycles = 0;
+		*total_cycles = 0;
+		return;
+		}*/
+
+	*busy_cycles = nv_rd32(ppmu, 0x0010a508 + 16) & 0x7fffffff;
+	/*todormb();*/
+	*total_cycles = nv_rd32(ppmu, 0x0010a508 + 32) & 0x7fffffff;
+	/*todogk20a_idle(g->dev);*/
+}
+
+void gk20a_pmu_reset_load_counters(struct nvkm_pmu *ppmu)
+{
+	u32 reg_val = 1 << 31;
+
+	/*todoif (!g->power_on || gk20a_busy(g->dev))
+	  return;*/
+
+	nv_wr32(ppmu, 0x0010a508 + 32, reg_val);
+	/*todowmb()*/;
+	nv_wr32(ppmu, 0x0010a508 + 16, reg_val);
+	/*todogk20a_idle(g->dev);*/
+}
+
+static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	int err;
+
+	mutex_lock(&pmu->isr_mutex);
+	pmu_reset(ppmu, pmc);
+	pmu->isr_enabled = true;
+	mutex_unlock(&pmu->isr_mutex);
+
+	/* setup apertures - virtual */
+	nv_wr32(ppmu, 0x10a600 + 0 * 4, 0x0);
+	nv_wr32(ppmu, 0x10a600 + 1 * 4, 0x0);
+	/* setup apertures - physical */
+	nv_wr32(ppmu, 0x10a600 + 2 * 4, 0x4 | 0x0);
+	nv_wr32(ppmu, 0x10a600 + 3 * 4, 0x4 | 0x1);
+	nv_wr32(ppmu, 0x10a600 + 4 * 4, 0x4 | 0x2);
+
+	/* TBD: load pmu ucode */
+	err = pmu_bootstrap(pmu);
+	if (err)
+		return err;
+
+	return 0;
+
+}
+
+static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu)
+{
+	struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
+	struct pmu_desc *pmu = &impl->pmudata;
+	struct pmu_priv_vm *ppmuvm = &pmuvm;
+	int i, err = 0;
+	int ret = 0;
+
+
+	if (pmu->sw_ready) {
+
+		for (i = 0; i < pmu->mutex_cnt; i++) {
+			pmu->mutex[i].id    = i;
+			pmu->mutex[i].index = i;
+		}
+		pmu_seq_init(pmu);
+
+		nv_debug(ppmu, "skipping init\n");
+		goto skip_init;
+	}
+
+	/* no infoRom script from vbios? */
+
+	/* TBD: sysmon subtask */
+
+	pmu->mutex_cnt = 0x00000010;
+	pmu->mutex = kzalloc(pmu->mutex_cnt *
+		sizeof(struct pmu_mutex), GFP_KERNEL);
+	if (!pmu->mutex) {
+		err = -ENOMEM;
+		nv_error(ppmu, "not enough space ENOMEM\n");
+		goto err;
+	}
+
+	for (i = 0; i < pmu->mutex_cnt; i++) {
+		pmu->mutex[i].id    = i;
+		pmu->mutex[i].index = i;
+	}
+
+	pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
+		sizeof(struct pmu_sequence), GFP_KERNEL);
+	if (!pmu->seq) {
+		err = -ENOMEM;
+		nv_error(ppmu, "not enough space ENOMEM\n");
+		goto err_free_mutex;
+	}
+
+	pmu_seq_init(pmu);
+
+	INIT_WORK(&pmu->isr_workq, pmu_process_message);
+	init_waitqueue_head(&ppmu->init_wq);
+	ppmu->gr_initialised = false;
+
+	/* allocate memory for pmu fw area */
+	ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_SEQ_BUF_SIZE,
+					    0x1000, 0, &pmu->seq_buf.pmubufobj);
+	if (ret)
+		return ret;
+	ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_TRACE_BUFSIZE,
+					    0, 0, &pmu->trace_buf.pmubufobj);
+	if (ret)
+		return ret;
+	/* map allocated memory into GMMU */
+	ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->seq_buf.pmubufobj),
+					ppmuvm->vm,
+					NV_MEM_ACCESS_RW,
+					&pmu->seq_buf.pmubufvma);
+	if (ret)
+		return ret;
+	ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->trace_buf.pmubufobj),
+					ppmuvm->vm,
+					NV_MEM_ACCESS_RW,
+					&pmu->trace_buf.pmubufvma);
+	if (ret)
+		return ret;
+
+	/* TBD: remove this if ZBC save/restore is handled by PMU
+	 * end an empty ZBC sequence for now */
+	nv_wo32(pmu->seq_buf.pmubufobj, 0, 0x16);
+	nv_wo32(pmu->seq_buf.pmubufobj, 1, 0x00);
+	nv_wo32(pmu->seq_buf.pmubufobj, 2, 0x01);
+	nv_wo32(pmu->seq_buf.pmubufobj, 3, 0x00);
+	nv_wo32(pmu->seq_buf.pmubufobj, 4, 0x00);
+	nv_wo32(pmu->seq_buf.pmubufobj, 5, 0x00);
+	nv_wo32(pmu->seq_buf.pmubufobj, 6, 0x00);
+	nv_wo32(pmu->seq_buf.pmubufobj, 7, 0x00);
+
+	pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
+	ret = gk20a_pmu_debugfs_init(ppmu);
+	if (ret)
+		return ret;
+
+	pmu->sw_ready = true;
+
+skip_init:
+	return 0;
+err_free_mutex:
+	kfree(pmu->mutex);
+err:
+	return err;
+}
+
+static void
+gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable)
+{
+	/*
+	nv_mask(ppmu, 0x000200, 0x00001000, 0x00000000);
+	nv_rd32(ppmu, 0x000200);
+	nv_mask(ppmu, 0x000200, 0x08000000, 0x08000000);
+
+	msleep(50);
+
+	nv_mask(ppmu, 0x000200, 0x08000000, 0x00000000);
+	nv_mask(ppmu, 0x000200, 0x00001000, 0x00001000);
+	nv_rd32(ppmu, 0x000200);
+	*/
+}
+
+static void gk20a_pmu_intr(struct nvkm_subdev *subdev)
+{
+	struct nvkm_pmu *ppmu = nvkm_pmu(subdev);
+
+	gk20a_pmu_isr(ppmu);
+}
+
+void gk20a_remove_pmu_support(struct pmu_desc *pmu)
+{
+	nvkm_pmu_allocator_destroy(&pmu->dmem);
+}
+
+int  gk20a_message(struct nvkm_pmu *ppmu, u32 reply[2],
+		 u32 process, u32 message, u32 data0, u32 data1)
+{
+	return -EPERM;
+}
+
+int
+gk20a_pmu_create_(struct nvkm_object *parent,
+		    struct nvkm_object *engine,
+		    struct nvkm_oclass *oclass, int length, void **pobject)
+{
+	struct nvkm_pmu *ppmu;
+	struct nvkm_device *device = nv_device(parent);
+	int ret;
+
+	ret = nvkm_subdev_create_(parent, engine, oclass, 0, "PPMU",
+				     "pmu", length, pobject);
+	ppmu = *pobject;
+	if (ret)
+		return ret;
+
+	ret = nv_device_get_irq(device, true);
+
+	ppmu->message = gk20a_message;
+	ppmu->pgob = gk20a_pmu_pgob;
+	ppmu->pmu_mutex_acquire = pmu_mutex_acquire;
+	ppmu->pmu_mutex_release = pmu_mutex_release;
+	ppmu->pmu_load_norm = gk20a_pmu_load_norm;
+	ppmu->pmu_load_update = gk20a_pmu_load_update;
+	ppmu->pmu_reset_load_counters = gk20a_pmu_reset_load_counters;
+	ppmu->pmu_get_load_counters = gk20a_pmu_get_load_counters;
+
+	return 0;
+}
+
+
+
diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.h b/drm/nouveau/nvkm/subdev/pmu/gk20a.h
new file mode 100644
index 000000000000..a084d6d518b4
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.h
@@ -0,0 +1,369 @@
+#ifndef __NVKM_pmu_GK20A_H__
+#define __NVKM_pmu_GK20A_H__
+
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+void pmu_setup_hw(struct pmu_desc *pmu);
+void gk20a_remove_pmu_support(struct pmu_desc *pmu);
+#define gk20a_pmu_create(p, e, o, d)                                         \
+	gk20a_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
+
+int gk20a_pmu_create_(struct nvkm_object *, struct nvkm_object *,
+			struct nvkm_oclass *, int, void **);
+/* defined by pmu hw spec */
+#define GK20A_PMU_VA_SIZE		(512 * 1024 * 1024)
+#define GK20A_PMU_UCODE_SIZE_MAX	(256 * 1024)
+#define GK20A_PMU_SEQ_BUF_SIZE		4096
+/* idle timeout */
+#define GK20A_IDLE_CHECK_DEFAULT		100 /* usec */
+#define GK20A_IDLE_CHECK_MAX		5000 /* usec */
+
+/* so far gk20a has two engines: gr and ce2(gr_copy) */
+enum {
+	ENGINE_GR_GK20A	    = 0,
+	ENGINE_CE2_GK20A    = 1,
+	ENGINE_INVAL_GK20A
+};
+
+#define ZBC_MASK(i)			(~(~(0) << ((i)+1)) & 0xfffe)
+
+#define APP_VERSION_GK20A 17997577
+
+enum {
+	GK20A_PMU_DMAIDX_UCODE		= 0,
+	GK20A_PMU_DMAIDX_VIRT		= 1,
+	GK20A_PMU_DMAIDX_PHYS_VID	= 2,
+	GK20A_PMU_DMAIDX_PHYS_SYS_COH	= 3,
+	GK20A_PMU_DMAIDX_PHYS_SYS_NCOH	= 4,
+	GK20A_PMU_DMAIDX_RSVD		= 5,
+	GK20A_PMU_DMAIDX_PELPG		= 6,
+	GK20A_PMU_DMAIDX_END		= 7
+};
+
+struct pmu_mem_gk20a {
+	u32 dma_base;
+	u8  dma_offset;
+	u8  dma_idx;
+	u16 fb_size;
+};
+
+struct pmu_dmem {
+	u16 size;
+	u32 offset;
+};
+
+struct pmu_cmdline_args_gk20a {
+	u32 cpu_freq_hz;		/* Frequency of the clock driving PMU */
+	u32 falc_trace_size;		/* falctrace buffer size (bytes) */
+	u32 falc_trace_dma_base;	/* 256-byte block address */
+	u32 falc_trace_dma_idx;		/* dmaIdx for DMA operations */
+	u8 secure_mode;
+	struct pmu_mem_gk20a gc6_ctx;		/* dmem offset of gc6 context */
+};
+
+#define GK20A_PMU_TRACE_BUFSIZE     0x4000   /* 4K */
+#define GK20A_PMU_DMEM_BLKSIZE2		8
+
+#define GK20A_PMU_UCODE_NB_MAX_OVERLAY	    32
+#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH  64
+
+struct pmu_ucode_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;  /* Offset from appStartOffset */
+/* Exact size of the resident code
+ * ( potentially contains CRC inside at the end ) */
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;  /* Offset from appStartOffset */
+/* Exact size of the resident data
+ * ( potentially contains CRC inside at the end ) */
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY];
+	u32 compressed;
+};
+
+#define PMU_UNIT_REWIND		(0x00)
+#define PMU_UNIT_PG		(0x03)
+#define PMU_UNIT_INIT		(0x07)
+#define PMU_UNIT_PERFMON	(0x12)
+#define PMU_UNIT_THERM		(0x1B)
+#define PMU_UNIT_RC		(0x1F)
+#define PMU_UNIT_NULL		(0x20)
+#define PMU_UNIT_END		(0x23)
+
+#define PMU_UNIT_TEST_START	(0xFE)
+#define PMU_UNIT_END_SIM	(0xFF)
+#define PMU_UNIT_TEST_END	(0xFF)
+
+#define PMU_UNIT_ID_IS_VALID(id)		\
+		(((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
+
+#define PMU_DMEM_ALLOC_ALIGNMENT	(32)
+#define PMU_DMEM_ALIGNMENT		(4)
+
+#define PMU_CMD_FLAGS_PMU_MASK		(0xF0)
+
+#define PMU_CMD_FLAGS_STATUS		BIT(0)
+#define PMU_CMD_FLAGS_INTR		BIT(1)
+#define PMU_CMD_FLAGS_EVENT		BIT(2)
+#define PMU_CMD_FLAGS_WATERMARK		BIT(3)
+
+struct pmu_hdr {
+	u8 unit_id;
+	u8 size;
+	u8 ctrl_flags;
+	u8 seq_id;
+};
+#define PMU_MSG_HDR_SIZE	sizeof(struct pmu_hdr)
+#define PMU_CMD_HDR_SIZE	sizeof(struct pmu_hdr)
+
+
+struct pmu_allocation_gk20a {
+	struct {
+		struct pmu_dmem dmem;
+		struct pmu_mem_gk20a fb;
+	} alloc;
+};
+
+enum {
+	PMU_INIT_MSG_TYPE_PMU_INIT = 0,
+};
+
+struct pmu_init_msg_pmu_gk20a {
+	u8 msg_type;
+	u8 pad;
+	u16  os_debug_entry_point;
+
+	struct {
+		u16 size;
+		u16 offset;
+		u8  index;
+		u8  pad;
+	} queue_info[PMU_QUEUE_COUNT];
+
+	u16 sw_managed_area_offset;
+	u16 sw_managed_area_size;
+};
+
+struct pmu_init_msg {
+	union {
+		u8 msg_type;
+		struct pmu_init_msg_pmu_gk20a pmu_init_gk20a;
+	};
+};
+
+
+enum {
+	PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0,
+};
+
+struct pmu_rc_msg_unhandled_cmd {
+	u8 msg_type;
+	u8 unit_id;
+};
+
+struct pmu_rc_msg {
+	u8 msg_type;
+	struct pmu_rc_msg_unhandled_cmd unhandled_cmd;
+};
+
+/* PERFMON */
+#define PMU_DOMAIN_GROUP_PSTATE		0
+#define PMU_DOMAIN_GROUP_GPC2CLK	1
+#define PMU_DOMAIN_GROUP_NUM		2
+struct pmu_perfmon_counter_gk20a {
+	u8 index;
+	u8 flags;
+	u8 group_id;
+	u8 valid;
+	u16 upper_threshold; /* units of 0.01% */
+	u16 lower_threshold; /* units of 0.01% */
+};
+struct pmu_zbc_cmd {
+	u8 cmd_type;
+	u8 pad;
+	u16 entry_mask;
+};
+
+/* PERFMON MSG */
+enum {
+	PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0,
+	PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1,
+	PMU_PERFMON_MSG_ID_INIT_EVENT     = 2,
+	PMU_PERFMON_MSG_ID_ACK            = 3
+};
+
+struct pmu_perfmon_msg_generic {
+	u8 msg_type;
+	u8 state_id;
+	u8 group_id;
+	u8 data;
+};
+
+struct pmu_perfmon_msg {
+	union {
+		u8 msg_type;
+		struct pmu_perfmon_msg_generic gen;
+	};
+};
+
+
+struct pmu_cmd {
+	struct pmu_hdr hdr;
+	union {
+		struct pmu_zbc_cmd zbc;
+	} cmd;
+};
+
+struct pmu_msg {
+	struct pmu_hdr hdr;
+	union {
+		struct pmu_init_msg init;
+		struct pmu_perfmon_msg perfmon;
+		struct pmu_rc_msg rc;
+	} msg;
+};
+
+/* write by sw, read by pmu, protected by sw mutex lock */
+#define PMU_COMMAND_QUEUE_HPQ		0
+/* write by sw, read by pmu, protected by sw mutex lock */
+#define PMU_COMMAND_QUEUE_LPQ		1
+/* write by pmu, read by sw, accessed by interrupt handler, no lock */
+#define PMU_MESSAGE_QUEUE		4
+#define PMU_QUEUE_COUNT			5
+
+enum {
+	PMU_MUTEX_ID_RSVD1 = 0,
+	PMU_MUTEX_ID_GPUSER,
+	PMU_MUTEX_ID_GPMUTEX,
+	PMU_MUTEX_ID_I2C,
+	PMU_MUTEX_ID_RMLOCK,
+	PMU_MUTEX_ID_MSGBOX,
+	PMU_MUTEX_ID_FIFO,
+	PMU_MUTEX_ID_PG,
+	PMU_MUTEX_ID_GR,
+	PMU_MUTEX_ID_CLK,
+	PMU_MUTEX_ID_RSVD6,
+	PMU_MUTEX_ID_RSVD7,
+	PMU_MUTEX_ID_RSVD8,
+	PMU_MUTEX_ID_RSVD9,
+	PMU_MUTEX_ID_INVALID
+};
+
+#define PMU_IS_COMMAND_QUEUE(id)	\
+		((id)  < PMU_MESSAGE_QUEUE)
+
+#define PMU_IS_SW_COMMAND_QUEUE(id)	\
+		(((id) == PMU_COMMAND_QUEUE_HPQ) || \
+		 ((id) == PMU_COMMAND_QUEUE_LPQ))
+
+#define  PMU_IS_MESSAGE_QUEUE(id)	\
+		((id) == PMU_MESSAGE_QUEUE)
+
+enum {
+	OFLAG_READ = 0,
+	OFLAG_WRITE
+};
+
+#define QUEUE_SET		(true)
+	/*todo find how to get cpu_pa*/
+#define QUEUE_GET		(false)
+
+#define QUEUE_ALIGNMENT		(4)
+
+#define PMU_PGENG_GR_BUFFER_IDX_INIT	(0)
+#define PMU_PGENG_GR_BUFFER_IDX_ZBC	(1)
+#define PMU_PGENG_GR_BUFFER_IDX_FECS	(2)
+
+enum {
+	PMU_DMAIDX_UCODE         = 0,
+	PMU_DMAIDX_VIRT          = 1,
+	PMU_DMAIDX_PHYS_VID      = 2,
+	PMU_DMAIDX_PHYS_SYS_COH  = 3,
+	PMU_DMAIDX_PHYS_SYS_NCOH = 4,
+	PMU_DMAIDX_RSVD          = 5,
+	PMU_DMAIDX_PELPG         = 6,
+	PMU_DMAIDX_END           = 7
+};
+
+#define PMU_MUTEX_ID_IS_VALID(id)	\
+		((id) < PMU_MUTEX_ID_INVALID)
+
+#define PMU_INVALID_MUTEX_OWNER_ID	(0)
+
+struct pmu_mutex {
+	u32 id;
+	u32 index;
+	u32 ref_cnt;
+};
+
+
+#define PMU_INVALID_SEQ_DESC		(~0)
+
+enum {
+	PMU_SEQ_STATE_FREE = 0,
+	PMU_SEQ_STATE_PENDING,
+	PMU_SEQ_STATE_USED,
+	PMU_SEQ_STATE_CANCELLED
+};
+
+struct pmu_payload {
+	struct {
+		void *buf;
+		u32 offset;
+		u32 size;
+	} in, out;
+};
+
+typedef void (*pmu_callback)(struct nvkm_pmu *, struct pmu_msg *, void *,
+u32, u32);
+
+struct pmu_sequence {
+	u8 id;
+	u32 state;
+	u32 desc;
+	struct pmu_msg *msg;
+	struct pmu_allocation_gk20a in_gk20a;
+	struct pmu_allocation_gk20a out_gk20a;
+	u8 *out_payload;
+	pmu_callback callback;
+	void *cb_params;
+};
+struct pmu_gk20a_data {
+	struct pmu_perfmon_counter_gk20a perfmon_counter_gk20a;
+	u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
+};
+
+#endif /*_GK20A_H__*/
diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h b/drm/nouveau/nvkm/subdev/pmu/priv.h
index 998410563bfd..c4686e418582 100644
--- a/drm/nouveau/nvkm/subdev/pmu/priv.h
+++ b/drm/nouveau/nvkm/subdev/pmu/priv.h
@@ -2,7 +2,91 @@
 #define __NVKM_PMU_PRIV_H__
 #include <subdev/pmu.h>
 #include <subdev/pmu/fuc/os.h>
+#include <core/object.h>
+#include <core/device.h>
+#include <core/parent.h>
+#include <core/mm.h>
+#include <linux/rwsem.h>
+#include <linux/slab.h>
+#include <subdev/mmu.h>
+#include <core/gpuobj.h>
 
+static inline u32 u64_hi32(u64 n)
+{
+	return (u32)((n >> 32) & ~(u32)0);
+}
+
+static inline u32 u64_lo32(u64 n)
+{
+	return (u32)(n & ~(u32)0);
+}
+
+/* #define ALLOCATOR_DEBUG */
+
+/* main struct */
+struct nvkm_pmu_allocator {
+
+	char name[32];			/* name for allocator */
+/*struct rb_root rb_root;*/		/* rb tree root for blocks */
+
+	u32 base;			/* min value of this linear space */
+	u32 limit;			/* max value = limit - 1 */
+
+	unsigned long *bitmap;		/* bitmap */
+
+	struct gk20a_alloc_block *block_first;	/* first block in list */
+	struct gk20a_alloc_block *block_recent; /* last visited block */
+
+	u32 first_free_addr;		/* first free addr, non-contigous
+					   allocation preferred start,
+					   in order to pick up small holes */
+	u32 last_free_addr;		/* last free addr, contiguous
+					   allocation preferred start */
+	u32 cached_hole_size;		/* max free hole size up to
+					   last_free_addr */
+	u32 block_count;		/* number of blocks */
+
+	struct rw_semaphore rw_sema;	/* lock */
+	struct kmem_cache *block_cache;	/* slab cache */
+
+	/* if enabled, constrain to [base, limit) */
+	struct {
+		bool enable;
+		u32 base;
+		u32 limit;
+	} constraint;
+
+	int (*alloc)(struct nvkm_pmu_allocator *allocator,
+		u32 *addr, u32 len, u32 align);
+	int (*free)(struct nvkm_pmu_allocator *allocator,
+		u32 addr, u32 len, u32 align);
+
+};
+
+int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
+			const char *name, u32 base, u32 size);
+void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator);
+
+int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
+			u32 *addr, u32 len, u32 align);
+
+int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
+			u32 addr, u32 len, u32 align);
+
+#if defined(ALLOCATOR_DEBUG)
+
+#define allocator_dbg(alloctor, format, arg...)				\
+do {								\
+	if (1)							\
+		pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\
+			alloctor->name, __func__, ##arg);\
+} while (0)
+
+#else /* ALLOCATOR_DEBUG */
+
+#define allocator_dbg(format, arg...)
+
+#endif /* ALLOCATOR_DEBUG */
 #define nvkm_pmu_create(p, e, o, d)                                         \
 	nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
 #define nvkm_pmu_destroy(p)                                                 \
@@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct nvkm_object *,
 int _nvkm_pmu_init(struct nvkm_object *);
 int _nvkm_pmu_fini(struct nvkm_object *, bool);
 void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable);
+#define PMU_PG_IDLE_THRESHOLD			15000
+#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD	1000000
+
+/* state transition :
+    OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
+    ON => OFF is always synchronized */
+#define PMU_ELPG_STAT_OFF		0   /* elpg is off */
+#define PMU_ELPG_STAT_ON		1   /* elpg is on */
+/* elpg is off, ALLOW cmd has been sent, wait for ack */
+#define PMU_ELPG_STAT_ON_PENDING	2
+/* elpg is on, DISALLOW cmd has been sent, wait for ack */
+#define PMU_ELPG_STAT_OFF_PENDING	3
+/* elpg is off, caller has requested on, but ALLOW
+cmd hasn't been sent due to ENABLE_ALLOW delay */
+#define PMU_ELPG_STAT_OFF_ON_PENDING	4
+
+/* Falcon Register index */
+#define PMU_FALCON_REG_R0		(0)
+#define PMU_FALCON_REG_R1		(1)
+#define PMU_FALCON_REG_R2		(2)
+#define PMU_FALCON_REG_R3		(3)
+#define PMU_FALCON_REG_R4		(4)
+#define PMU_FALCON_REG_R5		(5)
+#define PMU_FALCON_REG_R6		(6)
+#define PMU_FALCON_REG_R7		(7)
+#define PMU_FALCON_REG_R8		(8)
+#define PMU_FALCON_REG_R9		(9)
+#define PMU_FALCON_REG_R10		(10)
+#define PMU_FALCON_REG_R11		(11)
+#define PMU_FALCON_REG_R12		(12)
+#define PMU_FALCON_REG_R13		(13)
+#define PMU_FALCON_REG_R14		(14)
+#define PMU_FALCON_REG_R15		(15)
+#define PMU_FALCON_REG_IV0		(16)
+#define PMU_FALCON_REG_IV1		(17)
+#define PMU_FALCON_REG_UNDEFINED	(18)
+#define PMU_FALCON_REG_EV		(19)
+#define PMU_FALCON_REG_SP		(20)
+#define PMU_FALCON_REG_PC		(21)
+#define PMU_FALCON_REG_IMB		(22)
+#define PMU_FALCON_REG_DMB		(23)
+#define PMU_FALCON_REG_CSW		(24)
+#define PMU_FALCON_REG_CCR		(25)
+#define PMU_FALCON_REG_SEC		(26)
+#define PMU_FALCON_REG_CTX		(27)
+#define PMU_FALCON_REG_EXCI		(28)
+#define PMU_FALCON_REG_RSVD0		(29)
+#define PMU_FALCON_REG_RSVD1		(30)
+#define PMU_FALCON_REG_RSVD2		(31)
+#define PMU_FALCON_REG_SIZE		(32)
+
+/* Choices for pmu_state */
+#define PMU_STATE_OFF			0 /* PMU is off */
+#define PMU_STATE_STARTING		1 /* PMU is on, but not booted */
+#define PMU_STATE_INIT_RECEIVED		2 /* PMU init message received */
+#define PMU_STATE_ELPG_BOOTING		3 /* PMU is booting */
+#define PMU_STATE_ELPG_BOOTED		4 /* ELPG is initialized */
+#define PMU_STATE_LOADING_PG_BUF	5 /* Loading PG buf */
+#define PMU_STATE_LOADING_ZBC		6 /* Loading ZBC buf */
+#define PMU_STATE_STARTED		7 /* Fully unitialized */
+
+#define PMU_QUEUE_COUNT		5
+
+#define PMU_MAX_NUM_SEQUENCES		(256)
+#define PMU_SEQ_BIT_SHIFT		(5)
+#define PMU_SEQ_TBL_SIZE	\
+		(PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT)
+
+#define PMU_SHA1_GID_SIGNATURE		0xA7C66AD2
+#define PMU_SHA1_GID_SIGNATURE_SIZE	4
+
+#define PMU_SHA1_GID_SIZE	16
+
+struct pmu_queue {
+
+	/* used by hw, for BIOS/SMI queue */
+	u32 mutex_id;
+	u32 mutex_lock;
+	/* used by sw, for LPQ/HPQ queue */
+	struct mutex mutex;
+
+	/* current write position */
+	u32 position;
+	/* physical dmem offset where this queue begins */
+	u32 offset;
+	/* logical queue identifier */
+	u32 id;
+	/* physical queue index */
+	u32 index;
+	/* in bytes */
+	u32 size;
+
+	/* open-flag */
+	u32 oflag;
+	bool opened; /* opened implies locked */
+};
+
+struct pmu_sha1_gid {
+	bool valid;
+	u8 gid[PMU_SHA1_GID_SIZE];
+};
+
+struct pmu_sha1_gid_data {
+	u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE];
+	u8 gid[PMU_SHA1_GID_SIZE];
+};
+
+struct pmu_desc {
+
+	struct pmu_ucode_desc *desc;
+	struct pmu_buf_desc ucode;
+
+	struct pmu_buf_desc pg_buf;
+	/* TBD: remove this if ZBC seq is fixed */
+	struct pmu_buf_desc seq_buf;
+	struct pmu_buf_desc trace_buf;
+	bool buf_loaded;
+
+	struct pmu_sha1_gid gid_info;
+
+	struct pmu_queue queue[PMU_QUEUE_COUNT];
+
+	struct pmu_sequence *seq;
+	unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE];
+	u32 next_seq_desc;
+
+	struct pmu_mutex *mutex;
+	u32 mutex_cnt;
+
+	struct mutex pmu_copy_lock;
+	struct mutex pmu_seq_lock;
+
+	struct nvkm_pmu_allocator dmem;
+
+	u32 *ucode_image;
+	bool pmu_ready;
+
+	u32 zbc_save_done;
+
+	u32 stat_dmem_offset;
+
+	u32 elpg_stat;
+
+	int pmu_state;
+
+#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC	1 /* msec */
+	struct work_struct isr_workq;
+	struct mutex elpg_mutex; /* protect elpg enable/disable */
+/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
+	int elpg_refcnt;
+
+	bool initialized;
+
+	void (*remove_support)(struct pmu_desc *pmu);
+	bool sw_ready;
+	bool perfmon_ready;
+
+	u32 sample_buffer;
+	u32 load_shadow;
+	u32 load_avg;
+
+	struct mutex isr_mutex;
+	bool isr_enabled;
+
+	bool zbc_ready;
+	unsigned long perfmon_events_cnt;
+	bool perfmon_sampling_enabled;
+	u8 pmu_mode;
+	u32 falcon_id;
+	u32 aelpg_param[5];
+	void *pmu_chip_data;
+	struct nvkm_pmu *pmu;
+};
 
 struct nvkm_pmu_impl {
 	struct nvkm_oclass base;
@@ -39,5 +296,12 @@ struct nvkm_pmu_impl {
 	} data;
 
 	void (*pgob)(struct nvkm_pmu *, bool);
+	struct pmu_desc pmudata;
 };
+
+static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu)
+{
+	return pmu->pmu;
+}
+
 #endif
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [Nouveau] [PATCH] pmu/gk20a: PMU boot support.
       [not found] ` <1426055631-1166-1-git-send-email-dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
@ 2015-03-11 17:10   ` Ilia Mirkin
       [not found]     ` <CAKb7Uvj0xMvDWjKjGzbD6Tk0NArfkh4Vjvt4eRQ8XoHgR+7bsg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2015-03-13 10:12   ` Alexandre Courbot
  1 sibling, 1 reply; 6+ messages in thread
From: Ilia Mirkin @ 2015-03-11 17:10 UTC (permalink / raw)
  To: Deepak Goyal
  Cc: Ben Skeggs, Alexandre Courbot,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	linux-tegra-u79uwXL29TY76Z2rM5mHXA

Hi Deepak,

There's... a lot of stuff going on here. Can you describe the goal of
this patch (which could then be used as the patch commit message)? The
current one basically boils down to "Add support for loading PMU", but
merely loading the fw into a fuc engine is just a handful lines of
code. Also, except in rare cases, it's customary to split up patches
of this size into smaller, more reviewable chunks, which add on bits
of functionality as they go.

From what I can tell, you're adding the kernel-side interface for a
hypothetical (and presumably closed-source) PMU blob that NVIDIA will
supply. In essence, the blob is expected to implement a RTOS which
runs on the PMU's falcon CPU. There are a bunch of API's implemented
by this blob that the host can call, but it also does things on its
own. For the kernel side, each of these API calls should probably be a
separate patch (after an initial "just load it and do nothing" style
patch). Or perhaps have the infrastructure that you add first and then
something that implements the API calls.

Cheers,

  -ilia


On Wed, Mar 11, 2015 at 2:33 AM, Deepak Goyal <dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org> wrote:
> It adds PMU boot support.It loads PMU
> firmware into PMU falcon.RM/Kernel driver
> receives INIT ack (through interrupt mechanism)
> from PMU when PMU boots with success.
>
> Signed-off-by: Deepak Goyal <dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
> ---
>  drm/nouveau/include/nvkm/subdev/pmu.h |   26 +-
>  drm/nouveau/nvkm/subdev/pmu/base.c    |  108 ++
>  drm/nouveau/nvkm/subdev/pmu/gk20a.c   | 2131 ++++++++++++++++++++++++++++++++-
>  drm/nouveau/nvkm/subdev/pmu/gk20a.h   |  369 ++++++
>  drm/nouveau/nvkm/subdev/pmu/priv.h    |  264 ++++
>  5 files changed, 2884 insertions(+), 14 deletions(-)
>  create mode 100644 drm/nouveau/nvkm/subdev/pmu/gk20a.h
>
> diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h
> index 7b86acc634a0..659b4e0ba02b 100644
> --- a/drm/nouveau/include/nvkm/subdev/pmu.h
> +++ b/drm/nouveau/include/nvkm/subdev/pmu.h
> @@ -1,7 +1,20 @@
>  #ifndef __NVKM_PMU_H__
>  #define __NVKM_PMU_H__
>  #include <core/subdev.h>
> +#include <core/device.h>
> +#include <subdev/mmu.h>
> +#include <linux/debugfs.h>
>
> +struct pmu_buf_desc {
> +       struct nvkm_gpuobj *pmubufobj;
> +       struct nvkm_vma pmubufvma;
> +       size_t size;
> +};
> +struct pmu_priv_vm {
> +       struct nvkm_gpuobj *mem;
> +       struct nvkm_gpuobj *pgd;
> +       struct nvkm_vm *vm;
> +};
>  struct nvkm_pmu {
>         struct nvkm_subdev base;
>
> @@ -20,9 +33,20 @@ struct nvkm_pmu {
>                 u32 message;
>                 u32 data[2];
>         } recv;
> -
> +       wait_queue_head_t init_wq;
> +       bool gr_initialised;
> +       struct dentry *debugfs;
> +       struct pmu_buf_desc *pg_buf;
> +       struct pmu_priv_vm *pmuvm;
>         int  (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
>         void (*pgob)(struct nvkm_pmu *, bool);
> +       int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token);
> +       int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token);
> +       int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load);
> +       int (*pmu_load_update)(struct nvkm_pmu *pmu);
> +       void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu);
> +       void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 *busy_cycles,
> +               u32 *total_cycles);
>  };
>
>  static inline struct nvkm_pmu *
> diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c b/drm/nouveau/nvkm/subdev/pmu/base.c
> index 054b2d2eec35..6afd389b9764 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/base.c
> +++ b/drm/nouveau/nvkm/subdev/pmu/base.c
> @@ -25,6 +25,114 @@
>
>  #include <subdev/timer.h>
>
> +/* init allocator struct */
> +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
> +               const char *name, u32 start, u32 len)
> +{
> +       memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
> +
> +       strncpy(allocator->name, name, 32);
> +
> +       allocator->base = start;
> +       allocator->limit = start + len - 1;
> +
> +       allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long),
> +                       GFP_KERNEL);
> +       if (!allocator->bitmap)
> +               return -ENOMEM;
> +
> +       allocator_dbg(allocator, "%s : base %d, limit %d",
> +               allocator->name, allocator->base);
> +
> +       init_rwsem(&allocator->rw_sema);
> +
> +       allocator->alloc = nvkm_pmu_allocator_block_alloc;
> +       allocator->free = nvkm_pmu_allocator_block_free;
> +
> +       return 0;
> +}
> +
> +/* destroy allocator, free all remaining blocks if any */
> +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator)
> +{
> +       down_write(&allocator->rw_sema);
> +
> +       kfree(allocator->bitmap);
> +
> +       memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
> +}
> +
> +/*
> + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
> + * returned to caller in *addr.
> + *
> + * contiguous allocation, which allocates one block of
> + * contiguous address.
> +*/
> +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
> +               u32 *addr, u32 len, u32 align)
> +{
> +       unsigned long _addr;
> +
> +       allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
> +
> +       if ((*addr != 0 && *addr < allocator->base) || /* check addr range */
> +           *addr + len > allocator->limit || /* check addr range */
> +           *addr & (align - 1) || /* check addr alignment */
> +            len == 0)                        /* check len */
> +               return -EINVAL;
> +
> +       len = ALIGN(len, align);
> +       if (!len)
> +               return -ENOMEM;
> +
> +       down_write(&allocator->rw_sema);
> +
> +       _addr = bitmap_find_next_zero_area(allocator->bitmap,
> +                       allocator->limit - allocator->base + 1,
> +                       *addr ? (*addr - allocator->base) : 0,
> +                       len,
> +                       align - 1);
> +       if ((_addr > allocator->limit - allocator->base + 1) ||
> +           (*addr && *addr != (_addr + allocator->base))) {
> +               up_write(&allocator->rw_sema);
> +               return -ENOMEM;
> +       }
> +
> +       bitmap_set(allocator->bitmap, _addr, len);
> +       *addr = allocator->base + _addr;
> +
> +       up_write(&allocator->rw_sema);
> +
> +       allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
> +
> +       return 0;
> +}
> +
> +/* free all blocks between start and end */
> +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
> +               u32 addr, u32 len, u32 align)
> +{
> +       allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
> +
> +       if (addr + len > allocator->limit || /* check addr range */
> +           addr < allocator->base ||
> +           addr & (align - 1))   /* check addr alignment */
> +               return -EINVAL;
> +
> +       len = ALIGN(len, align);
> +       if (!len)
> +               return -EINVAL;
> +
> +       down_write(&allocator->rw_sema);
> +       bitmap_clear(allocator->bitmap, addr - allocator->base, len);
> +       up_write(&allocator->rw_sema);
> +
> +       allocator_dbg(allocator, "[out] addr %d, len %d", addr, len);
> +
> +       return 0;
> +}
> +
>  void
>  nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
>  {
> diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> index a49934bbe637..0fd2530301a3 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> @@ -20,21 +20,67 @@
>   * DEALINGS IN THE SOFTWARE.
>   */
>  #include "priv.h"
> +#include "gk20a.h"
> +#include <core/client.h>
> +#include <core/gpuobj.h>
> +#include <subdev/bar.h>
> +#include <subdev/fb.h>
> +#include <subdev/mc.h>
> +#include <subdev/timer.h>
> +#include <subdev/mmu.h>
> +#include <subdev/pmu.h>
> +#include <engine/falcon.h>
>
> +#include <linux/delay.h>       /* for mdelay */
> +#include <linux/firmware.h>
> +#include <linux/clk.h>
> +#include <linux/module.h>
> +#include <linux/debugfs.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/uaccess.h>
>  #include <subdev/clk.h>
>  #include <subdev/timer.h>
>  #include <subdev/volt.h>
>
>  #define BUSY_SLOT      0
>  #define CLK_SLOT       7
> +#define GK20A_PMU_UCODE_IMAGE  "gpmu_ucode.bin"
> +
> +static int falc_trace_show(struct seq_file *s, void *data);
> +static int falc_trace_open(struct inode *inode, struct file *file)
> +{
> +       return single_open(file, falc_trace_show, inode->i_private);
> +}
> +static const struct file_operations falc_trace_fops = {
> +       .open           = falc_trace_open,
> +       .read           = seq_read,
> +       .llseek         = seq_lseek,
> +       .release        = single_release,
> +};
> +struct pmu_priv_vm pmuvm;
> +const struct firmware *pmufw;
> +
> +static void  gk20a_pmu_isr(struct nvkm_pmu *ppmu);
> +static void pmu_process_message(struct work_struct *work);
> +
> +static int
> +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw);
> +static void
> +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, const struct firmware *fw);
> +
> +static int
> +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw);
> +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu);
> +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc);
> +static void gk20a_pmu_intr(struct nvkm_subdev *subdev);
>
> +static void gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable);
>  struct gk20a_pmu_dvfs_data {
>         int p_load_target;
>         int p_load_max;
>         int p_smooth;
>         unsigned int avg_load;
>  };
> -
>  struct gk20a_pmu_priv {
>         struct nvkm_pmu base;
>         struct nvkm_alarm alarm;
> @@ -46,7 +92,30 @@ struct gk20a_pmu_dvfs_dev_status {
>         unsigned long busy;
>         int cur_state;
>  };
> -
> +int gk20a_pmu_debugfs_init(struct nvkm_pmu *ppmu)
> +{
> +       struct dentry *d;
> +       ppmu->debugfs = debugfs_create_dir("PMU", NULL);
> +       if (!ppmu->debugfs)
> +               goto err_out;
> +       nv_debug(ppmu, "PMU directory created with success\n");
> +       d = debugfs_create_file(
> +               "falc_trace", 0644, ppmu->debugfs, ppmu,
> +                                               &falc_trace_fops);
> +       if (!d)
> +               goto err_out;
> +       return 0;
> +err_out:
> +       pr_err("%s: Failed to make debugfs node\n", __func__);
> +       debugfs_remove_recursive(ppmu->debugfs);
> +       return -ENOMEM;
> +}
> +void gk20a_pmu_release_firmware(struct nvkm_pmu *ppmu,
> +                                                   const struct firmware *pfw)
> +{
> +       nv_debug(ppmu, "firmware released\n");
> +       release_firmware(pfw);
> +}
>  static int
>  gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state)
>  {
> @@ -164,31 +233,145 @@ gk20a_pmu_fini(struct nvkm_object *object, bool suspend)
>  {
>         struct nvkm_pmu *pmu = (void *)object;
>         struct gk20a_pmu_priv *priv = (void *)pmu;
> -
> +       nv_wr32(pmu, 0x10a014, 0x00000060);
> +       flush_work(&pmu->recv.work);
>         nvkm_timer_alarm_cancel(priv, &priv->alarm);
>
>         return nvkm_subdev_fini(&pmu->base, suspend);
>  }
> +static bool find_hex_in_string(char *strings, u32 *hex_pos)
> +{
> +       u32 i = 0, j = strlen(strings);
> +       for (; i < j; i++) {
> +               if (strings[i] == '%')
> +                       if (strings[i + 1] == 'x' || strings[i + 1] == 'X') {
> +                               *hex_pos = i;
> +                               return true;
> +                       }
> +       }
> +       *hex_pos = -1;
> +       return false;
> +}
> +static int falc_trace_show(struct seq_file *s, void *data)
> +{
> +       struct nvkm_pmu *ppmu = s->private;
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       u32 i = 0, j = 0, k, l, m;
> +       char part_str[40];
> +       u32 data1;
> +       char *log_data = kmalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL);
> +       char *trace = log_data;
> +       u32 *trace1 = (u32 *)log_data;
> +       for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 4) {
> +               data1 = nv_ro32(pmu->trace_buf.pmubufobj, 0x0000 + i);
> +               memcpy(log_data + i, (void *)(&data1), 32);
> +       }
> +       for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
> +               for (j = 0; j < 0x40; j++)
> +                       if (trace1[(i / 4) + j])
> +                               break;
> +               if (j == 0x40)
> +                       goto out;
> +               seq_printf(s, "Index %x: ", trace1[(i / 4)]);
> +               l = 0;
> +               m = 0;
> +               while (find_hex_in_string((trace+i+20+m), &k)) {
> +                       if (k >= 40)
> +                               break;
> +                       strncpy(part_str, (trace+i+20+m), k);
> +                       part_str[k] = 0;
> +                       seq_printf(s, "%s0x%x", part_str,
> +                                       trace1[(i / 4) + 1 + l]);
> +                       l++;
> +                       m += k + 2;
> +               }
> +               seq_printf(s, "%s", (trace+i+20+m));
> +       }
> +out:
> +       kfree(log_data);
> +       return 0;
> +}
>
>  int
>  gk20a_pmu_init(struct nvkm_object *object)
>  {
> -       struct nvkm_pmu *pmu = (void *)object;
> -       struct gk20a_pmu_priv *priv = (void *)pmu;
> +       struct nvkm_pmu *ppmu = (void *)object;
> +       struct nvkm_mc *pmc = nvkm_mc(object);
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu;
> +       struct gk20a_pmu_priv *priv;
> +       struct pmu_gk20a_data *gk20adata;
>         int ret;
>
> -       ret = nvkm_subdev_init(&pmu->base);
> +       pmu = &impl->pmudata;
> +
> +       nv_subdev(ppmu)->intr = gk20a_pmu_intr;
> +
> +       mutex_init(&pmu->isr_mutex);
> +       mutex_init(&pmu->pmu_copy_lock);
> +       mutex_init(&pmu->pmu_seq_lock);
> +
> +       if (pmufw == NULL) {
> +               ret = gk20a_pmu_load_firmware(ppmu, &pmufw);
> +               if (ret < 0) {
> +                       nv_error(ppmu, "failed to load pmu fimware\n");
> +                       return ret;
> +               }
> +               nv_debug(ppmu, "loading firmware sucessful\n");
> +               ret = gk20a_pmu_init_vm(ppmu, pmufw);
> +               if (ret < 0) {
> +                       nv_error(ppmu, "failed to map pmu fw to va space\n");
> +                       goto init_vm_err;
> +               }
> +       }
> +       pmu->desc = (struct pmu_ucode_desc *)pmufw->data;
> +       gk20a_pmu_dump_firmware_info(ppmu, pmufw);
> +
> +       if (pmu->desc->app_version != APP_VERSION_GK20A) {
> +               nv_error(ppmu,
> +               "PMU code version not supported version: %d\n",
> +                       pmu->desc->app_version);
> +               ret = -EINVAL;
> +               goto app_ver_err;
> +       }
> +       gk20adata = kzalloc(sizeof(*gk20adata), GFP_KERNEL);
> +       if (!gk20adata) {
> +               ret = -ENOMEM;
> +               goto err;
> +       }
> +
> +       pmu->pmu_chip_data = (void *)gk20adata;
> +
> +       pmu->remove_support = gk20a_remove_pmu_support;
> +
> +       ret = gk20a_init_pmu_setup_sw(ppmu);
>         if (ret)
> -               return ret;
> +               goto err;
> +
> +       pmu->pmu_state = PMU_STATE_STARTING;
> +       ret = gk20a_init_pmu_setup_hw1(ppmu, pmc);
> +       if (ret)
> +               goto err;
> +
> +       priv = (void *)ppmu;
>
> -       pmu->pgob = nvkm_pmu_pgob;
> +       ret = nvkm_subdev_init(&ppmu->base);
> +       if (ret)
> +               goto err;
> +
> +       ppmu->pgob = nvkm_pmu_pgob;
>
> -       /* init pwr perf counter */
> -       nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
> -       nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
> -       nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
> +       /* init pmu perf counter */
> +       nv_wr32(ppmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
> +       nv_wr32(ppmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
> +       nv_wr32(ppmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
>
> -       nvkm_timer_alarm(pmu, 2000000000, &priv->alarm);
> +       nvkm_timer_alarm(ppmu, 2000000000, &priv->alarm);
> +err:
> +init_vm_err:
> +app_ver_err:
> +       gk20a_pmu_release_firmware(ppmu, pmufw);
>         return ret;
>  }
>
> @@ -226,4 +409,1926 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) {
>                 .init = gk20a_pmu_init,
>                 .fini = gk20a_pmu_fini,
>         },
> +       .base.handle = NV_SUBDEV(PMU, 0xea),
> +       .pgob = gk20a_pmu_pgob,
>  }.base;
> +void pmu_copy_from_dmem(struct pmu_desc *pmu,
> +               u32 src, u8 *dst, u32 size, u8 port)
> +{
> +       u32 i, words, bytes;
> +       u32 data, addr_mask;
> +       u32 *dst_u32 = (u32 *)dst;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       if (size == 0) {
> +               nv_error(ppmu, "size is zero\n");
> +               goto out;
> +       }
> +
> +       if (src & 0x3) {
> +               nv_error(ppmu, "src (0x%08x) not 4-byte aligned\n", src);
> +               goto out;
> +       }
> +
> +       mutex_lock(&pmu->pmu_copy_lock);
> +
> +       words = size >> 2;
> +       bytes = size & 0x3;
> +
> +       addr_mask = (0x3f << 2) | 0xff << 8;
> +
> +       src &= addr_mask;
> +
> +       nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (src | (0x1 << 25)));
> +
> +       for (i = 0; i < words; i++) {
> +               dst_u32[i] = nv_rd32(ppmu, (0x0010a1c4 + port * 8));
> +               nv_debug(ppmu, "0x%08x\n", dst_u32[i]);
> +       }
> +       if (bytes > 0) {
> +               data = nv_rd32(ppmu, (0x0010a1c4 + port * 8));
> +               nv_debug(ppmu, "0x%08x\n", data);
> +
> +               for (i = 0; i < bytes; i++)
> +                       dst[(words << 2) + i] = ((u8 *)&data)[i];
> +       }
> +       mutex_unlock(&pmu->pmu_copy_lock);
> +out:
> +       nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +void pmu_copy_to_dmem(struct pmu_desc *pmu,
> +               u32 dst, u8 *src, u32 size, u8 port)
> +{
> +       u32 i, words, bytes;
> +       u32 data, addr_mask;
> +       u32 *src_u32 = (u32 *)src;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       if (size == 0) {
> +               nv_error(ppmu, "size is zero\n");
> +               goto out;
> +       }
> +
> +       if (dst & 0x3) {
> +               nv_error(ppmu, "dst (0x%08x) not 4-byte aligned\n", dst);
> +               goto out;
> +       }
> +
> +       mutex_lock(&pmu->pmu_copy_lock);
> +
> +       words = size >> 2;
> +       bytes = size & 0x3;
> +
> +       addr_mask = (0x3f << 2) | 0xff << 8;
> +
> +       dst &= addr_mask;
> +
> +       nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24)));
> +
> +       for (i = 0; i < words; i++) {
> +               nv_wr32(ppmu, (0x10a1c4 + (port * 8)), src_u32[i]);
> +               nv_debug(ppmu, "0x%08x\n", src_u32[i]);
> +       }
> +       if (bytes > 0) {
> +               data = 0;
> +               for (i = 0; i < bytes; i++)
> +                       ((u8 *)&data)[i] = src[(words << 2) + i];
> +               nv_wr32(ppmu, (0x10a1c4 + (port * 8)), data);
> +               nv_debug(ppmu, "0x%08x\n", data);
> +       }
> +
> +       data = nv_rd32(ppmu, (0x10a1c0 + (port * 8))) & addr_mask;
> +       size = ALIGN(size, 4);
> +       if (data != dst + size) {
> +               nv_error(ppmu, "copy failed. bytes written %d, expected %d",
> +                       data - dst, size);
> +       }
> +       mutex_unlock(&pmu->pmu_copy_lock);
> +out:
> +       nv_debug(ppmu, "exit %s", __func__);
> +}
> +
> +static int pmu_idle(struct nvkm_pmu *ppmu)
> +{
> +       unsigned long end_jiffies = jiffies +
> +               msecs_to_jiffies(2000);
> +       u32 idle_stat;
> +
> +       /* wait for pmu idle */
> +       do {
> +               idle_stat = nv_rd32(ppmu, 0x0010a04c);
> +
> +               if (((idle_stat & 0x01) == 0) &&
> +                       ((idle_stat >> 1) & 0x7fff) == 0) {
> +                       break;
> +               }
> +
> +               if (time_after_eq(jiffies, end_jiffies)) {
> +                       nv_error(ppmu, "timeout waiting pmu idle : 0x%08x",
> +                                 idle_stat);
> +                       return -EBUSY;
> +               }
> +               usleep_range(100, 200);
> +       } while (1);
> +
> +       return 0;
> +}
> +
> +void pmu_enable_irq(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> +                       bool enable)
> +{
> +
> +       nv_wr32(pmc, 0x00000640,
> +               nv_rd32(pmc, 0x00000640) &
> +               ~0x1000000);
> +       nv_wr32(pmc, 0x00000644,
> +               nv_rd32(pmc, 0x00000644) &
> +               ~0x1000000);
> +       nv_wr32(ppmu, 0x0010a014, 0xff);
> +
> +       if (enable) {
> +               nv_debug(ppmu, "enable pmu irq\n");
> +               /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1
> +               nv_wr32(ppmu, 0x0010a01c, 0xff01ff52);
> +               0=disable, 1=enable*/
> +
> +               nv_wr32(ppmu, 0x0010a010, 0xff);
> +               nv_wr32(pmc, 0x00000640,
> +                       nv_rd32(pmc, 0x00000640) |
> +                       0x1000000);
> +               nv_wr32(pmc, 0x00000644,
> +                       nv_rd32(pmc, 0x00000644) |
> +                       0x1000000);
> +       } else {
> +               nv_debug(ppmu, "disable pmu irq\n");
> +       }
> +
> +}
> +
> +static int pmu_enable_hw(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> +                       bool enable)
> +{
> +       u32 reg;
> +
> +       if (enable) {
> +               int retries = GK20A_IDLE_CHECK_MAX / GK20A_IDLE_CHECK_DEFAULT;
> +               /*need a spinlock?*/
> +               reg = nv_rd32(pmc, 0x00000200);
> +               reg |= 0x2000;
> +               nv_wr32(pmc, 0x00000200, reg);
> +               nv_rd32(pmc, 0x00000200);
> +               do {
> +                       u32 w = nv_rd32(ppmu, 0x0010a10c) & 0x6;
> +
> +                       if (!w)
> +                               return 0;
> +
> +                       udelay(GK20A_IDLE_CHECK_DEFAULT);
> +               } while (--retries);
> +
> +               reg = nv_rd32(pmc, 0x00000200);
> +               reg &= ~0x2000;
> +               nv_wr32(pmc, 0x00000200, reg);
> +               nv_error(ppmu, "Falcon mem scrubbing timeout\n");
> +
> +               goto error;
> +       } else {
> +               reg = nv_rd32(pmc, 0x00000200);
> +               reg &= ~0x2000;
> +               nv_wr32(pmc, 0x00000200, reg);
> +               return 0;
> +       }
> +error:
> +       return -ETIMEDOUT;
> +}
> +
> +static int pmu_enable(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> +                       bool enable)
> +{
> +       u32 pmc_enable;
> +       int err;
> +
> +       if (!enable) {
> +               pmc_enable = nv_rd32(pmc, 0x200);
> +               if ((pmc_enable & 0x2000) != 0x0) {
> +                       pmu_enable_irq(ppmu, pmc, false);
> +                       pmu_enable_hw(ppmu, pmc, false);
> +               }
> +       } else {
> +               err = pmu_enable_hw(ppmu, pmc, true);
> +               if (err)
> +                       return err;
> +
> +               /* TBD: post reset */
> +
> +               err = pmu_idle(ppmu);
> +               if (err)
> +                       return err;
> +
> +               pmu_enable_irq(ppmu, pmc, true);
> +       }
> +
> +       return 0;
> +}
> +
> +int pmu_reset(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> +       int err;
> +
> +       err = pmu_idle(ppmu);
> +       if (err)
> +               return err;
> +
> +       /* TBD: release pmu hw mutex */
> +
> +       err = pmu_enable(ppmu, pmc, false);
> +       if (err)
> +               return err;
> +
> +       err = pmu_enable(ppmu, pmc, true);
> +       if (err)
> +               return err;
> +
> +       return 0;
> +}
> +
> +static int pmu_bootstrap(struct pmu_desc *pmu)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_ucode_desc *desc = pmu->desc;
> +       u64 addr_code, addr_data, addr_load;
> +       u32 i, blocks, addr_args;
> +       u32 *adr_data, *adr_load, *adr_code;
> +       struct pmu_cmdline_args_gk20a cmdline_args;
> +       struct pmu_priv_vm *ppmuvm = &pmuvm;
> +
> +       nv_wr32(ppmu, 0x0010a048,
> +               nv_rd32(ppmu, 0x0010a048) | 0x01);
> +       /*bind the address*/
> +       nv_wr32(ppmu, 0x0010a480,
> +               ppmuvm->mem->addr >> 12 |
> +               0x1 << 30 |
> +               0x20000000);
> +
> +       /* TBD: load all other surfaces */
> +       cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE;
> +       cmdline_args.falc_trace_dma_base =
> +                                u64_lo32(pmu->trace_buf.pmubufvma.offset >> 8);
> +       cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT;
> +       cmdline_args.cpu_freq_hz = 204;
> +       cmdline_args.secure_mode = 0;
> +
> +       addr_args = (nv_rd32(ppmu, 0x0010a108) >> 9) & 0x1ff;
> +       addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2;
> +       addr_args -= sizeof(struct pmu_cmdline_args_gk20a);
> +       nv_debug(ppmu, "initiating copy to dmem\n");
> +       pmu_copy_to_dmem(pmu, addr_args,
> +                       (u8 *)&cmdline_args,
> +                       sizeof(struct pmu_cmdline_args_gk20a), 0);
> +
> +       nv_wr32(ppmu, 0x0010a1c0, 0x1 << 24);
> +
> +
> +       addr_code = u64_lo32((pmu->ucode.pmubufvma.offset +
> +                       desc->app_start_offset +
> +                       desc->app_resident_code_offset) >> 8);
> +
> +       addr_data = u64_lo32((pmu->ucode.pmubufvma.offset +
> +                       desc->app_start_offset +
> +                       desc->app_resident_data_offset) >> 8);
> +
> +       addr_load = u64_lo32((pmu->ucode.pmubufvma.offset +
> +                       desc->bootloader_start_offset) >> 8);
> +
> +       adr_code = (u32 *) (&addr_code);
> +       adr_load = (u32 *) (&addr_load);
> +       adr_data = (u32 *) (&addr_data);
> +       nv_wr32(ppmu, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE);
> +       nv_debug(ppmu, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE);
> +       nv_wr32(ppmu, 0x0010a1c4, *(adr_code));
> +       nv_debug(ppmu, "0x%08x\n", *(adr_code));
> +       nv_wr32(ppmu, 0x0010a1c4, desc->app_size);
> +       nv_debug(ppmu, "0x%08x\n", desc->app_size);
> +       nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_code_size);
> +       nv_debug(ppmu, "0x%08x\n", desc->app_resident_code_size);
> +       nv_wr32(ppmu, 0x0010a1c4, desc->app_imem_entry);
> +       nv_debug(ppmu, "0x%08x\n", desc->app_imem_entry);
> +       nv_wr32(ppmu, 0x0010a1c4,  *(adr_data));
> +       nv_debug(ppmu, "0x%08x\n", *(adr_data));
> +       nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_data_size);
> +       nv_debug(ppmu, "0x%08x\n", desc->app_resident_data_size);
> +       nv_wr32(ppmu, 0x0010a1c4, *(adr_code));
> +       nv_debug(ppmu, "0x%08x\n", *(adr_code));
> +       nv_wr32(ppmu, 0x0010a1c4, 0x1);
> +       nv_debug(ppmu, "0x%08x\n", 1);
> +       nv_wr32(ppmu, 0x0010a1c4, addr_args);
> +       nv_debug(ppmu, "0x%08x\n", addr_args);
> +
> +
> +       nv_wr32(ppmu, 0x0010a110,
> +               *(adr_load) - (desc->bootloader_imem_offset >> 8));
> +
> +       blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
> +
> +       for (i = 0; i < blocks; i++) {
> +               nv_wr32(ppmu, 0x0010a114,
> +                       desc->bootloader_imem_offset + (i << 8));
> +               nv_wr32(ppmu, 0x0010a11c,
> +                       desc->bootloader_imem_offset + (i << 8));
> +               nv_wr32(ppmu, 0x0010a118,
> +                       0x01 << 4  |
> +                       0x06 << 8  |
> +                       ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12));
> +       }
> +
> +
> +       nv_wr32(ppmu, 0x0010a104,
> +               (0xffffffff & desc->bootloader_entry_point));
> +
> +       nv_wr32(ppmu, 0x0010a100, 0x1 << 1);
> +
> +       nv_wr32(ppmu, 0x0010a080, desc->app_version);
> +
> +       return 0;
> +}
> +
> +void pmu_seq_init(struct pmu_desc *pmu)
> +{
> +       u32 i;
> +
> +       memset(pmu->seq, 0,
> +               sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
> +       memset(pmu->pmu_seq_tbl, 0,
> +               sizeof(pmu->pmu_seq_tbl));
> +
> +       for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
> +               pmu->seq[i].id = i;
> +}
> +
> +static int pmu_seq_acquire(struct pmu_desc *pmu,
> +                       struct pmu_sequence **pseq)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_sequence *seq;
> +       u32 index;
> +
> +       mutex_lock(&pmu->pmu_seq_lock);
> +       index = find_first_zero_bit(pmu->pmu_seq_tbl,
> +                               sizeof(pmu->pmu_seq_tbl));
> +       if (index >= sizeof(pmu->pmu_seq_tbl)) {
> +               nv_error(ppmu,
> +                       "no free sequence available");
> +               mutex_unlock(&pmu->pmu_seq_lock);
> +               return -EAGAIN;
> +       }
> +       set_bit(index, pmu->pmu_seq_tbl);
> +       mutex_unlock(&pmu->pmu_seq_lock);
> +
> +       seq = &pmu->seq[index];
> +       seq->state = PMU_SEQ_STATE_PENDING;
> +
> +       *pseq = seq;
> +       return 0;
> +}
> +
> +static void pmu_seq_release(struct pmu_desc *pmu,
> +                       struct pmu_sequence *seq)
> +{
> +       seq->state      = PMU_SEQ_STATE_FREE;
> +       seq->desc       = PMU_INVALID_SEQ_DESC;
> +       seq->callback   = NULL;
> +       seq->cb_params  = NULL;
> +       seq->msg        = NULL;
> +       seq->out_payload = NULL;
> +       seq->in_gk20a.alloc.dmem.size = 0;
> +       seq->out_gk20a.alloc.dmem.size = 0;
> +       clear_bit(seq->id, pmu->pmu_seq_tbl);
> +}
> +
> +static int pmu_queue_init(struct pmu_desc *pmu,
> +               u32 id, struct pmu_init_msg_pmu_gk20a *init)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_queue *queue = &pmu->queue[id];
> +
> +       queue->id       = id;
> +       queue->index    = init->queue_info[id].index;
> +       queue->offset   = init->queue_info[id].offset;
> +       queue->size = init->queue_info[id].size;
> +       queue->mutex_id = id;
> +       mutex_init(&queue->mutex);
> +
> +       nv_debug(ppmu, "queue %d: index %d, offset 0x%08x, size 0x%08x",
> +               id, queue->index, queue->offset, queue->size);
> +
> +       return 0;
> +}
> +
> +static int pmu_queue_head(struct pmu_desc *pmu, struct pmu_queue *queue,
> +                       u32 *head, bool set)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       BUG_ON(!head);
> +
> +       if (PMU_IS_COMMAND_QUEUE(queue->id)) {
> +
> +               if (queue->index >= 0x00000004)
> +                       return -EINVAL;
> +
> +               if (!set)
> +                       *head = nv_rd32(ppmu, 0x0010a4a0 + (queue->index * 4)) &
> +                               0xffffffff;
> +               else
> +                       nv_wr32(ppmu,
> +                               (0x0010a4a0 + (queue->index * 4)),
> +                               (*head & 0xffffffff));
> +       } else {
> +               if (!set)
> +                       *head = nv_rd32(ppmu, 0x0010a4c8) & 0xffffffff;
> +               else
> +                       nv_wr32(ppmu, 0x0010a4c8, (*head & 0xffffffff));
> +       }
> +
> +       return 0;
> +}
> +
> +static int pmu_queue_tail(struct pmu_desc *pmu, struct pmu_queue *queue,
> +                       u32 *tail, bool set)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       BUG_ON(!tail);
> +
> +       if (PMU_IS_COMMAND_QUEUE(queue->id)) {
> +
> +               if (queue->index >= 0x00000004)
> +                       return -EINVAL;
> +
> +               if (!set)
> +                       *tail = nv_rd32(ppmu, 0x0010a4b0 + (queue->index * 4)) &
> +                               0xffffffff;
> +               else
> +                       nv_wr32(ppmu, (0x0010a4b0 + (queue->index * 4)),
> +                                                         (*tail & 0xffffffff));
> +       } else {
> +               if (!set)
> +                       *tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff;
> +               else
> +                       nv_wr32(ppmu, 0x0010a4cc, (*tail & 0xffffffff));
> +       }
> +
> +       return 0;
> +}
> +
> +static inline void pmu_queue_read(struct pmu_desc *pmu,
> +                       u32 offset, u8 *dst, u32 size)
> +{
> +       pmu_copy_from_dmem(pmu, offset, dst, size, 0);
> +}
> +
> +static inline void pmu_queue_write(struct pmu_desc *pmu,
> +                       u32 offset, u8 *src, u32 size)
> +{
> +       pmu_copy_to_dmem(pmu, offset, src, size, 0);
> +}
> +
> +int pmu_mutex_acquire(struct nvkm_pmu *ppmu, u32 id, u32 *token)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct pmu_mutex *mutex;
> +       u32 data, owner, max_retry;
> +
> +       if (!pmu->initialized)
> +               return -EINVAL;
> +
> +       BUG_ON(!token);
> +       BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
> +       BUG_ON(id > pmu->mutex_cnt);
> +
> +       mutex = &pmu->mutex[id];
> +
> +       owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff;
> +
> +       if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
> +               BUG_ON(mutex->ref_cnt == 0);
> +               nv_debug(ppmu, "already acquired by owner : 0x%08x", *token);
> +               mutex->ref_cnt++;
> +               return 0;
> +       }
> +
> +       max_retry = 40;
> +       do {
> +               data = nv_rd32(ppmu, 0x0010a488) & 0xff;
> +               if (data == 0x00000000 ||
> +                   data == 0x000000ff) {
> +                       nv_warn(ppmu,
> +                               "fail to generate mutex token: val 0x%08x",
> +                               owner);
> +                       usleep_range(20, 40);
> +                       continue;
> +               }
> +
> +               owner = data;
> +               nv_wr32(ppmu, (0x0010a580 + mutex->index * 4),
> +                       owner & 0xff);
> +
> +               data = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4));
> +
> +               if (owner == data) {
> +                       mutex->ref_cnt = 1;
> +                       nv_debug(ppmu, "mutex acquired: id=%d, token=0x%x",
> +                               mutex->index, *token);
> +                       *token = owner;
> +                       goto out;
> +               } else {
> +                 nv_debug(ppmu, "fail to acquire mutex idx=0x%08x",
> +                               mutex->index);
> +
> +                       nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff));
> +
> +                       usleep_range(20, 40);
> +                       continue;
> +               }
> +       } while (max_retry-- > 0);
> +
> +       return -EBUSY;
> +out:
> +       return 0;
> +}
> +
> +int pmu_mutex_release(struct nvkm_pmu *ppmu, u32 id, u32 *token)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct pmu_mutex *mutex;
> +       u32 owner;
> +
> +       if (!pmu->initialized)
> +               return -EINVAL;
> +
> +       BUG_ON(!token);
> +       BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
> +       BUG_ON(id > pmu->mutex_cnt);
> +
> +       mutex = &pmu->mutex[id];
> +
> +       owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff;
> +
> +       if (*token != owner) {
> +               nv_error(ppmu,
> +                       "requester 0x%08x NOT match owner 0x%08x",
> +                       *token, owner);
> +               return -EINVAL;
> +       }
> +
> +       if (--mutex->ref_cnt > 0)
> +               return -EBUSY;
> +
> +       nv_wr32(ppmu, 0x0010a580 + (mutex->index * 4), 0x00);
> +
> +       nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff));
> +
> +       nv_debug(ppmu, "mutex released: id=%d, token=0x%x",
> +                                                         mutex->index, *token);
> +
> +       return 0;
> +}
> +
> +static int pmu_queue_lock(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       int ret;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       if (PMU_IS_MESSAGE_QUEUE(queue->id))
> +               return 0;
> +
> +       if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
> +               mutex_lock(&queue->mutex);
> +               return 0;
> +       }
> +
> +       ret = pmu_mutex_acquire(ppmu, queue->mutex_id, &queue->mutex_lock);
> +       return ret;
> +}
> +
> +static int pmu_queue_unlock(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       int ret;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       if (PMU_IS_MESSAGE_QUEUE(queue->id))
> +               return 0;
> +
> +       if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
> +               mutex_unlock(&queue->mutex);
> +               return 0;
> +       }
> +
> +       ret = pmu_mutex_release(ppmu, queue->mutex_id, &queue->mutex_lock);
> +       return ret;
> +}
> +
> +/* called by pmu_read_message, no lock */
> +static bool pmu_queue_is_empty(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       u32 head, tail;
> +
> +       pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> +       if (queue->opened && queue->oflag == OFLAG_READ)
> +               tail = queue->position;
> +       else
> +               pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
> +
> +       return head == tail;
> +}
> +
> +static bool pmu_queue_has_room(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, u32 size, bool *need_rewind)
> +{
> +       u32 head, tail, free;
> +       bool rewind = false;
> +
> +       size = ALIGN(size, QUEUE_ALIGNMENT);
> +
> +       pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> +       pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
> +
> +       if (head >= tail) {
> +               free = queue->offset + queue->size - head;
> +               free -= PMU_CMD_HDR_SIZE;
> +
> +               if (size > free) {
> +                       rewind = true;
> +                       head = queue->offset;
> +               }
> +       }
> +
> +       if (head < tail)
> +               free = tail - head - 1;
> +
> +       if (need_rewind)
> +               *need_rewind = rewind;
> +
> +       return size <= free;
> +}
> +
> +static int pmu_queue_push(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, void *data, u32 size)
> +{
> +
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       if (!queue->opened && queue->oflag == OFLAG_WRITE) {
> +               nv_error(ppmu, "queue not opened for write\n");
> +               return -EINVAL;
> +       }
> +
> +       pmu_queue_write(pmu, queue->position, data, size);
> +       queue->position += ALIGN(size, QUEUE_ALIGNMENT);
> +       return 0;
> +}
> +
> +static int pmu_queue_pop(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, void *data, u32 size,
> +                       u32 *bytes_read)
> +{
> +       u32 head, tail, used;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       *bytes_read = 0;
> +
> +       if (!queue->opened && queue->oflag == OFLAG_READ) {
> +               nv_error(ppmu, "queue not opened for read\n");
> +               return -EINVAL;
> +       }
> +
> +       pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> +       tail = queue->position;
> +
> +       if (head == tail)
> +               return 0;
> +
> +       if (head > tail)
> +               used = head - tail;
> +       else
> +               used = queue->offset + queue->size - tail;
> +
> +       if (size > used) {
> +               nv_warn(ppmu, "queue size smaller than request read\n");
> +               size = used;
> +       }
> +
> +       pmu_queue_read(pmu, tail, data, size);
> +       queue->position += ALIGN(size, QUEUE_ALIGNMENT);
> +       *bytes_read = size;
> +       return 0;
> +}
> +
> +static void pmu_queue_rewind(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       struct pmu_cmd cmd;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +
> +       if (!queue->opened) {
> +               nv_error(ppmu, "queue not opened\n");
> +               goto out;
> +       }
> +
> +       if (queue->oflag == OFLAG_WRITE) {
> +               cmd.hdr.unit_id = PMU_UNIT_REWIND;
> +               cmd.hdr.size = PMU_CMD_HDR_SIZE;
> +               pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
> +               nv_debug(ppmu, "queue %d rewinded\n", queue->id);
> +       }
> +
> +       queue->position = queue->offset;
> +out:
> +       nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +/* open for read and lock the queue */
> +static int pmu_queue_open_read(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       int err;
> +
> +       err = pmu_queue_lock(pmu, queue);
> +       if (err)
> +               return err;
> +
> +       if (queue->opened)
> +               BUG();
> +
> +       pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
> +       queue->oflag = OFLAG_READ;
> +       queue->opened = true;
> +
> +       return 0;
> +}
> +
> +/* open for write and lock the queue
> +   make sure there's enough free space for the write */
> +static int pmu_queue_open_write(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, u32 size)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       bool rewind = false;
> +       int err;
> +
> +       err = pmu_queue_lock(pmu, queue);
> +       if (err)
> +               return err;
> +
> +       if (queue->opened)
> +               BUG();
> +
> +       if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
> +               nv_error(ppmu, "queue full");
> +               pmu_queue_unlock(pmu, queue);
> +               return -EAGAIN;
> +       }
> +
> +       pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
> +       queue->oflag = OFLAG_WRITE;
> +       queue->opened = true;
> +
> +       if (rewind)
> +               pmu_queue_rewind(pmu, queue);
> +
> +       return 0;
> +}
> +
> +/* close and unlock the queue */
> +static int pmu_queue_close(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, bool commit)
> +{
> +       if (!queue->opened)
> +               return 0;
> +
> +       if (commit) {
> +               if (queue->oflag == OFLAG_READ) {
> +                       pmu_queue_tail(pmu, queue,
> +                               &queue->position, QUEUE_SET);
> +               } else {
> +                       pmu_queue_head(pmu, queue,
> +                               &queue->position, QUEUE_SET);
> +               }
> +       }
> +
> +       queue->opened = false;
> +
> +       pmu_queue_unlock(pmu, queue);
> +
> +       return 0;
> +}
> +
> +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout,
> +                                u32 *var, u32 val)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
> +       unsigned long delay = GK20A_IDLE_CHECK_DEFAULT;
> +
> +       do {
> +               if (*var == val)
> +                       return 0;
> +
> +               if (nv_rd32(ppmu, 0x0010a008))
> +                       gk20a_pmu_isr(ppmu);
> +
> +               usleep_range(delay, delay * 2);
> +               delay = min_t(u32, delay << 1, GK20A_IDLE_CHECK_MAX);
> +       } while (time_before(jiffies, end_jiffies));
> +
> +       return -ETIMEDOUT;
> +}
> +
> +void pmu_dump_falcon_stats(struct pmu_desc *pmu)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       int i;
> +
> +       nv_debug(ppmu, "pmu_falcon_os_r : %d\n",
> +               nv_rd32(ppmu, 0x0010a080));
> +       nv_debug(ppmu, "pmu_falcon_cpuctl_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a100));
> +       nv_debug(ppmu, "pmu_falcon_idlestate_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a04c));
> +       nv_debug(ppmu, "pmu_falcon_mailbox0_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a040));
> +       nv_debug(ppmu, "pmu_falcon_mailbox1_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a044));
> +       nv_debug(ppmu, "pmu_falcon_irqstat_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a008));
> +       nv_debug(ppmu, "pmu_falcon_irqmode_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a00c));
> +       nv_debug(ppmu, "pmu_falcon_irqmask_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a018));
> +       nv_debug(ppmu, "pmu_falcon_irqdest_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a01c));
> +
> +       for (i = 0; i < 0x0000000c; i++)
> +               nv_debug(ppmu, "pmu_pmu_mailbox_r(%d) : 0x%x\n",
> +                       i, nv_rd32(ppmu, 0x0010a450 + i*4));
> +
> +       for (i = 0; i < 0x00000004; i++)
> +               nv_debug(ppmu, "pmu_pmu_debug_r(%d) : 0x%x\n",
> +                       i, nv_rd32(ppmu, 0x0010a5c0 + i*4));
> +
> +       for (i = 0; i < 6/*NV_Ppmu_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
> +               nv_wr32(ppmu, 0x0010a200,
> +                       0xe |
> +                       (i & 0x1f) << 8);
> +               nv_debug(ppmu, "pmu_rstat (%d) : 0x%x\n",
> +                       i, nv_rd32(ppmu, 0x0010a20c));
> +       }
> +
> +       i = nv_rd32(ppmu, 0x0010a7b0);
> +       nv_debug(ppmu, "pmu_pmu_bar0_error_status_r : 0x%x\n", i);
> +       if (i != 0) {
> +               nv_debug(ppmu, "pmu_pmu_bar0_addr_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a7a0));
> +               nv_debug(ppmu, "pmu_pmu_bar0_data_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a7a4));
> +               nv_debug(ppmu, "pmu_pmu_bar0_timeout_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a7a8));
> +               nv_debug(ppmu, "pmu_pmu_bar0_ctl_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a7ac));
> +       }
> +
> +       i = nv_rd32(ppmu, 0x0010a988);
> +       nv_debug(ppmu, "pmu_pmu_bar0_fecs_error_r : 0x%x\n", i);
> +
> +       i = nv_rd32(ppmu, 0x0010a16c);
> +       nv_debug(ppmu, "pmu_falcon_exterrstat_r : 0x%x\n", i);
> +       if (((i >> 31) & 0x1)) {
> +               nv_debug(ppmu, "pmu_falcon_exterraddr_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a168));
> +               /*nv_debug(ppmu, "pmc_enable : 0x%x\n",
> +                 nv_rd32(pmc, 0x00000200));*/
> +       }
> +
> +       nv_debug(ppmu, "pmu_falcon_engctl_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a0a4));
> +       nv_debug(ppmu, "pmu_falcon_curctx_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a050));
> +       nv_debug(ppmu, "pmu_falcon_nxtctx_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a054));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_IMB & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_IMB : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_DMB & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_DMB : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_CSW & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_CSW : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_CTX & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_CTX : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_EXCI & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_EXCI : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       for (i = 0; i < 4; i++) {
> +               nv_wr32(ppmu, 0x0010a200,
> +                       0x8 |
> +                       ((PMU_FALCON_REG_PC & 0x1f) << 8));
> +               nv_debug(ppmu, "PMU_FALCON_REG_PC : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a20c));
> +
> +               nv_wr32(ppmu, 0x0010a200,
> +                       0x8 |
> +                       ((PMU_FALCON_REG_SP & 0x1f) << 8));
> +               nv_debug(ppmu, "PMU_FALCON_REG_SP : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a20c));
> +       }
> +
> +       /* PMU may crash due to FECS crash. Dump FECS status */
> +       /*gk20a_fecs_dump_falcon_stats(g);*/
> +}
> +
> +static bool pmu_validate_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd,
> +                       struct pmu_msg *msg, struct pmu_payload *payload,
> +                       u32 queue_id)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_queue *queue;
> +       u32 in_size, out_size;
> +
> +       nv_debug(ppmu, "pmu validate cmd\n");
> +       pmu_dump_falcon_stats(pmu);
> +
> +       if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
> +               goto invalid_cmd;
> +
> +       queue = &pmu->queue[queue_id];
> +       if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
> +               goto invalid_cmd;
> +
> +       if (cmd->hdr.size > (queue->size >> 1))
> +               goto invalid_cmd;
> +
> +       if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
> +               goto invalid_cmd;
> +
> +       if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
> +               goto invalid_cmd;
> +
> +       if (payload == NULL)
> +               return true;
> +
> +       if (payload->in.buf == NULL && payload->out.buf == NULL)
> +               goto invalid_cmd;
> +
> +       if ((payload->in.buf != NULL && payload->in.size == 0) ||
> +           (payload->out.buf != NULL && payload->out.size == 0))
> +               goto invalid_cmd;
> +
> +       in_size = PMU_CMD_HDR_SIZE;
> +       if (payload->in.buf) {
> +               in_size += payload->in.offset;
> +               in_size += sizeof(struct pmu_allocation_gk20a);
> +       }
> +
> +       out_size = PMU_CMD_HDR_SIZE;
> +       if (payload->out.buf) {
> +               out_size += payload->out.offset;
> +               out_size += sizeof(struct pmu_allocation_gk20a);
> +       }
> +
> +       if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
> +               goto invalid_cmd;
> +
> +
> +       if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
> +           (payload->out.offset != 0 && payload->out.buf == NULL))
> +               goto invalid_cmd;
> +
> +       return true;
> +
> +invalid_cmd:
> +       nv_error(ppmu, "invalid pmu cmd :\n"
> +               "queue_id=%d,\n"
> +               "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
> +               "payload in=%p, in_size=%d, in_offset=%d,\n"
> +               "payload out=%p, out_size=%d, out_offset=%d",
> +               queue_id, cmd->hdr.size, cmd->hdr.unit_id,
> +               msg, msg ? msg->hdr.unit_id : ~0,
> +               &payload->in, payload->in.size, payload->in.offset,
> +               &payload->out, payload->out.size, payload->out.offset);
> +
> +       return false;
> +}
> +
> +static int pmu_write_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd,
> +                       u32 queue_id, unsigned long timeout)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_queue *queue;
> +       unsigned long end_jiffies = jiffies +
> +               msecs_to_jiffies(timeout);
> +       int err;
> +
> +       nv_debug(ppmu, "pmu write cmd\n");
> +
> +       queue = &pmu->queue[queue_id];
> +
> +       do {
> +               err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
> +               if (err == -EAGAIN && time_before(jiffies, end_jiffies))
> +                       usleep_range(1000, 2000);
> +               else
> +                       break;
> +       } while (1);
> +
> +       if (err)
> +               goto clean_up;
> +
> +       pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
> +
> +       err = pmu_queue_close(pmu, queue, true);
> +
> +clean_up:
> +       if (err)
> +               nv_error(ppmu,
> +                       "fail to write cmd to queue %d", queue_id);
> +       else
> +               nv_debug(ppmu, "cmd writing done");
> +
> +       return err;
> +}
> +
> +int gk20a_pmu_cmd_post(struct nvkm_pmu *ppmu, struct pmu_cmd *cmd,
> +               struct pmu_msg *msg, struct pmu_payload *payload,
> +               u32 queue_id, pmu_callback callback, void *cb_param,
> +               u32 *seq_desc, unsigned long timeout)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct pmu_sequence *seq;
> +       struct pmu_allocation_gk20a *in = NULL, *out = NULL;
> +       int err;
> +
> +       BUG_ON(!cmd);
> +       BUG_ON(!seq_desc);
> +       BUG_ON(!pmu->pmu_ready);
> +       nv_debug(ppmu, "Post CMD\n");
> +       if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
> +               return -EINVAL;
> +
> +       err = pmu_seq_acquire(pmu, &seq);
> +       if (err)
> +               return err;
> +
> +       cmd->hdr.seq_id = seq->id;
> +
> +       cmd->hdr.ctrl_flags = 0;
> +       cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
> +       cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
> +
> +       seq->callback = callback;
> +       seq->cb_params = cb_param;
> +       seq->msg = msg;
> +       seq->out_payload = NULL;
> +       seq->desc = pmu->next_seq_desc++;
> +
> +       if (payload)
> +               seq->out_payload = payload->out.buf;
> +
> +       *seq_desc = seq->desc;
> +
> +       if (payload && payload->in.offset != 0) {
> +               in = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd +
> +                       payload->in.offset);
> +
> +               if (payload->in.buf != payload->out.buf)
> +                       in->alloc.dmem.size = (u16)payload->in.size;
> +               else
> +                       in->alloc.dmem.size =
> +                               (u16)max(payload->in.size, payload->out.size);
> +
> +               err = pmu->dmem.alloc(&pmu->dmem,
> +                       (void *)&in->alloc.dmem.offset,
> +                       in->alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +               if (err)
> +                       goto clean_up;
> +
> +               pmu_copy_to_dmem(pmu, (in->alloc.dmem.offset),
> +                       payload->in.buf, payload->in.size, 0);
> +               seq->in_gk20a.alloc.dmem.size = in->alloc.dmem.size;
> +               seq->in_gk20a.alloc.dmem.offset = in->alloc.dmem.offset;
> +       }
> +
> +       if (payload && payload->out.offset != 0) {
> +               out = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd +
> +                       payload->out.offset);
> +               out->alloc.dmem.size = (u16)payload->out.size;
> +
> +               if (payload->out.buf != payload->in.buf) {
> +                       err = pmu->dmem.alloc(&pmu->dmem,
> +                               (void *)&out->alloc.dmem.offset,
> +                               out->alloc.dmem.size,
> +                               PMU_DMEM_ALLOC_ALIGNMENT);
> +                       if (err)
> +                               goto clean_up;
> +               } else {
> +                       BUG_ON(in == NULL);
> +                       out->alloc.dmem.offset = in->alloc.dmem.offset;
> +               }
> +
> +               seq->out_gk20a.alloc.dmem.size = out->alloc.dmem.size;
> +               seq->out_gk20a.alloc.dmem.offset = out->alloc.dmem.offset;
> +       }
> +
> +       seq->state = PMU_SEQ_STATE_USED;
> +       err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
> +       if (err)
> +               seq->state = PMU_SEQ_STATE_PENDING;
> +
> +       nv_debug(ppmu, "cmd posted\n");
> +
> +       return 0;
> +
> +clean_up:
> +       nv_debug(ppmu, "cmd post failed\n");
> +       if (in)
> +               pmu->dmem.free(&pmu->dmem,
> +                       in->alloc.dmem.offset,
> +                       in->alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +       if (out)
> +               pmu->dmem.free(&pmu->dmem,
> +                       out->alloc.dmem.offset,
> +                       out->alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +
> +       pmu_seq_release(pmu, seq);
> +       return err;
> +}
> +
> +void gk20a_pmu_isr(struct nvkm_pmu *ppmu)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct nvkm_mc *pmc = nvkm_mc(ppmu);
> +       struct pmu_queue *queue;
> +       u32 intr, mask;
> +       bool recheck = false;
> +       if (!pmu->isr_enabled)
> +               goto out;
> +
> +       mask = nv_rd32(ppmu, 0x0010a018) &
> +               nv_rd32(ppmu, 0x0010a01c);
> +
> +       intr = nv_rd32(ppmu, 0x0010a008) & mask;
> +
> +       nv_debug(ppmu, "received falcon interrupt: 0x%08x", intr);
> +       pmu_enable_irq(ppmu, pmc, false);
> +       if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
> +               nv_wr32(ppmu, 0x0010a004, intr);
> +               nv_error(ppmu, "pmu state off\n");
> +               pmu_enable_irq(ppmu, pmc, true);
> +               goto out;
> +       }
> +       if (intr & 0x10) {
> +               nv_error(ppmu,
> +                       "pmu halt intr not implemented");
> +               pmu_dump_falcon_stats(pmu);
> +       }
> +       if (intr & 0x20) {
> +               nv_error(ppmu,
> +                       "pmu exterr intr not implemented. Clearing interrupt.");
> +               pmu_dump_falcon_stats(pmu);
> +
> +               nv_wr32(ppmu, 0x0010a16c,
> +                       nv_rd32(ppmu, 0x0010a16c) &
> +                               ~(0x1 << 31));
> +       }
> +       if (intr & 0x40) {
> +               nv_debug(ppmu, "scheduling work\n");
> +               schedule_work(&pmu->isr_workq);
> +               pmu_enable_irq(ppmu, pmc, true);
> +               recheck = true;
> +       }
> +
> +       if (recheck) {
> +               queue = &pmu->queue[PMU_MESSAGE_QUEUE];
> +               if (!pmu_queue_is_empty(pmu, queue))
> +                       nv_wr32(ppmu, 0x0010a000, 0x40);
> +       } else {
> +               pmu_enable_irq(ppmu, pmc, true);
> +       }
> +
> +       pmu_enable_irq(ppmu, pmc, true);
> +       nv_wr32(ppmu, 0x0010a004, intr);
> +out:
> +       nv_debug(ppmu, "irq handled\n");
> +}
> +
> +static int
> +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw)
> +{
> +       int ret = 0;
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       u32 *ucode_image;
> +       struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data;
> +       int i;
> +       struct pmu_priv_vm *ppmuvm = &pmuvm;
> +       struct nvkm_device *device = nv_device(&ppmu->base);
> +       struct nvkm_vm *vm;
> +       u64 pmu_area_len = 300*1024;
> +
> +       ppmu->pmuvm = &pmuvm;
> +       ppmu->pg_buf = &pmu->pg_buf;
> +       pmu->pmu = ppmu;
> +       /* mem for inst blk*/
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x1000, 0, 0,
> +                               &ppmuvm->mem);
> +       if (ret)
> +               goto instblk_alloc_err;
> +
> +       /* mem for pgd*/
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x8000, 0, 0,
> +                               &ppmuvm->pgd);
> +       if (ret)
> +               goto pgd_alloc_err;
> +
> +       /*allocate virtual memory range*/
> +       ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm);
> +       if (ret)
> +               goto virt_alloc_err;
> +
> +       atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]);
> +       /*update VM with pgd */
> +
> +       ret = nvkm_vm_ref(vm, &ppmuvm->vm, ppmuvm->pgd);
> +       if (ret)
> +               goto virt_alloc_err;
> +
> +       /*update pgd in inst blk */
> +       nv_wo32(ppmuvm->mem, 0x0200, lower_32_bits(ppmuvm->pgd->addr));
> +       nv_wo32(ppmuvm->mem, 0x0204, upper_32_bits(ppmuvm->pgd->addr));
> +       nv_wo32(ppmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1));
> +       nv_wo32(ppmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1));
> +
> +       /* allocate memory for pmu fw to be copied to*/
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL,
> +                  GK20A_PMU_UCODE_SIZE_MAX, 0x1000, 0, &pmu->ucode.pmubufobj);
> +       if (ret)
> +               goto fw_alloc_err;
> +
> +       ucode_image = (u32 *)((u32)desc + desc->descriptor_size);
> +       for (i = 0; i < (desc->app_start_offset + desc->app_size) >> 2; i++) {
> +               nv_wo32(pmu->ucode.pmubufobj, i << 2, ucode_image[i]);
> +               pr_info("writing 0x%08x\n", ucode_image[i]);
> +       }
> +       /* map allocated memory into GMMU */
> +       ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->ucode.pmubufobj), vm,
> +                                   NV_MEM_ACCESS_RW,
> +                                   &pmu->ucode.pmubufvma);
> +       if (ret)
> +               goto map_err;
> +
> +       nv_debug(ppmu, "%s function end\n", __func__);
> +       return ret;
> +map_err:
> +       nvkm_gpuobj_destroy(pmu->ucode.pmubufobj);
> +virt_alloc_err:
> +fw_alloc_err:
> +       nvkm_gpuobj_destroy(ppmuvm->pgd);
> +pgd_alloc_err:
> +       nvkm_gpuobj_destroy(ppmuvm->mem);
> +instblk_alloc_err:
> +       return ret;
> +
> +}
> +
> +static int
> +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw)
> +{
> +       struct nvkm_device *dev;
> +       char name[32];
> +
> +       dev = nv_device(ppmu);
> +
> +       snprintf(name, sizeof(name), "nvidia/tegra124/%s",
> +                                                        GK20A_PMU_UCODE_IMAGE);
> +
> +       return request_firmware(pfw, name, nv_device_base(dev));
> +}
> +
> +static void
> +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu,
> +               const struct firmware *fw)
> +{
> +       struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data;
> +
> +       nv_debug(ppmu, "GK20A PMU firmware information\n");
> +       nv_debug(ppmu, "descriptor size = %u\n", desc->descriptor_size);
> +       nv_debug(ppmu, "image size  = %u\n", desc->image_size);
> +       nv_debug(ppmu, "app_version = 0x%08x\n", desc->app_version);
> +       nv_debug(ppmu, "date = %s\n", desc->date);
> +       nv_debug(ppmu, "bootloader_start_offset = 0x%08x\n",
> +                               desc->bootloader_start_offset);
> +       nv_debug(ppmu, "bootloader_size = 0x%08x\n", desc->bootloader_size);
> +       nv_debug(ppmu, "bootloader_imem_offset = 0x%08x\n",
> +                               desc->bootloader_imem_offset);
> +       nv_debug(ppmu, "bootloader_entry_point = 0x%08x\n",
> +                               desc->bootloader_entry_point);
> +       nv_debug(ppmu, "app_start_offset = 0x%08x\n", desc->app_start_offset);
> +       nv_debug(ppmu, "app_size = 0x%08x\n", desc->app_size);
> +       nv_debug(ppmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset);
> +       nv_debug(ppmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry);
> +       nv_debug(ppmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset);
> +       nv_debug(ppmu, "app_resident_code_offset = 0x%08x\n",
> +                       desc->app_resident_code_offset);
> +       nv_debug(ppmu, "app_resident_code_size = 0x%08x\n",
> +                       desc->app_resident_code_size);
> +       nv_debug(ppmu, "app_resident_data_offset = 0x%08x\n",
> +                       desc->app_resident_data_offset);
> +       nv_debug(ppmu, "app_resident_data_size = 0x%08x\n",
> +                       desc->app_resident_data_size);
> +       nv_debug(ppmu, "nb_overlays = %d\n", desc->nb_overlays);
> +
> +       nv_debug(ppmu, "compressed = %u\n", desc->compressed);
> +}
> +
> +static int pmu_process_init_msg(struct pmu_desc *pmu,
> +                       struct pmu_msg *msg)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_init_msg_pmu_gk20a *init;
> +       struct pmu_sha1_gid_data gid_data;
> +       u32 i, tail = 0;
> +
> +       tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff;
> +
> +       pmu_copy_from_dmem(pmu, tail,
> +               (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
> +
> +       if (msg->hdr.unit_id != PMU_UNIT_INIT) {
> +               nv_error(ppmu,
> +                       "expecting init msg");
> +               return -EINVAL;
> +       }
> +
> +       pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
> +               (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
> +
> +       if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
> +               nv_error(ppmu,
> +                       "expecting init msg");
> +               return -EINVAL;
> +       }
> +
> +       tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
> +       nv_wr32(ppmu, 0x0010a4cc,
> +               tail & 0xffffffff);
> +
> +       init = &msg->msg.init.pmu_init_gk20a;
> +       if (!pmu->gid_info.valid) {
> +
> +               pmu_copy_from_dmem(pmu,
> +                       init->sw_managed_area_offset,
> +                       (u8 *)&gid_data,
> +                       sizeof(struct pmu_sha1_gid_data), 0);
> +
> +               pmu->gid_info.valid =
> +                       (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
> +
> +               if (pmu->gid_info.valid) {
> +
> +                       BUG_ON(sizeof(pmu->gid_info.gid) !=
> +                               sizeof(gid_data.gid));
> +
> +                       memcpy(pmu->gid_info.gid, gid_data.gid,
> +                               sizeof(pmu->gid_info.gid));
> +               }
> +       }
> +
> +       for (i = 0; i < PMU_QUEUE_COUNT; i++)
> +               pmu_queue_init(pmu, i, init);
> +
> +       if (!pmu->dmem.alloc)
> +               nvkm_pmu_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
> +                               init->sw_managed_area_offset,
> +                               init->sw_managed_area_size);
> +
> +       pmu->pmu_ready = true;
> +       pmu->pmu_state = PMU_STATE_INIT_RECEIVED;
> +
> +       return 0;
> +}
> +
> +static bool pmu_read_message(struct pmu_desc *pmu, struct pmu_queue *queue,
> +                       struct pmu_msg *msg, int *status)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       u32 read_size, bytes_read;
> +       int err;
> +
> +       *status = 0;
> +
> +       if (pmu_queue_is_empty(pmu, queue))
> +               return false;
> +
> +       err = pmu_queue_open_read(pmu, queue);
> +       if (err) {
> +               nv_error(ppmu,
> +                       "fail to open queue %d for read", queue->id);
> +               *status = err;
> +               return false;
> +       }
> +
> +       err = pmu_queue_pop(pmu, queue, &msg->hdr,
> +                       PMU_MSG_HDR_SIZE, &bytes_read);
> +       if (err || bytes_read != PMU_MSG_HDR_SIZE) {
> +               nv_error(ppmu,
> +                       "fail to read msg from queue %d", queue->id);
> +               *status = err | -EINVAL;
> +               goto clean_up;
> +       }
> +
> +       if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
> +               pmu_queue_rewind(pmu, queue);
> +               /* read again after rewind */
> +               err = pmu_queue_pop(pmu, queue, &msg->hdr,
> +                               PMU_MSG_HDR_SIZE, &bytes_read);
> +               if (err || bytes_read != PMU_MSG_HDR_SIZE) {
> +                       nv_error(ppmu,
> +                               "fail to read msg from queue %d", queue->id);
> +                       *status = err | -EINVAL;
> +                       goto clean_up;
> +               }
> +       }
> +
> +       if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
> +               nv_error(ppmu,
> +                       "read invalid unit_id %d from queue %d",
> +                       msg->hdr.unit_id, queue->id);
> +                       *status = -EINVAL;
> +                       goto clean_up;
> +       }
> +
> +       if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
> +               read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
> +               err = pmu_queue_pop(pmu, queue, &msg->msg,
> +                       read_size, &bytes_read);
> +               if (err || bytes_read != read_size) {
> +                       nv_error(ppmu,
> +                               "fail to read msg from queue %d", queue->id);
> +                       *status = err;
> +                       goto clean_up;
> +               }
> +       }
> +
> +       err = pmu_queue_close(pmu, queue, true);
> +       if (err) {
> +               nv_error(ppmu,
> +                       "fail to close queue %d", queue->id);
> +               *status = err;
> +               return false;
> +       }
> +
> +       return true;
> +
> +clean_up:
> +       err = pmu_queue_close(pmu, queue, false);
> +       if (err)
> +               nv_error(ppmu,
> +                       "fail to close queue %d", queue->id);
> +       return false;
> +}
> +
> +static int pmu_response_handle(struct pmu_desc *pmu,
> +                       struct pmu_msg *msg)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_sequence *seq;
> +       int ret = 0;
> +
> +       nv_debug(ppmu, "handling pmu response\n");
> +       seq = &pmu->seq[msg->hdr.seq_id];
> +       if (seq->state != PMU_SEQ_STATE_USED &&
> +           seq->state != PMU_SEQ_STATE_CANCELLED) {
> +               nv_error(ppmu,
> +                       "msg for an unknown sequence %d", seq->id);
> +               return -EINVAL;
> +       }
> +
> +       if (msg->hdr.unit_id == PMU_UNIT_RC &&
> +           msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
> +               nv_error(ppmu,
> +                       "unhandled cmd: seq %d", seq->id);
> +       } else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
> +               if (seq->msg) {
> +                       if (seq->msg->hdr.size >= msg->hdr.size) {
> +                               memcpy(seq->msg, msg, msg->hdr.size);
> +                               if (seq->out_gk20a.alloc.dmem.size != 0) {
> +                                       pmu_copy_from_dmem(pmu,
> +                                       seq->out_gk20a.alloc.dmem.offset,
> +                                       seq->out_payload,
> +                                       seq->out_gk20a.alloc.dmem.size, 0);
> +                               }
> +                       } else {
> +                               nv_error(ppmu,
> +                                       "sequence %d msg buffer too small",
> +                                       seq->id);
> +                       }
> +               }
> +       } else
> +               seq->callback = NULL;
> +       if (seq->in_gk20a.alloc.dmem.size != 0)
> +               pmu->dmem.free(&pmu->dmem,
> +                       seq->in_gk20a.alloc.dmem.offset,
> +                       seq->in_gk20a.alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +       if (seq->out_gk20a.alloc.dmem.size != 0)
> +               pmu->dmem.free(&pmu->dmem,
> +                       seq->out_gk20a.alloc.dmem.offset,
> +                       seq->out_gk20a.alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +
> +       if (seq->callback)
> +               seq->callback(ppmu, msg, seq->cb_params, seq->desc, ret);
> +
> +       pmu_seq_release(pmu, seq);
> +
> +       /* TBD: notify client waiting for available dmem */
> +       nv_debug(ppmu, "pmu response processed\n");
> +
> +       return 0;
> +}
> +
> +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout,
> +                                u32 *var, u32 val);
> +
> +
> +static int pmu_handle_event(struct pmu_desc *pmu, struct pmu_msg *msg)
> +{
> +       int err = 0;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       switch (msg->hdr.unit_id) {
> +       case PMU_UNIT_PERFMON:
> +               nv_debug(ppmu, "init perfmon event generated\n");
> +               break;
> +       default:
> +               nv_debug(ppmu, "default event generated\n");
> +               break;
> +       }
> +
> +       return err;
> +}
> +
> +void pmu_process_message(struct work_struct *work)
> +{
> +       struct pmu_desc *pmu = container_of(work, struct pmu_desc, isr_workq);
> +       struct pmu_msg msg;
> +       int status;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct nvkm_mc *pmc = nvkm_mc(ppmu);
> +
> +       mutex_lock(&pmu->isr_mutex);
> +       if (unlikely(!pmu->pmu_ready)) {
> +               nv_debug(ppmu, "processing init msg\n");
> +               pmu_process_init_msg(pmu, &msg);
> +               mutex_unlock(&pmu->isr_mutex);
> +               pmu_enable_irq(ppmu, pmc, true);
> +               goto out;
> +       }
> +
> +       while (pmu_read_message(pmu,
> +               &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
> +
> +               nv_debug(ppmu, "read msg hdr:\n"
> +                               "unit_id = 0x%08x, size = 0x%08x,\n"
> +                               "ctrl_flags = 0x%08x, seq_id = 0x%08x\n",
> +                               msg.hdr.unit_id, msg.hdr.size,
> +                               msg.hdr.ctrl_flags, msg.hdr.seq_id);
> +
> +               msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
> +
> +               if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT)
> +                       pmu_handle_event(pmu, &msg);
> +               else
> +                       pmu_response_handle(pmu, &msg);
> +       }
> +       mutex_unlock(&pmu->isr_mutex);
> +       pmu_enable_irq(ppmu, pmc, true);
> +out:
> +       nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +int gk20a_pmu_destroy(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +
> +       /* make sure the pending operations are finished before we continue */
> +       cancel_work_sync(&pmu->isr_workq);
> +       pmu->initialized = false;
> +
> +       mutex_lock(&pmu->isr_mutex);
> +       pmu_enable(ppmu, pmc, false);
> +       pmu->isr_enabled = false;
> +       mutex_unlock(&pmu->isr_mutex);
> +
> +       pmu->pmu_state = PMU_STATE_OFF;
> +       pmu->pmu_ready = false;
> +       pmu->zbc_ready = false;
> +
> +       return 0;
> +}
> +
> +int gk20a_pmu_load_norm(struct nvkm_pmu *ppmu, u32 *load)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       *load = pmu->load_shadow;
> +       return 0;
> +}
> +
> +int gk20a_pmu_load_update(struct nvkm_pmu *ppmu)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       u16 _load = 0;
> +
> +       pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
> +       pmu->load_shadow = _load / 10;
> +       pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
> +
> +       return 0;
> +}
> +
> +void gk20a_pmu_get_load_counters(struct nvkm_pmu *ppmu, u32 *busy_cycles,
> +                                u32 *total_cycles)
> +{
> +  /*todo if (!g->power_on || gk20a_busy(g->dev)) {
> +               *busy_cycles = 0;
> +               *total_cycles = 0;
> +               return;
> +               }*/
> +
> +       *busy_cycles = nv_rd32(ppmu, 0x0010a508 + 16) & 0x7fffffff;
> +       /*todormb();*/
> +       *total_cycles = nv_rd32(ppmu, 0x0010a508 + 32) & 0x7fffffff;
> +       /*todogk20a_idle(g->dev);*/
> +}
> +
> +void gk20a_pmu_reset_load_counters(struct nvkm_pmu *ppmu)
> +{
> +       u32 reg_val = 1 << 31;
> +
> +       /*todoif (!g->power_on || gk20a_busy(g->dev))
> +         return;*/
> +
> +       nv_wr32(ppmu, 0x0010a508 + 32, reg_val);
> +       /*todowmb()*/;
> +       nv_wr32(ppmu, 0x0010a508 + 16, reg_val);
> +       /*todogk20a_idle(g->dev);*/
> +}
> +
> +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       int err;
> +
> +       mutex_lock(&pmu->isr_mutex);
> +       pmu_reset(ppmu, pmc);
> +       pmu->isr_enabled = true;
> +       mutex_unlock(&pmu->isr_mutex);
> +
> +       /* setup apertures - virtual */
> +       nv_wr32(ppmu, 0x10a600 + 0 * 4, 0x0);
> +       nv_wr32(ppmu, 0x10a600 + 1 * 4, 0x0);
> +       /* setup apertures - physical */
> +       nv_wr32(ppmu, 0x10a600 + 2 * 4, 0x4 | 0x0);
> +       nv_wr32(ppmu, 0x10a600 + 3 * 4, 0x4 | 0x1);
> +       nv_wr32(ppmu, 0x10a600 + 4 * 4, 0x4 | 0x2);
> +
> +       /* TBD: load pmu ucode */
> +       err = pmu_bootstrap(pmu);
> +       if (err)
> +               return err;
> +
> +       return 0;
> +
> +}
> +
> +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct pmu_priv_vm *ppmuvm = &pmuvm;
> +       int i, err = 0;
> +       int ret = 0;
> +
> +
> +       if (pmu->sw_ready) {
> +
> +               for (i = 0; i < pmu->mutex_cnt; i++) {
> +                       pmu->mutex[i].id    = i;
> +                       pmu->mutex[i].index = i;
> +               }
> +               pmu_seq_init(pmu);
> +
> +               nv_debug(ppmu, "skipping init\n");
> +               goto skip_init;
> +       }
> +
> +       /* no infoRom script from vbios? */
> +
> +       /* TBD: sysmon subtask */
> +
> +       pmu->mutex_cnt = 0x00000010;
> +       pmu->mutex = kzalloc(pmu->mutex_cnt *
> +               sizeof(struct pmu_mutex), GFP_KERNEL);
> +       if (!pmu->mutex) {
> +               err = -ENOMEM;
> +               nv_error(ppmu, "not enough space ENOMEM\n");
> +               goto err;
> +       }
> +
> +       for (i = 0; i < pmu->mutex_cnt; i++) {
> +               pmu->mutex[i].id    = i;
> +               pmu->mutex[i].index = i;
> +       }
> +
> +       pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
> +               sizeof(struct pmu_sequence), GFP_KERNEL);
> +       if (!pmu->seq) {
> +               err = -ENOMEM;
> +               nv_error(ppmu, "not enough space ENOMEM\n");
> +               goto err_free_mutex;
> +       }
> +
> +       pmu_seq_init(pmu);
> +
> +       INIT_WORK(&pmu->isr_workq, pmu_process_message);
> +       init_waitqueue_head(&ppmu->init_wq);
> +       ppmu->gr_initialised = false;
> +
> +       /* allocate memory for pmu fw area */
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_SEQ_BUF_SIZE,
> +                                           0x1000, 0, &pmu->seq_buf.pmubufobj);
> +       if (ret)
> +               return ret;
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_TRACE_BUFSIZE,
> +                                           0, 0, &pmu->trace_buf.pmubufobj);
> +       if (ret)
> +               return ret;
> +       /* map allocated memory into GMMU */
> +       ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->seq_buf.pmubufobj),
> +                                       ppmuvm->vm,
> +                                       NV_MEM_ACCESS_RW,
> +                                       &pmu->seq_buf.pmubufvma);
> +       if (ret)
> +               return ret;
> +       ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->trace_buf.pmubufobj),
> +                                       ppmuvm->vm,
> +                                       NV_MEM_ACCESS_RW,
> +                                       &pmu->trace_buf.pmubufvma);
> +       if (ret)
> +               return ret;
> +
> +       /* TBD: remove this if ZBC save/restore is handled by PMU
> +        * end an empty ZBC sequence for now */
> +       nv_wo32(pmu->seq_buf.pmubufobj, 0, 0x16);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 1, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 2, 0x01);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 3, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 4, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 5, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 6, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 7, 0x00);
> +
> +       pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
> +       ret = gk20a_pmu_debugfs_init(ppmu);
> +       if (ret)
> +               return ret;
> +
> +       pmu->sw_ready = true;
> +
> +skip_init:
> +       return 0;
> +err_free_mutex:
> +       kfree(pmu->mutex);
> +err:
> +       return err;
> +}
> +
> +static void
> +gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable)
> +{
> +       /*
> +       nv_mask(ppmu, 0x000200, 0x00001000, 0x00000000);
> +       nv_rd32(ppmu, 0x000200);
> +       nv_mask(ppmu, 0x000200, 0x08000000, 0x08000000);
> +
> +       msleep(50);
> +
> +       nv_mask(ppmu, 0x000200, 0x08000000, 0x00000000);
> +       nv_mask(ppmu, 0x000200, 0x00001000, 0x00001000);
> +       nv_rd32(ppmu, 0x000200);
> +       */
> +}
> +
> +static void gk20a_pmu_intr(struct nvkm_subdev *subdev)
> +{
> +       struct nvkm_pmu *ppmu = nvkm_pmu(subdev);
> +
> +       gk20a_pmu_isr(ppmu);
> +}
> +
> +void gk20a_remove_pmu_support(struct pmu_desc *pmu)
> +{
> +       nvkm_pmu_allocator_destroy(&pmu->dmem);
> +}
> +
> +int  gk20a_message(struct nvkm_pmu *ppmu, u32 reply[2],
> +                u32 process, u32 message, u32 data0, u32 data1)
> +{
> +       return -EPERM;
> +}
> +
> +int
> +gk20a_pmu_create_(struct nvkm_object *parent,
> +                   struct nvkm_object *engine,
> +                   struct nvkm_oclass *oclass, int length, void **pobject)
> +{
> +       struct nvkm_pmu *ppmu;
> +       struct nvkm_device *device = nv_device(parent);
> +       int ret;
> +
> +       ret = nvkm_subdev_create_(parent, engine, oclass, 0, "PPMU",
> +                                    "pmu", length, pobject);
> +       ppmu = *pobject;
> +       if (ret)
> +               return ret;
> +
> +       ret = nv_device_get_irq(device, true);
> +
> +       ppmu->message = gk20a_message;
> +       ppmu->pgob = gk20a_pmu_pgob;
> +       ppmu->pmu_mutex_acquire = pmu_mutex_acquire;
> +       ppmu->pmu_mutex_release = pmu_mutex_release;
> +       ppmu->pmu_load_norm = gk20a_pmu_load_norm;
> +       ppmu->pmu_load_update = gk20a_pmu_load_update;
> +       ppmu->pmu_reset_load_counters = gk20a_pmu_reset_load_counters;
> +       ppmu->pmu_get_load_counters = gk20a_pmu_get_load_counters;
> +
> +       return 0;
> +}
> +
> +
> +
> diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.h b/drm/nouveau/nvkm/subdev/pmu/gk20a.h
> new file mode 100644
> index 000000000000..a084d6d518b4
> --- /dev/null
> +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.h
> @@ -0,0 +1,369 @@
> +#ifndef __NVKM_pmu_GK20A_H__
> +#define __NVKM_pmu_GK20A_H__
> +
> +/*
> + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +void pmu_setup_hw(struct pmu_desc *pmu);
> +void gk20a_remove_pmu_support(struct pmu_desc *pmu);
> +#define gk20a_pmu_create(p, e, o, d)                                         \
> +       gk20a_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
> +
> +int gk20a_pmu_create_(struct nvkm_object *, struct nvkm_object *,
> +                       struct nvkm_oclass *, int, void **);
> +/* defined by pmu hw spec */
> +#define GK20A_PMU_VA_SIZE              (512 * 1024 * 1024)
> +#define GK20A_PMU_UCODE_SIZE_MAX       (256 * 1024)
> +#define GK20A_PMU_SEQ_BUF_SIZE         4096
> +/* idle timeout */
> +#define GK20A_IDLE_CHECK_DEFAULT               100 /* usec */
> +#define GK20A_IDLE_CHECK_MAX           5000 /* usec */
> +
> +/* so far gk20a has two engines: gr and ce2(gr_copy) */
> +enum {
> +       ENGINE_GR_GK20A     = 0,
> +       ENGINE_CE2_GK20A    = 1,
> +       ENGINE_INVAL_GK20A
> +};
> +
> +#define ZBC_MASK(i)                    (~(~(0) << ((i)+1)) & 0xfffe)
> +
> +#define APP_VERSION_GK20A 17997577
> +
> +enum {
> +       GK20A_PMU_DMAIDX_UCODE          = 0,
> +       GK20A_PMU_DMAIDX_VIRT           = 1,
> +       GK20A_PMU_DMAIDX_PHYS_VID       = 2,
> +       GK20A_PMU_DMAIDX_PHYS_SYS_COH   = 3,
> +       GK20A_PMU_DMAIDX_PHYS_SYS_NCOH  = 4,
> +       GK20A_PMU_DMAIDX_RSVD           = 5,
> +       GK20A_PMU_DMAIDX_PELPG          = 6,
> +       GK20A_PMU_DMAIDX_END            = 7
> +};
> +
> +struct pmu_mem_gk20a {
> +       u32 dma_base;
> +       u8  dma_offset;
> +       u8  dma_idx;
> +       u16 fb_size;
> +};
> +
> +struct pmu_dmem {
> +       u16 size;
> +       u32 offset;
> +};
> +
> +struct pmu_cmdline_args_gk20a {
> +       u32 cpu_freq_hz;                /* Frequency of the clock driving PMU */
> +       u32 falc_trace_size;            /* falctrace buffer size (bytes) */
> +       u32 falc_trace_dma_base;        /* 256-byte block address */
> +       u32 falc_trace_dma_idx;         /* dmaIdx for DMA operations */
> +       u8 secure_mode;
> +       struct pmu_mem_gk20a gc6_ctx;           /* dmem offset of gc6 context */
> +};
> +
> +#define GK20A_PMU_TRACE_BUFSIZE     0x4000   /* 4K */
> +#define GK20A_PMU_DMEM_BLKSIZE2                8
> +
> +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY     32
> +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH  64
> +
> +struct pmu_ucode_desc {
> +       u32 descriptor_size;
> +       u32 image_size;
> +       u32 tools_version;
> +       u32 app_version;
> +       char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH];
> +       u32 bootloader_start_offset;
> +       u32 bootloader_size;
> +       u32 bootloader_imem_offset;
> +       u32 bootloader_entry_point;
> +       u32 app_start_offset;
> +       u32 app_size;
> +       u32 app_imem_offset;
> +       u32 app_imem_entry;
> +       u32 app_dmem_offset;
> +       u32 app_resident_code_offset;  /* Offset from appStartOffset */
> +/* Exact size of the resident code
> + * ( potentially contains CRC inside at the end ) */
> +       u32 app_resident_code_size;
> +       u32 app_resident_data_offset;  /* Offset from appStartOffset */
> +/* Exact size of the resident data
> + * ( potentially contains CRC inside at the end ) */
> +       u32 app_resident_data_size;
> +       u32 nb_overlays;
> +       struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY];
> +       u32 compressed;
> +};
> +
> +#define PMU_UNIT_REWIND                (0x00)
> +#define PMU_UNIT_PG            (0x03)
> +#define PMU_UNIT_INIT          (0x07)
> +#define PMU_UNIT_PERFMON       (0x12)
> +#define PMU_UNIT_THERM         (0x1B)
> +#define PMU_UNIT_RC            (0x1F)
> +#define PMU_UNIT_NULL          (0x20)
> +#define PMU_UNIT_END           (0x23)
> +
> +#define PMU_UNIT_TEST_START    (0xFE)
> +#define PMU_UNIT_END_SIM       (0xFF)
> +#define PMU_UNIT_TEST_END      (0xFF)
> +
> +#define PMU_UNIT_ID_IS_VALID(id)               \
> +               (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
> +
> +#define PMU_DMEM_ALLOC_ALIGNMENT       (32)
> +#define PMU_DMEM_ALIGNMENT             (4)
> +
> +#define PMU_CMD_FLAGS_PMU_MASK         (0xF0)
> +
> +#define PMU_CMD_FLAGS_STATUS           BIT(0)
> +#define PMU_CMD_FLAGS_INTR             BIT(1)
> +#define PMU_CMD_FLAGS_EVENT            BIT(2)
> +#define PMU_CMD_FLAGS_WATERMARK                BIT(3)
> +
> +struct pmu_hdr {
> +       u8 unit_id;
> +       u8 size;
> +       u8 ctrl_flags;
> +       u8 seq_id;
> +};
> +#define PMU_MSG_HDR_SIZE       sizeof(struct pmu_hdr)
> +#define PMU_CMD_HDR_SIZE       sizeof(struct pmu_hdr)
> +
> +
> +struct pmu_allocation_gk20a {
> +       struct {
> +               struct pmu_dmem dmem;
> +               struct pmu_mem_gk20a fb;
> +       } alloc;
> +};
> +
> +enum {
> +       PMU_INIT_MSG_TYPE_PMU_INIT = 0,
> +};
> +
> +struct pmu_init_msg_pmu_gk20a {
> +       u8 msg_type;
> +       u8 pad;
> +       u16  os_debug_entry_point;
> +
> +       struct {
> +               u16 size;
> +               u16 offset;
> +               u8  index;
> +               u8  pad;
> +       } queue_info[PMU_QUEUE_COUNT];
> +
> +       u16 sw_managed_area_offset;
> +       u16 sw_managed_area_size;
> +};
> +
> +struct pmu_init_msg {
> +       union {
> +               u8 msg_type;
> +               struct pmu_init_msg_pmu_gk20a pmu_init_gk20a;
> +       };
> +};
> +
> +
> +enum {
> +       PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0,
> +};
> +
> +struct pmu_rc_msg_unhandled_cmd {
> +       u8 msg_type;
> +       u8 unit_id;
> +};
> +
> +struct pmu_rc_msg {
> +       u8 msg_type;
> +       struct pmu_rc_msg_unhandled_cmd unhandled_cmd;
> +};
> +
> +/* PERFMON */
> +#define PMU_DOMAIN_GROUP_PSTATE                0
> +#define PMU_DOMAIN_GROUP_GPC2CLK       1
> +#define PMU_DOMAIN_GROUP_NUM           2
> +struct pmu_perfmon_counter_gk20a {
> +       u8 index;
> +       u8 flags;
> +       u8 group_id;
> +       u8 valid;
> +       u16 upper_threshold; /* units of 0.01% */
> +       u16 lower_threshold; /* units of 0.01% */
> +};
> +struct pmu_zbc_cmd {
> +       u8 cmd_type;
> +       u8 pad;
> +       u16 entry_mask;
> +};
> +
> +/* PERFMON MSG */
> +enum {
> +       PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0,
> +       PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1,
> +       PMU_PERFMON_MSG_ID_INIT_EVENT     = 2,
> +       PMU_PERFMON_MSG_ID_ACK            = 3
> +};
> +
> +struct pmu_perfmon_msg_generic {
> +       u8 msg_type;
> +       u8 state_id;
> +       u8 group_id;
> +       u8 data;
> +};
> +
> +struct pmu_perfmon_msg {
> +       union {
> +               u8 msg_type;
> +               struct pmu_perfmon_msg_generic gen;
> +       };
> +};
> +
> +
> +struct pmu_cmd {
> +       struct pmu_hdr hdr;
> +       union {
> +               struct pmu_zbc_cmd zbc;
> +       } cmd;
> +};
> +
> +struct pmu_msg {
> +       struct pmu_hdr hdr;
> +       union {
> +               struct pmu_init_msg init;
> +               struct pmu_perfmon_msg perfmon;
> +               struct pmu_rc_msg rc;
> +       } msg;
> +};
> +
> +/* write by sw, read by pmu, protected by sw mutex lock */
> +#define PMU_COMMAND_QUEUE_HPQ          0
> +/* write by sw, read by pmu, protected by sw mutex lock */
> +#define PMU_COMMAND_QUEUE_LPQ          1
> +/* write by pmu, read by sw, accessed by interrupt handler, no lock */
> +#define PMU_MESSAGE_QUEUE              4
> +#define PMU_QUEUE_COUNT                        5
> +
> +enum {
> +       PMU_MUTEX_ID_RSVD1 = 0,
> +       PMU_MUTEX_ID_GPUSER,
> +       PMU_MUTEX_ID_GPMUTEX,
> +       PMU_MUTEX_ID_I2C,
> +       PMU_MUTEX_ID_RMLOCK,
> +       PMU_MUTEX_ID_MSGBOX,
> +       PMU_MUTEX_ID_FIFO,
> +       PMU_MUTEX_ID_PG,
> +       PMU_MUTEX_ID_GR,
> +       PMU_MUTEX_ID_CLK,
> +       PMU_MUTEX_ID_RSVD6,
> +       PMU_MUTEX_ID_RSVD7,
> +       PMU_MUTEX_ID_RSVD8,
> +       PMU_MUTEX_ID_RSVD9,
> +       PMU_MUTEX_ID_INVALID
> +};
> +
> +#define PMU_IS_COMMAND_QUEUE(id)       \
> +               ((id)  < PMU_MESSAGE_QUEUE)
> +
> +#define PMU_IS_SW_COMMAND_QUEUE(id)    \
> +               (((id) == PMU_COMMAND_QUEUE_HPQ) || \
> +                ((id) == PMU_COMMAND_QUEUE_LPQ))
> +
> +#define  PMU_IS_MESSAGE_QUEUE(id)      \
> +               ((id) == PMU_MESSAGE_QUEUE)
> +
> +enum {
> +       OFLAG_READ = 0,
> +       OFLAG_WRITE
> +};
> +
> +#define QUEUE_SET              (true)
> +       /*todo find how to get cpu_pa*/
> +#define QUEUE_GET              (false)
> +
> +#define QUEUE_ALIGNMENT                (4)
> +
> +#define PMU_PGENG_GR_BUFFER_IDX_INIT   (0)
> +#define PMU_PGENG_GR_BUFFER_IDX_ZBC    (1)
> +#define PMU_PGENG_GR_BUFFER_IDX_FECS   (2)
> +
> +enum {
> +       PMU_DMAIDX_UCODE         = 0,
> +       PMU_DMAIDX_VIRT          = 1,
> +       PMU_DMAIDX_PHYS_VID      = 2,
> +       PMU_DMAIDX_PHYS_SYS_COH  = 3,
> +       PMU_DMAIDX_PHYS_SYS_NCOH = 4,
> +       PMU_DMAIDX_RSVD          = 5,
> +       PMU_DMAIDX_PELPG         = 6,
> +       PMU_DMAIDX_END           = 7
> +};
> +
> +#define PMU_MUTEX_ID_IS_VALID(id)      \
> +               ((id) < PMU_MUTEX_ID_INVALID)
> +
> +#define PMU_INVALID_MUTEX_OWNER_ID     (0)
> +
> +struct pmu_mutex {
> +       u32 id;
> +       u32 index;
> +       u32 ref_cnt;
> +};
> +
> +
> +#define PMU_INVALID_SEQ_DESC           (~0)
> +
> +enum {
> +       PMU_SEQ_STATE_FREE = 0,
> +       PMU_SEQ_STATE_PENDING,
> +       PMU_SEQ_STATE_USED,
> +       PMU_SEQ_STATE_CANCELLED
> +};
> +
> +struct pmu_payload {
> +       struct {
> +               void *buf;
> +               u32 offset;
> +               u32 size;
> +       } in, out;
> +};
> +
> +typedef void (*pmu_callback)(struct nvkm_pmu *, struct pmu_msg *, void *,
> +u32, u32);
> +
> +struct pmu_sequence {
> +       u8 id;
> +       u32 state;
> +       u32 desc;
> +       struct pmu_msg *msg;
> +       struct pmu_allocation_gk20a in_gk20a;
> +       struct pmu_allocation_gk20a out_gk20a;
> +       u8 *out_payload;
> +       pmu_callback callback;
> +       void *cb_params;
> +};
> +struct pmu_gk20a_data {
> +       struct pmu_perfmon_counter_gk20a perfmon_counter_gk20a;
> +       u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
> +};
> +
> +#endif /*_GK20A_H__*/
> diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h b/drm/nouveau/nvkm/subdev/pmu/priv.h
> index 998410563bfd..c4686e418582 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/priv.h
> +++ b/drm/nouveau/nvkm/subdev/pmu/priv.h
> @@ -2,7 +2,91 @@
>  #define __NVKM_PMU_PRIV_H__
>  #include <subdev/pmu.h>
>  #include <subdev/pmu/fuc/os.h>
> +#include <core/object.h>
> +#include <core/device.h>
> +#include <core/parent.h>
> +#include <core/mm.h>
> +#include <linux/rwsem.h>
> +#include <linux/slab.h>
> +#include <subdev/mmu.h>
> +#include <core/gpuobj.h>
>
> +static inline u32 u64_hi32(u64 n)
> +{
> +       return (u32)((n >> 32) & ~(u32)0);
> +}
> +
> +static inline u32 u64_lo32(u64 n)
> +{
> +       return (u32)(n & ~(u32)0);
> +}
> +
> +/* #define ALLOCATOR_DEBUG */
> +
> +/* main struct */
> +struct nvkm_pmu_allocator {
> +
> +       char name[32];                  /* name for allocator */
> +/*struct rb_root rb_root;*/            /* rb tree root for blocks */
> +
> +       u32 base;                       /* min value of this linear space */
> +       u32 limit;                      /* max value = limit - 1 */
> +
> +       unsigned long *bitmap;          /* bitmap */
> +
> +       struct gk20a_alloc_block *block_first;  /* first block in list */
> +       struct gk20a_alloc_block *block_recent; /* last visited block */
> +
> +       u32 first_free_addr;            /* first free addr, non-contigous
> +                                          allocation preferred start,
> +                                          in order to pick up small holes */
> +       u32 last_free_addr;             /* last free addr, contiguous
> +                                          allocation preferred start */
> +       u32 cached_hole_size;           /* max free hole size up to
> +                                          last_free_addr */
> +       u32 block_count;                /* number of blocks */
> +
> +       struct rw_semaphore rw_sema;    /* lock */
> +       struct kmem_cache *block_cache; /* slab cache */
> +
> +       /* if enabled, constrain to [base, limit) */
> +       struct {
> +               bool enable;
> +               u32 base;
> +               u32 limit;
> +       } constraint;
> +
> +       int (*alloc)(struct nvkm_pmu_allocator *allocator,
> +               u32 *addr, u32 len, u32 align);
> +       int (*free)(struct nvkm_pmu_allocator *allocator,
> +               u32 addr, u32 len, u32 align);
> +
> +};
> +
> +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
> +                       const char *name, u32 base, u32 size);
> +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator);
> +
> +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
> +                       u32 *addr, u32 len, u32 align);
> +
> +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
> +                       u32 addr, u32 len, u32 align);
> +
> +#if defined(ALLOCATOR_DEBUG)
> +
> +#define allocator_dbg(alloctor, format, arg...)                                \
> +do {                                                           \
> +       if (1)                                                  \
> +               pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\
> +                       alloctor->name, __func__, ##arg);\
> +} while (0)
> +
> +#else /* ALLOCATOR_DEBUG */
> +
> +#define allocator_dbg(format, arg...)
> +
> +#endif /* ALLOCATOR_DEBUG */
>  #define nvkm_pmu_create(p, e, o, d)                                         \
>         nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
>  #define nvkm_pmu_destroy(p)                                                 \
> @@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct nvkm_object *,
>  int _nvkm_pmu_init(struct nvkm_object *);
>  int _nvkm_pmu_fini(struct nvkm_object *, bool);
>  void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable);
> +#define PMU_PG_IDLE_THRESHOLD                  15000
> +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD     1000000
> +
> +/* state transition :
> +    OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
> +    ON => OFF is always synchronized */
> +#define PMU_ELPG_STAT_OFF              0   /* elpg is off */
> +#define PMU_ELPG_STAT_ON               1   /* elpg is on */
> +/* elpg is off, ALLOW cmd has been sent, wait for ack */
> +#define PMU_ELPG_STAT_ON_PENDING       2
> +/* elpg is on, DISALLOW cmd has been sent, wait for ack */
> +#define PMU_ELPG_STAT_OFF_PENDING      3
> +/* elpg is off, caller has requested on, but ALLOW
> +cmd hasn't been sent due to ENABLE_ALLOW delay */
> +#define PMU_ELPG_STAT_OFF_ON_PENDING   4
> +
> +/* Falcon Register index */
> +#define PMU_FALCON_REG_R0              (0)
> +#define PMU_FALCON_REG_R1              (1)
> +#define PMU_FALCON_REG_R2              (2)
> +#define PMU_FALCON_REG_R3              (3)
> +#define PMU_FALCON_REG_R4              (4)
> +#define PMU_FALCON_REG_R5              (5)
> +#define PMU_FALCON_REG_R6              (6)
> +#define PMU_FALCON_REG_R7              (7)
> +#define PMU_FALCON_REG_R8              (8)
> +#define PMU_FALCON_REG_R9              (9)
> +#define PMU_FALCON_REG_R10             (10)
> +#define PMU_FALCON_REG_R11             (11)
> +#define PMU_FALCON_REG_R12             (12)
> +#define PMU_FALCON_REG_R13             (13)
> +#define PMU_FALCON_REG_R14             (14)
> +#define PMU_FALCON_REG_R15             (15)
> +#define PMU_FALCON_REG_IV0             (16)
> +#define PMU_FALCON_REG_IV1             (17)
> +#define PMU_FALCON_REG_UNDEFINED       (18)
> +#define PMU_FALCON_REG_EV              (19)
> +#define PMU_FALCON_REG_SP              (20)
> +#define PMU_FALCON_REG_PC              (21)
> +#define PMU_FALCON_REG_IMB             (22)
> +#define PMU_FALCON_REG_DMB             (23)
> +#define PMU_FALCON_REG_CSW             (24)
> +#define PMU_FALCON_REG_CCR             (25)
> +#define PMU_FALCON_REG_SEC             (26)
> +#define PMU_FALCON_REG_CTX             (27)
> +#define PMU_FALCON_REG_EXCI            (28)
> +#define PMU_FALCON_REG_RSVD0           (29)
> +#define PMU_FALCON_REG_RSVD1           (30)
> +#define PMU_FALCON_REG_RSVD2           (31)
> +#define PMU_FALCON_REG_SIZE            (32)
> +
> +/* Choices for pmu_state */
> +#define PMU_STATE_OFF                  0 /* PMU is off */
> +#define PMU_STATE_STARTING             1 /* PMU is on, but not booted */
> +#define PMU_STATE_INIT_RECEIVED                2 /* PMU init message received */
> +#define PMU_STATE_ELPG_BOOTING         3 /* PMU is booting */
> +#define PMU_STATE_ELPG_BOOTED          4 /* ELPG is initialized */
> +#define PMU_STATE_LOADING_PG_BUF       5 /* Loading PG buf */
> +#define PMU_STATE_LOADING_ZBC          6 /* Loading ZBC buf */
> +#define PMU_STATE_STARTED              7 /* Fully unitialized */
> +
> +#define PMU_QUEUE_COUNT                5
> +
> +#define PMU_MAX_NUM_SEQUENCES          (256)
> +#define PMU_SEQ_BIT_SHIFT              (5)
> +#define PMU_SEQ_TBL_SIZE       \
> +               (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT)
> +
> +#define PMU_SHA1_GID_SIGNATURE         0xA7C66AD2
> +#define PMU_SHA1_GID_SIGNATURE_SIZE    4
> +
> +#define PMU_SHA1_GID_SIZE      16
> +
> +struct pmu_queue {
> +
> +       /* used by hw, for BIOS/SMI queue */
> +       u32 mutex_id;
> +       u32 mutex_lock;
> +       /* used by sw, for LPQ/HPQ queue */
> +       struct mutex mutex;
> +
> +       /* current write position */
> +       u32 position;
> +       /* physical dmem offset where this queue begins */
> +       u32 offset;
> +       /* logical queue identifier */
> +       u32 id;
> +       /* physical queue index */
> +       u32 index;
> +       /* in bytes */
> +       u32 size;
> +
> +       /* open-flag */
> +       u32 oflag;
> +       bool opened; /* opened implies locked */
> +};
> +
> +struct pmu_sha1_gid {
> +       bool valid;
> +       u8 gid[PMU_SHA1_GID_SIZE];
> +};
> +
> +struct pmu_sha1_gid_data {
> +       u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE];
> +       u8 gid[PMU_SHA1_GID_SIZE];
> +};
> +
> +struct pmu_desc {
> +
> +       struct pmu_ucode_desc *desc;
> +       struct pmu_buf_desc ucode;
> +
> +       struct pmu_buf_desc pg_buf;
> +       /* TBD: remove this if ZBC seq is fixed */
> +       struct pmu_buf_desc seq_buf;
> +       struct pmu_buf_desc trace_buf;
> +       bool buf_loaded;
> +
> +       struct pmu_sha1_gid gid_info;
> +
> +       struct pmu_queue queue[PMU_QUEUE_COUNT];
> +
> +       struct pmu_sequence *seq;
> +       unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE];
> +       u32 next_seq_desc;
> +
> +       struct pmu_mutex *mutex;
> +       u32 mutex_cnt;
> +
> +       struct mutex pmu_copy_lock;
> +       struct mutex pmu_seq_lock;
> +
> +       struct nvkm_pmu_allocator dmem;
> +
> +       u32 *ucode_image;
> +       bool pmu_ready;
> +
> +       u32 zbc_save_done;
> +
> +       u32 stat_dmem_offset;
> +
> +       u32 elpg_stat;
> +
> +       int pmu_state;
> +
> +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC       1 /* msec */
> +       struct work_struct isr_workq;
> +       struct mutex elpg_mutex; /* protect elpg enable/disable */
> +/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
> +       int elpg_refcnt;
> +
> +       bool initialized;
> +
> +       void (*remove_support)(struct pmu_desc *pmu);
> +       bool sw_ready;
> +       bool perfmon_ready;
> +
> +       u32 sample_buffer;
> +       u32 load_shadow;
> +       u32 load_avg;
> +
> +       struct mutex isr_mutex;
> +       bool isr_enabled;
> +
> +       bool zbc_ready;
> +       unsigned long perfmon_events_cnt;
> +       bool perfmon_sampling_enabled;
> +       u8 pmu_mode;
> +       u32 falcon_id;
> +       u32 aelpg_param[5];
> +       void *pmu_chip_data;
> +       struct nvkm_pmu *pmu;
> +};
>
>  struct nvkm_pmu_impl {
>         struct nvkm_oclass base;
> @@ -39,5 +296,12 @@ struct nvkm_pmu_impl {
>         } data;
>
>         void (*pgob)(struct nvkm_pmu *, bool);
> +       struct pmu_desc pmudata;
>  };
> +
> +static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu)
> +{
> +       return pmu->pmu;
> +}
> +
>  #endif
> --
> 1.9.1
>
> _______________________________________________
> Nouveau mailing list
> Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [Nouveau] [PATCH] pmu/gk20a: PMU boot support.
       [not found]     ` <CAKb7Uvj0xMvDWjKjGzbD6Tk0NArfkh4Vjvt4eRQ8XoHgR+7bsg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-03-12  5:20       ` Deepak Goyal
       [not found]         ` <25b5050176544f47b0ac74d4086f145c-7W72rfoJkVm6sJks/06JalaTQe2KTcn/@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Deepak Goyal @ 2015-03-12  5:20 UTC (permalink / raw)
  To: Ilia Mirkin
  Cc: Ben Skeggs, Alexandre Courbot,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	linux-tegra-u79uwXL29TY76Z2rM5mHXA

Hi Mirkin,

Your observations are quiet correct.
After the boot code is submitted successfully, I will submit the code to configure & enable features of PMU.(This will be done by sending cmds to PMU).

Now talking about this patch:
Apart from just the boot code, I have also included some things in this patch that I can remove for now(I will include these things in later digestible chunks):

- Debugfs support (can be removed for now)
- Debug support for dumping PMU falcon registers(can be removed for now)
- PMU interacts with Kernel via interrupt mechanism.
  For interaction with PMU, we have
  defined command structs, functions to prepare/validate and send commands to PMU.
  This infrastructure is basically to send commands to PMU.(right now it can be removed though we still require to receive messages from PMU to know if it has booted successfully).

But this will be all that I will be able to remove from this patch.
Can I go ahead with removing above suggestions?

Regards,
Deepak G

-----Original Message-----
From: ibmirkin@gmail.com [mailto:ibmirkin@gmail.com] On Behalf Of Ilia Mirkin
Sent: Wednesday, March 11, 2015 10:41 PM
To: Deepak Goyal
Cc: Ben Skeggs; Alexandre Courbot; nouveau@lists.freedesktop.org; linux-tegra@vger.kernel.org
Subject: Re: [Nouveau] [PATCH] pmu/gk20a: PMU boot support.

Hi Deepak,

There's... a lot of stuff going on here. Can you describe the goal of
this patch (which could then be used as the patch commit message)? The
current one basically boils down to "Add support for loading PMU", but
merely loading the fw into a fuc engine is just a handful lines of
code. Also, except in rare cases, it's customary to split up patches
of this size into smaller, more reviewable chunks, which add on bits
of functionality as they go.

From what I can tell, you're adding the kernel-side interface for a
hypothetical (and presumably closed-source) PMU blob that NVIDIA will
supply. In essence, the blob is expected to implement a RTOS which
runs on the PMU's falcon CPU. There are a bunch of API's implemented
by this blob that the host can call, but it also does things on its
own. For the kernel side, each of these API calls should probably be a
separate patch (after an initial "just load it and do nothing" style
patch). Or perhaps have the infrastructure that you add first and then
something that implements the API calls.

Cheers,

  -ilia


On Wed, Mar 11, 2015 at 2:33 AM, Deepak Goyal <dgoyal@nvidia.com> wrote:
> It adds PMU boot support.It loads PMU
> firmware into PMU falcon.RM/Kernel driver
> receives INIT ack (through interrupt mechanism)
> from PMU when PMU boots with success.
>
> Signed-off-by: Deepak Goyal <dgoyal@nvidia.com>
> ---
>  drm/nouveau/include/nvkm/subdev/pmu.h |   26 +-
>  drm/nouveau/nvkm/subdev/pmu/base.c    |  108 ++
>  drm/nouveau/nvkm/subdev/pmu/gk20a.c   | 2131 ++++++++++++++++++++++++++++++++-
>  drm/nouveau/nvkm/subdev/pmu/gk20a.h   |  369 ++++++
>  drm/nouveau/nvkm/subdev/pmu/priv.h    |  264 ++++
>  5 files changed, 2884 insertions(+), 14 deletions(-)
>  create mode 100644 drm/nouveau/nvkm/subdev/pmu/gk20a.h
>
> diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h b/drm/nouveau/include/nvkm/subdev/pmu.h
> index 7b86acc634a0..659b4e0ba02b 100644
> --- a/drm/nouveau/include/nvkm/subdev/pmu.h
> +++ b/drm/nouveau/include/nvkm/subdev/pmu.h
> @@ -1,7 +1,20 @@
>  #ifndef __NVKM_PMU_H__
>  #define __NVKM_PMU_H__
>  #include <core/subdev.h>
> +#include <core/device.h>
> +#include <subdev/mmu.h>
> +#include <linux/debugfs.h>
>
> +struct pmu_buf_desc {
> +       struct nvkm_gpuobj *pmubufobj;
> +       struct nvkm_vma pmubufvma;
> +       size_t size;
> +};
> +struct pmu_priv_vm {
> +       struct nvkm_gpuobj *mem;
> +       struct nvkm_gpuobj *pgd;
> +       struct nvkm_vm *vm;
> +};
>  struct nvkm_pmu {
>         struct nvkm_subdev base;
>
> @@ -20,9 +33,20 @@ struct nvkm_pmu {
>                 u32 message;
>                 u32 data[2];
>         } recv;
> -
> +       wait_queue_head_t init_wq;
> +       bool gr_initialised;
> +       struct dentry *debugfs;
> +       struct pmu_buf_desc *pg_buf;
> +       struct pmu_priv_vm *pmuvm;
>         int  (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
>         void (*pgob)(struct nvkm_pmu *, bool);
> +       int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token);
> +       int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token);
> +       int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load);
> +       int (*pmu_load_update)(struct nvkm_pmu *pmu);
> +       void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu);
> +       void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 *busy_cycles,
> +               u32 *total_cycles);
>  };
>
>  static inline struct nvkm_pmu *
> diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c b/drm/nouveau/nvkm/subdev/pmu/base.c
> index 054b2d2eec35..6afd389b9764 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/base.c
> +++ b/drm/nouveau/nvkm/subdev/pmu/base.c
> @@ -25,6 +25,114 @@
>
>  #include <subdev/timer.h>
>
> +/* init allocator struct */
> +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
> +               const char *name, u32 start, u32 len)
> +{
> +       memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
> +
> +       strncpy(allocator->name, name, 32);
> +
> +       allocator->base = start;
> +       allocator->limit = start + len - 1;
> +
> +       allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long),
> +                       GFP_KERNEL);
> +       if (!allocator->bitmap)
> +               return -ENOMEM;
> +
> +       allocator_dbg(allocator, "%s : base %d, limit %d",
> +               allocator->name, allocator->base);
> +
> +       init_rwsem(&allocator->rw_sema);
> +
> +       allocator->alloc = nvkm_pmu_allocator_block_alloc;
> +       allocator->free = nvkm_pmu_allocator_block_free;
> +
> +       return 0;
> +}
> +
> +/* destroy allocator, free all remaining blocks if any */
> +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator)
> +{
> +       down_write(&allocator->rw_sema);
> +
> +       kfree(allocator->bitmap);
> +
> +       memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
> +}
> +
> +/*
> + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
> + * returned to caller in *addr.
> + *
> + * contiguous allocation, which allocates one block of
> + * contiguous address.
> +*/
> +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
> +               u32 *addr, u32 len, u32 align)
> +{
> +       unsigned long _addr;
> +
> +       allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
> +
> +       if ((*addr != 0 && *addr < allocator->base) || /* check addr range */
> +           *addr + len > allocator->limit || /* check addr range */
> +           *addr & (align - 1) || /* check addr alignment */
> +            len == 0)                        /* check len */
> +               return -EINVAL;
> +
> +       len = ALIGN(len, align);
> +       if (!len)
> +               return -ENOMEM;
> +
> +       down_write(&allocator->rw_sema);
> +
> +       _addr = bitmap_find_next_zero_area(allocator->bitmap,
> +                       allocator->limit - allocator->base + 1,
> +                       *addr ? (*addr - allocator->base) : 0,
> +                       len,
> +                       align - 1);
> +       if ((_addr > allocator->limit - allocator->base + 1) ||
> +           (*addr && *addr != (_addr + allocator->base))) {
> +               up_write(&allocator->rw_sema);
> +               return -ENOMEM;
> +       }
> +
> +       bitmap_set(allocator->bitmap, _addr, len);
> +       *addr = allocator->base + _addr;
> +
> +       up_write(&allocator->rw_sema);
> +
> +       allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
> +
> +       return 0;
> +}
> +
> +/* free all blocks between start and end */
> +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
> +               u32 addr, u32 len, u32 align)
> +{
> +       allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
> +
> +       if (addr + len > allocator->limit || /* check addr range */
> +           addr < allocator->base ||
> +           addr & (align - 1))   /* check addr alignment */
> +               return -EINVAL;
> +
> +       len = ALIGN(len, align);
> +       if (!len)
> +               return -EINVAL;
> +
> +       down_write(&allocator->rw_sema);
> +       bitmap_clear(allocator->bitmap, addr - allocator->base, len);
> +       up_write(&allocator->rw_sema);
> +
> +       allocator_dbg(allocator, "[out] addr %d, len %d", addr, len);
> +
> +       return 0;
> +}
> +
>  void
>  nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
>  {
> diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> index a49934bbe637..0fd2530301a3 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.c
> @@ -20,21 +20,67 @@
>   * DEALINGS IN THE SOFTWARE.
>   */
>  #include "priv.h"
> +#include "gk20a.h"
> +#include <core/client.h>
> +#include <core/gpuobj.h>
> +#include <subdev/bar.h>
> +#include <subdev/fb.h>
> +#include <subdev/mc.h>
> +#include <subdev/timer.h>
> +#include <subdev/mmu.h>
> +#include <subdev/pmu.h>
> +#include <engine/falcon.h>
>
> +#include <linux/delay.h>       /* for mdelay */
> +#include <linux/firmware.h>
> +#include <linux/clk.h>
> +#include <linux/module.h>
> +#include <linux/debugfs.h>
> +#include <linux/dma-mapping.h>
> +#include <linux/uaccess.h>
>  #include <subdev/clk.h>
>  #include <subdev/timer.h>
>  #include <subdev/volt.h>
>
>  #define BUSY_SLOT      0
>  #define CLK_SLOT       7
> +#define GK20A_PMU_UCODE_IMAGE  "gpmu_ucode.bin"
> +
> +static int falc_trace_show(struct seq_file *s, void *data);
> +static int falc_trace_open(struct inode *inode, struct file *file)
> +{
> +       return single_open(file, falc_trace_show, inode->i_private);
> +}
> +static const struct file_operations falc_trace_fops = {
> +       .open           = falc_trace_open,
> +       .read           = seq_read,
> +       .llseek         = seq_lseek,
> +       .release        = single_release,
> +};
> +struct pmu_priv_vm pmuvm;
> +const struct firmware *pmufw;
> +
> +static void  gk20a_pmu_isr(struct nvkm_pmu *ppmu);
> +static void pmu_process_message(struct work_struct *work);
> +
> +static int
> +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw);
> +static void
> +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu, const struct firmware *fw);
> +
> +static int
> +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw);
> +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu);
> +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc);
> +static void gk20a_pmu_intr(struct nvkm_subdev *subdev);
>
> +static void gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable);
>  struct gk20a_pmu_dvfs_data {
>         int p_load_target;
>         int p_load_max;
>         int p_smooth;
>         unsigned int avg_load;
>  };
> -
>  struct gk20a_pmu_priv {
>         struct nvkm_pmu base;
>         struct nvkm_alarm alarm;
> @@ -46,7 +92,30 @@ struct gk20a_pmu_dvfs_dev_status {
>         unsigned long busy;
>         int cur_state;
>  };
> -
> +int gk20a_pmu_debugfs_init(struct nvkm_pmu *ppmu)
> +{
> +       struct dentry *d;
> +       ppmu->debugfs = debugfs_create_dir("PMU", NULL);
> +       if (!ppmu->debugfs)
> +               goto err_out;
> +       nv_debug(ppmu, "PMU directory created with success\n");
> +       d = debugfs_create_file(
> +               "falc_trace", 0644, ppmu->debugfs, ppmu,
> +                                               &falc_trace_fops);
> +       if (!d)
> +               goto err_out;
> +       return 0;
> +err_out:
> +       pr_err("%s: Failed to make debugfs node\n", __func__);
> +       debugfs_remove_recursive(ppmu->debugfs);
> +       return -ENOMEM;
> +}
> +void gk20a_pmu_release_firmware(struct nvkm_pmu *ppmu,
> +                                                   const struct firmware *pfw)
> +{
> +       nv_debug(ppmu, "firmware released\n");
> +       release_firmware(pfw);
> +}
>  static int
>  gk20a_pmu_dvfs_target(struct gk20a_pmu_priv *priv, int *state)
>  {
> @@ -164,31 +233,145 @@ gk20a_pmu_fini(struct nvkm_object *object, bool suspend)
>  {
>         struct nvkm_pmu *pmu = (void *)object;
>         struct gk20a_pmu_priv *priv = (void *)pmu;
> -
> +       nv_wr32(pmu, 0x10a014, 0x00000060);
> +       flush_work(&pmu->recv.work);
>         nvkm_timer_alarm_cancel(priv, &priv->alarm);
>
>         return nvkm_subdev_fini(&pmu->base, suspend);
>  }
> +static bool find_hex_in_string(char *strings, u32 *hex_pos)
> +{
> +       u32 i = 0, j = strlen(strings);
> +       for (; i < j; i++) {
> +               if (strings[i] == '%')
> +                       if (strings[i + 1] == 'x' || strings[i + 1] == 'X') {
> +                               *hex_pos = i;
> +                               return true;
> +                       }
> +       }
> +       *hex_pos = -1;
> +       return false;
> +}
> +static int falc_trace_show(struct seq_file *s, void *data)
> +{
> +       struct nvkm_pmu *ppmu = s->private;
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       u32 i = 0, j = 0, k, l, m;
> +       char part_str[40];
> +       u32 data1;
> +       char *log_data = kmalloc(GK20A_PMU_TRACE_BUFSIZE, GFP_KERNEL);
> +       char *trace = log_data;
> +       u32 *trace1 = (u32 *)log_data;
> +       for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 4) {
> +               data1 = nv_ro32(pmu->trace_buf.pmubufobj, 0x0000 + i);
> +               memcpy(log_data + i, (void *)(&data1), 32);
> +       }
> +       for (i = 0; i < GK20A_PMU_TRACE_BUFSIZE; i += 0x40) {
> +               for (j = 0; j < 0x40; j++)
> +                       if (trace1[(i / 4) + j])
> +                               break;
> +               if (j == 0x40)
> +                       goto out;
> +               seq_printf(s, "Index %x: ", trace1[(i / 4)]);
> +               l = 0;
> +               m = 0;
> +               while (find_hex_in_string((trace+i+20+m), &k)) {
> +                       if (k >= 40)
> +                               break;
> +                       strncpy(part_str, (trace+i+20+m), k);
> +                       part_str[k] = 0;
> +                       seq_printf(s, "%s0x%x", part_str,
> +                                       trace1[(i / 4) + 1 + l]);
> +                       l++;
> +                       m += k + 2;
> +               }
> +               seq_printf(s, "%s", (trace+i+20+m));
> +       }
> +out:
> +       kfree(log_data);
> +       return 0;
> +}
>
>  int
>  gk20a_pmu_init(struct nvkm_object *object)
>  {
> -       struct nvkm_pmu *pmu = (void *)object;
> -       struct gk20a_pmu_priv *priv = (void *)pmu;
> +       struct nvkm_pmu *ppmu = (void *)object;
> +       struct nvkm_mc *pmc = nvkm_mc(object);
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu;
> +       struct gk20a_pmu_priv *priv;
> +       struct pmu_gk20a_data *gk20adata;
>         int ret;
>
> -       ret = nvkm_subdev_init(&pmu->base);
> +       pmu = &impl->pmudata;
> +
> +       nv_subdev(ppmu)->intr = gk20a_pmu_intr;
> +
> +       mutex_init(&pmu->isr_mutex);
> +       mutex_init(&pmu->pmu_copy_lock);
> +       mutex_init(&pmu->pmu_seq_lock);
> +
> +       if (pmufw == NULL) {
> +               ret = gk20a_pmu_load_firmware(ppmu, &pmufw);
> +               if (ret < 0) {
> +                       nv_error(ppmu, "failed to load pmu fimware\n");
> +                       return ret;
> +               }
> +               nv_debug(ppmu, "loading firmware sucessful\n");
> +               ret = gk20a_pmu_init_vm(ppmu, pmufw);
> +               if (ret < 0) {
> +                       nv_error(ppmu, "failed to map pmu fw to va space\n");
> +                       goto init_vm_err;
> +               }
> +       }
> +       pmu->desc = (struct pmu_ucode_desc *)pmufw->data;
> +       gk20a_pmu_dump_firmware_info(ppmu, pmufw);
> +
> +       if (pmu->desc->app_version != APP_VERSION_GK20A) {
> +               nv_error(ppmu,
> +               "PMU code version not supported version: %d\n",
> +                       pmu->desc->app_version);
> +               ret = -EINVAL;
> +               goto app_ver_err;
> +       }
> +       gk20adata = kzalloc(sizeof(*gk20adata), GFP_KERNEL);
> +       if (!gk20adata) {
> +               ret = -ENOMEM;
> +               goto err;
> +       }
> +
> +       pmu->pmu_chip_data = (void *)gk20adata;
> +
> +       pmu->remove_support = gk20a_remove_pmu_support;
> +
> +       ret = gk20a_init_pmu_setup_sw(ppmu);
>         if (ret)
> -               return ret;
> +               goto err;
> +
> +       pmu->pmu_state = PMU_STATE_STARTING;
> +       ret = gk20a_init_pmu_setup_hw1(ppmu, pmc);
> +       if (ret)
> +               goto err;
> +
> +       priv = (void *)ppmu;
>
> -       pmu->pgob = nvkm_pmu_pgob;
> +       ret = nvkm_subdev_init(&ppmu->base);
> +       if (ret)
> +               goto err;
> +
> +       ppmu->pgob = nvkm_pmu_pgob;
>
> -       /* init pwr perf counter */
> -       nv_wr32(pmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
> -       nv_wr32(pmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
> -       nv_wr32(pmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
> +       /* init pmu perf counter */
> +       nv_wr32(ppmu, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
> +       nv_wr32(ppmu, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
> +       nv_wr32(ppmu, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
>
> -       nvkm_timer_alarm(pmu, 2000000000, &priv->alarm);
> +       nvkm_timer_alarm(ppmu, 2000000000, &priv->alarm);
> +err:
> +init_vm_err:
> +app_ver_err:
> +       gk20a_pmu_release_firmware(ppmu, pmufw);
>         return ret;
>  }
>
> @@ -226,4 +409,1926 @@ gk20a_pmu_oclass = &(struct nvkm_pmu_impl) {
>                 .init = gk20a_pmu_init,
>                 .fini = gk20a_pmu_fini,
>         },
> +       .base.handle = NV_SUBDEV(PMU, 0xea),
> +       .pgob = gk20a_pmu_pgob,
>  }.base;
> +void pmu_copy_from_dmem(struct pmu_desc *pmu,
> +               u32 src, u8 *dst, u32 size, u8 port)
> +{
> +       u32 i, words, bytes;
> +       u32 data, addr_mask;
> +       u32 *dst_u32 = (u32 *)dst;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       if (size == 0) {
> +               nv_error(ppmu, "size is zero\n");
> +               goto out;
> +       }
> +
> +       if (src & 0x3) {
> +               nv_error(ppmu, "src (0x%08x) not 4-byte aligned\n", src);
> +               goto out;
> +       }
> +
> +       mutex_lock(&pmu->pmu_copy_lock);
> +
> +       words = size >> 2;
> +       bytes = size & 0x3;
> +
> +       addr_mask = (0x3f << 2) | 0xff << 8;
> +
> +       src &= addr_mask;
> +
> +       nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (src | (0x1 << 25)));
> +
> +       for (i = 0; i < words; i++) {
> +               dst_u32[i] = nv_rd32(ppmu, (0x0010a1c4 + port * 8));
> +               nv_debug(ppmu, "0x%08x\n", dst_u32[i]);
> +       }
> +       if (bytes > 0) {
> +               data = nv_rd32(ppmu, (0x0010a1c4 + port * 8));
> +               nv_debug(ppmu, "0x%08x\n", data);
> +
> +               for (i = 0; i < bytes; i++)
> +                       dst[(words << 2) + i] = ((u8 *)&data)[i];
> +       }
> +       mutex_unlock(&pmu->pmu_copy_lock);
> +out:
> +       nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +void pmu_copy_to_dmem(struct pmu_desc *pmu,
> +               u32 dst, u8 *src, u32 size, u8 port)
> +{
> +       u32 i, words, bytes;
> +       u32 data, addr_mask;
> +       u32 *src_u32 = (u32 *)src;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       if (size == 0) {
> +               nv_error(ppmu, "size is zero\n");
> +               goto out;
> +       }
> +
> +       if (dst & 0x3) {
> +               nv_error(ppmu, "dst (0x%08x) not 4-byte aligned\n", dst);
> +               goto out;
> +       }
> +
> +       mutex_lock(&pmu->pmu_copy_lock);
> +
> +       words = size >> 2;
> +       bytes = size & 0x3;
> +
> +       addr_mask = (0x3f << 2) | 0xff << 8;
> +
> +       dst &= addr_mask;
> +
> +       nv_wr32(ppmu, (0x10a1c0 + (port * 8)), (dst | (0x1 << 24)));
> +
> +       for (i = 0; i < words; i++) {
> +               nv_wr32(ppmu, (0x10a1c4 + (port * 8)), src_u32[i]);
> +               nv_debug(ppmu, "0x%08x\n", src_u32[i]);
> +       }
> +       if (bytes > 0) {
> +               data = 0;
> +               for (i = 0; i < bytes; i++)
> +                       ((u8 *)&data)[i] = src[(words << 2) + i];
> +               nv_wr32(ppmu, (0x10a1c4 + (port * 8)), data);
> +               nv_debug(ppmu, "0x%08x\n", data);
> +       }
> +
> +       data = nv_rd32(ppmu, (0x10a1c0 + (port * 8))) & addr_mask;
> +       size = ALIGN(size, 4);
> +       if (data != dst + size) {
> +               nv_error(ppmu, "copy failed. bytes written %d, expected %d",
> +                       data - dst, size);
> +       }
> +       mutex_unlock(&pmu->pmu_copy_lock);
> +out:
> +       nv_debug(ppmu, "exit %s", __func__);
> +}
> +
> +static int pmu_idle(struct nvkm_pmu *ppmu)
> +{
> +       unsigned long end_jiffies = jiffies +
> +               msecs_to_jiffies(2000);
> +       u32 idle_stat;
> +
> +       /* wait for pmu idle */
> +       do {
> +               idle_stat = nv_rd32(ppmu, 0x0010a04c);
> +
> +               if (((idle_stat & 0x01) == 0) &&
> +                       ((idle_stat >> 1) & 0x7fff) == 0) {
> +                       break;
> +               }
> +
> +               if (time_after_eq(jiffies, end_jiffies)) {
> +                       nv_error(ppmu, "timeout waiting pmu idle : 0x%08x",
> +                                 idle_stat);
> +                       return -EBUSY;
> +               }
> +               usleep_range(100, 200);
> +       } while (1);
> +
> +       return 0;
> +}
> +
> +void pmu_enable_irq(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> +                       bool enable)
> +{
> +
> +       nv_wr32(pmc, 0x00000640,
> +               nv_rd32(pmc, 0x00000640) &
> +               ~0x1000000);
> +       nv_wr32(pmc, 0x00000644,
> +               nv_rd32(pmc, 0x00000644) &
> +               ~0x1000000);
> +       nv_wr32(ppmu, 0x0010a014, 0xff);
> +
> +       if (enable) {
> +               nv_debug(ppmu, "enable pmu irq\n");
> +               /* dest 0=falcon, 1=host; level 0=irq0, 1=irq1
> +               nv_wr32(ppmu, 0x0010a01c, 0xff01ff52);
> +               0=disable, 1=enable*/
> +
> +               nv_wr32(ppmu, 0x0010a010, 0xff);
> +               nv_wr32(pmc, 0x00000640,
> +                       nv_rd32(pmc, 0x00000640) |
> +                       0x1000000);
> +               nv_wr32(pmc, 0x00000644,
> +                       nv_rd32(pmc, 0x00000644) |
> +                       0x1000000);
> +       } else {
> +               nv_debug(ppmu, "disable pmu irq\n");
> +       }
> +
> +}
> +
> +static int pmu_enable_hw(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> +                       bool enable)
> +{
> +       u32 reg;
> +
> +       if (enable) {
> +               int retries = GK20A_IDLE_CHECK_MAX / GK20A_IDLE_CHECK_DEFAULT;
> +               /*need a spinlock?*/
> +               reg = nv_rd32(pmc, 0x00000200);
> +               reg |= 0x2000;
> +               nv_wr32(pmc, 0x00000200, reg);
> +               nv_rd32(pmc, 0x00000200);
> +               do {
> +                       u32 w = nv_rd32(ppmu, 0x0010a10c) & 0x6;
> +
> +                       if (!w)
> +                               return 0;
> +
> +                       udelay(GK20A_IDLE_CHECK_DEFAULT);
> +               } while (--retries);
> +
> +               reg = nv_rd32(pmc, 0x00000200);
> +               reg &= ~0x2000;
> +               nv_wr32(pmc, 0x00000200, reg);
> +               nv_error(ppmu, "Falcon mem scrubbing timeout\n");
> +
> +               goto error;
> +       } else {
> +               reg = nv_rd32(pmc, 0x00000200);
> +               reg &= ~0x2000;
> +               nv_wr32(pmc, 0x00000200, reg);
> +               return 0;
> +       }
> +error:
> +       return -ETIMEDOUT;
> +}
> +
> +static int pmu_enable(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc,
> +                       bool enable)
> +{
> +       u32 pmc_enable;
> +       int err;
> +
> +       if (!enable) {
> +               pmc_enable = nv_rd32(pmc, 0x200);
> +               if ((pmc_enable & 0x2000) != 0x0) {
> +                       pmu_enable_irq(ppmu, pmc, false);
> +                       pmu_enable_hw(ppmu, pmc, false);
> +               }
> +       } else {
> +               err = pmu_enable_hw(ppmu, pmc, true);
> +               if (err)
> +                       return err;
> +
> +               /* TBD: post reset */
> +
> +               err = pmu_idle(ppmu);
> +               if (err)
> +                       return err;
> +
> +               pmu_enable_irq(ppmu, pmc, true);
> +       }
> +
> +       return 0;
> +}
> +
> +int pmu_reset(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> +       int err;
> +
> +       err = pmu_idle(ppmu);
> +       if (err)
> +               return err;
> +
> +       /* TBD: release pmu hw mutex */
> +
> +       err = pmu_enable(ppmu, pmc, false);
> +       if (err)
> +               return err;
> +
> +       err = pmu_enable(ppmu, pmc, true);
> +       if (err)
> +               return err;
> +
> +       return 0;
> +}
> +
> +static int pmu_bootstrap(struct pmu_desc *pmu)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_ucode_desc *desc = pmu->desc;
> +       u64 addr_code, addr_data, addr_load;
> +       u32 i, blocks, addr_args;
> +       u32 *adr_data, *adr_load, *adr_code;
> +       struct pmu_cmdline_args_gk20a cmdline_args;
> +       struct pmu_priv_vm *ppmuvm = &pmuvm;
> +
> +       nv_wr32(ppmu, 0x0010a048,
> +               nv_rd32(ppmu, 0x0010a048) | 0x01);
> +       /*bind the address*/
> +       nv_wr32(ppmu, 0x0010a480,
> +               ppmuvm->mem->addr >> 12 |
> +               0x1 << 30 |
> +               0x20000000);
> +
> +       /* TBD: load all other surfaces */
> +       cmdline_args.falc_trace_size = GK20A_PMU_TRACE_BUFSIZE;
> +       cmdline_args.falc_trace_dma_base =
> +                                u64_lo32(pmu->trace_buf.pmubufvma.offset >> 8);
> +       cmdline_args.falc_trace_dma_idx = GK20A_PMU_DMAIDX_VIRT;
> +       cmdline_args.cpu_freq_hz = 204;
> +       cmdline_args.secure_mode = 0;
> +
> +       addr_args = (nv_rd32(ppmu, 0x0010a108) >> 9) & 0x1ff;
> +       addr_args = addr_args << GK20A_PMU_DMEM_BLKSIZE2;
> +       addr_args -= sizeof(struct pmu_cmdline_args_gk20a);
> +       nv_debug(ppmu, "initiating copy to dmem\n");
> +       pmu_copy_to_dmem(pmu, addr_args,
> +                       (u8 *)&cmdline_args,
> +                       sizeof(struct pmu_cmdline_args_gk20a), 0);
> +
> +       nv_wr32(ppmu, 0x0010a1c0, 0x1 << 24);
> +
> +
> +       addr_code = u64_lo32((pmu->ucode.pmubufvma.offset +
> +                       desc->app_start_offset +
> +                       desc->app_resident_code_offset) >> 8);
> +
> +       addr_data = u64_lo32((pmu->ucode.pmubufvma.offset +
> +                       desc->app_start_offset +
> +                       desc->app_resident_data_offset) >> 8);
> +
> +       addr_load = u64_lo32((pmu->ucode.pmubufvma.offset +
> +                       desc->bootloader_start_offset) >> 8);
> +
> +       adr_code = (u32 *) (&addr_code);
> +       adr_load = (u32 *) (&addr_load);
> +       adr_data = (u32 *) (&addr_data);
> +       nv_wr32(ppmu, 0x0010a1c4, GK20A_PMU_DMAIDX_UCODE);
> +       nv_debug(ppmu, "0x%08x\n", GK20A_PMU_DMAIDX_UCODE);
> +       nv_wr32(ppmu, 0x0010a1c4, *(adr_code));
> +       nv_debug(ppmu, "0x%08x\n", *(adr_code));
> +       nv_wr32(ppmu, 0x0010a1c4, desc->app_size);
> +       nv_debug(ppmu, "0x%08x\n", desc->app_size);
> +       nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_code_size);
> +       nv_debug(ppmu, "0x%08x\n", desc->app_resident_code_size);
> +       nv_wr32(ppmu, 0x0010a1c4, desc->app_imem_entry);
> +       nv_debug(ppmu, "0x%08x\n", desc->app_imem_entry);
> +       nv_wr32(ppmu, 0x0010a1c4,  *(adr_data));
> +       nv_debug(ppmu, "0x%08x\n", *(adr_data));
> +       nv_wr32(ppmu, 0x0010a1c4, desc->app_resident_data_size);
> +       nv_debug(ppmu, "0x%08x\n", desc->app_resident_data_size);
> +       nv_wr32(ppmu, 0x0010a1c4, *(adr_code));
> +       nv_debug(ppmu, "0x%08x\n", *(adr_code));
> +       nv_wr32(ppmu, 0x0010a1c4, 0x1);
> +       nv_debug(ppmu, "0x%08x\n", 1);
> +       nv_wr32(ppmu, 0x0010a1c4, addr_args);
> +       nv_debug(ppmu, "0x%08x\n", addr_args);
> +
> +
> +       nv_wr32(ppmu, 0x0010a110,
> +               *(adr_load) - (desc->bootloader_imem_offset >> 8));
> +
> +       blocks = ((desc->bootloader_size + 0xFF) & ~0xFF) >> 8;
> +
> +       for (i = 0; i < blocks; i++) {
> +               nv_wr32(ppmu, 0x0010a114,
> +                       desc->bootloader_imem_offset + (i << 8));
> +               nv_wr32(ppmu, 0x0010a11c,
> +                       desc->bootloader_imem_offset + (i << 8));
> +               nv_wr32(ppmu, 0x0010a118,
> +                       0x01 << 4  |
> +                       0x06 << 8  |
> +                       ((GK20A_PMU_DMAIDX_UCODE & 0x07) << 12));
> +       }
> +
> +
> +       nv_wr32(ppmu, 0x0010a104,
> +               (0xffffffff & desc->bootloader_entry_point));
> +
> +       nv_wr32(ppmu, 0x0010a100, 0x1 << 1);
> +
> +       nv_wr32(ppmu, 0x0010a080, desc->app_version);
> +
> +       return 0;
> +}
> +
> +void pmu_seq_init(struct pmu_desc *pmu)
> +{
> +       u32 i;
> +
> +       memset(pmu->seq, 0,
> +               sizeof(struct pmu_sequence) * PMU_MAX_NUM_SEQUENCES);
> +       memset(pmu->pmu_seq_tbl, 0,
> +               sizeof(pmu->pmu_seq_tbl));
> +
> +       for (i = 0; i < PMU_MAX_NUM_SEQUENCES; i++)
> +               pmu->seq[i].id = i;
> +}
> +
> +static int pmu_seq_acquire(struct pmu_desc *pmu,
> +                       struct pmu_sequence **pseq)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_sequence *seq;
> +       u32 index;
> +
> +       mutex_lock(&pmu->pmu_seq_lock);
> +       index = find_first_zero_bit(pmu->pmu_seq_tbl,
> +                               sizeof(pmu->pmu_seq_tbl));
> +       if (index >= sizeof(pmu->pmu_seq_tbl)) {
> +               nv_error(ppmu,
> +                       "no free sequence available");
> +               mutex_unlock(&pmu->pmu_seq_lock);
> +               return -EAGAIN;
> +       }
> +       set_bit(index, pmu->pmu_seq_tbl);
> +       mutex_unlock(&pmu->pmu_seq_lock);
> +
> +       seq = &pmu->seq[index];
> +       seq->state = PMU_SEQ_STATE_PENDING;
> +
> +       *pseq = seq;
> +       return 0;
> +}
> +
> +static void pmu_seq_release(struct pmu_desc *pmu,
> +                       struct pmu_sequence *seq)
> +{
> +       seq->state      = PMU_SEQ_STATE_FREE;
> +       seq->desc       = PMU_INVALID_SEQ_DESC;
> +       seq->callback   = NULL;
> +       seq->cb_params  = NULL;
> +       seq->msg        = NULL;
> +       seq->out_payload = NULL;
> +       seq->in_gk20a.alloc.dmem.size = 0;
> +       seq->out_gk20a.alloc.dmem.size = 0;
> +       clear_bit(seq->id, pmu->pmu_seq_tbl);
> +}
> +
> +static int pmu_queue_init(struct pmu_desc *pmu,
> +               u32 id, struct pmu_init_msg_pmu_gk20a *init)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_queue *queue = &pmu->queue[id];
> +
> +       queue->id       = id;
> +       queue->index    = init->queue_info[id].index;
> +       queue->offset   = init->queue_info[id].offset;
> +       queue->size = init->queue_info[id].size;
> +       queue->mutex_id = id;
> +       mutex_init(&queue->mutex);
> +
> +       nv_debug(ppmu, "queue %d: index %d, offset 0x%08x, size 0x%08x",
> +               id, queue->index, queue->offset, queue->size);
> +
> +       return 0;
> +}
> +
> +static int pmu_queue_head(struct pmu_desc *pmu, struct pmu_queue *queue,
> +                       u32 *head, bool set)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       BUG_ON(!head);
> +
> +       if (PMU_IS_COMMAND_QUEUE(queue->id)) {
> +
> +               if (queue->index >= 0x00000004)
> +                       return -EINVAL;
> +
> +               if (!set)
> +                       *head = nv_rd32(ppmu, 0x0010a4a0 + (queue->index * 4)) &
> +                               0xffffffff;
> +               else
> +                       nv_wr32(ppmu,
> +                               (0x0010a4a0 + (queue->index * 4)),
> +                               (*head & 0xffffffff));
> +       } else {
> +               if (!set)
> +                       *head = nv_rd32(ppmu, 0x0010a4c8) & 0xffffffff;
> +               else
> +                       nv_wr32(ppmu, 0x0010a4c8, (*head & 0xffffffff));
> +       }
> +
> +       return 0;
> +}
> +
> +static int pmu_queue_tail(struct pmu_desc *pmu, struct pmu_queue *queue,
> +                       u32 *tail, bool set)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       BUG_ON(!tail);
> +
> +       if (PMU_IS_COMMAND_QUEUE(queue->id)) {
> +
> +               if (queue->index >= 0x00000004)
> +                       return -EINVAL;
> +
> +               if (!set)
> +                       *tail = nv_rd32(ppmu, 0x0010a4b0 + (queue->index * 4)) &
> +                               0xffffffff;
> +               else
> +                       nv_wr32(ppmu, (0x0010a4b0 + (queue->index * 4)),
> +                                                         (*tail & 0xffffffff));
> +       } else {
> +               if (!set)
> +                       *tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff;
> +               else
> +                       nv_wr32(ppmu, 0x0010a4cc, (*tail & 0xffffffff));
> +       }
> +
> +       return 0;
> +}
> +
> +static inline void pmu_queue_read(struct pmu_desc *pmu,
> +                       u32 offset, u8 *dst, u32 size)
> +{
> +       pmu_copy_from_dmem(pmu, offset, dst, size, 0);
> +}
> +
> +static inline void pmu_queue_write(struct pmu_desc *pmu,
> +                       u32 offset, u8 *src, u32 size)
> +{
> +       pmu_copy_to_dmem(pmu, offset, src, size, 0);
> +}
> +
> +int pmu_mutex_acquire(struct nvkm_pmu *ppmu, u32 id, u32 *token)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct pmu_mutex *mutex;
> +       u32 data, owner, max_retry;
> +
> +       if (!pmu->initialized)
> +               return -EINVAL;
> +
> +       BUG_ON(!token);
> +       BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
> +       BUG_ON(id > pmu->mutex_cnt);
> +
> +       mutex = &pmu->mutex[id];
> +
> +       owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff;
> +
> +       if (*token != PMU_INVALID_MUTEX_OWNER_ID && *token == owner) {
> +               BUG_ON(mutex->ref_cnt == 0);
> +               nv_debug(ppmu, "already acquired by owner : 0x%08x", *token);
> +               mutex->ref_cnt++;
> +               return 0;
> +       }
> +
> +       max_retry = 40;
> +       do {
> +               data = nv_rd32(ppmu, 0x0010a488) & 0xff;
> +               if (data == 0x00000000 ||
> +                   data == 0x000000ff) {
> +                       nv_warn(ppmu,
> +                               "fail to generate mutex token: val 0x%08x",
> +                               owner);
> +                       usleep_range(20, 40);
> +                       continue;
> +               }
> +
> +               owner = data;
> +               nv_wr32(ppmu, (0x0010a580 + mutex->index * 4),
> +                       owner & 0xff);
> +
> +               data = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4));
> +
> +               if (owner == data) {
> +                       mutex->ref_cnt = 1;
> +                       nv_debug(ppmu, "mutex acquired: id=%d, token=0x%x",
> +                               mutex->index, *token);
> +                       *token = owner;
> +                       goto out;
> +               } else {
> +                 nv_debug(ppmu, "fail to acquire mutex idx=0x%08x",
> +                               mutex->index);
> +
> +                       nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff));
> +
> +                       usleep_range(20, 40);
> +                       continue;
> +               }
> +       } while (max_retry-- > 0);
> +
> +       return -EBUSY;
> +out:
> +       return 0;
> +}
> +
> +int pmu_mutex_release(struct nvkm_pmu *ppmu, u32 id, u32 *token)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct pmu_mutex *mutex;
> +       u32 owner;
> +
> +       if (!pmu->initialized)
> +               return -EINVAL;
> +
> +       BUG_ON(!token);
> +       BUG_ON(!PMU_MUTEX_ID_IS_VALID(id));
> +       BUG_ON(id > pmu->mutex_cnt);
> +
> +       mutex = &pmu->mutex[id];
> +
> +       owner = nv_rd32(ppmu, 0x0010a580 + (mutex->index * 4)) & 0xff;
> +
> +       if (*token != owner) {
> +               nv_error(ppmu,
> +                       "requester 0x%08x NOT match owner 0x%08x",
> +                       *token, owner);
> +               return -EINVAL;
> +       }
> +
> +       if (--mutex->ref_cnt > 0)
> +               return -EBUSY;
> +
> +       nv_wr32(ppmu, 0x0010a580 + (mutex->index * 4), 0x00);
> +
> +       nv_mask(ppmu, 0x0010a48c, 0xff, (owner & 0xff));
> +
> +       nv_debug(ppmu, "mutex released: id=%d, token=0x%x",
> +                                                         mutex->index, *token);
> +
> +       return 0;
> +}
> +
> +static int pmu_queue_lock(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       int ret;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       if (PMU_IS_MESSAGE_QUEUE(queue->id))
> +               return 0;
> +
> +       if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
> +               mutex_lock(&queue->mutex);
> +               return 0;
> +       }
> +
> +       ret = pmu_mutex_acquire(ppmu, queue->mutex_id, &queue->mutex_lock);
> +       return ret;
> +}
> +
> +static int pmu_queue_unlock(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       int ret;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       if (PMU_IS_MESSAGE_QUEUE(queue->id))
> +               return 0;
> +
> +       if (PMU_IS_SW_COMMAND_QUEUE(queue->id)) {
> +               mutex_unlock(&queue->mutex);
> +               return 0;
> +       }
> +
> +       ret = pmu_mutex_release(ppmu, queue->mutex_id, &queue->mutex_lock);
> +       return ret;
> +}
> +
> +/* called by pmu_read_message, no lock */
> +static bool pmu_queue_is_empty(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       u32 head, tail;
> +
> +       pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> +       if (queue->opened && queue->oflag == OFLAG_READ)
> +               tail = queue->position;
> +       else
> +               pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
> +
> +       return head == tail;
> +}
> +
> +static bool pmu_queue_has_room(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, u32 size, bool *need_rewind)
> +{
> +       u32 head, tail, free;
> +       bool rewind = false;
> +
> +       size = ALIGN(size, QUEUE_ALIGNMENT);
> +
> +       pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> +       pmu_queue_tail(pmu, queue, &tail, QUEUE_GET);
> +
> +       if (head >= tail) {
> +               free = queue->offset + queue->size - head;
> +               free -= PMU_CMD_HDR_SIZE;
> +
> +               if (size > free) {
> +                       rewind = true;
> +                       head = queue->offset;
> +               }
> +       }
> +
> +       if (head < tail)
> +               free = tail - head - 1;
> +
> +       if (need_rewind)
> +               *need_rewind = rewind;
> +
> +       return size <= free;
> +}
> +
> +static int pmu_queue_push(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, void *data, u32 size)
> +{
> +
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       if (!queue->opened && queue->oflag == OFLAG_WRITE) {
> +               nv_error(ppmu, "queue not opened for write\n");
> +               return -EINVAL;
> +       }
> +
> +       pmu_queue_write(pmu, queue->position, data, size);
> +       queue->position += ALIGN(size, QUEUE_ALIGNMENT);
> +       return 0;
> +}
> +
> +static int pmu_queue_pop(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, void *data, u32 size,
> +                       u32 *bytes_read)
> +{
> +       u32 head, tail, used;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       *bytes_read = 0;
> +
> +       if (!queue->opened && queue->oflag == OFLAG_READ) {
> +               nv_error(ppmu, "queue not opened for read\n");
> +               return -EINVAL;
> +       }
> +
> +       pmu_queue_head(pmu, queue, &head, QUEUE_GET);
> +       tail = queue->position;
> +
> +       if (head == tail)
> +               return 0;
> +
> +       if (head > tail)
> +               used = head - tail;
> +       else
> +               used = queue->offset + queue->size - tail;
> +
> +       if (size > used) {
> +               nv_warn(ppmu, "queue size smaller than request read\n");
> +               size = used;
> +       }
> +
> +       pmu_queue_read(pmu, tail, data, size);
> +       queue->position += ALIGN(size, QUEUE_ALIGNMENT);
> +       *bytes_read = size;
> +       return 0;
> +}
> +
> +static void pmu_queue_rewind(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       struct pmu_cmd cmd;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +
> +       if (!queue->opened) {
> +               nv_error(ppmu, "queue not opened\n");
> +               goto out;
> +       }
> +
> +       if (queue->oflag == OFLAG_WRITE) {
> +               cmd.hdr.unit_id = PMU_UNIT_REWIND;
> +               cmd.hdr.size = PMU_CMD_HDR_SIZE;
> +               pmu_queue_push(pmu, queue, &cmd, cmd.hdr.size);
> +               nv_debug(ppmu, "queue %d rewinded\n", queue->id);
> +       }
> +
> +       queue->position = queue->offset;
> +out:
> +       nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +/* open for read and lock the queue */
> +static int pmu_queue_open_read(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue)
> +{
> +       int err;
> +
> +       err = pmu_queue_lock(pmu, queue);
> +       if (err)
> +               return err;
> +
> +       if (queue->opened)
> +               BUG();
> +
> +       pmu_queue_tail(pmu, queue, &queue->position, QUEUE_GET);
> +       queue->oflag = OFLAG_READ;
> +       queue->opened = true;
> +
> +       return 0;
> +}
> +
> +/* open for write and lock the queue
> +   make sure there's enough free space for the write */
> +static int pmu_queue_open_write(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, u32 size)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       bool rewind = false;
> +       int err;
> +
> +       err = pmu_queue_lock(pmu, queue);
> +       if (err)
> +               return err;
> +
> +       if (queue->opened)
> +               BUG();
> +
> +       if (!pmu_queue_has_room(pmu, queue, size, &rewind)) {
> +               nv_error(ppmu, "queue full");
> +               pmu_queue_unlock(pmu, queue);
> +               return -EAGAIN;
> +       }
> +
> +       pmu_queue_head(pmu, queue, &queue->position, QUEUE_GET);
> +       queue->oflag = OFLAG_WRITE;
> +       queue->opened = true;
> +
> +       if (rewind)
> +               pmu_queue_rewind(pmu, queue);
> +
> +       return 0;
> +}
> +
> +/* close and unlock the queue */
> +static int pmu_queue_close(struct pmu_desc *pmu,
> +                       struct pmu_queue *queue, bool commit)
> +{
> +       if (!queue->opened)
> +               return 0;
> +
> +       if (commit) {
> +               if (queue->oflag == OFLAG_READ) {
> +                       pmu_queue_tail(pmu, queue,
> +                               &queue->position, QUEUE_SET);
> +               } else {
> +                       pmu_queue_head(pmu, queue,
> +                               &queue->position, QUEUE_SET);
> +               }
> +       }
> +
> +       queue->opened = false;
> +
> +       pmu_queue_unlock(pmu, queue);
> +
> +       return 0;
> +}
> +
> +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout,
> +                                u32 *var, u32 val)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       unsigned long end_jiffies = jiffies + msecs_to_jiffies(timeout);
> +       unsigned long delay = GK20A_IDLE_CHECK_DEFAULT;
> +
> +       do {
> +               if (*var == val)
> +                       return 0;
> +
> +               if (nv_rd32(ppmu, 0x0010a008))
> +                       gk20a_pmu_isr(ppmu);
> +
> +               usleep_range(delay, delay * 2);
> +               delay = min_t(u32, delay << 1, GK20A_IDLE_CHECK_MAX);
> +       } while (time_before(jiffies, end_jiffies));
> +
> +       return -ETIMEDOUT;
> +}
> +
> +void pmu_dump_falcon_stats(struct pmu_desc *pmu)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       int i;
> +
> +       nv_debug(ppmu, "pmu_falcon_os_r : %d\n",
> +               nv_rd32(ppmu, 0x0010a080));
> +       nv_debug(ppmu, "pmu_falcon_cpuctl_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a100));
> +       nv_debug(ppmu, "pmu_falcon_idlestate_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a04c));
> +       nv_debug(ppmu, "pmu_falcon_mailbox0_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a040));
> +       nv_debug(ppmu, "pmu_falcon_mailbox1_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a044));
> +       nv_debug(ppmu, "pmu_falcon_irqstat_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a008));
> +       nv_debug(ppmu, "pmu_falcon_irqmode_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a00c));
> +       nv_debug(ppmu, "pmu_falcon_irqmask_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a018));
> +       nv_debug(ppmu, "pmu_falcon_irqdest_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a01c));
> +
> +       for (i = 0; i < 0x0000000c; i++)
> +               nv_debug(ppmu, "pmu_pmu_mailbox_r(%d) : 0x%x\n",
> +                       i, nv_rd32(ppmu, 0x0010a450 + i*4));
> +
> +       for (i = 0; i < 0x00000004; i++)
> +               nv_debug(ppmu, "pmu_pmu_debug_r(%d) : 0x%x\n",
> +                       i, nv_rd32(ppmu, 0x0010a5c0 + i*4));
> +
> +       for (i = 0; i < 6/*NV_Ppmu_FALCON_ICD_IDX_RSTAT__SIZE_1*/; i++) {
> +               nv_wr32(ppmu, 0x0010a200,
> +                       0xe |
> +                       (i & 0x1f) << 8);
> +               nv_debug(ppmu, "pmu_rstat (%d) : 0x%x\n",
> +                       i, nv_rd32(ppmu, 0x0010a20c));
> +       }
> +
> +       i = nv_rd32(ppmu, 0x0010a7b0);
> +       nv_debug(ppmu, "pmu_pmu_bar0_error_status_r : 0x%x\n", i);
> +       if (i != 0) {
> +               nv_debug(ppmu, "pmu_pmu_bar0_addr_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a7a0));
> +               nv_debug(ppmu, "pmu_pmu_bar0_data_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a7a4));
> +               nv_debug(ppmu, "pmu_pmu_bar0_timeout_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a7a8));
> +               nv_debug(ppmu, "pmu_pmu_bar0_ctl_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a7ac));
> +       }
> +
> +       i = nv_rd32(ppmu, 0x0010a988);
> +       nv_debug(ppmu, "pmu_pmu_bar0_fecs_error_r : 0x%x\n", i);
> +
> +       i = nv_rd32(ppmu, 0x0010a16c);
> +       nv_debug(ppmu, "pmu_falcon_exterrstat_r : 0x%x\n", i);
> +       if (((i >> 31) & 0x1)) {
> +               nv_debug(ppmu, "pmu_falcon_exterraddr_r : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a168));
> +               /*nv_debug(ppmu, "pmc_enable : 0x%x\n",
> +                 nv_rd32(pmc, 0x00000200));*/
> +       }
> +
> +       nv_debug(ppmu, "pmu_falcon_engctl_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a0a4));
> +       nv_debug(ppmu, "pmu_falcon_curctx_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a050));
> +       nv_debug(ppmu, "pmu_falcon_nxtctx_r : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a054));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_IMB & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_IMB : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_DMB & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_DMB : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_CSW & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_CSW : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_CTX & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_CTX : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       nv_wr32(ppmu, 0x0010a200,
> +               0x8 |
> +               ((PMU_FALCON_REG_EXCI & 0x1f) << 8));
> +       nv_debug(ppmu, "PMU_FALCON_REG_EXCI : 0x%x\n",
> +               nv_rd32(ppmu, 0x0010a20c));
> +
> +       for (i = 0; i < 4; i++) {
> +               nv_wr32(ppmu, 0x0010a200,
> +                       0x8 |
> +                       ((PMU_FALCON_REG_PC & 0x1f) << 8));
> +               nv_debug(ppmu, "PMU_FALCON_REG_PC : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a20c));
> +
> +               nv_wr32(ppmu, 0x0010a200,
> +                       0x8 |
> +                       ((PMU_FALCON_REG_SP & 0x1f) << 8));
> +               nv_debug(ppmu, "PMU_FALCON_REG_SP : 0x%x\n",
> +                       nv_rd32(ppmu, 0x0010a20c));
> +       }
> +
> +       /* PMU may crash due to FECS crash. Dump FECS status */
> +       /*gk20a_fecs_dump_falcon_stats(g);*/
> +}
> +
> +static bool pmu_validate_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd,
> +                       struct pmu_msg *msg, struct pmu_payload *payload,
> +                       u32 queue_id)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_queue *queue;
> +       u32 in_size, out_size;
> +
> +       nv_debug(ppmu, "pmu validate cmd\n");
> +       pmu_dump_falcon_stats(pmu);
> +
> +       if (!PMU_IS_SW_COMMAND_QUEUE(queue_id))
> +               goto invalid_cmd;
> +
> +       queue = &pmu->queue[queue_id];
> +       if (cmd->hdr.size < PMU_CMD_HDR_SIZE)
> +               goto invalid_cmd;
> +
> +       if (cmd->hdr.size > (queue->size >> 1))
> +               goto invalid_cmd;
> +
> +       if (msg != NULL && msg->hdr.size < PMU_MSG_HDR_SIZE)
> +               goto invalid_cmd;
> +
> +       if (!PMU_UNIT_ID_IS_VALID(cmd->hdr.unit_id))
> +               goto invalid_cmd;
> +
> +       if (payload == NULL)
> +               return true;
> +
> +       if (payload->in.buf == NULL && payload->out.buf == NULL)
> +               goto invalid_cmd;
> +
> +       if ((payload->in.buf != NULL && payload->in.size == 0) ||
> +           (payload->out.buf != NULL && payload->out.size == 0))
> +               goto invalid_cmd;
> +
> +       in_size = PMU_CMD_HDR_SIZE;
> +       if (payload->in.buf) {
> +               in_size += payload->in.offset;
> +               in_size += sizeof(struct pmu_allocation_gk20a);
> +       }
> +
> +       out_size = PMU_CMD_HDR_SIZE;
> +       if (payload->out.buf) {
> +               out_size += payload->out.offset;
> +               out_size += sizeof(struct pmu_allocation_gk20a);
> +       }
> +
> +       if (in_size > cmd->hdr.size || out_size > cmd->hdr.size)
> +               goto invalid_cmd;
> +
> +
> +       if ((payload->in.offset != 0 && payload->in.buf == NULL) ||
> +           (payload->out.offset != 0 && payload->out.buf == NULL))
> +               goto invalid_cmd;
> +
> +       return true;
> +
> +invalid_cmd:
> +       nv_error(ppmu, "invalid pmu cmd :\n"
> +               "queue_id=%d,\n"
> +               "cmd_size=%d, cmd_unit_id=%d, msg=%p, msg_size=%d,\n"
> +               "payload in=%p, in_size=%d, in_offset=%d,\n"
> +               "payload out=%p, out_size=%d, out_offset=%d",
> +               queue_id, cmd->hdr.size, cmd->hdr.unit_id,
> +               msg, msg ? msg->hdr.unit_id : ~0,
> +               &payload->in, payload->in.size, payload->in.offset,
> +               &payload->out, payload->out.size, payload->out.offset);
> +
> +       return false;
> +}
> +
> +static int pmu_write_cmd(struct pmu_desc *pmu, struct pmu_cmd *cmd,
> +                       u32 queue_id, unsigned long timeout)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_queue *queue;
> +       unsigned long end_jiffies = jiffies +
> +               msecs_to_jiffies(timeout);
> +       int err;
> +
> +       nv_debug(ppmu, "pmu write cmd\n");
> +
> +       queue = &pmu->queue[queue_id];
> +
> +       do {
> +               err = pmu_queue_open_write(pmu, queue, cmd->hdr.size);
> +               if (err == -EAGAIN && time_before(jiffies, end_jiffies))
> +                       usleep_range(1000, 2000);
> +               else
> +                       break;
> +       } while (1);
> +
> +       if (err)
> +               goto clean_up;
> +
> +       pmu_queue_push(pmu, queue, cmd, cmd->hdr.size);
> +
> +       err = pmu_queue_close(pmu, queue, true);
> +
> +clean_up:
> +       if (err)
> +               nv_error(ppmu,
> +                       "fail to write cmd to queue %d", queue_id);
> +       else
> +               nv_debug(ppmu, "cmd writing done");
> +
> +       return err;
> +}
> +
> +int gk20a_pmu_cmd_post(struct nvkm_pmu *ppmu, struct pmu_cmd *cmd,
> +               struct pmu_msg *msg, struct pmu_payload *payload,
> +               u32 queue_id, pmu_callback callback, void *cb_param,
> +               u32 *seq_desc, unsigned long timeout)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct pmu_sequence *seq;
> +       struct pmu_allocation_gk20a *in = NULL, *out = NULL;
> +       int err;
> +
> +       BUG_ON(!cmd);
> +       BUG_ON(!seq_desc);
> +       BUG_ON(!pmu->pmu_ready);
> +       nv_debug(ppmu, "Post CMD\n");
> +       if (!pmu_validate_cmd(pmu, cmd, msg, payload, queue_id))
> +               return -EINVAL;
> +
> +       err = pmu_seq_acquire(pmu, &seq);
> +       if (err)
> +               return err;
> +
> +       cmd->hdr.seq_id = seq->id;
> +
> +       cmd->hdr.ctrl_flags = 0;
> +       cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_STATUS;
> +       cmd->hdr.ctrl_flags |= PMU_CMD_FLAGS_INTR;
> +
> +       seq->callback = callback;
> +       seq->cb_params = cb_param;
> +       seq->msg = msg;
> +       seq->out_payload = NULL;
> +       seq->desc = pmu->next_seq_desc++;
> +
> +       if (payload)
> +               seq->out_payload = payload->out.buf;
> +
> +       *seq_desc = seq->desc;
> +
> +       if (payload && payload->in.offset != 0) {
> +               in = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd +
> +                       payload->in.offset);
> +
> +               if (payload->in.buf != payload->out.buf)
> +                       in->alloc.dmem.size = (u16)payload->in.size;
> +               else
> +                       in->alloc.dmem.size =
> +                               (u16)max(payload->in.size, payload->out.size);
> +
> +               err = pmu->dmem.alloc(&pmu->dmem,
> +                       (void *)&in->alloc.dmem.offset,
> +                       in->alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +               if (err)
> +                       goto clean_up;
> +
> +               pmu_copy_to_dmem(pmu, (in->alloc.dmem.offset),
> +                       payload->in.buf, payload->in.size, 0);
> +               seq->in_gk20a.alloc.dmem.size = in->alloc.dmem.size;
> +               seq->in_gk20a.alloc.dmem.offset = in->alloc.dmem.offset;
> +       }
> +
> +       if (payload && payload->out.offset != 0) {
> +               out = (struct pmu_allocation_gk20a *)((u8 *)&cmd->cmd +
> +                       payload->out.offset);
> +               out->alloc.dmem.size = (u16)payload->out.size;
> +
> +               if (payload->out.buf != payload->in.buf) {
> +                       err = pmu->dmem.alloc(&pmu->dmem,
> +                               (void *)&out->alloc.dmem.offset,
> +                               out->alloc.dmem.size,
> +                               PMU_DMEM_ALLOC_ALIGNMENT);
> +                       if (err)
> +                               goto clean_up;
> +               } else {
> +                       BUG_ON(in == NULL);
> +                       out->alloc.dmem.offset = in->alloc.dmem.offset;
> +               }
> +
> +               seq->out_gk20a.alloc.dmem.size = out->alloc.dmem.size;
> +               seq->out_gk20a.alloc.dmem.offset = out->alloc.dmem.offset;
> +       }
> +
> +       seq->state = PMU_SEQ_STATE_USED;
> +       err = pmu_write_cmd(pmu, cmd, queue_id, timeout);
> +       if (err)
> +               seq->state = PMU_SEQ_STATE_PENDING;
> +
> +       nv_debug(ppmu, "cmd posted\n");
> +
> +       return 0;
> +
> +clean_up:
> +       nv_debug(ppmu, "cmd post failed\n");
> +       if (in)
> +               pmu->dmem.free(&pmu->dmem,
> +                       in->alloc.dmem.offset,
> +                       in->alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +       if (out)
> +               pmu->dmem.free(&pmu->dmem,
> +                       out->alloc.dmem.offset,
> +                       out->alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +
> +       pmu_seq_release(pmu, seq);
> +       return err;
> +}
> +
> +void gk20a_pmu_isr(struct nvkm_pmu *ppmu)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct nvkm_mc *pmc = nvkm_mc(ppmu);
> +       struct pmu_queue *queue;
> +       u32 intr, mask;
> +       bool recheck = false;
> +       if (!pmu->isr_enabled)
> +               goto out;
> +
> +       mask = nv_rd32(ppmu, 0x0010a018) &
> +               nv_rd32(ppmu, 0x0010a01c);
> +
> +       intr = nv_rd32(ppmu, 0x0010a008) & mask;
> +
> +       nv_debug(ppmu, "received falcon interrupt: 0x%08x", intr);
> +       pmu_enable_irq(ppmu, pmc, false);
> +       if (!intr || pmu->pmu_state == PMU_STATE_OFF) {
> +               nv_wr32(ppmu, 0x0010a004, intr);
> +               nv_error(ppmu, "pmu state off\n");
> +               pmu_enable_irq(ppmu, pmc, true);
> +               goto out;
> +       }
> +       if (intr & 0x10) {
> +               nv_error(ppmu,
> +                       "pmu halt intr not implemented");
> +               pmu_dump_falcon_stats(pmu);
> +       }
> +       if (intr & 0x20) {
> +               nv_error(ppmu,
> +                       "pmu exterr intr not implemented. Clearing interrupt.");
> +               pmu_dump_falcon_stats(pmu);
> +
> +               nv_wr32(ppmu, 0x0010a16c,
> +                       nv_rd32(ppmu, 0x0010a16c) &
> +                               ~(0x1 << 31));
> +       }
> +       if (intr & 0x40) {
> +               nv_debug(ppmu, "scheduling work\n");
> +               schedule_work(&pmu->isr_workq);
> +               pmu_enable_irq(ppmu, pmc, true);
> +               recheck = true;
> +       }
> +
> +       if (recheck) {
> +               queue = &pmu->queue[PMU_MESSAGE_QUEUE];
> +               if (!pmu_queue_is_empty(pmu, queue))
> +                       nv_wr32(ppmu, 0x0010a000, 0x40);
> +       } else {
> +               pmu_enable_irq(ppmu, pmc, true);
> +       }
> +
> +       pmu_enable_irq(ppmu, pmc, true);
> +       nv_wr32(ppmu, 0x0010a004, intr);
> +out:
> +       nv_debug(ppmu, "irq handled\n");
> +}
> +
> +static int
> +gk20a_pmu_init_vm(struct nvkm_pmu *ppmu, const struct firmware *fw)
> +{
> +       int ret = 0;
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       u32 *ucode_image;
> +       struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data;
> +       int i;
> +       struct pmu_priv_vm *ppmuvm = &pmuvm;
> +       struct nvkm_device *device = nv_device(&ppmu->base);
> +       struct nvkm_vm *vm;
> +       u64 pmu_area_len = 300*1024;
> +
> +       ppmu->pmuvm = &pmuvm;
> +       ppmu->pg_buf = &pmu->pg_buf;
> +       pmu->pmu = ppmu;
> +       /* mem for inst blk*/
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x1000, 0, 0,
> +                               &ppmuvm->mem);
> +       if (ret)
> +               goto instblk_alloc_err;
> +
> +       /* mem for pgd*/
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, 0x8000, 0, 0,
> +                               &ppmuvm->pgd);
> +       if (ret)
> +               goto pgd_alloc_err;
> +
> +       /*allocate virtual memory range*/
> +       ret = nvkm_vm_new(device, 0, pmu_area_len, 0, &vm);
> +       if (ret)
> +               goto virt_alloc_err;
> +
> +       atomic_inc(&vm->engref[NVDEV_SUBDEV_PMU]);
> +       /*update VM with pgd */
> +
> +       ret = nvkm_vm_ref(vm, &ppmuvm->vm, ppmuvm->pgd);
> +       if (ret)
> +               goto virt_alloc_err;
> +
> +       /*update pgd in inst blk */
> +       nv_wo32(ppmuvm->mem, 0x0200, lower_32_bits(ppmuvm->pgd->addr));
> +       nv_wo32(ppmuvm->mem, 0x0204, upper_32_bits(ppmuvm->pgd->addr));
> +       nv_wo32(ppmuvm->mem, 0x0208, lower_32_bits(pmu_area_len - 1));
> +       nv_wo32(ppmuvm->mem, 0x020c, upper_32_bits(pmu_area_len - 1));
> +
> +       /* allocate memory for pmu fw to be copied to*/
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL,
> +                  GK20A_PMU_UCODE_SIZE_MAX, 0x1000, 0, &pmu->ucode.pmubufobj);
> +       if (ret)
> +               goto fw_alloc_err;
> +
> +       ucode_image = (u32 *)((u32)desc + desc->descriptor_size);
> +       for (i = 0; i < (desc->app_start_offset + desc->app_size) >> 2; i++) {
> +               nv_wo32(pmu->ucode.pmubufobj, i << 2, ucode_image[i]);
> +               pr_info("writing 0x%08x\n", ucode_image[i]);
> +       }
> +       /* map allocated memory into GMMU */
> +       ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->ucode.pmubufobj), vm,
> +                                   NV_MEM_ACCESS_RW,
> +                                   &pmu->ucode.pmubufvma);
> +       if (ret)
> +               goto map_err;
> +
> +       nv_debug(ppmu, "%s function end\n", __func__);
> +       return ret;
> +map_err:
> +       nvkm_gpuobj_destroy(pmu->ucode.pmubufobj);
> +virt_alloc_err:
> +fw_alloc_err:
> +       nvkm_gpuobj_destroy(ppmuvm->pgd);
> +pgd_alloc_err:
> +       nvkm_gpuobj_destroy(ppmuvm->mem);
> +instblk_alloc_err:
> +       return ret;
> +
> +}
> +
> +static int
> +gk20a_pmu_load_firmware(struct nvkm_pmu *ppmu, const struct firmware **pfw)
> +{
> +       struct nvkm_device *dev;
> +       char name[32];
> +
> +       dev = nv_device(ppmu);
> +
> +       snprintf(name, sizeof(name), "nvidia/tegra124/%s",
> +                                                        GK20A_PMU_UCODE_IMAGE);
> +
> +       return request_firmware(pfw, name, nv_device_base(dev));
> +}
> +
> +static void
> +gk20a_pmu_dump_firmware_info(struct nvkm_pmu *ppmu,
> +               const struct firmware *fw)
> +{
> +       struct pmu_ucode_desc *desc = (struct pmu_ucode_desc *)fw->data;
> +
> +       nv_debug(ppmu, "GK20A PMU firmware information\n");
> +       nv_debug(ppmu, "descriptor size = %u\n", desc->descriptor_size);
> +       nv_debug(ppmu, "image size  = %u\n", desc->image_size);
> +       nv_debug(ppmu, "app_version = 0x%08x\n", desc->app_version);
> +       nv_debug(ppmu, "date = %s\n", desc->date);
> +       nv_debug(ppmu, "bootloader_start_offset = 0x%08x\n",
> +                               desc->bootloader_start_offset);
> +       nv_debug(ppmu, "bootloader_size = 0x%08x\n", desc->bootloader_size);
> +       nv_debug(ppmu, "bootloader_imem_offset = 0x%08x\n",
> +                               desc->bootloader_imem_offset);
> +       nv_debug(ppmu, "bootloader_entry_point = 0x%08x\n",
> +                               desc->bootloader_entry_point);
> +       nv_debug(ppmu, "app_start_offset = 0x%08x\n", desc->app_start_offset);
> +       nv_debug(ppmu, "app_size = 0x%08x\n", desc->app_size);
> +       nv_debug(ppmu, "app_imem_offset = 0x%08x\n", desc->app_imem_offset);
> +       nv_debug(ppmu, "app_imem_entry = 0x%08x\n", desc->app_imem_entry);
> +       nv_debug(ppmu, "app_dmem_offset = 0x%08x\n", desc->app_dmem_offset);
> +       nv_debug(ppmu, "app_resident_code_offset = 0x%08x\n",
> +                       desc->app_resident_code_offset);
> +       nv_debug(ppmu, "app_resident_code_size = 0x%08x\n",
> +                       desc->app_resident_code_size);
> +       nv_debug(ppmu, "app_resident_data_offset = 0x%08x\n",
> +                       desc->app_resident_data_offset);
> +       nv_debug(ppmu, "app_resident_data_size = 0x%08x\n",
> +                       desc->app_resident_data_size);
> +       nv_debug(ppmu, "nb_overlays = %d\n", desc->nb_overlays);
> +
> +       nv_debug(ppmu, "compressed = %u\n", desc->compressed);
> +}
> +
> +static int pmu_process_init_msg(struct pmu_desc *pmu,
> +                       struct pmu_msg *msg)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_init_msg_pmu_gk20a *init;
> +       struct pmu_sha1_gid_data gid_data;
> +       u32 i, tail = 0;
> +
> +       tail = nv_rd32(ppmu, 0x0010a4cc) & 0xffffffff;
> +
> +       pmu_copy_from_dmem(pmu, tail,
> +               (u8 *)&msg->hdr, PMU_MSG_HDR_SIZE, 0);
> +
> +       if (msg->hdr.unit_id != PMU_UNIT_INIT) {
> +               nv_error(ppmu,
> +                       "expecting init msg");
> +               return -EINVAL;
> +       }
> +
> +       pmu_copy_from_dmem(pmu, tail + PMU_MSG_HDR_SIZE,
> +               (u8 *)&msg->msg, msg->hdr.size - PMU_MSG_HDR_SIZE, 0);
> +
> +       if (msg->msg.init.msg_type != PMU_INIT_MSG_TYPE_PMU_INIT) {
> +               nv_error(ppmu,
> +                       "expecting init msg");
> +               return -EINVAL;
> +       }
> +
> +       tail += ALIGN(msg->hdr.size, PMU_DMEM_ALIGNMENT);
> +       nv_wr32(ppmu, 0x0010a4cc,
> +               tail & 0xffffffff);
> +
> +       init = &msg->msg.init.pmu_init_gk20a;
> +       if (!pmu->gid_info.valid) {
> +
> +               pmu_copy_from_dmem(pmu,
> +                       init->sw_managed_area_offset,
> +                       (u8 *)&gid_data,
> +                       sizeof(struct pmu_sha1_gid_data), 0);
> +
> +               pmu->gid_info.valid =
> +                       (*(u32 *)gid_data.signature == PMU_SHA1_GID_SIGNATURE);
> +
> +               if (pmu->gid_info.valid) {
> +
> +                       BUG_ON(sizeof(pmu->gid_info.gid) !=
> +                               sizeof(gid_data.gid));
> +
> +                       memcpy(pmu->gid_info.gid, gid_data.gid,
> +                               sizeof(pmu->gid_info.gid));
> +               }
> +       }
> +
> +       for (i = 0; i < PMU_QUEUE_COUNT; i++)
> +               pmu_queue_init(pmu, i, init);
> +
> +       if (!pmu->dmem.alloc)
> +               nvkm_pmu_allocator_init(&pmu->dmem, "gk20a_pmu_dmem",
> +                               init->sw_managed_area_offset,
> +                               init->sw_managed_area_size);
> +
> +       pmu->pmu_ready = true;
> +       pmu->pmu_state = PMU_STATE_INIT_RECEIVED;
> +
> +       return 0;
> +}
> +
> +static bool pmu_read_message(struct pmu_desc *pmu, struct pmu_queue *queue,
> +                       struct pmu_msg *msg, int *status)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       u32 read_size, bytes_read;
> +       int err;
> +
> +       *status = 0;
> +
> +       if (pmu_queue_is_empty(pmu, queue))
> +               return false;
> +
> +       err = pmu_queue_open_read(pmu, queue);
> +       if (err) {
> +               nv_error(ppmu,
> +                       "fail to open queue %d for read", queue->id);
> +               *status = err;
> +               return false;
> +       }
> +
> +       err = pmu_queue_pop(pmu, queue, &msg->hdr,
> +                       PMU_MSG_HDR_SIZE, &bytes_read);
> +       if (err || bytes_read != PMU_MSG_HDR_SIZE) {
> +               nv_error(ppmu,
> +                       "fail to read msg from queue %d", queue->id);
> +               *status = err | -EINVAL;
> +               goto clean_up;
> +       }
> +
> +       if (msg->hdr.unit_id == PMU_UNIT_REWIND) {
> +               pmu_queue_rewind(pmu, queue);
> +               /* read again after rewind */
> +               err = pmu_queue_pop(pmu, queue, &msg->hdr,
> +                               PMU_MSG_HDR_SIZE, &bytes_read);
> +               if (err || bytes_read != PMU_MSG_HDR_SIZE) {
> +                       nv_error(ppmu,
> +                               "fail to read msg from queue %d", queue->id);
> +                       *status = err | -EINVAL;
> +                       goto clean_up;
> +               }
> +       }
> +
> +       if (!PMU_UNIT_ID_IS_VALID(msg->hdr.unit_id)) {
> +               nv_error(ppmu,
> +                       "read invalid unit_id %d from queue %d",
> +                       msg->hdr.unit_id, queue->id);
> +                       *status = -EINVAL;
> +                       goto clean_up;
> +       }
> +
> +       if (msg->hdr.size > PMU_MSG_HDR_SIZE) {
> +               read_size = msg->hdr.size - PMU_MSG_HDR_SIZE;
> +               err = pmu_queue_pop(pmu, queue, &msg->msg,
> +                       read_size, &bytes_read);
> +               if (err || bytes_read != read_size) {
> +                       nv_error(ppmu,
> +                               "fail to read msg from queue %d", queue->id);
> +                       *status = err;
> +                       goto clean_up;
> +               }
> +       }
> +
> +       err = pmu_queue_close(pmu, queue, true);
> +       if (err) {
> +               nv_error(ppmu,
> +                       "fail to close queue %d", queue->id);
> +               *status = err;
> +               return false;
> +       }
> +
> +       return true;
> +
> +clean_up:
> +       err = pmu_queue_close(pmu, queue, false);
> +       if (err)
> +               nv_error(ppmu,
> +                       "fail to close queue %d", queue->id);
> +       return false;
> +}
> +
> +static int pmu_response_handle(struct pmu_desc *pmu,
> +                       struct pmu_msg *msg)
> +{
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct pmu_sequence *seq;
> +       int ret = 0;
> +
> +       nv_debug(ppmu, "handling pmu response\n");
> +       seq = &pmu->seq[msg->hdr.seq_id];
> +       if (seq->state != PMU_SEQ_STATE_USED &&
> +           seq->state != PMU_SEQ_STATE_CANCELLED) {
> +               nv_error(ppmu,
> +                       "msg for an unknown sequence %d", seq->id);
> +               return -EINVAL;
> +       }
> +
> +       if (msg->hdr.unit_id == PMU_UNIT_RC &&
> +           msg->msg.rc.msg_type == PMU_RC_MSG_TYPE_UNHANDLED_CMD) {
> +               nv_error(ppmu,
> +                       "unhandled cmd: seq %d", seq->id);
> +       } else if (seq->state != PMU_SEQ_STATE_CANCELLED) {
> +               if (seq->msg) {
> +                       if (seq->msg->hdr.size >= msg->hdr.size) {
> +                               memcpy(seq->msg, msg, msg->hdr.size);
> +                               if (seq->out_gk20a.alloc.dmem.size != 0) {
> +                                       pmu_copy_from_dmem(pmu,
> +                                       seq->out_gk20a.alloc.dmem.offset,
> +                                       seq->out_payload,
> +                                       seq->out_gk20a.alloc.dmem.size, 0);
> +                               }
> +                       } else {
> +                               nv_error(ppmu,
> +                                       "sequence %d msg buffer too small",
> +                                       seq->id);
> +                       }
> +               }
> +       } else
> +               seq->callback = NULL;
> +       if (seq->in_gk20a.alloc.dmem.size != 0)
> +               pmu->dmem.free(&pmu->dmem,
> +                       seq->in_gk20a.alloc.dmem.offset,
> +                       seq->in_gk20a.alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +       if (seq->out_gk20a.alloc.dmem.size != 0)
> +               pmu->dmem.free(&pmu->dmem,
> +                       seq->out_gk20a.alloc.dmem.offset,
> +                       seq->out_gk20a.alloc.dmem.size,
> +                       PMU_DMEM_ALLOC_ALIGNMENT);
> +
> +       if (seq->callback)
> +               seq->callback(ppmu, msg, seq->cb_params, seq->desc, ret);
> +
> +       pmu_seq_release(pmu, seq);
> +
> +       /* TBD: notify client waiting for available dmem */
> +       nv_debug(ppmu, "pmu response processed\n");
> +
> +       return 0;
> +}
> +
> +int pmu_wait_message_cond(struct pmu_desc *pmu, u32 timeout,
> +                                u32 *var, u32 val);
> +
> +
> +static int pmu_handle_event(struct pmu_desc *pmu, struct pmu_msg *msg)
> +{
> +       int err = 0;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +
> +       switch (msg->hdr.unit_id) {
> +       case PMU_UNIT_PERFMON:
> +               nv_debug(ppmu, "init perfmon event generated\n");
> +               break;
> +       default:
> +               nv_debug(ppmu, "default event generated\n");
> +               break;
> +       }
> +
> +       return err;
> +}
> +
> +void pmu_process_message(struct work_struct *work)
> +{
> +       struct pmu_desc *pmu = container_of(work, struct pmu_desc, isr_workq);
> +       struct pmu_msg msg;
> +       int status;
> +       struct nvkm_pmu *ppmu = (void *)nvkm_pmu((void *)
> +               impl_from_pmu(pmu));
> +       struct nvkm_mc *pmc = nvkm_mc(ppmu);
> +
> +       mutex_lock(&pmu->isr_mutex);
> +       if (unlikely(!pmu->pmu_ready)) {
> +               nv_debug(ppmu, "processing init msg\n");
> +               pmu_process_init_msg(pmu, &msg);
> +               mutex_unlock(&pmu->isr_mutex);
> +               pmu_enable_irq(ppmu, pmc, true);
> +               goto out;
> +       }
> +
> +       while (pmu_read_message(pmu,
> +               &pmu->queue[PMU_MESSAGE_QUEUE], &msg, &status)) {
> +
> +               nv_debug(ppmu, "read msg hdr:\n"
> +                               "unit_id = 0x%08x, size = 0x%08x,\n"
> +                               "ctrl_flags = 0x%08x, seq_id = 0x%08x\n",
> +                               msg.hdr.unit_id, msg.hdr.size,
> +                               msg.hdr.ctrl_flags, msg.hdr.seq_id);
> +
> +               msg.hdr.ctrl_flags &= ~PMU_CMD_FLAGS_PMU_MASK;
> +
> +               if (msg.hdr.ctrl_flags == PMU_CMD_FLAGS_EVENT)
> +                       pmu_handle_event(pmu, &msg);
> +               else
> +                       pmu_response_handle(pmu, &msg);
> +       }
> +       mutex_unlock(&pmu->isr_mutex);
> +       pmu_enable_irq(ppmu, pmc, true);
> +out:
> +       nv_debug(ppmu, "exit %s\n", __func__);
> +}
> +
> +int gk20a_pmu_destroy(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +
> +       /* make sure the pending operations are finished before we continue */
> +       cancel_work_sync(&pmu->isr_workq);
> +       pmu->initialized = false;
> +
> +       mutex_lock(&pmu->isr_mutex);
> +       pmu_enable(ppmu, pmc, false);
> +       pmu->isr_enabled = false;
> +       mutex_unlock(&pmu->isr_mutex);
> +
> +       pmu->pmu_state = PMU_STATE_OFF;
> +       pmu->pmu_ready = false;
> +       pmu->zbc_ready = false;
> +
> +       return 0;
> +}
> +
> +int gk20a_pmu_load_norm(struct nvkm_pmu *ppmu, u32 *load)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       *load = pmu->load_shadow;
> +       return 0;
> +}
> +
> +int gk20a_pmu_load_update(struct nvkm_pmu *ppmu)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       u16 _load = 0;
> +
> +       pmu_copy_from_dmem(pmu, pmu->sample_buffer, (u8 *)&_load, 2, 0);
> +       pmu->load_shadow = _load / 10;
> +       pmu->load_avg = (((9*pmu->load_avg) + pmu->load_shadow) / 10);
> +
> +       return 0;
> +}
> +
> +void gk20a_pmu_get_load_counters(struct nvkm_pmu *ppmu, u32 *busy_cycles,
> +                                u32 *total_cycles)
> +{
> +  /*todo if (!g->power_on || gk20a_busy(g->dev)) {
> +               *busy_cycles = 0;
> +               *total_cycles = 0;
> +               return;
> +               }*/
> +
> +       *busy_cycles = nv_rd32(ppmu, 0x0010a508 + 16) & 0x7fffffff;
> +       /*todormb();*/
> +       *total_cycles = nv_rd32(ppmu, 0x0010a508 + 32) & 0x7fffffff;
> +       /*todogk20a_idle(g->dev);*/
> +}
> +
> +void gk20a_pmu_reset_load_counters(struct nvkm_pmu *ppmu)
> +{
> +       u32 reg_val = 1 << 31;
> +
> +       /*todoif (!g->power_on || gk20a_busy(g->dev))
> +         return;*/
> +
> +       nv_wr32(ppmu, 0x0010a508 + 32, reg_val);
> +       /*todowmb()*/;
> +       nv_wr32(ppmu, 0x0010a508 + 16, reg_val);
> +       /*todogk20a_idle(g->dev);*/
> +}
> +
> +static int gk20a_init_pmu_setup_hw1(struct nvkm_pmu *ppmu, struct nvkm_mc *pmc)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       int err;
> +
> +       mutex_lock(&pmu->isr_mutex);
> +       pmu_reset(ppmu, pmc);
> +       pmu->isr_enabled = true;
> +       mutex_unlock(&pmu->isr_mutex);
> +
> +       /* setup apertures - virtual */
> +       nv_wr32(ppmu, 0x10a600 + 0 * 4, 0x0);
> +       nv_wr32(ppmu, 0x10a600 + 1 * 4, 0x0);
> +       /* setup apertures - physical */
> +       nv_wr32(ppmu, 0x10a600 + 2 * 4, 0x4 | 0x0);
> +       nv_wr32(ppmu, 0x10a600 + 3 * 4, 0x4 | 0x1);
> +       nv_wr32(ppmu, 0x10a600 + 4 * 4, 0x4 | 0x2);
> +
> +       /* TBD: load pmu ucode */
> +       err = pmu_bootstrap(pmu);
> +       if (err)
> +               return err;
> +
> +       return 0;
> +
> +}
> +
> +static int gk20a_init_pmu_setup_sw(struct nvkm_pmu *ppmu)
> +{
> +       struct nvkm_pmu_impl *impl = (void *)nv_oclass(ppmu);
> +       struct pmu_desc *pmu = &impl->pmudata;
> +       struct pmu_priv_vm *ppmuvm = &pmuvm;
> +       int i, err = 0;
> +       int ret = 0;
> +
> +
> +       if (pmu->sw_ready) {
> +
> +               for (i = 0; i < pmu->mutex_cnt; i++) {
> +                       pmu->mutex[i].id    = i;
> +                       pmu->mutex[i].index = i;
> +               }
> +               pmu_seq_init(pmu);
> +
> +               nv_debug(ppmu, "skipping init\n");
> +               goto skip_init;
> +       }
> +
> +       /* no infoRom script from vbios? */
> +
> +       /* TBD: sysmon subtask */
> +
> +       pmu->mutex_cnt = 0x00000010;
> +       pmu->mutex = kzalloc(pmu->mutex_cnt *
> +               sizeof(struct pmu_mutex), GFP_KERNEL);
> +       if (!pmu->mutex) {
> +               err = -ENOMEM;
> +               nv_error(ppmu, "not enough space ENOMEM\n");
> +               goto err;
> +       }
> +
> +       for (i = 0; i < pmu->mutex_cnt; i++) {
> +               pmu->mutex[i].id    = i;
> +               pmu->mutex[i].index = i;
> +       }
> +
> +       pmu->seq = kzalloc(PMU_MAX_NUM_SEQUENCES *
> +               sizeof(struct pmu_sequence), GFP_KERNEL);
> +       if (!pmu->seq) {
> +               err = -ENOMEM;
> +               nv_error(ppmu, "not enough space ENOMEM\n");
> +               goto err_free_mutex;
> +       }
> +
> +       pmu_seq_init(pmu);
> +
> +       INIT_WORK(&pmu->isr_workq, pmu_process_message);
> +       init_waitqueue_head(&ppmu->init_wq);
> +       ppmu->gr_initialised = false;
> +
> +       /* allocate memory for pmu fw area */
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_SEQ_BUF_SIZE,
> +                                           0x1000, 0, &pmu->seq_buf.pmubufobj);
> +       if (ret)
> +               return ret;
> +       ret = nvkm_gpuobj_new(nv_object(ppmu), NULL, GK20A_PMU_TRACE_BUFSIZE,
> +                                           0, 0, &pmu->trace_buf.pmubufobj);
> +       if (ret)
> +               return ret;
> +       /* map allocated memory into GMMU */
> +       ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->seq_buf.pmubufobj),
> +                                       ppmuvm->vm,
> +                                       NV_MEM_ACCESS_RW,
> +                                       &pmu->seq_buf.pmubufvma);
> +       if (ret)
> +               return ret;
> +       ret = nvkm_gpuobj_map_vm(nv_gpuobj(pmu->trace_buf.pmubufobj),
> +                                       ppmuvm->vm,
> +                                       NV_MEM_ACCESS_RW,
> +                                       &pmu->trace_buf.pmubufvma);
> +       if (ret)
> +               return ret;
> +
> +       /* TBD: remove this if ZBC save/restore is handled by PMU
> +        * end an empty ZBC sequence for now */
> +       nv_wo32(pmu->seq_buf.pmubufobj, 0, 0x16);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 1, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 2, 0x01);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 3, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 4, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 5, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 6, 0x00);
> +       nv_wo32(pmu->seq_buf.pmubufobj, 7, 0x00);
> +
> +       pmu->seq_buf.size = GK20A_PMU_SEQ_BUF_SIZE;
> +       ret = gk20a_pmu_debugfs_init(ppmu);
> +       if (ret)
> +               return ret;
> +
> +       pmu->sw_ready = true;
> +
> +skip_init:
> +       return 0;
> +err_free_mutex:
> +       kfree(pmu->mutex);
> +err:
> +       return err;
> +}
> +
> +static void
> +gk20a_pmu_pgob(struct nvkm_pmu *ppmu, bool enable)
> +{
> +       /*
> +       nv_mask(ppmu, 0x000200, 0x00001000, 0x00000000);
> +       nv_rd32(ppmu, 0x000200);
> +       nv_mask(ppmu, 0x000200, 0x08000000, 0x08000000);
> +
> +       msleep(50);
> +
> +       nv_mask(ppmu, 0x000200, 0x08000000, 0x00000000);
> +       nv_mask(ppmu, 0x000200, 0x00001000, 0x00001000);
> +       nv_rd32(ppmu, 0x000200);
> +       */
> +}
> +
> +static void gk20a_pmu_intr(struct nvkm_subdev *subdev)
> +{
> +       struct nvkm_pmu *ppmu = nvkm_pmu(subdev);
> +
> +       gk20a_pmu_isr(ppmu);
> +}
> +
> +void gk20a_remove_pmu_support(struct pmu_desc *pmu)
> +{
> +       nvkm_pmu_allocator_destroy(&pmu->dmem);
> +}
> +
> +int  gk20a_message(struct nvkm_pmu *ppmu, u32 reply[2],
> +                u32 process, u32 message, u32 data0, u32 data1)
> +{
> +       return -EPERM;
> +}
> +
> +int
> +gk20a_pmu_create_(struct nvkm_object *parent,
> +                   struct nvkm_object *engine,
> +                   struct nvkm_oclass *oclass, int length, void **pobject)
> +{
> +       struct nvkm_pmu *ppmu;
> +       struct nvkm_device *device = nv_device(parent);
> +       int ret;
> +
> +       ret = nvkm_subdev_create_(parent, engine, oclass, 0, "PPMU",
> +                                    "pmu", length, pobject);
> +       ppmu = *pobject;
> +       if (ret)
> +               return ret;
> +
> +       ret = nv_device_get_irq(device, true);
> +
> +       ppmu->message = gk20a_message;
> +       ppmu->pgob = gk20a_pmu_pgob;
> +       ppmu->pmu_mutex_acquire = pmu_mutex_acquire;
> +       ppmu->pmu_mutex_release = pmu_mutex_release;
> +       ppmu->pmu_load_norm = gk20a_pmu_load_norm;
> +       ppmu->pmu_load_update = gk20a_pmu_load_update;
> +       ppmu->pmu_reset_load_counters = gk20a_pmu_reset_load_counters;
> +       ppmu->pmu_get_load_counters = gk20a_pmu_get_load_counters;
> +
> +       return 0;
> +}
> +
> +
> +
> diff --git a/drm/nouveau/nvkm/subdev/pmu/gk20a.h b/drm/nouveau/nvkm/subdev/pmu/gk20a.h
> new file mode 100644
> index 000000000000..a084d6d518b4
> --- /dev/null
> +++ b/drm/nouveau/nvkm/subdev/pmu/gk20a.h
> @@ -0,0 +1,369 @@
> +#ifndef __NVKM_pmu_GK20A_H__
> +#define __NVKM_pmu_GK20A_H__
> +
> +/*
> + * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> + * DEALINGS IN THE SOFTWARE.
> + */
> +void pmu_setup_hw(struct pmu_desc *pmu);
> +void gk20a_remove_pmu_support(struct pmu_desc *pmu);
> +#define gk20a_pmu_create(p, e, o, d)                                         \
> +       gk20a_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
> +
> +int gk20a_pmu_create_(struct nvkm_object *, struct nvkm_object *,
> +                       struct nvkm_oclass *, int, void **);
> +/* defined by pmu hw spec */
> +#define GK20A_PMU_VA_SIZE              (512 * 1024 * 1024)
> +#define GK20A_PMU_UCODE_SIZE_MAX       (256 * 1024)
> +#define GK20A_PMU_SEQ_BUF_SIZE         4096
> +/* idle timeout */
> +#define GK20A_IDLE_CHECK_DEFAULT               100 /* usec */
> +#define GK20A_IDLE_CHECK_MAX           5000 /* usec */
> +
> +/* so far gk20a has two engines: gr and ce2(gr_copy) */
> +enum {
> +       ENGINE_GR_GK20A     = 0,
> +       ENGINE_CE2_GK20A    = 1,
> +       ENGINE_INVAL_GK20A
> +};
> +
> +#define ZBC_MASK(i)                    (~(~(0) << ((i)+1)) & 0xfffe)
> +
> +#define APP_VERSION_GK20A 17997577
> +
> +enum {
> +       GK20A_PMU_DMAIDX_UCODE          = 0,
> +       GK20A_PMU_DMAIDX_VIRT           = 1,
> +       GK20A_PMU_DMAIDX_PHYS_VID       = 2,
> +       GK20A_PMU_DMAIDX_PHYS_SYS_COH   = 3,
> +       GK20A_PMU_DMAIDX_PHYS_SYS_NCOH  = 4,
> +       GK20A_PMU_DMAIDX_RSVD           = 5,
> +       GK20A_PMU_DMAIDX_PELPG          = 6,
> +       GK20A_PMU_DMAIDX_END            = 7
> +};
> +
> +struct pmu_mem_gk20a {
> +       u32 dma_base;
> +       u8  dma_offset;
> +       u8  dma_idx;
> +       u16 fb_size;
> +};
> +
> +struct pmu_dmem {
> +       u16 size;
> +       u32 offset;
> +};
> +
> +struct pmu_cmdline_args_gk20a {
> +       u32 cpu_freq_hz;                /* Frequency of the clock driving PMU */
> +       u32 falc_trace_size;            /* falctrace buffer size (bytes) */
> +       u32 falc_trace_dma_base;        /* 256-byte block address */
> +       u32 falc_trace_dma_idx;         /* dmaIdx for DMA operations */
> +       u8 secure_mode;
> +       struct pmu_mem_gk20a gc6_ctx;           /* dmem offset of gc6 context */
> +};
> +
> +#define GK20A_PMU_TRACE_BUFSIZE     0x4000   /* 4K */
> +#define GK20A_PMU_DMEM_BLKSIZE2                8
> +
> +#define GK20A_PMU_UCODE_NB_MAX_OVERLAY     32
> +#define GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH  64
> +
> +struct pmu_ucode_desc {
> +       u32 descriptor_size;
> +       u32 image_size;
> +       u32 tools_version;
> +       u32 app_version;
> +       char date[GK20A_PMU_UCODE_NB_MAX_DATE_LENGTH];
> +       u32 bootloader_start_offset;
> +       u32 bootloader_size;
> +       u32 bootloader_imem_offset;
> +       u32 bootloader_entry_point;
> +       u32 app_start_offset;
> +       u32 app_size;
> +       u32 app_imem_offset;
> +       u32 app_imem_entry;
> +       u32 app_dmem_offset;
> +       u32 app_resident_code_offset;  /* Offset from appStartOffset */
> +/* Exact size of the resident code
> + * ( potentially contains CRC inside at the end ) */
> +       u32 app_resident_code_size;
> +       u32 app_resident_data_offset;  /* Offset from appStartOffset */
> +/* Exact size of the resident data
> + * ( potentially contains CRC inside at the end ) */
> +       u32 app_resident_data_size;
> +       u32 nb_overlays;
> +       struct {u32 start; u32 size; } load_ovl[GK20A_PMU_UCODE_NB_MAX_OVERLAY];
> +       u32 compressed;
> +};
> +
> +#define PMU_UNIT_REWIND                (0x00)
> +#define PMU_UNIT_PG            (0x03)
> +#define PMU_UNIT_INIT          (0x07)
> +#define PMU_UNIT_PERFMON       (0x12)
> +#define PMU_UNIT_THERM         (0x1B)
> +#define PMU_UNIT_RC            (0x1F)
> +#define PMU_UNIT_NULL          (0x20)
> +#define PMU_UNIT_END           (0x23)
> +
> +#define PMU_UNIT_TEST_START    (0xFE)
> +#define PMU_UNIT_END_SIM       (0xFF)
> +#define PMU_UNIT_TEST_END      (0xFF)
> +
> +#define PMU_UNIT_ID_IS_VALID(id)               \
> +               (((id) < PMU_UNIT_END) || ((id) >= PMU_UNIT_TEST_START))
> +
> +#define PMU_DMEM_ALLOC_ALIGNMENT       (32)
> +#define PMU_DMEM_ALIGNMENT             (4)
> +
> +#define PMU_CMD_FLAGS_PMU_MASK         (0xF0)
> +
> +#define PMU_CMD_FLAGS_STATUS           BIT(0)
> +#define PMU_CMD_FLAGS_INTR             BIT(1)
> +#define PMU_CMD_FLAGS_EVENT            BIT(2)
> +#define PMU_CMD_FLAGS_WATERMARK                BIT(3)
> +
> +struct pmu_hdr {
> +       u8 unit_id;
> +       u8 size;
> +       u8 ctrl_flags;
> +       u8 seq_id;
> +};
> +#define PMU_MSG_HDR_SIZE       sizeof(struct pmu_hdr)
> +#define PMU_CMD_HDR_SIZE       sizeof(struct pmu_hdr)
> +
> +
> +struct pmu_allocation_gk20a {
> +       struct {
> +               struct pmu_dmem dmem;
> +               struct pmu_mem_gk20a fb;
> +       } alloc;
> +};
> +
> +enum {
> +       PMU_INIT_MSG_TYPE_PMU_INIT = 0,
> +};
> +
> +struct pmu_init_msg_pmu_gk20a {
> +       u8 msg_type;
> +       u8 pad;
> +       u16  os_debug_entry_point;
> +
> +       struct {
> +               u16 size;
> +               u16 offset;
> +               u8  index;
> +               u8  pad;
> +       } queue_info[PMU_QUEUE_COUNT];
> +
> +       u16 sw_managed_area_offset;
> +       u16 sw_managed_area_size;
> +};
> +
> +struct pmu_init_msg {
> +       union {
> +               u8 msg_type;
> +               struct pmu_init_msg_pmu_gk20a pmu_init_gk20a;
> +       };
> +};
> +
> +
> +enum {
> +       PMU_RC_MSG_TYPE_UNHANDLED_CMD = 0,
> +};
> +
> +struct pmu_rc_msg_unhandled_cmd {
> +       u8 msg_type;
> +       u8 unit_id;
> +};
> +
> +struct pmu_rc_msg {
> +       u8 msg_type;
> +       struct pmu_rc_msg_unhandled_cmd unhandled_cmd;
> +};
> +
> +/* PERFMON */
> +#define PMU_DOMAIN_GROUP_PSTATE                0
> +#define PMU_DOMAIN_GROUP_GPC2CLK       1
> +#define PMU_DOMAIN_GROUP_NUM           2
> +struct pmu_perfmon_counter_gk20a {
> +       u8 index;
> +       u8 flags;
> +       u8 group_id;
> +       u8 valid;
> +       u16 upper_threshold; /* units of 0.01% */
> +       u16 lower_threshold; /* units of 0.01% */
> +};
> +struct pmu_zbc_cmd {
> +       u8 cmd_type;
> +       u8 pad;
> +       u16 entry_mask;
> +};
> +
> +/* PERFMON MSG */
> +enum {
> +       PMU_PERFMON_MSG_ID_INCREASE_EVENT = 0,
> +       PMU_PERFMON_MSG_ID_DECREASE_EVENT = 1,
> +       PMU_PERFMON_MSG_ID_INIT_EVENT     = 2,
> +       PMU_PERFMON_MSG_ID_ACK            = 3
> +};
> +
> +struct pmu_perfmon_msg_generic {
> +       u8 msg_type;
> +       u8 state_id;
> +       u8 group_id;
> +       u8 data;
> +};
> +
> +struct pmu_perfmon_msg {
> +       union {
> +               u8 msg_type;
> +               struct pmu_perfmon_msg_generic gen;
> +       };
> +};
> +
> +
> +struct pmu_cmd {
> +       struct pmu_hdr hdr;
> +       union {
> +               struct pmu_zbc_cmd zbc;
> +       } cmd;
> +};
> +
> +struct pmu_msg {
> +       struct pmu_hdr hdr;
> +       union {
> +               struct pmu_init_msg init;
> +               struct pmu_perfmon_msg perfmon;
> +               struct pmu_rc_msg rc;
> +       } msg;
> +};
> +
> +/* write by sw, read by pmu, protected by sw mutex lock */
> +#define PMU_COMMAND_QUEUE_HPQ          0
> +/* write by sw, read by pmu, protected by sw mutex lock */
> +#define PMU_COMMAND_QUEUE_LPQ          1
> +/* write by pmu, read by sw, accessed by interrupt handler, no lock */
> +#define PMU_MESSAGE_QUEUE              4
> +#define PMU_QUEUE_COUNT                        5
> +
> +enum {
> +       PMU_MUTEX_ID_RSVD1 = 0,
> +       PMU_MUTEX_ID_GPUSER,
> +       PMU_MUTEX_ID_GPMUTEX,
> +       PMU_MUTEX_ID_I2C,
> +       PMU_MUTEX_ID_RMLOCK,
> +       PMU_MUTEX_ID_MSGBOX,
> +       PMU_MUTEX_ID_FIFO,
> +       PMU_MUTEX_ID_PG,
> +       PMU_MUTEX_ID_GR,
> +       PMU_MUTEX_ID_CLK,
> +       PMU_MUTEX_ID_RSVD6,
> +       PMU_MUTEX_ID_RSVD7,
> +       PMU_MUTEX_ID_RSVD8,
> +       PMU_MUTEX_ID_RSVD9,
> +       PMU_MUTEX_ID_INVALID
> +};
> +
> +#define PMU_IS_COMMAND_QUEUE(id)       \
> +               ((id)  < PMU_MESSAGE_QUEUE)
> +
> +#define PMU_IS_SW_COMMAND_QUEUE(id)    \
> +               (((id) == PMU_COMMAND_QUEUE_HPQ) || \
> +                ((id) == PMU_COMMAND_QUEUE_LPQ))
> +
> +#define  PMU_IS_MESSAGE_QUEUE(id)      \
> +               ((id) == PMU_MESSAGE_QUEUE)
> +
> +enum {
> +       OFLAG_READ = 0,
> +       OFLAG_WRITE
> +};
> +
> +#define QUEUE_SET              (true)
> +       /*todo find how to get cpu_pa*/
> +#define QUEUE_GET              (false)
> +
> +#define QUEUE_ALIGNMENT                (4)
> +
> +#define PMU_PGENG_GR_BUFFER_IDX_INIT   (0)
> +#define PMU_PGENG_GR_BUFFER_IDX_ZBC    (1)
> +#define PMU_PGENG_GR_BUFFER_IDX_FECS   (2)
> +
> +enum {
> +       PMU_DMAIDX_UCODE         = 0,
> +       PMU_DMAIDX_VIRT          = 1,
> +       PMU_DMAIDX_PHYS_VID      = 2,
> +       PMU_DMAIDX_PHYS_SYS_COH  = 3,
> +       PMU_DMAIDX_PHYS_SYS_NCOH = 4,
> +       PMU_DMAIDX_RSVD          = 5,
> +       PMU_DMAIDX_PELPG         = 6,
> +       PMU_DMAIDX_END           = 7
> +};
> +
> +#define PMU_MUTEX_ID_IS_VALID(id)      \
> +               ((id) < PMU_MUTEX_ID_INVALID)
> +
> +#define PMU_INVALID_MUTEX_OWNER_ID     (0)
> +
> +struct pmu_mutex {
> +       u32 id;
> +       u32 index;
> +       u32 ref_cnt;
> +};
> +
> +
> +#define PMU_INVALID_SEQ_DESC           (~0)
> +
> +enum {
> +       PMU_SEQ_STATE_FREE = 0,
> +       PMU_SEQ_STATE_PENDING,
> +       PMU_SEQ_STATE_USED,
> +       PMU_SEQ_STATE_CANCELLED
> +};
> +
> +struct pmu_payload {
> +       struct {
> +               void *buf;
> +               u32 offset;
> +               u32 size;
> +       } in, out;
> +};
> +
> +typedef void (*pmu_callback)(struct nvkm_pmu *, struct pmu_msg *, void *,
> +u32, u32);
> +
> +struct pmu_sequence {
> +       u8 id;
> +       u32 state;
> +       u32 desc;
> +       struct pmu_msg *msg;
> +       struct pmu_allocation_gk20a in_gk20a;
> +       struct pmu_allocation_gk20a out_gk20a;
> +       u8 *out_payload;
> +       pmu_callback callback;
> +       void *cb_params;
> +};
> +struct pmu_gk20a_data {
> +       struct pmu_perfmon_counter_gk20a perfmon_counter_gk20a;
> +       u32 perfmon_state_id[PMU_DOMAIN_GROUP_NUM];
> +};
> +
> +#endif /*_GK20A_H__*/
> diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h b/drm/nouveau/nvkm/subdev/pmu/priv.h
> index 998410563bfd..c4686e418582 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/priv.h
> +++ b/drm/nouveau/nvkm/subdev/pmu/priv.h
> @@ -2,7 +2,91 @@
>  #define __NVKM_PMU_PRIV_H__
>  #include <subdev/pmu.h>
>  #include <subdev/pmu/fuc/os.h>
> +#include <core/object.h>
> +#include <core/device.h>
> +#include <core/parent.h>
> +#include <core/mm.h>
> +#include <linux/rwsem.h>
> +#include <linux/slab.h>
> +#include <subdev/mmu.h>
> +#include <core/gpuobj.h>
>
> +static inline u32 u64_hi32(u64 n)
> +{
> +       return (u32)((n >> 32) & ~(u32)0);
> +}
> +
> +static inline u32 u64_lo32(u64 n)
> +{
> +       return (u32)(n & ~(u32)0);
> +}
> +
> +/* #define ALLOCATOR_DEBUG */
> +
> +/* main struct */
> +struct nvkm_pmu_allocator {
> +
> +       char name[32];                  /* name for allocator */
> +/*struct rb_root rb_root;*/            /* rb tree root for blocks */
> +
> +       u32 base;                       /* min value of this linear space */
> +       u32 limit;                      /* max value = limit - 1 */
> +
> +       unsigned long *bitmap;          /* bitmap */
> +
> +       struct gk20a_alloc_block *block_first;  /* first block in list */
> +       struct gk20a_alloc_block *block_recent; /* last visited block */
> +
> +       u32 first_free_addr;            /* first free addr, non-contigous
> +                                          allocation preferred start,
> +                                          in order to pick up small holes */
> +       u32 last_free_addr;             /* last free addr, contiguous
> +                                          allocation preferred start */
> +       u32 cached_hole_size;           /* max free hole size up to
> +                                          last_free_addr */
> +       u32 block_count;                /* number of blocks */
> +
> +       struct rw_semaphore rw_sema;    /* lock */
> +       struct kmem_cache *block_cache; /* slab cache */
> +
> +       /* if enabled, constrain to [base, limit) */
> +       struct {
> +               bool enable;
> +               u32 base;
> +               u32 limit;
> +       } constraint;
> +
> +       int (*alloc)(struct nvkm_pmu_allocator *allocator,
> +               u32 *addr, u32 len, u32 align);
> +       int (*free)(struct nvkm_pmu_allocator *allocator,
> +               u32 addr, u32 len, u32 align);
> +
> +};
> +
> +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
> +                       const char *name, u32 base, u32 size);
> +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator);
> +
> +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
> +                       u32 *addr, u32 len, u32 align);
> +
> +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
> +                       u32 addr, u32 len, u32 align);
> +
> +#if defined(ALLOCATOR_DEBUG)
> +
> +#define allocator_dbg(alloctor, format, arg...)                                \
> +do {                                                           \
> +       if (1)                                                  \
> +               pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\
> +                       alloctor->name, __func__, ##arg);\
> +} while (0)
> +
> +#else /* ALLOCATOR_DEBUG */
> +
> +#define allocator_dbg(format, arg...)
> +
> +#endif /* ALLOCATOR_DEBUG */
>  #define nvkm_pmu_create(p, e, o, d)                                         \
>         nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
>  #define nvkm_pmu_destroy(p)                                                 \
> @@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct nvkm_object *,
>  int _nvkm_pmu_init(struct nvkm_object *);
>  int _nvkm_pmu_fini(struct nvkm_object *, bool);
>  void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable);
> +#define PMU_PG_IDLE_THRESHOLD                  15000
> +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD     1000000
> +
> +/* state transition :
> +    OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
> +    ON => OFF is always synchronized */
> +#define PMU_ELPG_STAT_OFF              0   /* elpg is off */
> +#define PMU_ELPG_STAT_ON               1   /* elpg is on */
> +/* elpg is off, ALLOW cmd has been sent, wait for ack */
> +#define PMU_ELPG_STAT_ON_PENDING       2
> +/* elpg is on, DISALLOW cmd has been sent, wait for ack */
> +#define PMU_ELPG_STAT_OFF_PENDING      3
> +/* elpg is off, caller has requested on, but ALLOW
> +cmd hasn't been sent due to ENABLE_ALLOW delay */
> +#define PMU_ELPG_STAT_OFF_ON_PENDING   4
> +
> +/* Falcon Register index */
> +#define PMU_FALCON_REG_R0              (0)
> +#define PMU_FALCON_REG_R1              (1)
> +#define PMU_FALCON_REG_R2              (2)
> +#define PMU_FALCON_REG_R3              (3)
> +#define PMU_FALCON_REG_R4              (4)
> +#define PMU_FALCON_REG_R5              (5)
> +#define PMU_FALCON_REG_R6              (6)
> +#define PMU_FALCON_REG_R7              (7)
> +#define PMU_FALCON_REG_R8              (8)
> +#define PMU_FALCON_REG_R9              (9)
> +#define PMU_FALCON_REG_R10             (10)
> +#define PMU_FALCON_REG_R11             (11)
> +#define PMU_FALCON_REG_R12             (12)
> +#define PMU_FALCON_REG_R13             (13)
> +#define PMU_FALCON_REG_R14             (14)
> +#define PMU_FALCON_REG_R15             (15)
> +#define PMU_FALCON_REG_IV0             (16)
> +#define PMU_FALCON_REG_IV1             (17)
> +#define PMU_FALCON_REG_UNDEFINED       (18)
> +#define PMU_FALCON_REG_EV              (19)
> +#define PMU_FALCON_REG_SP              (20)
> +#define PMU_FALCON_REG_PC              (21)
> +#define PMU_FALCON_REG_IMB             (22)
> +#define PMU_FALCON_REG_DMB             (23)
> +#define PMU_FALCON_REG_CSW             (24)
> +#define PMU_FALCON_REG_CCR             (25)
> +#define PMU_FALCON_REG_SEC             (26)
> +#define PMU_FALCON_REG_CTX             (27)
> +#define PMU_FALCON_REG_EXCI            (28)
> +#define PMU_FALCON_REG_RSVD0           (29)
> +#define PMU_FALCON_REG_RSVD1           (30)
> +#define PMU_FALCON_REG_RSVD2           (31)
> +#define PMU_FALCON_REG_SIZE            (32)
> +
> +/* Choices for pmu_state */
> +#define PMU_STATE_OFF                  0 /* PMU is off */
> +#define PMU_STATE_STARTING             1 /* PMU is on, but not booted */
> +#define PMU_STATE_INIT_RECEIVED                2 /* PMU init message received */
> +#define PMU_STATE_ELPG_BOOTING         3 /* PMU is booting */
> +#define PMU_STATE_ELPG_BOOTED          4 /* ELPG is initialized */
> +#define PMU_STATE_LOADING_PG_BUF       5 /* Loading PG buf */
> +#define PMU_STATE_LOADING_ZBC          6 /* Loading ZBC buf */
> +#define PMU_STATE_STARTED              7 /* Fully unitialized */
> +
> +#define PMU_QUEUE_COUNT                5
> +
> +#define PMU_MAX_NUM_SEQUENCES          (256)
> +#define PMU_SEQ_BIT_SHIFT              (5)
> +#define PMU_SEQ_TBL_SIZE       \
> +               (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT)
> +
> +#define PMU_SHA1_GID_SIGNATURE         0xA7C66AD2
> +#define PMU_SHA1_GID_SIGNATURE_SIZE    4
> +
> +#define PMU_SHA1_GID_SIZE      16
> +
> +struct pmu_queue {
> +
> +       /* used by hw, for BIOS/SMI queue */
> +       u32 mutex_id;
> +       u32 mutex_lock;
> +       /* used by sw, for LPQ/HPQ queue */
> +       struct mutex mutex;
> +
> +       /* current write position */
> +       u32 position;
> +       /* physical dmem offset where this queue begins */
> +       u32 offset;
> +       /* logical queue identifier */
> +       u32 id;
> +       /* physical queue index */
> +       u32 index;
> +       /* in bytes */
> +       u32 size;
> +
> +       /* open-flag */
> +       u32 oflag;
> +       bool opened; /* opened implies locked */
> +};
> +
> +struct pmu_sha1_gid {
> +       bool valid;
> +       u8 gid[PMU_SHA1_GID_SIZE];
> +};
> +
> +struct pmu_sha1_gid_data {
> +       u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE];
> +       u8 gid[PMU_SHA1_GID_SIZE];
> +};
> +
> +struct pmu_desc {
> +
> +       struct pmu_ucode_desc *desc;
> +       struct pmu_buf_desc ucode;
> +
> +       struct pmu_buf_desc pg_buf;
> +       /* TBD: remove this if ZBC seq is fixed */
> +       struct pmu_buf_desc seq_buf;
> +       struct pmu_buf_desc trace_buf;
> +       bool buf_loaded;
> +
> +       struct pmu_sha1_gid gid_info;
> +
> +       struct pmu_queue queue[PMU_QUEUE_COUNT];
> +
> +       struct pmu_sequence *seq;
> +       unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE];
> +       u32 next_seq_desc;
> +
> +       struct pmu_mutex *mutex;
> +       u32 mutex_cnt;
> +
> +       struct mutex pmu_copy_lock;
> +       struct mutex pmu_seq_lock;
> +
> +       struct nvkm_pmu_allocator dmem;
> +
> +       u32 *ucode_image;
> +       bool pmu_ready;
> +
> +       u32 zbc_save_done;
> +
> +       u32 stat_dmem_offset;
> +
> +       u32 elpg_stat;
> +
> +       int pmu_state;
> +
> +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC       1 /* msec */
> +       struct work_struct isr_workq;
> +       struct mutex elpg_mutex; /* protect elpg enable/disable */
> +/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
> +       int elpg_refcnt;
> +
> +       bool initialized;
> +
> +       void (*remove_support)(struct pmu_desc *pmu);
> +       bool sw_ready;
> +       bool perfmon_ready;
> +
> +       u32 sample_buffer;
> +       u32 load_shadow;
> +       u32 load_avg;
> +
> +       struct mutex isr_mutex;
> +       bool isr_enabled;
> +
> +       bool zbc_ready;
> +       unsigned long perfmon_events_cnt;
> +       bool perfmon_sampling_enabled;
> +       u8 pmu_mode;
> +       u32 falcon_id;
> +       u32 aelpg_param[5];
> +       void *pmu_chip_data;
> +       struct nvkm_pmu *pmu;
> +};
>
>  struct nvkm_pmu_impl {
>         struct nvkm_oclass base;
> @@ -39,5 +296,12 @@ struct nvkm_pmu_impl {
>         } data;
>
>         void (*pgob)(struct nvkm_pmu *, bool);
> +       struct pmu_desc pmudata;
>  };
> +
> +static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu)
> +{
> +       return pmu->pmu;
> +}
> +
>  #endif
> --
> 1.9.1
>
> _______________________________________________
> Nouveau mailing list
> Nouveau@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Nouveau] [PATCH] pmu/gk20a: PMU boot support.
       [not found]         ` <25b5050176544f47b0ac74d4086f145c-7W72rfoJkVm6sJks/06JalaTQe2KTcn/@public.gmane.org>
@ 2015-03-12 22:11           ` Ilia Mirkin
       [not found]             ` <CAKb7UvgDq-FzZwAZ8VwbhaVHi4B29jXL5qjOCQ47TWfqDBDQaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Ilia Mirkin @ 2015-03-12 22:11 UTC (permalink / raw)
  To: Deepak Goyal
  Cc: Ben Skeggs, Alexandre Courbot,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	linux-tegra-u79uwXL29TY76Z2rM5mHXA

I guess Alexandre will help you prepare this for upstream inclusion,
but just want to make sure that my main point makes it across -- this
is a 3K line patch. Please try to split it up into patches that add no
more than 300 lines at a time, preferably fewer (but this isn't always
possible). Ideally each patch should introduce one conceptual unit. Of
course different people might draw the line of "conceptual unit"
differently, but try to do it so that each one becomes 100-300 lines
of code :)

These aren't hard limits by the way, but good rules of thumb to go by
when sending patches for upstream inclusion.

Cheers,

  -ilia

On Thu, Mar 12, 2015 at 1:20 AM, Deepak Goyal <dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org> wrote:
> Hi Mirkin,
>
> Your observations are quiet correct.
> After the boot code is submitted successfully, I will submit the code to configure & enable features of PMU.(This will be done by sending cmds to PMU).
>
> Now talking about this patch:
> Apart from just the boot code, I have also included some things in this patch that I can remove for now(I will include these things in later digestible chunks):
>
> - Debugfs support (can be removed for now)
> - Debug support for dumping PMU falcon registers(can be removed for now)
> - PMU interacts with Kernel via interrupt mechanism.
>   For interaction with PMU, we have
>   defined command structs, functions to prepare/validate and send commands to PMU.
>   This infrastructure is basically to send commands to PMU.(right now it can be removed though we still require to receive messages from PMU to know if it has booted successfully).
>
> But this will be all that I will be able to remove from this patch.
> Can I go ahead with removing above suggestions?
>
> Regards,
> Deepak G
>
> -----Original Message-----
> From: ibmirkin-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org [mailto:ibmirkin-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org] On Behalf Of Ilia Mirkin
> Sent: Wednesday, March 11, 2015 10:41 PM
> To: Deepak Goyal
> Cc: Ben Skeggs; Alexandre Courbot; nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org; linux-tegra-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Subject: Re: [Nouveau] [PATCH] pmu/gk20a: PMU boot support.
>
> Hi Deepak,
>
> There's... a lot of stuff going on here. Can you describe the goal of
> this patch (which could then be used as the patch commit message)? The
> current one basically boils down to "Add support for loading PMU", but
> merely loading the fw into a fuc engine is just a handful lines of
> code. Also, except in rare cases, it's customary to split up patches
> of this size into smaller, more reviewable chunks, which add on bits
> of functionality as they go.
>
> From what I can tell, you're adding the kernel-side interface for a
> hypothetical (and presumably closed-source) PMU blob that NVIDIA will
> supply. In essence, the blob is expected to implement a RTOS which
> runs on the PMU's falcon CPU. There are a bunch of API's implemented
> by this blob that the host can call, but it also does things on its
> own. For the kernel side, each of these API calls should probably be a
> separate patch (after an initial "just load it and do nothing" style
> patch). Or perhaps have the infrastructure that you add first and then
> something that implements the API calls.
>
> Cheers,
>
>   -ilia
>
>
> On Wed, Mar 11, 2015 at 2:33 AM, Deepak Goyal <dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org> wrote:
>> It adds PMU boot support.It loads PMU
>> firmware into PMU falcon.RM/Kernel driver
>> receives INIT ack (through interrupt mechanism)
>> from PMU when PMU boots with success.
>>
>> Signed-off-by: Deepak Goyal <dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
>> ---
>>  drm/nouveau/include/nvkm/subdev/pmu.h |   26 +-
>>  drm/nouveau/nvkm/subdev/pmu/base.c    |  108 ++
>>  drm/nouveau/nvkm/subdev/pmu/gk20a.c   | 2131 ++++++++++++++++++++++++++++++++-
>>  drm/nouveau/nvkm/subdev/pmu/gk20a.h   |  369 ++++++
>>  drm/nouveau/nvkm/subdev/pmu/priv.h    |  264 ++++
>>  5 files changed, 2884 insertions(+), 14 deletions(-)
>>  create mode 100644 drm/nouveau/nvkm/subdev/pmu/gk20a.h
>>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [Nouveau] [PATCH] pmu/gk20a: PMU boot support.
       [not found]             ` <CAKb7UvgDq-FzZwAZ8VwbhaVHi4B29jXL5qjOCQ47TWfqDBDQaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2015-03-13  1:56               ` Alexandre Courbot
  0 siblings, 0 replies; 6+ messages in thread
From: Alexandre Courbot @ 2015-03-13  1:56 UTC (permalink / raw)
  To: Ilia Mirkin
  Cc: Deepak Goyal, Ben Skeggs,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	linux-tegra-u79uwXL29TY76Z2rM5mHXA

On Fri, Mar 13, 2015 at 7:11 AM, Ilia Mirkin <imirkin-FrUbXkNCsVf2fBVCVOL8/A@public.gmane.org> wrote:
> I guess Alexandre will help you prepare this for upstream inclusion,
> but just want to make sure that my main point makes it across -- this
> is a 3K line patch. Please try to split it up into patches that add no
> more than 300 lines at a time, preferably fewer (but this isn't always
> possible). Ideally each patch should introduce one conceptual unit. Of
> course different people might draw the line of "conceptual unit"
> differently, but try to do it so that each one becomes 100-300 lines
> of code :)

Yes, please let me take care of the first few rounds of reviews. I
don't know how much this patch can be split though, as it is supposed
to be the "basic" stuff for PMU. :)

Anyway, will come again later today with a thorough review.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] pmu/gk20a: PMU boot support.
       [not found] ` <1426055631-1166-1-git-send-email-dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
  2015-03-11 17:10   ` [Nouveau] " Ilia Mirkin
@ 2015-03-13 10:12   ` Alexandre Courbot
  1 sibling, 0 replies; 6+ messages in thread
From: Alexandre Courbot @ 2015-03-13 10:12 UTC (permalink / raw)
  To: Deepak Goyal, bskeggs-H+wXaHxf7aLQT0dZR+AlfA,
	gnurou-Re5JQEeQqe8AvxtiuMwx3w,
	nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	linux-tegra-u79uwXL29TY76Z2rM5mHXA

Due to the length of the patch there are many things to fix. This review 
alone won't cover all of them, but is mainly an attempt to reduce the 
amount of code and to split this.

On Wed, Mar 11, 2015 at 3:33 PM, Deepak Goyal <dgoyal@nvidia.com> wrote:
 > It adds PMU boot support.It loads PMU
 > firmware into PMU falcon.RM/Kernel driver
 > receives INIT ack (through interrupt mechanism)
 > from PMU when PMU boots with success.

This commit log is strangely formatted. You want to break lines of git 
commit lots around column 70, not 50. Also don't forget the space after 
the end of your sentences.

The log itself also lacks informative value, especially considering the 
length of this patch. Please assume your reader is completely unfamiliar 
with your work, and explain in detail what your patch does, even the 
parts that seem obvious. Some questions that come to mind when reading 
the log:

- What is the PMU firmware do?
- What is RM? (this is not the terminology used by Nouveau, so better to 
avoid using it altogether)
- What value does this patch add to the project?

I understand that this patch clears the way for follow-up patches that 
will actually add features. Please state this clearly in the log, and 
explain what these features are. No code can be merged upstream until 
its benefits are clearly understood.

Review follows, I have changed the order of files to comment on the 
structures before the code.

 > diff --git a/drm/nouveau/nvkm/subdev/pmu/priv.h 
b/drm/nouveau/nvkm/subdev/pmu/priv.h
 > index 998410563bfd..c4686e418582 100644
 > --- a/drm/nouveau/nvkm/subdev/pmu/priv.h
 > +++ b/drm/nouveau/nvkm/subdev/pmu/priv.h
 > @@ -2,7 +2,91 @@
 >  #define __NVKM_PMU_PRIV_H__
 >  #include <subdev/pmu.h>
 >  #include <subdev/pmu/fuc/os.h>
 > +#include <core/object.h>
 > +#include <core/device.h>
 > +#include <core/parent.h>
 > +#include <core/mm.h>
 > +#include <linux/rwsem.h>
 > +#include <linux/slab.h>
 > +#include <subdev/mmu.h>
 > +#include <core/gpuobj.h>
 >
 > +static inline u32 u64_hi32(u64 n)
 > +{
 > +       return (u32)((n >> 32) & ~(u32)0);
 > +}
 > +
 > +static inline u32 u64_lo32(u64 n)
 > +{
 > +       return (u32)(n & ~(u32)0);
 > +}

Use the lower_32_bits() and upper_32_bits() macros instead.

 > +
 > +/* #define ALLOCATOR_DEBUG */

This line is useless...

 > +
 > +/* main struct */

... and this comment uninformative.

 > +struct nvkm_pmu_allocator {
 > +
 > +       char name[32];                  /* name for allocator */
 > +/*struct rb_root rb_root;*/            /* rb tree root for blocks */

Do not comment out members that we don't need. If it's unneeded, just 
remove it.

 > +
 > +       u32 base;                       /* min value of this linear 
space */
 > +       u32 limit;                      /* max value = limit - 1 */
 > +
 > +       unsigned long *bitmap;          /* bitmap */
 > +
 > +       struct gk20a_alloc_block *block_first;  /* first block in list */
 > +       struct gk20a_alloc_block *block_recent; /* last visited block */
 > +
 > +       u32 first_free_addr;            /* first free addr, non-contigous
 > +                                          allocation preferred start,
 > +                                          in order to pick up small 
holes */
 > +       u32 last_free_addr;             /* last free addr, contiguous
 > +                                          allocation preferred start */
 > +       u32 cached_hole_size;           /* max free hole size up to
 > +                                          last_free_addr */
 > +       u32 block_count;                /* number of blocks */
 > +
 > +       struct rw_semaphore rw_sema;    /* lock */
 > +       struct kmem_cache *block_cache; /* slab cache */
 > +
 > +       /* if enabled, constrain to [base, limit) */
 > +       struct {
 > +               bool enable;
 > +               u32 base;
 > +               u32 limit;
 > +       } constraint;
 > +
 > +       int (*alloc)(struct nvkm_pmu_allocator *allocator,
 > +               u32 *addr, u32 len, u32 align);
 > +       int (*free)(struct nvkm_pmu_allocator *allocator,
 > +               u32 addr, u32 len, u32 align);
 > +
 > +};
 > +
 > +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
 > +                       const char *name, u32 base, u32 size);
 > +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator);
 > +
 > +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
 > +                       u32 *addr, u32 len, u32 align);
 > +
 > +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
 > +                       u32 addr, u32 len, u32 align);

So from the nvkm_pmu_allocator struct and these function prototypes, 
this looks like a pretty casual address space allocator. Nouveau already 
has such an allocator: nvkm_mm. Check it out, it will do all that you 
need and you can remove a lot of code from this patch.

 > +
 > +#if defined(ALLOCATOR_DEBUG)
 > +
 > +#define allocator_dbg(alloctor, format, arg...) 
            \
 > +do {                                                           \
 > +       if (1)                                                  \
 > +               pr_debug("nvkm_pmu_allocator (%s) %s: " format "\n",\
 > +                       alloctor->name, __func__, ##arg);\
 > +} while (0)
 > +
 > +#else /* ALLOCATOR_DEBUG */
 > +
 > +#define allocator_dbg(format, arg...)

I'd prefer if you use the nv_debug() macro in place of this one, but it 
will go away with the allocator anyway...

 > +
 > +#endif /* ALLOCATOR_DEBUG */
 >  #define nvkm_pmu_create(p, e, o, d) 
         \
 >         nvkm_pmu_create_((p), (e), (o), sizeof(**d), (void **)d)
 >  #define nvkm_pmu_destroy(p) 
         \
 > @@ -26,6 +110,179 @@ int _nvkm_pmu_ctor(struct nvkm_object *, struct 
nvkm_object *,
 >  int _nvkm_pmu_init(struct nvkm_object *);
 >  int _nvkm_pmu_fini(struct nvkm_object *, bool);
 >  void nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable);
 > +#define PMU_PG_IDLE_THRESHOLD                  15000
 > +#define PMU_PG_POST_POWERUP_IDLE_THRESHOLD     1000000

I do not see these macros being used anywhere in your code.

 > +
 > +/* state transition :
 > +    OFF => [OFF_ON_PENDING optional] => ON_PENDING => ON => OFF
 > +    ON => OFF is always synchronized */
 > +#define PMU_ELPG_STAT_OFF              0   /* elpg is off */
 > +#define PMU_ELPG_STAT_ON               1   /* elpg is on */
 > +/* elpg is off, ALLOW cmd has been sent, wait for ack */
 > +#define PMU_ELPG_STAT_ON_PENDING       2
 > +/* elpg is on, DISALLOW cmd has been sent, wait for ack */
 > +#define PMU_ELPG_STAT_OFF_PENDING      3
 > +/* elpg is off, caller has requested on, but ALLOW
 > +cmd hasn't been sent due to ENABLE_ALLOW delay */
 > +#define PMU_ELPG_STAT_OFF_ON_PENDING   4

Same here. If they are used by a future patch, introduce them at the 
time they actually become useful.

 > +
 > +/* Falcon Register index */
 > +#define PMU_FALCON_REG_R0              (0)
 > +#define PMU_FALCON_REG_R1              (1)
 > +#define PMU_FALCON_REG_R2              (2)
 > +#define PMU_FALCON_REG_R3              (3)
 > +#define PMU_FALCON_REG_R4              (4)
 > +#define PMU_FALCON_REG_R5              (5)
 > +#define PMU_FALCON_REG_R6              (6)
 > +#define PMU_FALCON_REG_R7              (7)
 > +#define PMU_FALCON_REG_R8              (8)
 > +#define PMU_FALCON_REG_R9              (9)
 > +#define PMU_FALCON_REG_R10             (10)
 > +#define PMU_FALCON_REG_R11             (11)
 > +#define PMU_FALCON_REG_R12             (12)
 > +#define PMU_FALCON_REG_R13             (13)
 > +#define PMU_FALCON_REG_R14             (14)
 > +#define PMU_FALCON_REG_R15             (15)
 > +#define PMU_FALCON_REG_IV0             (16)
 > +#define PMU_FALCON_REG_IV1             (17)
 > +#define PMU_FALCON_REG_UNDEFINED       (18)
 > +#define PMU_FALCON_REG_EV              (19)
 > +#define PMU_FALCON_REG_SP              (20)
 > +#define PMU_FALCON_REG_PC              (21)
 > +#define PMU_FALCON_REG_IMB             (22)
 > +#define PMU_FALCON_REG_DMB             (23)
 > +#define PMU_FALCON_REG_CSW             (24)
 > +#define PMU_FALCON_REG_CCR             (25)
 > +#define PMU_FALCON_REG_SEC             (26)
 > +#define PMU_FALCON_REG_CTX             (27)
 > +#define PMU_FALCON_REG_EXCI            (28)
 > +#define PMU_FALCON_REG_RSVD0           (29)
 > +#define PMU_FALCON_REG_RSVD1           (30)
 > +#define PMU_FALCON_REG_RSVD2           (31)
 > +#define PMU_FALCON_REG_SIZE            (32)

These ones are ok since it would not make sense to define only part of 
the regs...

 > +
 > +/* Choices for pmu_state */
 > +#define PMU_STATE_OFF                  0 /* PMU is off */
 > +#define PMU_STATE_STARTING             1 /* PMU is on, but not booted */
 > +#define PMU_STATE_INIT_RECEIVED                2 /* PMU init message 
received */
 > +#define PMU_STATE_ELPG_BOOTING         3 /* PMU is booting */
 > +#define PMU_STATE_ELPG_BOOTED          4 /* ELPG is initialized */
 > +#define PMU_STATE_LOADING_PG_BUF       5 /* Loading PG buf */
 > +#define PMU_STATE_LOADING_ZBC          6 /* Loading ZBC buf */
 > +#define PMU_STATE_STARTED              7 /* Fully unitialized */

But here again, the last 5 states are not used yet, so please introduce 
them as they become needed.

 > +
 > +#define PMU_QUEUE_COUNT                5
 > +
 > +#define PMU_MAX_NUM_SEQUENCES          (256)
 > +#define PMU_SEQ_BIT_SHIFT              (5)
 > +#define PMU_SEQ_TBL_SIZE       \
 > +               (PMU_MAX_NUM_SEQUENCES >> PMU_SEQ_BIT_SHIFT)
 > +
 > +#define PMU_SHA1_GID_SIGNATURE         0xA7C66AD2
 > +#define PMU_SHA1_GID_SIGNATURE_SIZE    4
 > +
 > +#define PMU_SHA1_GID_SIZE      16
 > +
 > +struct pmu_queue {
 > +

Empty blank line.

 > +       /* used by hw, for BIOS/SMI queue */
 > +       u32 mutex_id;
 > +       u32 mutex_lock;
 > +       /* used by sw, for LPQ/HPQ queue */
 > +       struct mutex mutex;
 > +
 > +       /* current write position */
 > +       u32 position;
 > +       /* physical dmem offset where this queue begins */
 > +       u32 offset;
 > +       /* logical queue identifier */
 > +       u32 id;
 > +       /* physical queue index */
 > +       u32 index;
 > +       /* in bytes */
 > +       u32 size;
 > +
 > +       /* open-flag */
 > +       u32 oflag;
 > +       bool opened; /* opened implies locked */
 > +};
 > +
 > +struct pmu_sha1_gid {
 > +       bool valid;
 > +       u8 gid[PMU_SHA1_GID_SIZE];
 > +};
 > +
 > +struct pmu_sha1_gid_data {
 > +       u8 signature[PMU_SHA1_GID_SIGNATURE_SIZE];
 > +       u8 gid[PMU_SHA1_GID_SIZE];
 > +};
 > +
 > +struct pmu_desc {
 > +

Empty blank line.

 > +       struct pmu_ucode_desc *desc;
 > +       struct pmu_buf_desc ucode;
 > +
 > +       struct pmu_buf_desc pg_buf;

This member doesn't seem to be needed now.

 > +       /* TBD: remove this if ZBC seq is fixed */
 > +       struct pmu_buf_desc seq_buf;
 > +       struct pmu_buf_desc trace_buf;
 > +       bool buf_loaded;

buf_loaded is never referenced in this code.

 > +
 > +       struct pmu_sha1_gid gid_info;
 > +
 > +       struct pmu_queue queue[PMU_QUEUE_COUNT];
 > +
 > +       struct pmu_sequence *seq;

Wrong. pmu_sequence is defined in gk20a.h. This file is a generic one. 
Why would PMUs for other GPUs embed GK20A-specific structures?

Actually it seems like the whole pmu_desc should be moved to 
GK20A-specific files, since it is not used elsewhere for now.

 > +       unsigned long pmu_seq_tbl[PMU_SEQ_TBL_SIZE];
 > +       u32 next_seq_desc;
 > +
 > +       struct pmu_mutex *mutex;
 > +       u32 mutex_cnt;
 > +
 > +       struct mutex pmu_copy_lock;
 > +       struct mutex pmu_seq_lock;
 > +
 > +       struct nvkm_pmu_allocator dmem;

So as explained above, this should be replaced by a nvkm_mm.

 > +
 > +       u32 *ucode_image;
 > +       bool pmu_ready;
 > +
 > +       u32 zbc_save_done;

Yet another unreferenced member...

 > +
 > +       u32 stat_dmem_offset;

And another one.

 > +
 > +       u32 elpg_stat;

And another one.

 > +
 > +       int pmu_state;
 > +
 > +#define PMU_ELPG_ENABLE_ALLOW_DELAY_MSEC       1 /* msec */

And another one.

 > +       struct work_struct isr_workq;
 > +       struct mutex elpg_mutex; /* protect elpg enable/disable */

And another one.

 > +/* disable -1, enable +1, <=0 elpg disabled, > 0 elpg enabled */
 > +       int elpg_refcnt;

Here too.

 > +
 > +       bool initialized;
 > +
 > +       void (*remove_support)(struct pmu_desc *pmu);

So this function pointer is set, but never called! Is it unneeded, or 
have you forgot to call it when you should have?

 > +       bool sw_ready;
 > +       bool perfmon_ready;

Unneeded member again.

 > +
 > +       u32 sample_buffer;
 > +       u32 load_shadow;
 > +       u32 load_avg;
 > +
 > +       struct mutex isr_mutex;
 > +       bool isr_enabled;
 > +
 > +       bool zbc_ready;

This is only set to false in the destroy() function, so I guess you 
don't need this now...

 > +       unsigned long perfmon_events_cnt;
 > +       bool perfmon_sampling_enabled;
 > +       u8 pmu_mode;
 > +       u32 falcon_id;
 > +       u32 aelpg_param[5];

And all these 5 members are also not needed now it seems.

 > +       void *pmu_chip_data;

 From how you are using this member (to store a pointer to a kzalloc'd 
pmu_gk20a_data), it seems to be unneeded. Put the content of 
pmu_gk20a_data into gk20a_pmu_priv, and get rid of both this member and 
pmu_gk20a_data.

And actually since both members of pmu_gk20a_data are completely 
unreferenced, they can be added in a later patch anyway, when they 
actually become useful.

 > +       struct nvkm_pmu *pmu;
 > +};
 >
 >  struct nvkm_pmu_impl {
 >         struct nvkm_oclass base;
 > @@ -39,5 +296,12 @@ struct nvkm_pmu_impl {
 >         } data;
 >
 >         void (*pgob)(struct nvkm_pmu *, bool);
 > +       struct pmu_desc pmudata;
 >  };
 > +
 > +static inline struct nvkm_pmu *impl_from_pmu(struct pmu_desc *pmu)
 > +{
 > +       return pmu->pmu;
 > +}
 > +
 >  #endif
 > diff --git a/drm/nouveau/include/nvkm/subdev/pmu.h 
b/drm/nouveau/include/nvkm/subdev/pmu.h
 > index 7b86acc634a0..659b4e0ba02b 100644
 > --- a/drm/nouveau/include/nvkm/subdev/pmu.h
 > +++ b/drm/nouveau/include/nvkm/subdev/pmu.h
 > @@ -1,7 +1,20 @@
 >  #ifndef __NVKM_PMU_H__
 >  #define __NVKM_PMU_H__
 >  #include <core/subdev.h>
 > +#include <core/device.h>
 > +#include <subdev/mmu.h>
 > +#include <linux/debugfs.h>
 >
 > +struct pmu_buf_desc {
 > +       struct nvkm_gpuobj *pmubufobj;
 > +       struct nvkm_vma pmubufvma;

Your struct is already called "pmu_buf", so maybe call these members 
"obj" and "vma" simply.

 > +       size_t size;
 > +};
 > +struct pmu_priv_vm {
 > +       struct nvkm_gpuobj *mem;
 > +       struct nvkm_gpuobj *pgd;
 > +       struct nvkm_vm *vm;
 > +};
 >  struct nvkm_pmu {
 >         struct nvkm_subdev base;
 >
 > @@ -20,9 +33,20 @@ struct nvkm_pmu {
 >                 u32 message;
 >                 u32 data[2];
 >         } recv;
 > -
 > +       wait_queue_head_t init_wq;

This wq is initialized and never used.

 > +       bool gr_initialised;

Member only written once.

 > +       struct dentry *debugfs;
 > +       struct pmu_buf_desc *pg_buf;

This member is never used, and by transition neither is the pg_buf of 
struct pmu_desc.

 > +       struct pmu_priv_vm *pmuvm;
 >         int  (*message)(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
 >         void (*pgob)(struct nvkm_pmu *, bool);
 > +       int (*pmu_mutex_acquire)(struct nvkm_pmu *, u32 id, u32 *token);

Never used because you are calling the function you affect to the 
pointer directly in the code (which happens to also be called 
pmu_mutex_acquire!)

 > +       int (*pmu_mutex_release)(struct nvkm_pmu *, u32 id, u32 *token);

Same here.

 > +       int (*pmu_load_norm)(struct nvkm_pmu *pmu, u32 *load);
 > +       int (*pmu_load_update)(struct nvkm_pmu *pmu);
 > +       void (*pmu_reset_load_counters)(struct nvkm_pmu *pmu);
 > +       void (*pmu_get_load_counters)(struct nvkm_pmu *pmu, u32 
*busy_cycles,
 > +               u32 *total_cycles);

These four ones are never called. Introduce members and functions only 
when they become needed.

 >  };
 >
 >  static inline struct nvkm_pmu *
 > diff --git a/drm/nouveau/nvkm/subdev/pmu/base.c 
b/drm/nouveau/nvkm/subdev/pmu/base.c
 > index 054b2d2eec35..6afd389b9764 100644
 > --- a/drm/nouveau/nvkm/subdev/pmu/base.c
 > +++ b/drm/nouveau/nvkm/subdev/pmu/base.c
 > @@ -25,6 +25,114 @@
 >
 >  #include <subdev/timer.h>


 >
 > +/* init allocator struct */
 > +int nvkm_pmu_allocator_init(struct nvkm_pmu_allocator *allocator,
 > +               const char *name, u32 start, u32 len)
 > +{
 > +       memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
 > +
 > +       strncpy(allocator->name, name, 32);
 > +
 > +       allocator->base = start;
 > +       allocator->limit = start + len - 1;
 > +
 > +       allocator->bitmap = kcalloc(BITS_TO_LONGS(len), sizeof(long),
 > +                       GFP_KERNEL);
 > +       if (!allocator->bitmap)
 > +               return -ENOMEM;
 > +
 > +       allocator_dbg(allocator, "%s : base %d, limit %d",
 > +               allocator->name, allocator->base);
 > +
 > +       init_rwsem(&allocator->rw_sema);
 > +
 > +       allocator->alloc = nvkm_pmu_allocator_block_alloc;
 > +       allocator->free = nvkm_pmu_allocator_block_free;
 > +
 > +       return 0;
 > +}
 > +
 > +/* destroy allocator, free all remaining blocks if any */
 > +void nvkm_pmu_allocator_destroy(struct nvkm_pmu_allocator *allocator)
 > +{
 > +       down_write(&allocator->rw_sema);
 > +
 > +       kfree(allocator->bitmap);
 > +
 > +       memset(allocator, 0, sizeof(struct nvkm_pmu_allocator));
 > +}
 > +
 > +/*
 > + * *addr != ~0 for fixed address allocation. if *addr == 0, base addr is
 > + * returned to caller in *addr.
 > + *
 > + * contiguous allocation, which allocates one block of
 > + * contiguous address.
 > +*/
 > +int nvkm_pmu_allocator_block_alloc(struct nvkm_pmu_allocator *allocator,
 > +               u32 *addr, u32 len, u32 align)
 > +{
 > +       unsigned long _addr;
 > +
 > +       allocator_dbg(allocator, "[in] addr %d, len %d", *addr, len);
 > +
 > +       if ((*addr != 0 && *addr < allocator->base) || /* check addr 
range */
 > +           *addr + len > allocator->limit || /* check addr range */
 > +           *addr & (align - 1) || /* check addr alignment */
 > +            len == 0)                        /* check len */
 > +               return -EINVAL;
 > +
 > +       len = ALIGN(len, align);
 > +       if (!len)
 > +               return -ENOMEM;
 > +
 > +       down_write(&allocator->rw_sema);
 > +
 > +       _addr = bitmap_find_next_zero_area(allocator->bitmap,
 > +                       allocator->limit - allocator->base + 1,
 > +                       *addr ? (*addr - allocator->base) : 0,
 > +                       len,
 > +                       align - 1);
 > +       if ((_addr > allocator->limit - allocator->base + 1) ||
 > +           (*addr && *addr != (_addr + allocator->base))) {
 > +               up_write(&allocator->rw_sema);
 > +               return -ENOMEM;
 > +       }
 > +
 > +       bitmap_set(allocator->bitmap, _addr, len);
 > +       *addr = allocator->base + _addr;
 > +
 > +       up_write(&allocator->rw_sema);
 > +
 > +       allocator_dbg(allocator, "[out] addr %d, len %d", *addr, len);
 > +
 > +       return 0;
 > +}
 > +
 > +/* free all blocks between start and end */
 > +int nvkm_pmu_allocator_block_free(struct nvkm_pmu_allocator *allocator,
 > +               u32 addr, u32 len, u32 align)
 > +{
 > +       allocator_dbg(allocator, "[in] addr %d, len %d", addr, len);
 > +
 > +       if (addr + len > allocator->limit || /* check addr range */
 > +           addr < allocator->base ||
 > +           addr & (align - 1))   /* check addr alignment */
 > +               return -EINVAL;
 > +
 > +       len = ALIGN(len, align);
 > +       if (!len)
 > +               return -EINVAL;
 > +
 > +       down_write(&allocator->rw_sema);
 > +       bitmap_clear(allocator->bitmap, addr - allocator->base, len);
 > +       up_write(&allocator->rw_sema);
 > +
 > +       allocator_dbg(allocator, "[out] addr %d, len %d", addr, len);
 > +
 > +       return 0;
 > +}
 > +

So all this code should go away when you switch to nvkm_mm. It was 
out-of-place anyway: this is a standard address space allocator and has 
nothing specific to PMU.

That's a lot of things to fix already, so I will hold my review of 
pmu/gk20a.c for next time. Just a few remarks about the most obvious 
problems though:

The file is a mess. Functions appear without any logical order, so you 
end up making declarations that could be avoided if things were ordered 
a bit better. For instance, pmu_read_message() is only used by 
pmu_process_message(), but you have 3 functions between these two. A 
logical ordering of the code makes it much easier to read: "building 
blocks" functions first, more complex functions later. Ideally you would 
end up with a C file that has no forward-declarations.

Again, some functions are absolutely not used, sometimes in worrying 
ways. Examples are gk20a_pmu_destroy and gk20a_pmu_create_, but I 
suspect there are others.

For gk20a_pmu_create_, I don't even know why it is here in the first 
place. It seems like its code should be gk20a_pmu_ctor() instead, and it 
sets function pointers that are apparently never called because they are 
remaining NULL and things seem to go just fine?

This patch should definitely be split into different bits to allow a 
more pleasant review. Right now it is almost impossible to understand 
what it does. Suggestion for splitting:

1) Add firmware loading ability, bootstrap PMU (since these two tasks 
cannot be separated I guess)
2) Add message receiving/posting ability
3) DebugFS support

This should be a good beginning to make things more readable. There are 
other things to comment on, but let's start with this.

Keep in mind that upstreaming is more than just trying to make the 
downstream code fit as-is in the upstream kernel. You need to reshape 
things when it makes sense, and replace custom-built solutions with the 
ones that already exist. Also important is to make sure you uncover 
things in a logical way, in chunks as small as possible to the 
unfamiliar reader can understand them (for this particular series I 
don't think we can go with less-than-300-lines patches though). In other 
words, it is ok to send a 3000 lines patch series, if everything appears 
progressively and logically. A 3000 lines patch however is likely to be 
frown upon.

Looking forward to seeing v2 and hopefully diving deeper into this - 
good luck!

Alex.
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2015-03-13 10:12 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-03-11  6:33 [PATCH] pmu/gk20a: PMU boot support Deepak Goyal
     [not found] ` <1426055631-1166-1-git-send-email-dgoyal-DDmLM1+adcrQT0dZR+AlfA@public.gmane.org>
2015-03-11 17:10   ` [Nouveau] " Ilia Mirkin
     [not found]     ` <CAKb7Uvj0xMvDWjKjGzbD6Tk0NArfkh4Vjvt4eRQ8XoHgR+7bsg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-03-12  5:20       ` Deepak Goyal
     [not found]         ` <25b5050176544f47b0ac74d4086f145c-7W72rfoJkVm6sJks/06JalaTQe2KTcn/@public.gmane.org>
2015-03-12 22:11           ` Ilia Mirkin
     [not found]             ` <CAKb7UvgDq-FzZwAZ8VwbhaVHi4B29jXL5qjOCQ47TWfqDBDQaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-03-13  1:56               ` Alexandre Courbot
2015-03-13 10:12   ` Alexandre Courbot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.