All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Christian König" <deathsimple@vodafone.de>
To: dri-devel@lists.freedesktop.org
Subject: [PATCH 02/10] drm/radeon: UVD bringup v7
Date: Wed,  3 Apr 2013 01:18:31 +0200	[thread overview]
Message-ID: <1364944719-5175-3-git-send-email-deathsimple@vodafone.de> (raw)
In-Reply-To: <1364944719-5175-1-git-send-email-deathsimple@vodafone.de>

Just everything needed to decode videos using UVD.

v6: just all the bugfixes and support for R7xx-SI merged in one patch
v7: UVD_CGC_GATE is a write only register, lockup detection fix

Signed-off-by: Christian König <deathsimple@vodafone.de>
---
 drivers/gpu/drm/radeon/Makefile        |    2 +-
 drivers/gpu/drm/radeon/evergreen.c     |   40 ++-
 drivers/gpu/drm/radeon/evergreend.h    |    7 +
 drivers/gpu/drm/radeon/ni.c            |   49 +++
 drivers/gpu/drm/radeon/nid.h           |    9 +
 drivers/gpu/drm/radeon/r600.c          |  291 ++++++++++++++++++
 drivers/gpu/drm/radeon/r600d.h         |   61 ++++
 drivers/gpu/drm/radeon/radeon.h        |   47 ++-
 drivers/gpu/drm/radeon/radeon_asic.c   |   63 ++++
 drivers/gpu/drm/radeon/radeon_asic.h   |   19 ++
 drivers/gpu/drm/radeon/radeon_cs.c     |   27 +-
 drivers/gpu/drm/radeon/radeon_fence.c  |   23 +-
 drivers/gpu/drm/radeon/radeon_kms.c    |    1 +
 drivers/gpu/drm/radeon/radeon_object.c |   12 +-
 drivers/gpu/drm/radeon/radeon_object.h |    2 +-
 drivers/gpu/drm/radeon/radeon_ring.c   |   24 +-
 drivers/gpu/drm/radeon/radeon_test.c   |   72 +++--
 drivers/gpu/drm/radeon/radeon_uvd.c    |  521 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/rv770.c         |  134 ++++++++
 drivers/gpu/drm/radeon/rv770d.h        |   14 +
 drivers/gpu/drm/radeon/si.c            |   32 ++
 drivers/gpu/drm/radeon/sid.h           |    6 +
 include/uapi/drm/radeon_drm.h          |    1 +
 23 files changed, 1400 insertions(+), 57 deletions(-)
 create mode 100644 drivers/gpu/drm/radeon/radeon_uvd.c

diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index bf17252..86c5e36 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -76,7 +76,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
 	evergreen_hdmi.o radeon_trace_points.o ni.o cayman_blit_shaders.o \
 	atombios_encoders.o radeon_semaphore.o radeon_sa.o atombios_i2c.o si.o \
-	si_blit_shaders.o radeon_prime.o
+	si_blit_shaders.o radeon_prime.o radeon_uvd.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 305a657..18b66ff 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -3360,6 +3360,9 @@ restart_ih:
 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
 				break;
 			}
+		case 124: /* UVD */
+			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
 			break;
 		case 146:
 		case 147:
@@ -3571,7 +3574,7 @@ int evergreen_copy_dma(struct radeon_device *rdev,
 
 static int evergreen_startup(struct radeon_device *rdev)
 {
-	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
+	struct radeon_ring *ring;
 	int r;
 
 	/* enable pcie gen2 link */
@@ -3638,6 +3641,17 @@ static int evergreen_startup(struct radeon_device *rdev)
 		return r;
 	}
 
+	r = rv770_uvd_resume(rdev);
+	if (!r) {
+		r = radeon_fence_driver_start_ring(rdev,
+						   R600_RING_TYPE_UVD_INDEX);
+		if (r)
+			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+	}
+
+	if (r)
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -3647,6 +3661,7 @@ static int evergreen_startup(struct radeon_device *rdev)
 	}
 	evergreen_irq_set(rdev);
 
+	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
 			     R600_CP_RB_RPTR, R600_CP_RB_WPTR,
 			     0, 0xfffff, RADEON_CP_PACKET2);
@@ -3670,6 +3685,19 @@ static int evergreen_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	if (ring->ring_size) {
+		r = radeon_ring_init(rdev, ring, ring->ring_size,
+				     R600_WB_UVD_RPTR_OFFSET,
+				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+				     0, 0xfffff, RADEON_CP_PACKET2);
+		if (!r)
+			r = r600_uvd_init(rdev);
+
+		if (r)
+			DRM_ERROR("radeon: error initializing UVD (%d).\n", r);
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -3716,8 +3744,10 @@ int evergreen_resume(struct radeon_device *rdev)
 int evergreen_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	radeon_uvd_suspend(rdev);
 	r700_cp_stop(rdev);
 	r600_dma_stop(rdev);
+	r600_uvd_rbc_stop(rdev);
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	evergreen_pcie_gart_disable(rdev);
@@ -3797,6 +3827,13 @@ int evergreen_init(struct radeon_device *rdev)
 	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
 
+	r = radeon_uvd_init(rdev);
+	if (!r) {
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
+		r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
+			       4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -3843,6 +3880,7 @@ void evergreen_fini(struct radeon_device *rdev)
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	evergreen_pcie_gart_fini(rdev);
+	radeon_uvd_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
 	radeon_fence_driver_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 982d25a..c5d873e 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -992,6 +992,13 @@
 #       define TARGET_LINK_SPEED_MASK                     (0xf << 0)
 #       define SELECTABLE_DEEMPHASIS                      (1 << 6)
 
+
+/*
+ * UVD
+ */
+#define UVD_RBC_RB_RPTR					0xf690
+#define UVD_RBC_RB_WPTR					0xf694
+
 /*
  * PM4
  */
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 27769e7..ac944f5 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -931,6 +931,23 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	radeon_ring_write(ring, 10); /* poll interval */
 }
 
+void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
+			       struct radeon_ring *ring,
+			       struct radeon_semaphore *semaphore,
+			       bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+	radeon_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
+}
+
 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
 {
 	if (enable)
@@ -1682,6 +1699,16 @@ static int cayman_startup(struct radeon_device *rdev)
 		return r;
 	}
 
+	r = rv770_uvd_resume(rdev);
+	if (!r) {
+		r = radeon_fence_driver_start_ring(rdev,
+						   R600_RING_TYPE_UVD_INDEX);
+		if (r)
+			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+	}
+	if (r)
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
 	if (r) {
 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
@@ -1748,6 +1775,18 @@ static int cayman_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	if (ring->ring_size) {
+		r = radeon_ring_init(rdev, ring, ring->ring_size,
+				     R600_WB_UVD_RPTR_OFFSET,
+				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+				     0, 0xfffff, RADEON_CP_PACKET2);
+		if (!r)
+			r = r600_uvd_init(rdev);
+		if (r)
+			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1794,6 +1833,8 @@ int cayman_suspend(struct radeon_device *rdev)
 	radeon_vm_manager_fini(rdev);
 	cayman_cp_enable(rdev, false);
 	cayman_dma_stop(rdev);
+	r600_uvd_rbc_stop(rdev);
+	radeon_uvd_suspend(rdev);
 	evergreen_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	cayman_pcie_gart_disable(rdev);
@@ -1868,6 +1909,13 @@ int cayman_init(struct radeon_device *rdev)
 	ring->ring_obj = NULL;
 	r600_ring_init(rdev, ring, 64 * 1024);
 
+	r = radeon_uvd_init(rdev);
+	if (!r) {
+		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+		ring->ring_obj = NULL;
+		r600_ring_init(rdev, ring, 4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1919,6 +1967,7 @@ void cayman_fini(struct radeon_device *rdev)
 	radeon_vm_manager_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
+	radeon_uvd_fini(rdev);
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 079dee2..3731f6c 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -486,6 +486,15 @@
 #       define CACHE_FLUSH_AND_INV_EVENT                        (0x16 << 0)
 
 /*
+ * UVD
+ */
+#define UVD_SEMA_ADDR_LOW				0xEF00
+#define UVD_SEMA_ADDR_HIGH				0xEF04
+#define UVD_SEMA_CMD					0xEF08
+#define UVD_RBC_RB_RPTR					0xF690
+#define UVD_RBC_RB_WPTR					0xF694
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)	((RADEON_PACKET_TYPE0 << 30) |			\
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 0740db3..ca6117d 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2552,6 +2552,185 @@ void r600_dma_fini(struct radeon_device *rdev)
 }
 
 /*
+ * UVD
+ */
+int r600_uvd_rbc_start(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	uint64_t rptr_addr;
+	uint32_t rb_bufsz, tmp;
+	int r;
+
+	rptr_addr = rdev->wb.gpu_addr + R600_WB_UVD_RPTR_OFFSET;
+
+	if (upper_32_bits(rptr_addr) != upper_32_bits(ring->gpu_addr)) {
+		DRM_ERROR("UVD ring and rptr not in the same 4GB segment!\n");
+		return -EINVAL;
+	}
+
+	/* force RBC into idle state */
+	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+
+	/* Set the write pointer delay */
+	WREG32(UVD_RBC_RB_WPTR_CNTL, 0);
+
+	/* set the wb address */
+	WREG32(UVD_RBC_RB_RPTR_ADDR, rptr_addr >> 2);
+
+	/* programm the 4GB memory segment for rptr and ring buffer */
+	WREG32(UVD_LMI_EXT40_ADDR, upper_32_bits(rptr_addr) |
+				   (0x7 << 16) | (0x1 << 31));
+
+	/* Initialize the ring buffer's read and write pointers */
+	WREG32(UVD_RBC_RB_RPTR, 0x0);
+
+	ring->wptr = ring->rptr = RREG32(UVD_RBC_RB_RPTR);
+	WREG32(UVD_RBC_RB_WPTR, ring->wptr);
+
+	/* set the ring address */
+	WREG32(UVD_RBC_RB_BASE, ring->gpu_addr);
+
+	/* Set ring buffer size */
+	rb_bufsz = drm_order(ring->ring_size);
+	rb_bufsz = (0x1 << 8) | rb_bufsz;
+	WREG32(UVD_RBC_RB_CNTL, rb_bufsz);
+
+	ring->ready = true;
+	r = radeon_ring_test(rdev, R600_RING_TYPE_UVD_INDEX, ring);
+	if (r) {
+		ring->ready = false;
+		return r;
+	}
+
+	r = radeon_ring_lock(rdev, ring, 10);
+	if (r) {
+		DRM_ERROR("radeon: ring failed to lock UVD ring (%d).\n", r);
+		return r;
+	}
+
+	tmp = PACKET0(UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	tmp = PACKET0(UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	tmp = PACKET0(UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL, 0);
+	radeon_ring_write(ring, tmp);
+	radeon_ring_write(ring, 0xFFFFF);
+
+	/* Clear timeout status bits */
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_TIMEOUT_STATUS, 0));
+	radeon_ring_write(ring, 0x8);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CNTL, 0));
+	radeon_ring_write(ring, 1);
+
+	radeon_ring_unlock_commit(rdev, ring);
+
+	return 0;
+}
+
+void r600_uvd_rbc_stop(struct radeon_device *rdev)
+{
+	struct radeon_ring *ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+
+	/* force RBC into idle state */
+	WREG32(UVD_RBC_RB_CNTL, 0x11010101);
+	ring->ready = false;
+}
+
+int r600_uvd_init(struct radeon_device *rdev)
+{
+	int i, j, r;
+
+	/* disable clock gating */
+	WREG32(UVD_CGC_GATE, 0);
+
+	/* disable interupt */
+	WREG32_P(UVD_MASTINT_EN, 0, ~(1 << 1));
+
+	/* put LMI, VCPU, RBC etc... into reset */
+	WREG32(UVD_SOFT_RESET, LMI_SOFT_RESET | VCPU_SOFT_RESET |
+	       LBSI_SOFT_RESET | RBC_SOFT_RESET | CSM_SOFT_RESET |
+	       CXW_SOFT_RESET | TAP_SOFT_RESET | LMI_UMC_SOFT_RESET);
+	mdelay(5);
+
+	/* take UVD block out of reset */
+	WREG32_P(SRBM_SOFT_RESET, 0, ~SOFT_RESET_UVD);
+	mdelay(5);
+
+	/* initialize UVD memory controller */
+	WREG32(UVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
+			     (1 << 21) | (1 << 9) | (1 << 20));
+
+	/* disable byte swapping */
+	WREG32(UVD_LMI_SWAP_CNTL, 0);
+	WREG32(UVD_MP_SWAP_CNTL, 0);
+
+	WREG32(UVD_MPC_SET_MUXA0, 0x40c2040);
+	WREG32(UVD_MPC_SET_MUXA1, 0x0);
+	WREG32(UVD_MPC_SET_MUXB0, 0x40c2040);
+	WREG32(UVD_MPC_SET_MUXB1, 0x0);
+	WREG32(UVD_MPC_SET_ALU, 0);
+	WREG32(UVD_MPC_SET_MUX, 0x88);
+
+	/* Stall UMC */
+	WREG32_P(UVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+	WREG32_P(UVD_RB_ARB_CTRL, 1 << 3, ~(1 << 3));
+
+	/* take all subblocks out of reset, except VCPU */
+	WREG32(UVD_SOFT_RESET, VCPU_SOFT_RESET);
+	mdelay(5);
+
+	/* enable VCPU clock */
+	WREG32(UVD_VCPU_CNTL,  1 << 9);
+
+	/* enable UMC */
+	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));
+
+	/* boot up the VCPU */
+	WREG32(UVD_SOFT_RESET, 0);
+	mdelay(10);
+
+	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+
+	for (i = 0; i < 10; ++i) {
+		uint32_t status;
+		for (j = 0; j < 100; ++j) {
+			status = RREG32(UVD_STATUS);
+			if (status & 2)
+				break;
+			mdelay(10);
+		}
+		r = 0;
+		if (status & 2)
+			break;
+
+		DRM_ERROR("UVD not responding, trying to reset the VCPU!!!\n");
+		WREG32_P(UVD_SOFT_RESET, VCPU_SOFT_RESET, ~VCPU_SOFT_RESET);
+		mdelay(10);
+		WREG32_P(UVD_SOFT_RESET, 0, ~VCPU_SOFT_RESET);
+		mdelay(10);
+		r = -1;
+	}
+	if (r) {
+		DRM_ERROR("UVD not responding, giving up!!!\n");
+		return r;
+	}
+	/* enable interupt */
+	WREG32_P(UVD_MASTINT_EN, 3<<1, ~(3 << 1));
+
+	r = r600_uvd_rbc_start(rdev);
+	if (r)
+		return r;
+
+	DRM_INFO("UVD initialized successfully.\n");
+	return 0;
+}
+
+/*
  * GPU scratch registers helpers function.
  */
 void r600_scratch_init(struct radeon_device *rdev)
@@ -2660,6 +2839,40 @@ int r600_dma_ring_test(struct radeon_device *rdev,
 	return r;
 }
 
+int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	WREG32(UVD_CONTEXT_ID, 0xCAFEDEAD);
+	r = radeon_ring_lock(rdev, ring, 3);
+	if (r) {
+		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n",
+			  ring->idx, r);
+		return r;
+	}
+	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+	radeon_ring_write(ring, 0xDEADBEEF);
+	radeon_ring_unlock_commit(rdev, ring);
+	for (i = 0; i < rdev->usec_timeout; i++) {
+		tmp = RREG32(UVD_CONTEXT_ID);
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < rdev->usec_timeout) {
+		DRM_INFO("ring test on %d succeeded in %d usecs\n",
+			 ring->idx, i);
+	} else {
+		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
+			  ring->idx, tmp);
+		r = -EINVAL;
+	}
+	return r;
+}
+
 /*
  * CP fences/semaphores
  */
@@ -2711,6 +2924,30 @@ void r600_fence_ring_emit(struct radeon_device *rdev,
 	}
 }
 
+void r600_uvd_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_CONTEXT_ID, 0));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 2);
+	return;
+}
+
 void r600_semaphore_ring_emit(struct radeon_device *rdev,
 			      struct radeon_ring *ring,
 			      struct radeon_semaphore *semaphore,
@@ -2780,6 +3017,23 @@ void r600_dma_semaphore_ring_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, upper_32_bits(addr) & 0xff);
 }
 
+void r600_uvd_semaphore_emit(struct radeon_device *rdev,
+			     struct radeon_ring *ring,
+			     struct radeon_semaphore *semaphore,
+			     bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_LOW, 0));
+	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_ADDR_HIGH, 0));
+	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+
+	radeon_ring_write(ring, PACKET0(UVD_SEMA_CMD, 0));
+	radeon_ring_write(ring, emit_wait ? 1 : 0);
+}
+
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
@@ -3183,6 +3437,16 @@ void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
 	radeon_ring_write(ring, ib->length_dw);
 }
 
+void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+
+	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_BASE, 0));
+	radeon_ring_write(ring, ib->gpu_addr);
+	radeon_ring_write(ring, PACKET0(UVD_RBC_IB_SIZE, 0));
+	radeon_ring_write(ring, ib->length_dw);
+}
+
 int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	struct radeon_ib ib;
@@ -3300,6 +3564,33 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
 	return r;
 }
 
+int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	struct radeon_fence *fence;
+	int r;
+
+	r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+	if (r) {
+		DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, &fence);
+	if (r) {
+		DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
+		return r;
+	}
+
+	r = radeon_fence_wait(fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+		return r;
+	}
+	DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+	radeon_fence_unref(&fence);
+	return r;
+}
+
 /**
  * r600_dma_ring_ib_execute - Schedule an IB on the DMA engine
  *
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index a42ba11..441bdb8 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -691,6 +691,7 @@
 #define SRBM_SOFT_RESET                                   0xe60
 #       define SOFT_RESET_DMA                             (1 << 12)
 #       define SOFT_RESET_RLC                             (1 << 13)
+#       define SOFT_RESET_UVD                             (1 << 18)
 #       define RV770_SOFT_RESET_DMA                       (1 << 20)
 
 #define CP_INT_CNTL                                       0xc124
@@ -1143,6 +1144,66 @@
 #       define AFMT_AZ_AUDIO_ENABLE_CHG_ACK  (1 << 30)
 
 /*
+ * UVD
+ */
+#define UVD_SEMA_ADDR_LOW				0xef00
+#define UVD_SEMA_ADDR_HIGH				0xef04
+#define UVD_SEMA_CMD					0xef08
+
+#define UVD_GPCOM_VCPU_CMD				0xef0c
+#define UVD_GPCOM_VCPU_DATA0				0xef10
+#define UVD_GPCOM_VCPU_DATA1				0xef14
+#define UVD_ENGINE_CNTL					0xef18
+
+#define UVD_SEMA_CNTL					0xf400
+#define UVD_RB_ARB_CTRL					0xf480
+
+#define UVD_LMI_EXT40_ADDR				0xf498
+#define UVD_CGC_GATE					0xf4a8
+#define UVD_LMI_CTRL2					0xf4f4
+#define UVD_MASTINT_EN					0xf500
+#define UVD_LMI_ADDR_EXT				0xf594
+#define UVD_LMI_CTRL					0xf598
+#define UVD_LMI_SWAP_CNTL				0xf5b4
+#define UVD_MP_SWAP_CNTL				0xf5bC
+#define UVD_MPC_CNTL					0xf5dC
+#define UVD_MPC_SET_MUXA0				0xf5e4
+#define UVD_MPC_SET_MUXA1				0xf5e8
+#define UVD_MPC_SET_MUXB0				0xf5eC
+#define UVD_MPC_SET_MUXB1				0xf5f0
+#define UVD_MPC_SET_MUX					0xf5f4
+#define UVD_MPC_SET_ALU					0xf5f8
+
+#define UVD_VCPU_CNTL					0xf660
+#define UVD_SOFT_RESET					0xf680
+#define		RBC_SOFT_RESET					(1<<0)
+#define		LBSI_SOFT_RESET					(1<<1)
+#define		LMI_SOFT_RESET					(1<<2)
+#define		VCPU_SOFT_RESET					(1<<3)
+#define		CSM_SOFT_RESET					(1<<5)
+#define		CXW_SOFT_RESET					(1<<6)
+#define		TAP_SOFT_RESET					(1<<7)
+#define		LMI_UMC_SOFT_RESET				(1<<13)
+#define UVD_RBC_IB_BASE					0xf684
+#define UVD_RBC_IB_SIZE					0xf688
+#define UVD_RBC_RB_BASE					0xf68c
+#define UVD_RBC_RB_RPTR					0xf690
+#define UVD_RBC_RB_WPTR					0xf694
+#define UVD_RBC_RB_WPTR_CNTL				0xf698
+
+#define UVD_STATUS					0xf6bc
+
+#define UVD_SEMA_TIMEOUT_STATUS				0xf6c0
+#define UVD_SEMA_WAIT_INCOMPLETE_TIMEOUT_CNTL		0xf6c4
+#define UVD_SEMA_WAIT_FAULT_TIMEOUT_CNTL		0xf6c8
+#define UVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL		0xf6cc
+
+#define UVD_RBC_RB_CNTL					0xf6a4
+#define UVD_RBC_RB_RPTR_ADDR				0xf6a8
+
+#define UVD_CONTEXT_ID					0xf6f4
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)	((RADEON_PACKET_TYPE0 << 30) |			\
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8263af3..3f5572d 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -109,24 +109,27 @@ extern int radeon_lockup_timeout;
 #define RADEON_BIOS_NUM_SCRATCH			8
 
 /* max number of rings */
-#define RADEON_NUM_RINGS			5
+#define RADEON_NUM_RINGS			6
 
 /* fence seq are set to this number when signaled */
 #define RADEON_FENCE_SIGNALED_SEQ		0LL
 
 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
-#define RADEON_RING_TYPE_GFX_INDEX		0
+#define RADEON_RING_TYPE_GFX_INDEX	0
 
 /* cayman has 2 compute CP rings */
-#define CAYMAN_RING_TYPE_CP1_INDEX		1
-#define CAYMAN_RING_TYPE_CP2_INDEX		2
+#define CAYMAN_RING_TYPE_CP1_INDEX	1
+#define CAYMAN_RING_TYPE_CP2_INDEX	2
 
 /* R600+ has an async dma ring */
 #define R600_RING_TYPE_DMA_INDEX		3
 /* cayman add a second async dma ring */
 #define CAYMAN_RING_TYPE_DMA1_INDEX		4
 
+/* R600+ */
+#define R600_RING_TYPE_UVD_INDEX	5
+
 /* hardcode those limit for now */
 #define RADEON_VA_IB_OFFSET			(1 << 20)
 #define RADEON_VA_RESERVED_SIZE			(8 << 20)
@@ -357,8 +360,9 @@ struct radeon_bo_list {
 	struct ttm_validate_buffer tv;
 	struct radeon_bo	*bo;
 	uint64_t		gpu_offset;
-	unsigned		rdomain;
-	unsigned		wdomain;
+	bool			written;
+	unsigned		domain;
+	unsigned		alt_domain;
 	u32			tiling_flags;
 };
 
@@ -826,7 +830,6 @@ struct radeon_cs_reloc {
 	struct radeon_bo		*robj;
 	struct radeon_bo_list		lobj;
 	uint32_t			handle;
-	uint32_t			flags;
 };
 
 struct radeon_cs_chunk {
@@ -918,6 +921,7 @@ struct radeon_wb {
 #define R600_WB_DMA_RPTR_OFFSET   1792
 #define R600_WB_IH_WPTR_OFFSET   2048
 #define CAYMAN_WB_DMA1_RPTR_OFFSET   2304
+#define R600_WB_UVD_RPTR_OFFSET  2560
 #define R600_WB_EVENT_OFFSET     3072
 
 /**
@@ -1118,6 +1122,33 @@ struct radeon_pm {
 int radeon_pm_get_type_index(struct radeon_device *rdev,
 			     enum radeon_pm_state_type ps_type,
 			     int instance);
+/*
+ * UVD
+ */
+#define RADEON_MAX_UVD_HANDLES	10
+#define RADEON_UVD_STACK_SIZE	(1024*1024)
+#define RADEON_UVD_HEAP_SIZE	(1024*1024)
+
+struct radeon_uvd {
+	struct radeon_bo	*vcpu_bo;
+	void			*cpu_addr;
+	uint64_t		gpu_addr;
+	atomic_t		handles[RADEON_MAX_UVD_HANDLES];
+	struct drm_file		*filp[RADEON_MAX_UVD_HANDLES];
+};
+
+int radeon_uvd_init(struct radeon_device *rdev);
+void radeon_uvd_fini(struct radeon_device *rdev);
+int radeon_uvd_suspend(struct radeon_device *rdev);
+int radeon_uvd_resume(struct radeon_device *rdev);
+int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
+			      uint32_t handle, struct radeon_fence **fence);
+int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
+			       uint32_t handle, struct radeon_fence **fence);
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo);
+void radeon_uvd_free_handles(struct radeon_device *rdev,
+			     struct drm_file *filp);
+int radeon_uvd_cs_parse(struct radeon_cs_parser *parser);
 
 struct r600_audio {
 	int			channels;
@@ -1608,6 +1639,7 @@ struct radeon_device {
 	struct radeon_asic		*asic;
 	struct radeon_gem		gem;
 	struct radeon_pm		pm;
+	struct radeon_uvd		uvd;
 	uint32_t			bios_scratch[RADEON_BIOS_NUM_SCRATCH];
 	struct radeon_wb		wb;
 	struct radeon_dummy_page	dummy_page;
@@ -1621,6 +1653,7 @@ struct radeon_device {
 	const struct firmware *rlc_fw;	/* r6/700 RLC firmware */
 	const struct firmware *mc_fw;	/* NI MC firmware */
 	const struct firmware *ce_fw;	/* SI CE firmware */
+	const struct firmware *uvd_fw;	/* UVD firmware */
 	struct r600_blit r600_blit;
 	struct r600_vram_scratch vram_scratch;
 	int msi_enabled; /* msi enabled */
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index aba0a89..a7a7b2b 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1130,6 +1130,15 @@ static struct radeon_asic rv770_asic = {
 			.ring_test = &r600_dma_ring_test,
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &r600_dma_is_lockup,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &r600_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1216,6 +1225,15 @@ static struct radeon_asic evergreen_asic = {
 			.ring_test = &r600_dma_ring_test,
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &evergreen_dma_is_lockup,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &r600_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1302,6 +1320,15 @@ static struct radeon_asic sumo_asic = {
 			.ring_test = &r600_dma_ring_test,
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &evergreen_dma_is_lockup,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &r600_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1388,6 +1415,15 @@ static struct radeon_asic btc_asic = {
 			.ring_test = &r600_dma_ring_test,
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &evergreen_dma_is_lockup,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &r600_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1517,6 +1553,15 @@ static struct radeon_asic cayman_asic = {
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &cayman_dma_is_lockup,
 			.vm_flush = &cayman_dma_vm_flush,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &cayman_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1646,6 +1691,15 @@ static struct radeon_asic trinity_asic = {
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &cayman_dma_is_lockup,
 			.vm_flush = &cayman_dma_vm_flush,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &cayman_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
@@ -1775,6 +1829,15 @@ static struct radeon_asic si_asic = {
 			.ib_test = &r600_dma_ib_test,
 			.is_lockup = &si_dma_is_lockup,
 			.vm_flush = &si_dma_vm_flush,
+		},
+		[R600_RING_TYPE_UVD_INDEX] = {
+			.ib_execute = &r600_uvd_ib_execute,
+			.emit_fence = &r600_uvd_fence_emit,
+			.emit_semaphore = &cayman_uvd_semaphore_emit,
+			.cs_parse = &radeon_uvd_cs_parse,
+			.ring_test = &r600_uvd_ring_test,
+			.ib_test = &r600_uvd_ib_test,
+			.is_lockup = &radeon_ring_test_lockup,
 		}
 	},
 	.irq = {
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 3535f73..515db96 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -330,6 +330,7 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 void r600_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
 int r600_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
 int r600_dma_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
+int r600_uvd_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset, uint64_t dst_offset,
 		   unsigned num_gpu_pages, struct radeon_fence **fence);
@@ -392,6 +393,19 @@ int r600_mc_wait_for_idle(struct radeon_device *rdev);
 u32 r600_get_xclk(struct radeon_device *rdev);
 uint64_t r600_get_gpu_clock_counter(struct radeon_device *rdev);
 
+/* uvd */
+int r600_uvd_init(struct radeon_device *rdev);
+int r600_uvd_rbc_start(struct radeon_device *rdev);
+void r600_uvd_rbc_stop(struct radeon_device *rdev);
+int r600_uvd_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
+void r600_uvd_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence);
+void r600_uvd_semaphore_emit(struct radeon_device *rdev,
+			     struct radeon_ring *ring,
+			     struct radeon_semaphore *semaphore,
+			     bool emit_wait);
+void r600_uvd_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+
 /*
  * rv770,rv730,rv710,rv740
  */
@@ -409,6 +423,7 @@ int rv770_copy_dma(struct radeon_device *rdev,
 		  unsigned num_gpu_pages,
 		   struct radeon_fence **fence);
 u32 rv770_get_xclk(struct radeon_device *rdev);
+int rv770_uvd_resume(struct radeon_device *rdev);
 
 /*
  * evergreen
@@ -465,6 +480,10 @@ int evergreen_copy_dma(struct radeon_device *rdev,
  */
 void cayman_fence_ring_emit(struct radeon_device *rdev,
 			    struct radeon_fence *fence);
+void cayman_uvd_semaphore_emit(struct radeon_device *rdev,
+			       struct radeon_ring *ring,
+			       struct radeon_semaphore *semaphore,
+			       bool emit_wait);
 void cayman_pcie_gart_tlb_flush(struct radeon_device *rdev);
 int cayman_init(struct radeon_device *rdev);
 void cayman_fini(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 7d66e01..532ff68 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -75,18 +75,34 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs_ptr[i] = &p->relocs[i];
 			p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
 			p->relocs[i].lobj.bo = p->relocs[i].robj;
-			p->relocs[i].lobj.wdomain = r->write_domain;
-			p->relocs[i].lobj.rdomain = r->read_domains;
+			p->relocs[i].lobj.written = !!r->write_domain;
+
+			/* the first reloc of an UVD job is the
+			   msg and that must be in VRAM */
+			if (p->ring == R600_RING_TYPE_UVD_INDEX && i == 0) {
+
+				p->relocs[i].lobj.domain =
+					RADEON_GEM_DOMAIN_VRAM;
+
+				p->relocs[i].lobj.alt_domain =
+					RADEON_GEM_DOMAIN_VRAM;
+			} else {
+				uint32_t domain = r->write_domain ?
+					r->write_domain : r->read_domains;
+				p->relocs[i].lobj.domain = domain;
+				if (domain == RADEON_GEM_DOMAIN_VRAM)
+					domain |= RADEON_GEM_DOMAIN_GTT;
+				p->relocs[i].lobj.alt_domain = domain;
+			}
 			p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
 			p->relocs[i].handle = r->handle;
-			p->relocs[i].flags = r->flags;
 			radeon_bo_list_add_object(&p->relocs[i].lobj,
 						  &p->validated);
 
 		} else
 			p->relocs[i].handle = 0;
 	}
-	return radeon_bo_list_validate(&p->validated);
+	return radeon_bo_list_validate(&p->validated, p->ring);
 }
 
 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
@@ -121,6 +137,9 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
 			return -EINVAL;
 		}
 		break;
+	case RADEON_CS_RING_UVD:
+		p->ring = R600_RING_TYPE_UVD_INDEX;
+		break;
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c
index 3435625..82fe183 100644
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@@ -31,9 +31,9 @@
 #include <linux/seq_file.h>
 #include <linux/atomic.h>
 #include <linux/wait.h>
-#include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/slab.h>
+#include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "radeon_reg.h"
 #include "radeon.h"
@@ -767,8 +767,21 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 
 	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 	if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
-		rdev->fence_drv[ring].scratch_reg = 0;
-		index = R600_WB_EVENT_OFFSET + ring * 4;
+		if (ring != R600_RING_TYPE_UVD_INDEX) {
+			rdev->fence_drv[ring].scratch_reg = 0;
+			index = R600_WB_EVENT_OFFSET + ring * 4;
+			rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
+			rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
+							 index;
+
+		} else {
+			/* put fence directly behind firmware */
+			rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr +
+							 rdev->uvd_fw->size;
+			rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr +
+							 rdev->uvd_fw->size;
+		}
+
 	} else {
 		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
 		if (r) {
@@ -778,9 +791,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
 		index = RADEON_WB_SCRATCH_OFFSET +
 			rdev->fence_drv[ring].scratch_reg -
 			rdev->scratch.reg_base;
+		rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
+		rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
 	}
-	rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
-	rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
 	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
 	rdev->fence_drv[ring].initialized = true;
 	dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c
index c75cb2c..3019759 100644
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@@ -513,6 +513,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev,
 		rdev->hyperz_filp = NULL;
 	if (rdev->cmask_filp == file_priv)
 		rdev->cmask_filp = NULL;
+	radeon_uvd_free_handles(rdev, file_priv);
 }
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index d3aface..0e34446 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -339,14 +339,14 @@ void radeon_bo_fini(struct radeon_device *rdev)
 void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 				struct list_head *head)
 {
-	if (lobj->wdomain) {
+	if (lobj->written) {
 		list_add(&lobj->tv.head, head);
 	} else {
 		list_add_tail(&lobj->tv.head, head);
 	}
 }
 
-int radeon_bo_list_validate(struct list_head *head)
+int radeon_bo_list_validate(struct list_head *head, int ring)
 {
 	struct radeon_bo_list *lobj;
 	struct radeon_bo *bo;
@@ -360,15 +360,17 @@ int radeon_bo_list_validate(struct list_head *head)
 	list_for_each_entry(lobj, head, tv.head) {
 		bo = lobj->bo;
 		if (!bo->pin_count) {
-			domain = lobj->wdomain ? lobj->wdomain : lobj->rdomain;
+			domain = lobj->domain;
 			
 		retry:
 			radeon_ttm_placement_from_domain(bo, domain);
+			if (ring == R600_RING_TYPE_UVD_INDEX)
+				radeon_uvd_force_into_uvd_segment(bo);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement,
 						true, false);
 			if (unlikely(r)) {
-				if (r != -ERESTARTSYS && domain == RADEON_GEM_DOMAIN_VRAM) {
-					domain |= RADEON_GEM_DOMAIN_GTT;
+				if (r != -ERESTARTSYS && domain != lobj->alt_domain) {
+					domain = lobj->alt_domain;
 					goto retry;
 				}
 				return r;
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index 5fc86b0..e2cb80a 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -128,7 +128,7 @@ extern int radeon_bo_init(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
 extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
 				struct list_head *head);
-extern int radeon_bo_list_validate(struct list_head *head);
+extern int radeon_bo_list_validate(struct list_head *head, int ring);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
 				struct vm_area_struct *vma);
 extern int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
index 8d58e26..31e47d8 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -368,7 +368,7 @@ void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
 {
 	u32 rptr;
 
-	if (rdev->wb.enabled)
+	if (rdev->wb.enabled && ring != &rdev->ring[R600_RING_TYPE_UVD_INDEX])
 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
 	else
 		rptr = RREG32(ring->rptr_reg);
@@ -821,18 +821,20 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static int radeon_ring_type_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
-static int cayman_ring_type_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
-static int cayman_ring_type_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
-static int radeon_ring_type_dma1_index = R600_RING_TYPE_DMA_INDEX;
-static int radeon_ring_type_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
+static int radeon_gfx_index = RADEON_RING_TYPE_GFX_INDEX;
+static int cayman_cp1_index = CAYMAN_RING_TYPE_CP1_INDEX;
+static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
+static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX;
+static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
+static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX;
 
 static struct drm_info_list radeon_debugfs_ring_info_list[] = {
-	{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_ring_type_gfx_index},
-	{"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp1_index},
-	{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_ring_type_cp2_index},
-	{"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma1_index},
-	{"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_ring_type_dma2_index},
+	{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index},
+	{"radeon_ring_cp1", radeon_debugfs_ring_info, 0, &cayman_cp1_index},
+	{"radeon_ring_cp2", radeon_debugfs_ring_info, 0, &cayman_cp2_index},
+	{"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index},
+	{"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index},
+	{"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index},
 };
 
 static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c
index fda09c9..bbed4af 100644
--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@@ -252,6 +252,36 @@ void radeon_test_moves(struct radeon_device *rdev)
 		radeon_do_test_moves(rdev, RADEON_TEST_COPY_BLIT);
 }
 
+static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
+					     struct radeon_ring *ring,
+					     struct radeon_fence **fence)
+{
+	int r;
+
+	if (ring->idx == R600_RING_TYPE_UVD_INDEX) {
+		r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+		if (r) {
+			DRM_ERROR("Failed to get dummy create msg\n");
+			return r;
+		}
+
+		r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence);
+		if (r) {
+			DRM_ERROR("Failed to get dummy destroy msg\n");
+			return r;
+		}
+	} else {
+		r = radeon_ring_lock(rdev, ring, 64);
+		if (r) {
+			DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
+			return r;
+		}
+		radeon_fence_emit(rdev, fence, ring->idx);
+		radeon_ring_unlock_commit(rdev, ring);
+	}
+	return 0;
+}
+
 void radeon_test_ring_sync(struct radeon_device *rdev,
 			   struct radeon_ring *ringA,
 			   struct radeon_ring *ringB)
@@ -272,21 +302,24 @@ void radeon_test_ring_sync(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	r = radeon_fence_emit(rdev, &fence1, ringA->idx);
-	if (r) {
-		DRM_ERROR("Failed to emit fence 1\n");
-		radeon_ring_unlock_undo(rdev, ringA);
+	radeon_ring_unlock_commit(rdev, ringA);
+
+	r = radeon_test_create_and_emit_fence(rdev, ringA, &fence1);
+	if (r)
 		goto out_cleanup;
-	}
-	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	r = radeon_fence_emit(rdev, &fence2, ringA->idx);
+
+	r = radeon_ring_lock(rdev, ringA, 64);
 	if (r) {
-		DRM_ERROR("Failed to emit fence 2\n");
-		radeon_ring_unlock_undo(rdev, ringA);
+		DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
 		goto out_cleanup;
 	}
+	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
 	radeon_ring_unlock_commit(rdev, ringA);
 
+	r = radeon_test_create_and_emit_fence(rdev, ringA, &fence2);
+	if (r)
+		goto out_cleanup;
+
 	mdelay(1000);
 
 	if (radeon_fence_signaled(fence1)) {
@@ -364,27 +397,22 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringA->idx, semaphore);
-	r = radeon_fence_emit(rdev, &fenceA, ringA->idx);
-	if (r) {
-		DRM_ERROR("Failed to emit sync fence 1\n");
-		radeon_ring_unlock_undo(rdev, ringA);
-		goto out_cleanup;
-	}
 	radeon_ring_unlock_commit(rdev, ringA);
 
+	r = radeon_test_create_and_emit_fence(rdev, ringA, &fenceA);
+	if (r)
+		goto out_cleanup;
+
 	r = radeon_ring_lock(rdev, ringB, 64);
 	if (r) {
 		DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
 		goto out_cleanup;
 	}
 	radeon_semaphore_emit_wait(rdev, ringB->idx, semaphore);
-	r = radeon_fence_emit(rdev, &fenceB, ringB->idx);
-	if (r) {
-		DRM_ERROR("Failed to create sync fence 2\n");
-		radeon_ring_unlock_undo(rdev, ringB);
-		goto out_cleanup;
-	}
 	radeon_ring_unlock_commit(rdev, ringB);
+	r = radeon_test_create_and_emit_fence(rdev, ringB, &fenceB);
+	if (r)
+		goto out_cleanup;
 
 	mdelay(1000);
 
@@ -393,7 +421,7 @@ static void radeon_test_ring_sync2(struct radeon_device *rdev,
 		goto out_cleanup;
 	}
 	if (radeon_fence_signaled(fenceB)) {
-		DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
+		DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
 		goto out_cleanup;
 	}
 
diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c
new file mode 100644
index 0000000..8ab7bb9
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@@ -0,0 +1,521 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <deathsimple@vodafone.de>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "radeon.h"
+#include "r600d.h"
+
+/* Firmware Names */
+#define FIRMWARE_RV770		"radeon/RV770_uvd.bin"
+#define FIRMWARE_RV710		"radeon/RV710_uvd.bin"
+#define FIRMWARE_CYPRESS	"radeon/CYPRESS_uvd.bin"
+#define FIRMWARE_SUMO		"radeon/SUMO_uvd.bin"
+#define FIRMWARE_TAHITI		"radeon/TAHITI_uvd.bin"
+
+MODULE_FIRMWARE(FIRMWARE_RV770);
+MODULE_FIRMWARE(FIRMWARE_RV710);
+MODULE_FIRMWARE(FIRMWARE_CYPRESS);
+MODULE_FIRMWARE(FIRMWARE_SUMO);
+MODULE_FIRMWARE(FIRMWARE_TAHITI);
+
+int radeon_uvd_init(struct radeon_device *rdev)
+{
+	struct platform_device *pdev;
+	unsigned long bo_size;
+	const char *fw_name;
+	int i, r;
+
+	pdev = platform_device_register_simple("radeon_uvd", 0, NULL, 0);
+	r = IS_ERR(pdev);
+	if (r) {
+		dev_err(rdev->dev, "radeon_uvd: Failed to register firmware\n");
+		return -EINVAL;
+	}
+
+	switch (rdev->family) {
+	case CHIP_RV770:
+		fw_name = FIRMWARE_RV770;
+		break;
+
+	case CHIP_RV710:
+	case CHIP_RV730:
+	case CHIP_RV740:
+		fw_name = FIRMWARE_RV710;
+		break;
+
+	case CHIP_CYPRESS:
+	case CHIP_JUNIPER:
+	case CHIP_REDWOOD:
+	case CHIP_CEDAR:
+		fw_name = FIRMWARE_CYPRESS;
+		break;
+
+	case CHIP_SUMO:
+	case CHIP_SUMO2:
+	case CHIP_PALM:
+	case CHIP_CAYMAN:
+	case CHIP_BARTS:
+	case CHIP_TURKS:
+	case CHIP_CAICOS:
+		fw_name = FIRMWARE_SUMO;
+		break;
+
+	case CHIP_TAHITI:
+	case CHIP_VERDE:
+	case CHIP_PITCAIRN:
+	case CHIP_ARUBA:
+		fw_name = FIRMWARE_TAHITI;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	r = request_firmware(&rdev->uvd_fw, fw_name, &pdev->dev);
+	if (r) {
+		dev_err(rdev->dev, "radeon_uvd: Can't load firmware \"%s\"\n",
+			fw_name);
+		platform_device_unregister(pdev);
+		return r;
+	}
+
+	platform_device_unregister(pdev);
+
+	bo_size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) +
+		  RADEON_UVD_STACK_SIZE + RADEON_UVD_HEAP_SIZE;
+	r = radeon_bo_create(rdev, bo_size, PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->uvd.vcpu_bo);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to allocate UVD bo\n", r);
+		return r;
+	}
+
+	r = radeon_uvd_resume(rdev);
+	if (r)
+		return r;
+
+	memset(rdev->uvd.cpu_addr, 0, bo_size);
+	memcpy(rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
+
+	r = radeon_uvd_suspend(rdev);
+	if (r)
+		return r;
+
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		atomic_set(&rdev->uvd.handles[i], 0);
+		rdev->uvd.filp[i] = NULL;
+	}
+
+	return 0;
+}
+
+void radeon_uvd_fini(struct radeon_device *rdev)
+{
+	radeon_uvd_suspend(rdev);
+	radeon_bo_unref(&rdev->uvd.vcpu_bo);
+}
+
+int radeon_uvd_suspend(struct radeon_device *rdev)
+{
+	int r;
+
+	if (rdev->uvd.vcpu_bo == NULL)
+		return 0;
+
+	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
+	if (!r) {
+		radeon_bo_kunmap(rdev->uvd.vcpu_bo);
+		radeon_bo_unpin(rdev->uvd.vcpu_bo);
+		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+	}
+	return r;
+}
+
+int radeon_uvd_resume(struct radeon_device *rdev)
+{
+	int r;
+
+	if (rdev->uvd.vcpu_bo == NULL)
+		return -EINVAL;
+
+	r = radeon_bo_reserve(rdev->uvd.vcpu_bo, false);
+	if (r) {
+		radeon_bo_unref(&rdev->uvd.vcpu_bo);
+		dev_err(rdev->dev, "(%d) failed to reserve UVD bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_pin(rdev->uvd.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
+			  &rdev->uvd.gpu_addr);
+	if (r) {
+		radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+		radeon_bo_unref(&rdev->uvd.vcpu_bo);
+		dev_err(rdev->dev, "(%d) UVD bo pin failed\n", r);
+		return r;
+	}
+
+	r = radeon_bo_kmap(rdev->uvd.vcpu_bo, &rdev->uvd.cpu_addr);
+	if (r) {
+		dev_err(rdev->dev, "(%d) UVD map failed\n", r);
+		return r;
+	}
+
+	radeon_bo_unreserve(rdev->uvd.vcpu_bo);
+
+	return 0;
+}
+
+void radeon_uvd_force_into_uvd_segment(struct radeon_bo *rbo)
+{
+	rbo->placement.fpfn = 0 >> PAGE_SHIFT;
+	rbo->placement.lpfn = (256 * 1024 * 1024) >> PAGE_SHIFT;
+}
+
+void radeon_uvd_free_handles(struct radeon_device *rdev, struct drm_file *filp)
+{
+	int i, r;
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		if (rdev->uvd.filp[i] == filp) {
+			uint32_t handle = atomic_read(&rdev->uvd.handles[i]);
+			struct radeon_fence *fence;
+
+			r = radeon_uvd_get_destroy_msg(rdev,
+				R600_RING_TYPE_UVD_INDEX, handle, &fence);
+			if (r) {
+				DRM_ERROR("Error destroying UVD (%d)!\n", r);
+				continue;
+			}
+
+			radeon_fence_wait(fence, false);
+			radeon_fence_unref(&fence);
+
+			rdev->uvd.filp[i] = NULL;
+			atomic_set(&rdev->uvd.handles[i], 0);
+		}
+	}
+}
+
+static int radeon_uvd_cs_msg(struct radeon_cs_parser *p, struct radeon_bo *msg)
+{
+	uint32_t *map, msg_type, handle;
+	int i, r;
+
+	r = radeon_bo_kmap(msg, (void **)&map);
+	if (r)
+		return r;
+
+	msg_type = map[1];
+	handle = map[2];
+
+	radeon_bo_kunmap(msg);
+
+	if (handle == 0) {
+		DRM_ERROR("Invalid UVD handle!\n");
+		return -EINVAL;
+	}
+
+	if (msg_type == 2) {
+		/* it's a destroy msg, free the handle */
+		for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i)
+			atomic_cmpxchg(&p->rdev->uvd.handles[i], handle, 0);
+		return 0;
+	}
+
+	/* create or decode, validate the handle */
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		if (atomic_read(&p->rdev->uvd.handles[i]) == handle)
+			return 0;
+	}
+	/* handle not found try to alloc a new one */
+	for (i = 0; i < RADEON_MAX_UVD_HANDLES; ++i) {
+		if (!atomic_cmpxchg(&p->rdev->uvd.handles[i], 0, handle)) {
+			p->rdev->uvd.filp[i] = p->filp;
+			return 0;
+		}
+	}
+	DRM_ERROR("No more free UVD handles!\n");
+	return -EINVAL;
+}
+
+static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p, int data0, int data1)
+{
+	struct radeon_cs_chunk *relocs_chunk;
+	struct radeon_cs_reloc *reloc;
+	unsigned idx, cmd;
+	uint64_t start, end;
+
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	idx = radeon_get_ib_value(p, data1);
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		return -EINVAL;
+	}
+
+	reloc = p->relocs_ptr[(idx / 4)];
+	start = reloc->lobj.gpu_offset;
+	end = start + radeon_bo_size(reloc->robj);
+	start += radeon_get_ib_value(p, data0);
+
+	p->ib.ptr[data0] = start & 0xFFFFFFFF;
+	p->ib.ptr[data1] = start >> 32;
+
+
+	cmd = radeon_get_ib_value(p, p->idx);
+	if (cmd == 0) {
+		if (end & 0xFFFFFFFFF0000000) {
+			DRM_ERROR("msg buffer %LX-%LX out of 256MB segment!\n",
+				  start, end);
+			return -EINVAL;
+		}
+
+		return radeon_uvd_cs_msg(p, reloc->robj);
+
+	}
+
+	if ((start & 0xFFFFFFFFF0000000) != (end & 0xFFFFFFFFF0000000)) {
+		DRM_ERROR("reloc %LX-%LX crossing 256MB boundary!\n",
+			  start, end);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int radeon_uvd_cs_parse(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_packet pkt;
+	int i, r, data0 = 0, data1 = 0;
+
+	if (p->chunks[p->chunk_ib_idx].length_dw % 16) {
+		DRM_ERROR("UVD IB length (%d) not 16 dwords aligned!\n",
+			  p->chunks[p->chunk_ib_idx].length_dw);
+		return -EINVAL;
+	}
+
+	if (p->chunk_relocs_idx == -1) {
+		DRM_ERROR("No relocation chunk !\n");
+		return -EINVAL;
+	}
+
+
+	do {
+		r = radeon_cs_packet_parse(p, &pkt, p->idx);
+		if (r)
+			return r;
+		switch (pkt.type) {
+		case RADEON_PACKET_TYPE0:
+			p->idx++;
+			for (i = 0; i <= pkt.count; ++i) {
+				switch (pkt.reg + i*4) {
+				case UVD_GPCOM_VCPU_DATA0:
+					data0 = p->idx;
+					break;
+				case UVD_GPCOM_VCPU_DATA1:
+					data1 = p->idx;
+					break;
+				case UVD_GPCOM_VCPU_CMD:
+					r = radeon_uvd_cs_reloc(p, data0,
+								data1);
+					if (r)
+						return r;
+					break;
+				case UVD_ENGINE_CNTL:
+					break;
+				default:
+					DRM_ERROR("Invalid reg 0x%X!\n",
+						  pkt.reg + i*4);
+					return -EINVAL;
+				}
+				p->idx++;
+			}
+			break;
+		case RADEON_PACKET_TYPE2:
+			p->idx += pkt.count + 2;
+			break;
+		default:
+			DRM_ERROR("Unknown packet type %d !\n", pkt.type);
+			return -EINVAL;
+		}
+	} while (p->idx < p->chunks[p->chunk_ib_idx].length_dw);
+	return 0;
+}
+
+static int radeon_uvd_send_msg(struct radeon_device *rdev,
+			       int ring, struct radeon_bo *bo,
+			       struct radeon_fence **fence)
+{
+	struct ttm_validate_buffer tv;
+	struct list_head head;
+	struct radeon_ib ib;
+	uint64_t addr;
+	int i, r;
+
+	memset(&tv, 0, sizeof(tv));
+	tv.bo = &bo->tbo;
+
+	INIT_LIST_HEAD(&head);
+	list_add(&tv.head, &head);
+
+	r = ttm_eu_reserve_buffers(&head);
+	if (r)
+		return r;
+
+	radeon_ttm_placement_from_domain(bo, RADEON_GEM_DOMAIN_VRAM);
+	radeon_uvd_force_into_uvd_segment(bo);
+
+	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+	if (r) {
+		ttm_eu_backoff_reservation(&head);
+		return r;
+	}
+
+	r = radeon_ib_get(rdev, ring, &ib, NULL, 16);
+	if (r) {
+		ttm_eu_backoff_reservation(&head);
+		return r;
+	}
+
+	addr = radeon_bo_gpu_offset(bo);
+	ib.ptr[0] = PACKET0(UVD_GPCOM_VCPU_DATA0, 0);
+	ib.ptr[1] = addr;
+	ib.ptr[2] = PACKET0(UVD_GPCOM_VCPU_DATA1, 0);
+	ib.ptr[3] = addr >> 32;
+	ib.ptr[4] = PACKET0(UVD_GPCOM_VCPU_CMD, 0);
+	ib.ptr[5] = 0;
+	for (i = 6; i < 16; ++i)
+		ib.ptr[i] = PACKET2(0);
+	ib.length_dw = 16;
+
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+		ttm_eu_backoff_reservation(&head);
+		return r;
+	}
+	ttm_eu_fence_buffer_objects(&head, ib.fence);
+
+	if (fence)
+		*fence = radeon_fence_ref(ib.fence);
+
+	radeon_ib_free(rdev, &ib);
+	radeon_bo_unref(&bo);
+	return 0;
+}
+
+/* multiple fence commands without any stream commands in between can
+   crash the vcpu so just try to emmit a dummy create/destroy msg to
+   avoid this */
+int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring,
+			      uint32_t handle, struct radeon_fence **fence)
+{
+	struct radeon_bo *bo;
+	uint32_t *msg;
+	int r, i;
+
+	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
+	if (r)
+		return r;
+
+	r = radeon_bo_reserve(bo, false);
+	if (r) {
+		radeon_bo_unref(&bo);
+		return r;
+	}
+
+	r = radeon_bo_kmap(bo, (void **)&msg);
+	if (r) {
+		radeon_bo_unreserve(bo);
+		radeon_bo_unref(&bo);
+		return r;
+	}
+
+	/* stitch together an UVD create msg */
+	msg[0] = 0x00000de4;
+	msg[1] = 0x00000000;
+	msg[2] = handle;
+	msg[3] = 0x00000000;
+	msg[4] = 0x00000000;
+	msg[5] = 0x00000000;
+	msg[6] = 0x00000000;
+	msg[7] = 0x00000780;
+	msg[8] = 0x00000440;
+	msg[9] = 0x00000000;
+	msg[10] = 0x01b37000;
+	for (i = 11; i < 1024; ++i)
+		msg[i] = 0x0;
+
+	radeon_bo_kunmap(bo);
+	radeon_bo_unreserve(bo);
+
+	return radeon_uvd_send_msg(rdev, ring, bo, fence);
+}
+
+int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring,
+			       uint32_t handle, struct radeon_fence **fence)
+{
+	struct radeon_bo *bo;
+	uint32_t *msg;
+	int r, i;
+
+	r = radeon_bo_create(rdev, 1024, PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_VRAM, NULL, &bo);
+	if (r)
+		return r;
+
+	r = radeon_bo_reserve(bo, false);
+	if (r) {
+		radeon_bo_unref(&bo);
+		return r;
+	}
+
+	r = radeon_bo_kmap(bo, (void **)&msg);
+	if (r) {
+		radeon_bo_unreserve(bo);
+		radeon_bo_unref(&bo);
+		return r;
+	}
+
+	/* stitch together an UVD destroy msg */
+	msg[0] = 0x00000de4;
+	msg[1] = 0x00000002;
+	msg[2] = handle;
+	msg[3] = 0x00000000;
+	for (i = 4; i < 1024; ++i)
+		msg[i] = 0x0;
+
+	radeon_bo_kunmap(bo);
+	radeon_bo_unreserve(bo);
+
+	return radeon_uvd_send_msg(rdev, ring, bo, fence);
+}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index d63fe1d..5a78cce 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -68,6 +68,107 @@ u32 rv770_get_xclk(struct radeon_device *rdev)
 	return reference_clock;
 }
 
+int rv770_uvd_resume(struct radeon_device *rdev)
+{
+	uint64_t addr;
+	uint32_t chip_id, size;
+	int r;
+
+	r = radeon_uvd_resume(rdev);
+	if (r)
+		return r;
+
+	/* programm the VCPU memory controller bits 0-27 */
+	addr = rdev->uvd.gpu_addr >> 3;
+	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE0, size);
+
+	addr += size;
+	size = RADEON_UVD_STACK_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE1, size);
+
+	addr += size;
+	size = RADEON_UVD_HEAP_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE2, size);
+
+	/* bits 28-31 */
+	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
+	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+	/* bits 32-39 */
+	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
+	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+	/* tell firmware which hardware it is running on */
+	switch (rdev->family) {
+	default:
+		return -EINVAL;
+	case CHIP_RV770:
+		chip_id = 0x01000004;
+		break;
+	case CHIP_RV710:
+		chip_id = 0x01000005;
+		break;
+	case CHIP_RV730:
+		chip_id = 0x01000006;
+		break;
+	case CHIP_RV740:
+		chip_id = 0x01000007;
+		break;
+	case CHIP_CYPRESS:
+		chip_id = 0x01000008;
+		break;
+	case CHIP_JUNIPER:
+		chip_id = 0x01000009;
+		break;
+	case CHIP_REDWOOD:
+		chip_id = 0x0100000a;
+		break;
+	case CHIP_CEDAR:
+		chip_id = 0x0100000b;
+		break;
+	case CHIP_SUMO:
+		chip_id = 0x0100000c;
+		break;
+	case CHIP_SUMO2:
+		chip_id = 0x0100000d;
+		break;
+	case CHIP_PALM:
+		chip_id = 0x0100000e;
+		break;
+	case CHIP_CAYMAN:
+		chip_id = 0x0100000f;
+		break;
+	case CHIP_BARTS:
+		chip_id = 0x01000010;
+		break;
+	case CHIP_TURKS:
+		chip_id = 0x01000011;
+		break;
+	case CHIP_CAICOS:
+		chip_id = 0x01000012;
+		break;
+	case CHIP_TAHITI:
+		chip_id = 0x01000014;
+		break;
+	case CHIP_VERDE:
+		chip_id = 0x01000015;
+		break;
+	case CHIP_PITCAIRN:
+		chip_id = 0x01000016;
+		break;
+	case CHIP_ARUBA:
+		chip_id = 0x01000017;
+		break;
+	}
+	WREG32(UVD_VCPU_CHIP_ID, chip_id);
+
+	return 0;
+}
+
 u32 rv770_page_flip(struct radeon_device *rdev, int crtc_id, u64 crtc_base)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
@@ -1040,6 +1141,17 @@ static int rv770_startup(struct radeon_device *rdev)
 		return r;
 	}
 
+	r = rv770_uvd_resume(rdev);
+	if (!r) {
+		r = radeon_fence_driver_start_ring(rdev,
+						   R600_RING_TYPE_UVD_INDEX);
+		if (r)
+			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+	}
+
+	if (r)
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
 	/* Enable IRQ */
 	r = r600_irq_init(rdev);
 	if (r) {
@@ -1074,6 +1186,19 @@ static int rv770_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	if (ring->ring_size) {
+		r = radeon_ring_init(rdev, ring, ring->ring_size,
+				     R600_WB_UVD_RPTR_OFFSET,
+				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+				     0, 0xfffff, RADEON_CP_PACKET2);
+		if (!r)
+			r = r600_uvd_init(rdev);
+
+		if (r)
+			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -1115,6 +1240,7 @@ int rv770_resume(struct radeon_device *rdev)
 int rv770_suspend(struct radeon_device *rdev)
 {
 	r600_audio_fini(rdev);
+	radeon_uvd_suspend(rdev);
 	r700_cp_stop(rdev);
 	r600_dma_stop(rdev);
 	r600_irq_suspend(rdev);
@@ -1190,6 +1316,13 @@ int rv770_init(struct radeon_device *rdev)
 	rdev->ring[R600_RING_TYPE_DMA_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX], 64 * 1024);
 
+	r = radeon_uvd_init(rdev);
+	if (!r) {
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
+		r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX],
+			       4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -1224,6 +1357,7 @@ void rv770_fini(struct radeon_device *rdev)
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
 	rv770_pcie_gart_fini(rdev);
+	radeon_uvd_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
 	radeon_fence_driver_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index c55f950..da158b54 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -671,4 +671,18 @@
 #       define TARGET_LINK_SPEED_MASK                     (0xf << 0)
 #       define SELECTABLE_DEEMPHASIS                      (1 << 6)
 
+/* UVD */
+#define UVD_LMI_EXT40_ADDR				0xf498
+#define UVD_VCPU_CHIP_ID				0xf4d4
+#define UVD_VCPU_CACHE_OFFSET0				0xf4d8
+#define UVD_VCPU_CACHE_SIZE0				0xf4dc
+#define UVD_VCPU_CACHE_OFFSET1				0xf4e0
+#define UVD_VCPU_CACHE_SIZE1				0xf4e4
+#define UVD_VCPU_CACHE_OFFSET2				0xf4e8
+#define UVD_VCPU_CACHE_SIZE2				0xf4ec
+#define UVD_LMI_ADDR_EXT				0xf594
+
+#define UVD_RBC_RB_RPTR					0xf690
+#define UVD_RBC_RB_WPTR					0xf694
+
 #endif
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index bafbe32..cc9fe39 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -4372,6 +4372,16 @@ static int si_startup(struct radeon_device *rdev)
 		return r;
 	}
 
+	r = rv770_uvd_resume(rdev);
+	if (!r) {
+		r = radeon_fence_driver_start_ring(rdev,
+						   R600_RING_TYPE_UVD_INDEX);
+		if (r)
+			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
+	}
+	if (r)
+		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+
 	/* Enable IRQ */
 	r = si_irq_init(rdev);
 	if (r) {
@@ -4429,6 +4439,18 @@ static int si_startup(struct radeon_device *rdev)
 	if (r)
 		return r;
 
+	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+	if (ring->ring_size) {
+		r = radeon_ring_init(rdev, ring, ring->ring_size,
+				     R600_WB_UVD_RPTR_OFFSET,
+				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
+				     0, 0xfffff, RADEON_CP_PACKET2);
+		if (!r)
+			r = r600_uvd_init(rdev);
+		if (r)
+			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@@ -4472,6 +4494,8 @@ int si_suspend(struct radeon_device *rdev)
 	radeon_vm_manager_fini(rdev);
 	si_cp_enable(rdev, false);
 	cayman_dma_stop(rdev);
+	r600_uvd_rbc_stop(rdev);
+	radeon_uvd_suspend(rdev);
 	si_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	si_pcie_gart_disable(rdev);
@@ -4557,6 +4581,13 @@ int si_init(struct radeon_device *rdev)
 	ring->ring_obj = NULL;
 	r600_ring_init(rdev, ring, 64 * 1024);
 
+	r = radeon_uvd_init(rdev);
+	if (!r) {
+		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+		ring->ring_obj = NULL;
+		r600_ring_init(rdev, ring, 4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);
 
@@ -4605,6 +4636,7 @@ void si_fini(struct radeon_device *rdev)
 	radeon_vm_manager_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
+	radeon_uvd_fini(rdev);
 	si_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 23fc08f..759f682 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -798,6 +798,12 @@
 #       define THREAD_TRACE_FINISH                      (55 << 0)
 
 /*
+ * UVD
+ */
+#define UVD_RBC_RB_RPTR					0xF690
+#define UVD_RBC_RB_WPTR					0xF694
+
+/*
  * PM4
  */
 #define PACKET0(reg, n)	((RADEON_PACKET_TYPE0 << 30) |			\
diff --git a/include/uapi/drm/radeon_drm.h b/include/uapi/drm/radeon_drm.h
index eeda917..cd085d1 100644
--- a/include/uapi/drm/radeon_drm.h
+++ b/include/uapi/drm/radeon_drm.h
@@ -918,6 +918,7 @@ struct drm_radeon_gem_va {
 #define RADEON_CS_RING_GFX          0
 #define RADEON_CS_RING_COMPUTE      1
 #define RADEON_CS_RING_DMA          2
+#define RADEON_CS_RING_UVD          3
 /* The third dword of RADEON_CHUNK_ID_FLAGS is a sint32 that sets the priority */
 /* 0 = normal, + = higher priority, - = lower priority */
 
-- 
1.7.9.5

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel

  parent reply	other threads:[~2013-04-02 23:18 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-02 23:18 [PATCH] drm/radeon: UVD support for RV710-SI Christian König
2013-04-02 23:18 ` [PATCH 01/10] drm/radeon: UVD doesn't needs VM on SI Christian König
2013-04-03 14:42   ` Jerome Glisse
2013-04-03 15:29     ` Christian König
2013-04-02 23:18 ` Christian König [this message]
2013-04-03 14:53   ` [PATCH 02/10] drm/radeon: UVD bringup v7 Jerome Glisse
2013-04-03 15:53     ` Christian König
2013-04-03 17:10       ` Jerome Glisse
2013-04-04 13:26         ` Christian König
2013-04-04 16:57   ` Alex Deucher
2013-04-02 23:18 ` [PATCH 03/10] drm/radeon: add pm callback for setting uvd clocks Christian König
2013-04-02 23:18 ` [PATCH 04/10] drm/radeon: add radeon_atom_get_clock_dividers helper Christian König
2013-04-02 23:18 ` [PATCH 05/10] drm/radeon: add set_uvd_clocks callback for ON/LN/TN (v4) Christian König
2013-04-02 23:18 ` [PATCH 06/10] drm/radeon: add set_uvd_clocks callback for evergreen Christian König
2013-04-02 23:18 ` [PATCH 07/10] drm/radeon: add set_uvd_clocks callback for SI Christian König
2013-04-02 23:18 ` [PATCH 08/10] drm/radeon: add set_uvd_clocks callback for r7xx v3 Christian König
2013-04-02 23:18 ` [PATCH 09/10] drm/radeon: init UVD clocks to sane defaults Christian König
2013-04-02 23:18 ` [PATCH 10/10] drm/radeon: add UVD tiling addr config v2 Christian König
2013-04-02 23:34 ` [PATCH] drm/radeon: UVD support for RV710-SI Alex Deucher
2013-04-03 14:41 ` Jerome Glisse
2013-04-03 17:57 ` Andreas Boll
2013-04-04  8:41   ` Christian König
2013-04-04 12:06 ` Paul Menzel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1364944719-5175-3-git-send-email-deathsimple@vodafone.de \
    --to=deathsimple@vodafone.de \
    --cc=dri-devel@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.