All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH split] Improve pipe split between amdgpu and amdkfd
@ 2017-04-13 21:35 Andres Rodriguez
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

This is a split of patches that are ready to land from the series:
Add support for high priority scheduling in amdgpu v8

I've included Felix and Alex's feedback from the thread above. This includes:
 * Separate MEC_HPD_SIZE rename into a separate patch (patch 01)
 * Added a patch to fix the kgd_hqd_load bug Felix pointed out (patch 06)
 * Fixes for various off-by-one errors
 * Use gfx_v8_0_deactivate_hqd

Only comment I didn't address was changing the queue allocation policy for
gfx9 (similar to gfx7/8). See inline reply in that thread for more details
on why this was skipped.


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 01/17] drm/amdgpu: clarify MEC_HPD_SIZE is specific to a gfx generation
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 02/17] drm/amdgpu: refactor MQD/HQD initialization v3 Andres Rodriguez
                     ` (16 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Rename MEC_HPD_SIZE to GFXN_MEC_HPD_SIZE to clarify it is specific to a
gfx generation.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 11 +++++------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 15 +++++++--------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 17 ++++++++---------
 3 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index c930bb8..3b98162 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -32,40 +32,41 @@
 #include "clearstate_ci.h"
 
 #include "dce/dce_8_0_d.h"
 #include "dce/dce_8_0_sh_mask.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
 
 #include "gca/gfx_7_0_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 
 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"
 
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
 #define GFX7_NUM_GFX_RINGS     1
 #define GFX7_NUM_COMPUTE_RINGS 8
+#define GFX7_MEC_HPD_SIZE      2048
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
 
 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
 MODULE_FIRMWARE("radeon/hawaii_me.bin");
 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
 
 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
 MODULE_FIRMWARE("radeon/kaveri_me.bin");
 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
@@ -2804,90 +2805,88 @@ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
 		}
 	}
 }
 
 static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 {
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 }
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 
 	/*
 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 	 * Nonetheless, we assign only 1 pipe because all other pipes will
 	 * be handled by KFD
 	 */
 	adev->gfx.mec.num_mec = 1;
 	adev->gfx.mec.num_pipe = 1;
 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
+				     adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v7_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 			  &adev->gfx.mec.hpd_eop_gpu_addr);
 	if (r) {
 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 		gfx_v7_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
 	if (r) {
 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
 		gfx_v7_0_mec_fini(adev);
 		return r;
 	}
 
 	/* clear memory.  Not sure if this is required or not */
-	memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
+	memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2);
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 	return 0;
 }
 
 struct hqd_registers
 {
 	u32 cp_mqd_base_addr;
 	u32 cp_mqd_base_addr_hi;
 	u32 cp_hqd_active;
 	u32 cp_hqd_vmid;
 	u32 cp_hqd_persistent_state;
 	u32 cp_hqd_pipe_priority;
 	u32 cp_hqd_queue_priority;
 	u32 cp_hqd_quantum;
 	u32 cp_hqd_pq_base;
 	u32 cp_hqd_pq_base_hi;
 	u32 cp_hqd_pq_rptr;
@@ -2961,55 +2960,55 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
 	bool use_doorbell = true;
 	u64 hqd_gpu_addr;
 	u64 mqd_gpu_addr;
 	u64 eop_gpu_addr;
 	u64 wb_gpu_addr;
 	u32 *buf;
 	struct bonaire_mqd *mqd;
 	struct amdgpu_ring *ring;
 
 	/* fix up chicken bits */
 	tmp = RREG32(mmCP_CPF_DEBUG);
 	tmp |= (1 << 23);
 	WREG32(mmCP_CPF_DEBUG, tmp);
 
 	/* init the pipes */
 	mutex_lock(&adev->srbm_mutex);
 	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
 		int me = (i < 4) ? 1 : 2;
 		int pipe = (i < 4) ? i : (i - 4);
 
-		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
+		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX7_MEC_HPD_SIZE * 2);
 
 		cik_srbm_select(adev, me, pipe, 0, 0);
 
 		/* write the EOP addr */
 		WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
 		WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
 
 		/* set the VMID assigned */
 		WREG32(mmCP_HPD_EOP_VMID, 0);
 
 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 		tmp = RREG32(mmCP_HPD_EOP_CONTROL);
 		tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
-		tmp |= order_base_2(MEC_HPD_SIZE / 8);
+		tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
 		WREG32(mmCP_HPD_EOP_CONTROL, tmp);
 	}
 	cik_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
 
 	/* init the queues.  Just two for now. */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
 
 		if (ring->mqd_obj == NULL) {
 			r = amdgpu_bo_create(adev,
 					     sizeof(struct bonaire_mqd),
 					     PAGE_SIZE, true,
 					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 					     &ring->mqd_obj);
 			if (r) {
 				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
 				return r;
 			}
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index df50e34..fc94c3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -36,40 +36,41 @@
 #include "gmc/gmc_8_2_sh_mask.h"
 
 #include "oss/oss_3_0_d.h"
 #include "oss/oss_3_0_sh_mask.h"
 
 #include "bif/bif_5_0_d.h"
 #include "bif/bif_5_0_sh_mask.h"
 
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_enum.h"
 
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
 #include "smu/smu_7_1_3_d.h"
 
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_NUM_COMPUTE_RINGS 8
+#define GFX8_MEC_HPD_SIZE 2048
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
 
 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
 
 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
@@ -1396,116 +1397,114 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
 		ring->pipe = 1;
 	}
 
 	ring->queue = 0;
 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
 	r = amdgpu_ring_init(adev, ring, 1024,
 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
 	if (r)
 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 
 	return r;
 }
 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
 				   struct amdgpu_irq_src *irq)
 {
 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
 }
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 
 	/*
 	 * we assign only 1 pipe because all other pipes will
 	 * be handled by KFD
 	 */
 	adev->gfx.mec.num_mec = 1;
 	adev->gfx.mec.num_pipe = 1;
 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
+				     adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v8_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 			  &adev->gfx.mec.hpd_eop_gpu_addr);
 	if (r) {
 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 		gfx_v8_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
 	if (r) {
 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
 		gfx_v8_0_mec_fini(adev);
 		return r;
 	}
 
-	memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
+	memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE);
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 	return 0;
 }
 
 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
 {
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
 }
 
 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
-	r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
+	r = amdgpu_bo_create_kernel(adev, GFX8_MEC_HPD_SIZE, PAGE_SIZE,
 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
 				    &kiq->eop_gpu_addr, (void **)&hpd);
 	if (r) {
 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
 		return r;
 	}
 
-	memset(hpd, 0, MEC_HPD_SIZE);
+	memset(hpd, 0, GFX8_MEC_HPD_SIZE);
 
 	r = amdgpu_bo_reserve(kiq->eop_obj, false);
 	if (unlikely(r != 0))
 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 	amdgpu_bo_kunmap(kiq->eop_obj);
 	amdgpu_bo_unreserve(kiq->eop_obj);
 
 	return 0;
 }
 
 static const u32 vgpr_init_compute_shader[] =
 {
 	0x7e000209, 0x7e020208,
 	0x7e040207, 0x7e060206,
 	0x7e080205, 0x7e0a0204,
 	0x7e0c0203, 0x7e0e0202,
 	0x7e100201, 0x7e120200,
 	0x7e140209, 0x7e160208,
 	0x7e180207, 0x7e1a0206,
 	0x7e1c0205, 0x7e1e0204,
@@ -2138,41 +2137,41 @@ static int gfx_v8_0_sw_init(void *handle)
 		if (r)
 			return r;
 	}
 
 	/* set up the compute queues */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		unsigned irq_type;
 
 		/* max 32 queues per MEC */
 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
 			DRM_ERROR("Too many (%d) compute rings!\n", i);
 			break;
 		}
 		ring = &adev->gfx.compute_ring[i];
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
 		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
 		ring->me = 1; /* first MEC */
 		ring->pipe = i / 8;
 		ring->queue = i % 8;
-		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
+		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE);
 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
 				     irq_type);
 		if (r)
 			return r;
 	}
 
 	r = gfx_v8_0_kiq_init(adev);
 	if (r) {
 		DRM_ERROR("Failed to init KIQ BOs!\n");
 		return r;
 	}
 
 	kiq = &adev->gfx.kiq;
 	r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
 	if (r)
 		return r;
 
@@ -4758,41 +4757,41 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 	uint32_t tmp;
 
 	mqd->header = 0xC0310800;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 	mqd->compute_misc_reserved = 0x00000003;
 
 	eop_base_addr = ring->eop_gpu_addr >> 8;
 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-			(order_base_2(MEC_HPD_SIZE / 4) - 1));
+			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
 
 	mqd->cp_hqd_eop_control = tmp;
 
 	/* enable doorbell? */
 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
 			    CP_HQD_PQ_DOORBELL_CONTROL,
 			    DOORBELL_EN,
 			    ring->use_doorbell ? 1 : 0);
 
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 
 	/* disable the queue if it's active */
 	mqd->cp_hqd_dequeue_request = 0;
 	mqd->cp_hqd_pq_rptr = 0;
 	mqd->cp_hqd_pq_wptr = 0;
 
 	/* set the pointer to the MQD */
 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 87596e4..0de7ff4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -23,40 +23,41 @@
 #include <linux/firmware.h>
 #include "drmP.h"
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "soc15.h"
 #include "soc15d.h"
 
 #include "vega10/soc15ip.h"
 #include "vega10/GC/gc_9_0_offset.h"
 #include "vega10/GC/gc_9_0_sh_mask.h"
 #include "vega10/vega10_enum.h"
 #include "vega10/HDP/hdp_4_0_offset.h"
 
 #include "soc15_common.h"
 #include "clearstate_gfx9.h"
 #include "v9_structs.h"
 
 #define GFX9_NUM_GFX_RINGS     1
 #define GFX9_NUM_COMPUTE_RINGS 8
 #define GFX9_NUM_SE		4
+#define GFX9_MEC_HPD_SIZE 2048
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
 
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
 
 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 {
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)},
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE),
 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)},
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE),
 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)},
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE),
 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID3), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID3)},
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID4_SIZE),
@@ -457,63 +458,61 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 	if (adev->gfx.mec.mec_fw_obj) {
 		r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
 
 		amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj);
 		adev->gfx.mec.mec_fw_obj = NULL;
 	}
 }
 
-#define MEC_HPD_SIZE 2048
-
 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	const __le32 *fw_data;
 	unsigned fw_size;
 	u32 *fw;
 
 	const struct gfx_firmware_header_v1_0 *mec_hdr;
 
 	/*
 	 * we assign only 1 pipe because all other pipes will
 	 * be handled by KFD
 	 */
 	adev->gfx.mec.num_mec = 1;
 	adev->gfx.mec.num_pipe = 1;
 	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_queue * MEC_HPD_SIZE,
+				     adev->gfx.mec.num_queue * GFX9_MEC_HPD_SIZE,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v9_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 			  &adev->gfx.mec.hpd_eop_gpu_addr);
 	if (r) {
 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 		gfx_v9_0_mec_fini(adev);
 		return r;
@@ -572,49 +571,49 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 	amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
 
 
 	return 0;
 }
 
 static void gfx_v9_0_kiq_fini(struct amdgpu_device *adev)
 {
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
 }
 
 static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
-	r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
+	r = amdgpu_bo_create_kernel(adev, GFX9_MEC_HPD_SIZE, PAGE_SIZE,
 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
 				    &kiq->eop_gpu_addr, (void **)&hpd);
 	if (r) {
 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
 		return r;
 	}
 
-	memset(hpd, 0, MEC_HPD_SIZE);
+	memset(hpd, 0, GFX9_MEC_HPD_SIZE);
 
 	r = amdgpu_bo_reserve(kiq->eop_obj, false);
 	if (unlikely(r != 0))
 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 	amdgpu_bo_kunmap(kiq->eop_obj);
 	amdgpu_bo_unreserve(kiq->eop_obj);
 
 	return 0;
 }
 
 static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_irq_src *irq)
 {
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	int r = 0;
 
 	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
 	if (r)
 		return r;
@@ -1078,41 +1077,41 @@ static int gfx_v9_0_sw_init(void *handle)
 		if (r)
 			return r;
 	}
 
 	/* set up the compute queues */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		unsigned irq_type;
 
 		/* max 32 queues per MEC */
 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
 			DRM_ERROR("Too many (%d) compute rings!\n", i);
 			break;
 		}
 		ring = &adev->gfx.compute_ring[i];
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
 		ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1;
 		ring->me = 1; /* first MEC */
 		ring->pipe = i / 8;
 		ring->queue = i % 8;
-		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
+		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX9_MEC_HPD_SIZE);
 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, irq_type);
 		if (r)
 			return r;
 	}
 
 	if (amdgpu_sriov_vf(adev)) {
 		r = gfx_v9_0_kiq_init(adev);
 		if (r) {
 			DRM_ERROR("Failed to init KIQ BOs!\n");
 			return r;
 		}
 
 		kiq = &adev->gfx.kiq;
 		r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
 		if (r)
 			return r;
@@ -1883,41 +1882,41 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 	struct v9_mqd *mqd = ring->mqd_ptr;
 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 	uint32_t tmp;
 
 	mqd->header = 0xC0310800;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 	mqd->compute_misc_reserved = 0x00000003;
 
 	eop_base_addr = ring->eop_gpu_addr >> 8;
 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-			(order_base_2(MEC_HPD_SIZE / 4) - 1));
+			(order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
 
 	mqd->cp_hqd_eop_control = tmp;
 
 	/* enable doorbell? */
 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 
 	if (ring->use_doorbell) {
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 				    DOORBELL_OFFSET, ring->doorbell_index);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 				    DOORBELL_EN, 1);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 				    DOORBELL_SOURCE, 0);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 				    DOORBELL_HIT, 0);
 	}
 	else
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 					 DOORBELL_EN, 0);
 
@@ -3740,52 +3739,52 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	/* init the mqd struct */
 	memset(buf, 0, sizeof(struct v9_mqd));
 
 	mqd = (struct v9_mqd *)buf;
 	mqd->header = 0xC0310800;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 	mqd->compute_misc_reserved = 0x00000003;
 	mutex_lock(&adev->srbm_mutex);
 	soc15_grbm_select(adev, ring->me,
 			       ring->pipe,
 			       ring->queue, 0);
 	/* disable wptr polling */
 	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
 	/* write the EOP addr */
 	BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */
-	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
+	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * GFX9_MEC_HPD_SIZE);
 	eop_gpu_addr >>= 8;
 
 	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr));
 	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
 	mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
-				    (order_base_2(MEC_HPD_SIZE / 4) - 1));
+				    (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
 	WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp);
 
 	/* enable doorbell? */
 	tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
 	if (use_doorbell)
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
 	else
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
 
 	WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 
 	/* disable the queue if it's active */
 	ring->wptr = 0;
 	mqd->cp_hqd_dequeue_request = 0;
 	mqd->cp_hqd_pq_rptr = 0;
 	mqd->cp_hqd_pq_wptr_lo = 0;
 	mqd->cp_hqd_pq_wptr_hi = 0;
 	if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
 		WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 02/17] drm/amdgpu: refactor MQD/HQD initialization v3
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-04-13 21:35   ` [PATCH 01/17] drm/amdgpu: clarify MEC_HPD_SIZE is specific to a gfx generation Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
       [not found]     ` <20170413213542.18802-3-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-04-13 21:35   ` [PATCH 03/17] drm/amdgpu: detect timeout error when deactivating hqd Andres Rodriguez
                     ` (15 subsequent siblings)
  17 siblings, 1 reply; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

The MQD programming sequence currently exists in 3 different places.
Refactor it to absorb all the duplicates.

The success path remains mostly identical except for a slightly
different order in the non-kiq case. This shouldn't matter if the HQD
is disabled.

The error handling paths have been updated to deal with the new code
structure.

v2: the non-kiq path for gfxv8 was dropped in the rebase
v3: split MEC_HPD_SIZE rename, dropped doorbell changes

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 439 ++++++++++++++++++----------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  78 +++---
 2 files changed, 271 insertions(+), 246 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 3b98162..4e6a60c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2927,281 +2927,316 @@ struct bonaire_mqd
 	u32 perf_counter_enable;
 	u32 pgm[2];
 	u32 tba[2];
 	u32 tma[2];
 	u32 pgm_rsrc[2];
 	u32 vmid;
 	u32 resource_limits;
 	u32 static_thread_mgmt01[2];
 	u32 tmp_ring_size;
 	u32 static_thread_mgmt23[2];
 	u32 restart[3];
 	u32 thread_trace_enable;
 	u32 reserved1;
 	u32 user_data[16];
 	u32 vgtcs_invoke_count[2];
 	struct hqd_registers queue_state;
 	u32 dequeue_cntr;
 	u32 interrupt_queue[64];
 };
 
-/**
- * gfx_v7_0_cp_compute_resume - setup the compute queue registers
- *
- * @adev: amdgpu_device pointer
- *
- * Program the compute queues and test them to make sure they
- * are working.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe)
 {
-	int r, i, j;
-	u32 tmp;
-	bool use_doorbell = true;
-	u64 hqd_gpu_addr;
-	u64 mqd_gpu_addr;
 	u64 eop_gpu_addr;
-	u64 wb_gpu_addr;
-	u32 *buf;
-	struct bonaire_mqd *mqd;
-	struct amdgpu_ring *ring;
-
-	/* fix up chicken bits */
-	tmp = RREG32(mmCP_CPF_DEBUG);
-	tmp |= (1 << 23);
-	WREG32(mmCP_CPF_DEBUG, tmp);
+	u32 tmp;
+	size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2;
 
-	/* init the pipes */
 	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
-		int me = (i < 4) ? 1 : 2;
-		int pipe = (i < 4) ? i : (i - 4);
+	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
 
-		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX7_MEC_HPD_SIZE * 2);
+	cik_srbm_select(adev, me, pipe, 0, 0);
 
-		cik_srbm_select(adev, me, pipe, 0, 0);
+	/* write the EOP addr */
+	WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
 
-		/* write the EOP addr */
-		WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-		WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+	/* set the VMID assigned */
+	WREG32(mmCP_HPD_EOP_VMID, 0);
 
-		/* set the VMID assigned */
-		WREG32(mmCP_HPD_EOP_VMID, 0);
+	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+	tmp = RREG32(mmCP_HPD_EOP_CONTROL);
+	tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
+	tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
+	WREG32(mmCP_HPD_EOP_CONTROL, tmp);
 
-		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-		tmp = RREG32(mmCP_HPD_EOP_CONTROL);
-		tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
-		tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
-		WREG32(mmCP_HPD_EOP_CONTROL, tmp);
-	}
 	cik_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
+}
 
-	/* init the queues.  Just two for now. */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
-		ring = &adev->gfx.compute_ring[i];
+static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
+{
+	int i;
 
-		if (ring->mqd_obj == NULL) {
-			r = amdgpu_bo_create(adev,
-					     sizeof(struct bonaire_mqd),
-					     PAGE_SIZE, true,
-					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
-					     &ring->mqd_obj);
-			if (r) {
-				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
-				return r;
-			}
+	/* disable the queue if it's active */
+	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
+		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
+				break;
+			udelay(1);
 		}
 
-		r = amdgpu_bo_reserve(ring->mqd_obj, false);
-		if (unlikely(r != 0)) {
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
-				  &mqd_gpu_addr);
-		if (r) {
-			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
-		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
-		if (r) {
-			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
-			gfx_v7_0_cp_compute_fini(adev);
-			return r;
-		}
+		if (i == adev->usec_timeout)
+			return -ETIMEDOUT;
 
-		/* init the mqd struct */
-		memset(buf, 0, sizeof(struct bonaire_mqd));
+		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+		WREG32(mmCP_HQD_PQ_RPTR, 0);
+		WREG32(mmCP_HQD_PQ_WPTR, 0);
+	}
 
-		mqd = (struct bonaire_mqd *)buf;
-		mqd->header = 0xC0310800;
-		mqd->static_thread_mgmt01[0] = 0xffffffff;
-		mqd->static_thread_mgmt01[1] = 0xffffffff;
-		mqd->static_thread_mgmt23[0] = 0xffffffff;
-		mqd->static_thread_mgmt23[1] = 0xffffffff;
+	return 0;
+}
 
-		mutex_lock(&adev->srbm_mutex);
-		cik_srbm_select(adev, ring->me,
-				ring->pipe,
-				ring->queue, 0);
+static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
+			     struct bonaire_mqd *mqd,
+			     uint64_t mqd_gpu_addr,
+			     struct amdgpu_ring *ring)
+{
+	u64 hqd_gpu_addr;
+	u64 wb_gpu_addr;
 
-		/* disable wptr polling */
-		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
-		tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
-		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+	/* init the mqd struct */
+	memset(mqd, 0, sizeof(struct bonaire_mqd));
 
-		/* enable doorbell? */
-		mqd->queue_state.cp_hqd_pq_doorbell_control =
-			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-		if (use_doorbell)
-			mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-		else
-			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
-		       mqd->queue_state.cp_hqd_pq_doorbell_control);
-
-		/* disable the queue if it's active */
-		mqd->queue_state.cp_hqd_dequeue_request = 0;
-		mqd->queue_state.cp_hqd_pq_rptr = 0;
-		mqd->queue_state.cp_hqd_pq_wptr= 0;
-		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
-					break;
-				udelay(1);
-			}
-			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
-			WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
-			WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-		}
+	mqd->header = 0xC0310800;
+	mqd->static_thread_mgmt01[0] = 0xffffffff;
+	mqd->static_thread_mgmt01[1] = 0xffffffff;
+	mqd->static_thread_mgmt23[0] = 0xffffffff;
+	mqd->static_thread_mgmt23[1] = 0xffffffff;
 
-		/* set the pointer to the MQD */
-		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
-		WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
-		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
-		/* set MQD vmid to 0 */
-		mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
-		mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
-		WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
-
-		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
-		hqd_gpu_addr = ring->gpu_addr >> 8;
-		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
-		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
-		WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
-		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
-
-		/* set up the HQD, this is similar to CP_RB0_CNTL */
-		mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
-		mqd->queue_state.cp_hqd_pq_control &=
-			~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
-					CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
-
-		mqd->queue_state.cp_hqd_pq_control |=
-			order_base_2(ring->ring_size / 8);
-		mqd->queue_state.cp_hqd_pq_control |=
-			(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
+	/* enable doorbell? */
+	mqd->queue_state.cp_hqd_pq_doorbell_control =
+		RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+	if (ring->use_doorbell)
+		mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+	else
+		mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+
+	/* set the pointer to the MQD */
+	mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
+	mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+
+	/* set MQD vmid to 0 */
+	mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+	mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+
+	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
+	hqd_gpu_addr = ring->gpu_addr >> 8;
+	mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
+	mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+
+	/* set up the HQD, this is similar to CP_RB0_CNTL */
+	mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
+	mqd->queue_state.cp_hqd_pq_control &=
+		~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
+				CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
+
+	mqd->queue_state.cp_hqd_pq_control |=
+		order_base_2(ring->ring_size / 8);
+	mqd->queue_state.cp_hqd_pq_control |=
+		(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
 #ifdef __BIG_ENDIAN
-		mqd->queue_state.cp_hqd_pq_control |=
-			2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
+	mqd->queue_state.cp_hqd_pq_control |=
+		2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
 #endif
-		mqd->queue_state.cp_hqd_pq_control &=
-			~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
+	mqd->queue_state.cp_hqd_pq_control &=
+		~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
 				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
 				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
-		mqd->queue_state.cp_hqd_pq_control |=
-			CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
-			CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
-		WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
-
-		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
-		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
-		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
-
-		/* set the wb address wether it's enabled or not */
-		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
-		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
-			upper_32_bits(wb_gpu_addr) & 0xffff;
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
-		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
-		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
-
-		/* enable the doorbell if requested */
-		if (use_doorbell) {
-			mqd->queue_state.cp_hqd_pq_doorbell_control =
-				RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-			mqd->queue_state.cp_hqd_pq_doorbell_control &=
-				~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
-			mqd->queue_state.cp_hqd_pq_doorbell_control |=
-				(ring->doorbell_index <<
-				 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
-			mqd->queue_state.cp_hqd_pq_doorbell_control |=
-				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-			mqd->queue_state.cp_hqd_pq_doorbell_control &=
-				~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
-				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+	mqd->queue_state.cp_hqd_pq_control |=
+		CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
+		CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
 
-		} else {
-			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
+	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+	mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
+	mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* set the wb address wether it's enabled or not */
+	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
+	mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
+	mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
+		upper_32_bits(wb_gpu_addr) & 0xffff;
+
+	/* enable the doorbell if requested */
+	if (ring->use_doorbell) {
+		mqd->queue_state.cp_hqd_pq_doorbell_control =
+			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
+		mqd->queue_state.cp_hqd_pq_doorbell_control &=
+			~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
+		mqd->queue_state.cp_hqd_pq_doorbell_control |=
+			(ring->doorbell_index <<
+			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
+		mqd->queue_state.cp_hqd_pq_doorbell_control |=
+			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+		mqd->queue_state.cp_hqd_pq_doorbell_control &=
+			~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
+					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
+
+	} else {
+		mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+	}
+
+	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
+	ring->wptr = 0;
+	mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
+	mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+
+	/* set the vmid for the queue */
+	mqd->queue_state.cp_hqd_vmid = 0;
+
+	/* activate the queue */
+	mqd->queue_state.cp_hqd_active = 1;
+}
+
+static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
+			       struct bonaire_mqd *mqd)
+{
+	u32 tmp;
+
+	/* disable wptr polling */
+	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
+	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
+	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+
+	/* program MQD field to HW */
+	WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
+	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
+	WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
+	WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
+	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
+	WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
+	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
+	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
+	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr);
+	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control);
+	WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
+	WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+
+	/* activate the HQD */
+	WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+
+	return 0;
+}
+
+static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
+{
+	int r;
+	u64 mqd_gpu_addr;
+	struct bonaire_mqd *mqd;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	if (ring->mqd_obj == NULL) {
+		r = amdgpu_bo_create(adev,
+				sizeof(struct bonaire_mqd),
+				PAGE_SIZE, true,
+				AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
+				&ring->mqd_obj);
+		if (r) {
+			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
+			return r;
 		}
-		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
-		       mqd->queue_state.cp_hqd_pq_doorbell_control);
+	}
 
-		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
-		ring->wptr = 0;
-		mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
-		WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-		mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	r = amdgpu_bo_reserve(ring->mqd_obj, false);
+	if (unlikely(r != 0))
+		goto out;
 
-		/* set the vmid for the queue */
-		mqd->queue_state.cp_hqd_vmid = 0;
-		WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+	r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
+			&mqd_gpu_addr);
+	if (r) {
+		dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
+		goto out_unreserve;
+	}
+	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
+	if (r) {
+		dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
+		goto out_unreserve;
+	}
 
-		/* activate the queue */
-		mqd->queue_state.cp_hqd_active = 1;
-		WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+	mutex_lock(&adev->srbm_mutex);
+	cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 
-		cik_srbm_select(adev, 0, 0, 0, 0);
-		mutex_unlock(&adev->srbm_mutex);
+	gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
+	gfx_v7_0_mqd_deactivate(adev);
+	gfx_v7_0_mqd_commit(adev, mqd);
 
-		amdgpu_bo_kunmap(ring->mqd_obj);
-		amdgpu_bo_unreserve(ring->mqd_obj);
+	cik_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
 
-		ring->ready = true;
+	amdgpu_bo_kunmap(ring->mqd_obj);
+out_unreserve:
+	amdgpu_bo_unreserve(ring->mqd_obj);
+out:
+	return 0;
+}
+
+/**
+ * gfx_v7_0_cp_compute_resume - setup the compute queue registers
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Program the compute queues and test them to make sure they
+ * are working.
+ * Returns 0 for success, error for failure.
+ */
+static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
+{
+	int r, i, j;
+	u32 tmp;
+	struct amdgpu_ring *ring;
+
+	/* fix up chicken bits */
+	tmp = RREG32(mmCP_CPF_DEBUG);
+	tmp |= (1 << 23);
+	WREG32(mmCP_CPF_DEBUG, tmp);
+
+	/* init the pipes */
+	for (i = 0; i < adev->gfx.mec.num_mec; i++)
+		for (j = 0; j < adev->gfx.mec.num_pipe; j++)
+			gfx_v7_0_compute_pipe_init(adev, i, j);
+
+	/* init the queues */
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+		r = gfx_v7_0_compute_queue_init(adev, i);
+		if (r) {
+			gfx_v7_0_cp_compute_fini(adev);
+			return r;
+		}
 	}
 
 	gfx_v7_0_cp_compute_enable(adev, true);
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
-
+		ring->ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
 			ring->ready = false;
 	}
 
 	return 0;
 }
 
 static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
 {
 	gfx_v7_0_cp_gfx_enable(adev, enable);
 	gfx_v7_0_cp_compute_enable(adev, enable);
 }
 
 static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
 {
 	int r;
 
 	r = gfx_v7_0_cp_gfx_load_microcode(adev);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fc94c3a..b670302 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4735,79 +4735,98 @@ static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
 	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
 	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
 	amdgpu_ring_commit(kiq_ring);
 
 	for (i = 0; i < adev->usec_timeout; i++) {
 		tmp = RREG32(scratch);
 		if (tmp == 0xDEADBEEF)
 			break;
 		DRM_UDELAY(1);
 	}
 	if (i >= adev->usec_timeout) {
 		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
 			  scratch, tmp);
 		r = -EINVAL;
 	}
 	amdgpu_gfx_scratch_free(adev, scratch);
 
 	return r;
 }
 
+static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
+{
+	int i, r = 0;
+
+	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
+		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
+		for (i = 0; i < adev->usec_timeout; i++) {
+			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
+				break;
+			udelay(1);
+		}
+		if (i == adev->usec_timeout)
+			r = -ETIMEDOUT;
+	}
+	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
+	WREG32(mmCP_HQD_PQ_RPTR, 0);
+	WREG32(mmCP_HQD_PQ_WPTR, 0);
+
+	return r;
+}
+
 static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
 	uint32_t tmp;
 
+	/* init the mqd struct */
+	memset(mqd, 0, sizeof(struct vi_mqd));
+
 	mqd->header = 0xC0310800;
 	mqd->compute_pipelinestat_enable = 0x00000001;
 	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
 	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 	mqd->compute_misc_reserved = 0x00000003;
 
 	eop_base_addr = ring->eop_gpu_addr >> 8;
 	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
 	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
 			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
 
 	mqd->cp_hqd_eop_control = tmp;
 
 	/* enable doorbell? */
 	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
 			    CP_HQD_PQ_DOORBELL_CONTROL,
 			    DOORBELL_EN,
 			    ring->use_doorbell ? 1 : 0);
 
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 
-	/* disable the queue if it's active */
-	mqd->cp_hqd_dequeue_request = 0;
-	mqd->cp_hqd_pq_rptr = 0;
-	mqd->cp_hqd_pq_wptr = 0;
-
 	/* set the pointer to the MQD */
 	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
 	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
 
 	/* set MQD vmid to 0 */
 	tmp = RREG32(mmCP_MQD_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
 	mqd->cp_mqd_control = tmp;
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
 	hqd_gpu_addr = ring->gpu_addr >> 8;
 	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
 	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
 	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
 			    (order_base_2(ring->ring_size / 4) - 1));
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
 			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
@@ -4863,157 +4882,149 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	/* set MTYPE */
 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
 	mqd->cp_hqd_ib_control = tmp;
 
 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
 	mqd->cp_hqd_iq_timer = tmp;
 
 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
 	mqd->cp_hqd_ctx_save_control = tmp;
 
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 
 	return 0;
 }
 
-static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
+static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
-	int j;
 
 	/* disable wptr polling */
 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
 	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
 	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
 
 	/* enable doorbell? */
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 
-	/* disable the queue if it's active */
-	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
-		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
-		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
-				break;
-			udelay(1);
-		}
-		WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
-		WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
-		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
-	}
+	/* set pq read/write pointers */
+	WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
+	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
+	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
 
 	/* set the pointer to the MQD */
 	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
 	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
 
 	/* set MQD vmid to 0 */
 	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
 	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
 	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
 	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
 
 	/* set the wb address whether it's enabled or not */
 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
 				mqd->cp_hqd_pq_rptr_report_addr_lo);
 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 				mqd->cp_hqd_pq_rptr_report_addr_hi);
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
 
+	/* enable the doorbell if requested */
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
 
 	/* set the vmid for the queue */
 	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
 	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
 
 	/* activate the queue */
 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 	return 0;
 }
 
 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
 	gfx_v8_0_kiq_setting(ring);
 
 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
 		amdgpu_ring_clear_ring(ring);
 
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-		gfx_v8_0_kiq_init_register(ring);
+		gfx_v8_0_deactivate_hqd(adev, 1);
+		gfx_v8_0_mqd_commit(ring);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 	} else {
-		memset((void *)mqd, 0, sizeof(*mqd));
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
-		gfx_v8_0_kiq_init_register(ring);
+		gfx_v8_0_deactivate_hqd(adev, 1);
+		gfx_v8_0_mqd_commit(ring);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	}
 
 	return 0;
 }
 
 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
 	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
-		memset((void *)mqd, 0, sizeof(*mqd));
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
 		amdgpu_ring_clear_ring(ring);
 	}
 
 	return 0;
 }
@@ -5284,61 +5295,40 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
 	/* SRBM_STATUS */
 	tmp = RREG32(mmSRBM_STATUS);
 	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
 						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
 	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
 		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
 						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
 
 	if (grbm_soft_reset || srbm_soft_reset) {
 		adev->gfx.grbm_soft_reset = grbm_soft_reset;
 		adev->gfx.srbm_soft_reset = srbm_soft_reset;
 		return true;
 	} else {
 		adev->gfx.grbm_soft_reset = 0;
 		adev->gfx.srbm_soft_reset = 0;
 		return false;
 	}
 }
 
-static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
-{
-	int i, r = 0;
-
-	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
-		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
-		for (i = 0; i < adev->usec_timeout; i++) {
-			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
-				break;
-			udelay(1);
-		}
-		if (i == adev->usec_timeout)
-			r = -ETIMEDOUT;
-	}
-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
-	WREG32(mmCP_HQD_PQ_RPTR, 0);
-	WREG32(mmCP_HQD_PQ_WPTR, 0);
-
-	return r;
-}
-
 static int gfx_v8_0_pre_soft_reset(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
 
 	if ((!adev->gfx.grbm_soft_reset) &&
 	    (!adev->gfx.srbm_soft_reset))
 		return 0;
 
 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
 	srbm_soft_reset = adev->gfx.srbm_soft_reset;
 
 	/* stop the rlc */
 	gfx_v8_0_rlc_stop(adev);
 
 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
 		/* Disable GFX parsing/prefetching */
 		gfx_v8_0_cp_gfx_enable(adev, false);
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 03/17] drm/amdgpu: detect timeout error when deactivating hqd
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-04-13 21:35   ` [PATCH 01/17] drm/amdgpu: clarify MEC_HPD_SIZE is specific to a gfx generation Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 02/17] drm/amdgpu: refactor MQD/HQD initialization v3 Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 04/17] drm/amdgpu: remove duplicate definition of cik_mqd Andres Rodriguez
                     ` (14 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Handle HQD deactivation timeouts instead of ignoring them.

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index b670302..cd1af26 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4947,75 +4947,89 @@ static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
 
 	/* enable the doorbell if requested */
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
 
 	/* set the vmid for the queue */
 	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
 	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
 
 	/* activate the queue */
 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 	return 0;
 }
 
 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
+	int r = 0;
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
 	gfx_v8_0_kiq_setting(ring);
 
 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
 		amdgpu_ring_clear_ring(ring);
 
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
-		gfx_v8_0_deactivate_hqd(adev, 1);
+		r = gfx_v8_0_deactivate_hqd(adev, 1);
+		if (r) {
+			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
+			goto out_unlock;
+		}
 		gfx_v8_0_mqd_commit(ring);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 	} else {
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
-		gfx_v8_0_deactivate_hqd(adev, 1);
+		r = gfx_v8_0_deactivate_hqd(adev, 1);
+		if (r) {
+			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
+			goto out_unlock;
+		}
 		gfx_v8_0_mqd_commit(ring);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	}
 
-	return 0;
+	return r;
+
+out_unlock:
+	vi_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+	return r;
 }
 
 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
 
 	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 04/17] drm/amdgpu: remove duplicate definition of cik_mqd
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (2 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 03/17] drm/amdgpu: detect timeout error when deactivating hqd Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 05/17] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu v2 Andres Rodriguez
                     ` (13 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

The gfxv7 contains a slightly different version of cik_mqd called
bonaire_mqd. This can introduce subtle bugs if fixes are not applied in
both places.

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 135 ++++++++++++++--------------------
 1 file changed, 54 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 4e6a60c..c408af5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -10,40 +10,41 @@
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
 #include <linux/firmware.h>
 #include "drmP.h"
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_gfx.h"
 #include "cikd.h"
 #include "cik.h"
+#include "cik_structs.h"
 #include "atom.h"
 #include "amdgpu_ucode.h"
 #include "clearstate_ci.h"
 
 #include "dce/dce_8_0_d.h"
 #include "dce/dce_8_0_sh_mask.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
 
 #include "gca/gfx_7_0_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 
 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"
 
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
@@ -2899,68 +2900,40 @@ struct hqd_registers
 	u32 cp_hqd_pq_control;
 	u32 cp_hqd_ib_base_addr;
 	u32 cp_hqd_ib_base_addr_hi;
 	u32 cp_hqd_ib_rptr;
 	u32 cp_hqd_ib_control;
 	u32 cp_hqd_iq_timer;
 	u32 cp_hqd_iq_rptr;
 	u32 cp_hqd_dequeue_request;
 	u32 cp_hqd_dma_offload;
 	u32 cp_hqd_sema_cmd;
 	u32 cp_hqd_msg_type;
 	u32 cp_hqd_atomic0_preop_lo;
 	u32 cp_hqd_atomic0_preop_hi;
 	u32 cp_hqd_atomic1_preop_lo;
 	u32 cp_hqd_atomic1_preop_hi;
 	u32 cp_hqd_hq_scheduler0;
 	u32 cp_hqd_hq_scheduler1;
 	u32 cp_mqd_control;
 };
 
-struct bonaire_mqd
-{
-	u32 header;
-	u32 dispatch_initiator;
-	u32 dimensions[3];
-	u32 start_idx[3];
-	u32 num_threads[3];
-	u32 pipeline_stat_enable;
-	u32 perf_counter_enable;
-	u32 pgm[2];
-	u32 tba[2];
-	u32 tma[2];
-	u32 pgm_rsrc[2];
-	u32 vmid;
-	u32 resource_limits;
-	u32 static_thread_mgmt01[2];
-	u32 tmp_ring_size;
-	u32 static_thread_mgmt23[2];
-	u32 restart[3];
-	u32 thread_trace_enable;
-	u32 reserved1;
-	u32 user_data[16];
-	u32 vgtcs_invoke_count[2];
-	struct hqd_registers queue_state;
-	u32 dequeue_cntr;
-	u32 interrupt_queue[64];
-};
-
 static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe)
 {
 	u64 eop_gpu_addr;
 	u32 tmp;
 	size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2;
 
 	mutex_lock(&adev->srbm_mutex);
 	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
 
 	cik_srbm_select(adev, me, pipe, 0, 0);
 
 	/* write the EOP addr */
 	WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
 	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
 
 	/* set the VMID assigned */
 	WREG32(mmCP_HPD_EOP_VMID, 0);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	tmp = RREG32(mmCP_HPD_EOP_CONTROL);
@@ -2980,182 +2953,182 @@ static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
 	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
 		for (i = 0; i < adev->usec_timeout; i++) {
 			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
 				break;
 			udelay(1);
 		}
 
 		if (i == adev->usec_timeout)
 			return -ETIMEDOUT;
 
 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
 		WREG32(mmCP_HQD_PQ_RPTR, 0);
 		WREG32(mmCP_HQD_PQ_WPTR, 0);
 	}
 
 	return 0;
 }
 
 static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
-			     struct bonaire_mqd *mqd,
+			     struct cik_mqd *mqd,
 			     uint64_t mqd_gpu_addr,
 			     struct amdgpu_ring *ring)
 {
 	u64 hqd_gpu_addr;
 	u64 wb_gpu_addr;
 
 	/* init the mqd struct */
-	memset(mqd, 0, sizeof(struct bonaire_mqd));
+	memset(mqd, 0, sizeof(struct cik_mqd));
 
 	mqd->header = 0xC0310800;
-	mqd->static_thread_mgmt01[0] = 0xffffffff;
-	mqd->static_thread_mgmt01[1] = 0xffffffff;
-	mqd->static_thread_mgmt23[0] = 0xffffffff;
-	mqd->static_thread_mgmt23[1] = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
+	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
 
 	/* enable doorbell? */
-	mqd->queue_state.cp_hqd_pq_doorbell_control =
+	mqd->cp_hqd_pq_doorbell_control =
 		RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
 	if (ring->use_doorbell)
-		mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+		mqd->cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
 	else
-		mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
+		mqd->cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
 
 	/* set the pointer to the MQD */
-	mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
-	mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
+	mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
+	mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
 
 	/* set MQD vmid to 0 */
-	mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
-	mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
+	mqd->cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
+	mqd->cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
 
 	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
 	hqd_gpu_addr = ring->gpu_addr >> 8;
-	mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
-	mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
+	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
+	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
-	mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
-	mqd->queue_state.cp_hqd_pq_control &=
+	mqd->cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
+	mqd->cp_hqd_pq_control &=
 		~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
 				CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
 
-	mqd->queue_state.cp_hqd_pq_control |=
+	mqd->cp_hqd_pq_control |=
 		order_base_2(ring->ring_size / 8);
-	mqd->queue_state.cp_hqd_pq_control |=
+	mqd->cp_hqd_pq_control |=
 		(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
 #ifdef __BIG_ENDIAN
-	mqd->queue_state.cp_hqd_pq_control |=
+	mqd->cp_hqd_pq_control |=
 		2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
 #endif
-	mqd->queue_state.cp_hqd_pq_control &=
+	mqd->cp_hqd_pq_control &=
 		~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
 				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
 				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
-	mqd->queue_state.cp_hqd_pq_control |=
+	mqd->cp_hqd_pq_control |=
 		CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
 		CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
 	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-	mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
-	mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
+	mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
 
 	/* set the wb address wether it's enabled or not */
 	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
-	mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
-	mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
+	mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
+	mqd->cp_hqd_pq_rptr_report_addr_hi =
 		upper_32_bits(wb_gpu_addr) & 0xffff;
 
 	/* enable the doorbell if requested */
 	if (ring->use_doorbell) {
-		mqd->queue_state.cp_hqd_pq_doorbell_control =
+		mqd->cp_hqd_pq_doorbell_control =
 			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-		mqd->queue_state.cp_hqd_pq_doorbell_control &=
+		mqd->cp_hqd_pq_doorbell_control &=
 			~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
-		mqd->queue_state.cp_hqd_pq_doorbell_control |=
+		mqd->cp_hqd_pq_doorbell_control |=
 			(ring->doorbell_index <<
 			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
-		mqd->queue_state.cp_hqd_pq_doorbell_control |=
+		mqd->cp_hqd_pq_doorbell_control |=
 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
-		mqd->queue_state.cp_hqd_pq_doorbell_control &=
+		mqd->cp_hqd_pq_doorbell_control &=
 			~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
 					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
 
 	} else {
-		mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
+		mqd->cp_hqd_pq_doorbell_control = 0;
 	}
 
 	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	ring->wptr = 0;
-	mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
-	mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
+	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
 
 	/* set the vmid for the queue */
-	mqd->queue_state.cp_hqd_vmid = 0;
+	mqd->cp_hqd_vmid = 0;
 
 	/* activate the queue */
-	mqd->queue_state.cp_hqd_active = 1;
+	mqd->cp_hqd_active = 1;
 }
 
 static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
-			       struct bonaire_mqd *mqd)
+			       struct cik_mqd *mqd)
 {
 	u32 tmp;
 
 	/* disable wptr polling */
 	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
 	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
 	/* program MQD field to HW */
-	WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
-	WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
-	WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
-	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control);
-	WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-	WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
+	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
+	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
+	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
+	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
+	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
+	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
+	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
+	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
+	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo);
+	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi);
+	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
+	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
+	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
 	/* activate the HQD */
-	WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
+	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 	return 0;
 }
 
 static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
 {
 	int r;
 	u64 mqd_gpu_addr;
-	struct bonaire_mqd *mqd;
+	struct cik_mqd *mqd;
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
 
 	if (ring->mqd_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				sizeof(struct bonaire_mqd),
+				sizeof(struct cik_mqd),
 				PAGE_SIZE, true,
 				AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				&ring->mqd_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(ring->mqd_obj, false);
 	if (unlikely(r != 0))
 		goto out;
 
 	r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
 			&mqd_gpu_addr);
 	if (r) {
 		dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
 		goto out_unreserve;
 	}
 	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 05/17] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu v2
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (3 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 04/17] drm/amdgpu: remove duplicate definition of cik_mqd Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 06/17] drm/amdgpu: fix kgd_hqd_load failing to update shadow_wptr Andres Rodriguez
                     ` (12 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Use the same gfx_*_mqd_commit function for kfd and amdgpu codepaths.

This removes the last duplicates of this programming sequence.

v2: fix cp_hqd_pq_wptr value

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Acked-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 51 ++---------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 49 ++--------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c             | 38 ++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h             |  5 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c             | 49 +++++++++++++++++++---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h             |  5 +++
 6 files changed, 97 insertions(+), 100 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 1a0a5f7..038b7ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -12,40 +12,41 @@
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "cikd.h"
 #include "cik_sdma.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v7_0.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 #include "gmc/gmc_7_1_d.h"
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"
 
 #define CIK_PIPE_PER_MEC	(4)
 
 enum {
 	MAX_TRAPID = 8,		/* 3 bits in the bitfield. */
 	MAX_WATCH_ADDRESSES = 4
 };
 
 enum {
 	ADDRESS_WATCH_REG_ADDR_HI = 0,
 	ADDRESS_WATCH_REG_ADDR_LO,
 	ADDRESS_WATCH_REG_CNTL,
@@ -292,89 +293,45 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 static inline struct cik_mqd *get_mqd(void *mqd)
 {
 	return (struct cik_mqd *)mqd;
 }
 
 static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 {
 	return (struct cik_sdma_rlc_registers *)mqd;
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t wptr_shadow, is_wptr_shadow_valid;
 	struct cik_mqd *m;
 
 	m = get_mqd(mqd);
 
 	is_wptr_shadow_valid = !get_user(wptr_shadow, wptr);
-
-	acquire_queue(kgd, pipe_id, queue_id);
-	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-
-	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-
-	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-	WREG32(mmCP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo);
-	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi);
-
-	WREG32(mmCP_HQD_IB_RPTR, m->cp_hqd_ib_rptr);
-
-	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-	WREG32(mmCP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd);
-	WREG32(mmCP_HQD_MSG_TYPE, m->cp_hqd_msg_type);
-
-	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, m->cp_hqd_atomic0_preop_lo);
-	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, m->cp_hqd_atomic0_preop_hi);
-	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, m->cp_hqd_atomic1_preop_lo);
-	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, m->cp_hqd_atomic1_preop_hi);
-
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-			m->cp_hqd_pq_rptr_report_addr_hi);
-
-	WREG32(mmCP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr);
-
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, m->cp_hqd_pq_wptr_poll_addr_lo);
-	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, m->cp_hqd_pq_wptr_poll_addr_hi);
-
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-
-	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-
-	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-
-	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-
-	WREG32(mmCP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr);
-
 	if (is_wptr_shadow_valid)
-		WREG32(mmCP_HQD_PQ_WPTR, wptr_shadow);
+		m->cp_hqd_pq_wptr = wptr_shadow;
 
-	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+	acquire_queue(kgd, pipe_id, queue_id);
+	gfx_v7_0_mqd_commit(adev, m);
 	release_queue(kgd);
 
 	return 0;
 }
 
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct cik_sdma_rlc_registers *m;
 	uint32_t sdma_base_addr;
 
 	m = get_sdma_mqd(mqd);
 	sdma_base_addr = get_sdma_base_addr(m);
 
 	WREG32(sdma_base_addr + mmSDMA0_RLC0_VIRTUAL_ADDR,
 			m->sdma_rlc_virtual_addr);
 
 	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE,
 			m->sdma_rlc_rb_base);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6697612..f9ad534 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -11,40 +11,41 @@
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include <linux/module.h>
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_ucode.h"
+#include "gfx_v8_0.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "oss/oss_3_0_sh_mask.h"
 #include "oss/oss_3_0_d.h"
 #include "gmc/gmc_8_1_sh_mask.h"
 #include "gmc/gmc_8_1_d.h"
 #include "vi_structs.h"
 #include "vid.h"
 
 #define VI_PIPE_PER_MEC	(4)
 
 struct cik_sdma_rlc_registers;
 
 /*
  * Register access functions
  */
 
 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t sh_mem_config,
@@ -234,87 +235,45 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 static inline struct vi_mqd *get_mqd(void *mqd)
 {
 	return (struct vi_mqd *)mqd;
 }
 
 static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 {
 	return (struct cik_sdma_rlc_registers *)mqd;
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr)
 {
 	struct vi_mqd *m;
 	uint32_t shadow_wptr, valid_wptr;
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
 	m = get_mqd(mqd);
 
 	valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
-	acquire_queue(kgd, pipe_id, queue_id);
-
-	WREG32(mmCP_MQD_CONTROL, m->cp_mqd_control);
-	WREG32(mmCP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo);
-	WREG32(mmCP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi);
-
-	WREG32(mmCP_HQD_VMID, m->cp_hqd_vmid);
-	WREG32(mmCP_HQD_PERSISTENT_STATE, m->cp_hqd_persistent_state);
-	WREG32(mmCP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority);
-	WREG32(mmCP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority);
-	WREG32(mmCP_HQD_QUANTUM, m->cp_hqd_quantum);
-	WREG32(mmCP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo);
-	WREG32(mmCP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, m->cp_hqd_pq_rptr_report_addr_lo);
-	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
-			m->cp_hqd_pq_rptr_report_addr_hi);
-
 	if (valid_wptr > 0)
-		WREG32(mmCP_HQD_PQ_WPTR, shadow_wptr);
-
-	WREG32(mmCP_HQD_PQ_CONTROL, m->cp_hqd_pq_control);
-	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, m->cp_hqd_pq_doorbell_control);
-
-	WREG32(mmCP_HQD_EOP_BASE_ADDR, m->cp_hqd_eop_base_addr_lo);
-	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, m->cp_hqd_eop_base_addr_hi);
-	WREG32(mmCP_HQD_EOP_CONTROL, m->cp_hqd_eop_control);
-	WREG32(mmCP_HQD_EOP_RPTR, m->cp_hqd_eop_rptr);
-	WREG32(mmCP_HQD_EOP_WPTR, m->cp_hqd_eop_wptr);
-	WREG32(mmCP_HQD_EOP_EVENTS, m->cp_hqd_eop_done_events);
-
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, m->cp_hqd_ctx_save_base_addr_lo);
-	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, m->cp_hqd_ctx_save_base_addr_hi);
-	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, m->cp_hqd_ctx_save_control);
-	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, m->cp_hqd_cntl_stack_offset);
-	WREG32(mmCP_HQD_CNTL_STACK_SIZE, m->cp_hqd_cntl_stack_size);
-	WREG32(mmCP_HQD_WG_STATE_OFFSET, m->cp_hqd_wg_state_offset);
-	WREG32(mmCP_HQD_CTX_SAVE_SIZE, m->cp_hqd_ctx_save_size);
-
-	WREG32(mmCP_HQD_IB_CONTROL, m->cp_hqd_ib_control);
-
-	WREG32(mmCP_HQD_DEQUEUE_REQUEST, m->cp_hqd_dequeue_request);
-	WREG32(mmCP_HQD_ERROR, m->cp_hqd_error);
-	WREG32(mmCP_HQD_EOP_WPTR_MEM, m->cp_hqd_eop_wptr_mem);
-	WREG32(mmCP_HQD_EOP_DONES, m->cp_hqd_eop_dones);
-
-	WREG32(mmCP_HQD_ACTIVE, m->cp_hqd_active);
+		m->cp_hqd_pq_wptr = shadow_wptr;
 
+	acquire_queue(kgd, pipe_id, queue_id);
+	gfx_v8_0_mqd_commit(adev, mqd);
 	release_queue(kgd);
 
 	return 0;
 }
 
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 {
 	return 0;
 }
 
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 				uint32_t pipe_id, uint32_t queue_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t act;
 	bool retval = false;
 	uint32_t low, high;
 
 	acquire_queue(kgd, pipe_id, queue_id);
 	act = RREG32(mmCP_HQD_ACTIVE);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index c408af5..35e5178 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -3050,69 +3050,103 @@ static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
 			(ring->doorbell_index <<
 			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
 		mqd->cp_hqd_pq_doorbell_control |=
 			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
 		mqd->cp_hqd_pq_doorbell_control &=
 			~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
 					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
 
 	} else {
 		mqd->cp_hqd_pq_doorbell_control = 0;
 	}
 
 	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	ring->wptr = 0;
 	mqd->cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
 	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
 
 	/* set the vmid for the queue */
 	mqd->cp_hqd_vmid = 0;
 
+	/* defaults */
+	mqd->cp_hqd_ib_control = RREG32(mmCP_HQD_IB_CONTROL);
+	mqd->cp_hqd_ib_base_addr_lo = RREG32(mmCP_HQD_IB_BASE_ADDR);
+	mqd->cp_hqd_ib_base_addr_hi = RREG32(mmCP_HQD_IB_BASE_ADDR_HI);
+	mqd->cp_hqd_ib_rptr = RREG32(mmCP_HQD_IB_RPTR);
+	mqd->cp_hqd_persistent_state = RREG32(mmCP_HQD_PERSISTENT_STATE);
+	mqd->cp_hqd_sema_cmd = RREG32(mmCP_HQD_SEMA_CMD);
+	mqd->cp_hqd_msg_type = RREG32(mmCP_HQD_MSG_TYPE);
+	mqd->cp_hqd_atomic0_preop_lo = RREG32(mmCP_HQD_ATOMIC0_PREOP_LO);
+	mqd->cp_hqd_atomic0_preop_hi = RREG32(mmCP_HQD_ATOMIC0_PREOP_HI);
+	mqd->cp_hqd_atomic1_preop_lo = RREG32(mmCP_HQD_ATOMIC1_PREOP_LO);
+	mqd->cp_hqd_atomic1_preop_hi = RREG32(mmCP_HQD_ATOMIC1_PREOP_HI);
+	mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
+	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
+	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
+	mqd->cp_hqd_iq_rptr = RREG32(mmCP_HQD_IQ_RPTR);
+
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 }
 
-static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
-			       struct cik_mqd *mqd)
+int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd)
 {
 	u32 tmp;
 
 	/* disable wptr polling */
 	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
 	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
 
 	/* program MQD field to HW */
 	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
 	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
 	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
 	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
 	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
 	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->cp_hqd_pq_rptr_report_addr_lo);
 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->cp_hqd_pq_rptr_report_addr_hi);
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
 	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
+	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
+	WREG32(mmCP_HQD_IB_BASE_ADDR, mqd->cp_hqd_ib_base_addr_lo);
+	WREG32(mmCP_HQD_IB_BASE_ADDR_HI, mqd->cp_hqd_ib_base_addr_hi);
+	WREG32(mmCP_HQD_IB_RPTR, mqd->cp_hqd_ib_rptr);
+	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
+	WREG32(mmCP_HQD_SEMA_CMD, mqd->cp_hqd_sema_cmd);
+	WREG32(mmCP_HQD_MSG_TYPE, mqd->cp_hqd_msg_type);
+	WREG32(mmCP_HQD_ATOMIC0_PREOP_LO, mqd->cp_hqd_atomic0_preop_lo);
+	WREG32(mmCP_HQD_ATOMIC0_PREOP_HI, mqd->cp_hqd_atomic0_preop_hi);
+	WREG32(mmCP_HQD_ATOMIC1_PREOP_LO, mqd->cp_hqd_atomic1_preop_lo);
+	WREG32(mmCP_HQD_ATOMIC1_PREOP_HI, mqd->cp_hqd_atomic1_preop_hi);
+	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
+	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
+	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
+	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
+	WREG32(mmCP_HQD_IQ_RPTR, mqd->cp_hqd_iq_rptr);
+
 	/* activate the HQD */
 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 	return 0;
 }
 
 static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
 {
 	int r;
 	u64 mqd_gpu_addr;
 	struct cik_mqd *mqd;
 	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
 
 	if (ring->mqd_obj == NULL) {
 		r = amdgpu_bo_create(adev,
 				sizeof(struct cik_mqd),
 				PAGE_SIZE, true,
 				AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				&ring->mqd_obj);
 		if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
index 2f5164c..6fb9c15 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -12,21 +12,26 @@
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
 
 #ifndef __GFX_V7_0_H__
 #define __GFX_V7_0_H__
 
 extern const struct amdgpu_ip_block_version gfx_v7_0_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_1_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_2_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v7_3_ip_block;
 
+struct amdgpu_device;
+struct cik_mqd;
+
+int gfx_v7_0_mqd_commit(struct amdgpu_device *adev, struct cik_mqd *mqd);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index cd1af26..00b1eff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4876,51 +4876,66 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_vmid = 0;
 
 	tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
 	mqd->cp_hqd_persistent_state = tmp;
 
 	/* set MTYPE */
 	tmp = RREG32(mmCP_HQD_IB_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
 	mqd->cp_hqd_ib_control = tmp;
 
 	tmp = RREG32(mmCP_HQD_IQ_TIMER);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
 	mqd->cp_hqd_iq_timer = tmp;
 
 	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
 	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
 	mqd->cp_hqd_ctx_save_control = tmp;
 
+	/* defaults */
+	mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
+	mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
+	mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
+	mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
+	mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
+	mqd->cp_hqd_ctx_save_base_addr_lo = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
+	mqd->cp_hqd_ctx_save_base_addr_hi = RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
+	mqd->cp_hqd_cntl_stack_offset = RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
+	mqd->cp_hqd_cntl_stack_size = RREG32(mmCP_HQD_CNTL_STACK_SIZE);
+	mqd->cp_hqd_wg_state_offset = RREG32(mmCP_HQD_WG_STATE_OFFSET);
+	mqd->cp_hqd_ctx_save_size = RREG32(mmCP_HQD_CTX_SAVE_SIZE);
+	mqd->cp_hqd_eop_done_events = RREG32(mmCP_HQD_EOP_EVENTS);
+	mqd->cp_hqd_error = RREG32(mmCP_HQD_ERROR);
+	mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
+	mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
+
 	/* activate the queue */
 	mqd->cp_hqd_active = 1;
 
 	return 0;
 }
 
-static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
+int gfx_v8_0_mqd_commit(struct amdgpu_device *adev,
+			struct vi_mqd *mqd)
 {
-	struct amdgpu_device *adev = ring->adev;
-	struct vi_mqd *mqd = ring->mqd_ptr;
-
 	/* disable wptr polling */
 	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
 	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
 	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
 
 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
 	WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
 
 	/* enable doorbell? */
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 
 	/* set pq read/write pointers */
 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
 	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
 
 	/* set the pointer to the MQD */
 	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
 	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
@@ -4933,90 +4948,112 @@ static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
 	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
 
 	/* set up the HQD, this is similar to CP_RB0_CNTL */
 	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
 
 	/* set the wb address whether it's enabled or not */
 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
 				mqd->cp_hqd_pq_rptr_report_addr_lo);
 	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
 				mqd->cp_hqd_pq_rptr_report_addr_hi);
 
 	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
 	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
 
 	/* enable the doorbell if requested */
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 
 	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
 	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
+	WREG32(mmCP_HQD_EOP_RPTR, mqd->cp_hqd_eop_rptr);
+	WREG32(mmCP_HQD_EOP_WPTR, mqd->cp_hqd_eop_wptr);
+
+	/* set the HQD priority */
+	WREG32(mmCP_HQD_PIPE_PRIORITY, mqd->cp_hqd_pipe_priority);
+	WREG32(mmCP_HQD_QUEUE_PRIORITY, mqd->cp_hqd_queue_priority);
+	WREG32(mmCP_HQD_QUANTUM, mqd->cp_hqd_quantum);
+
+	/* set cwsr save area */
+	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO, mqd->cp_hqd_ctx_save_base_addr_lo);
+	WREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI, mqd->cp_hqd_ctx_save_base_addr_hi);
+	WREG32(mmCP_HQD_CTX_SAVE_CONTROL, mqd->cp_hqd_ctx_save_control);
+	WREG32(mmCP_HQD_CNTL_STACK_OFFSET, mqd->cp_hqd_cntl_stack_offset);
+	WREG32(mmCP_HQD_CNTL_STACK_SIZE, mqd->cp_hqd_cntl_stack_size);
+	WREG32(mmCP_HQD_WG_STATE_OFFSET, mqd->cp_hqd_wg_state_offset);
+	WREG32(mmCP_HQD_CTX_SAVE_SIZE, mqd->cp_hqd_ctx_save_size);
+
+	WREG32(mmCP_HQD_IB_CONTROL, mqd->cp_hqd_ib_control);
+	WREG32(mmCP_HQD_EOP_EVENTS, mqd->cp_hqd_eop_done_events);
+	WREG32(mmCP_HQD_ERROR, mqd->cp_hqd_error);
+	WREG32(mmCP_HQD_EOP_WPTR_MEM, mqd->cp_hqd_eop_wptr_mem);
+	WREG32(mmCP_HQD_EOP_DONES, mqd->cp_hqd_eop_dones);
 
 	/* set the vmid for the queue */
 	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
 
 	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
 
 	/* activate the queue */
 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
 	return 0;
 }
 
 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 {
 	int r = 0;
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
 
 	gfx_v8_0_kiq_setting(ring);
 
 	if (adev->gfx.in_reset) { /* for GPU_RESET case */
 		/* reset MQD to a clean status */
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
 
 		/* reset ring buffer */
 		ring->wptr = 0;
 		amdgpu_ring_clear_ring(ring);
 
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		r = gfx_v8_0_deactivate_hqd(adev, 1);
 		if (r) {
 			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
 			goto out_unlock;
 		}
-		gfx_v8_0_mqd_commit(ring);
+		gfx_v8_0_mqd_commit(adev, mqd);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 	} else {
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
 		r = gfx_v8_0_deactivate_hqd(adev, 1);
 		if (r) {
 			dev_err(adev->dev, "failed to deactivate ring %s\n", ring->name);
 			goto out_unlock;
 		}
-		gfx_v8_0_mqd_commit(ring);
+		gfx_v8_0_mqd_commit(adev, mqd);
 		vi_srbm_select(adev, 0, 0, 0, 0);
 		mutex_unlock(&adev->srbm_mutex);
 
 		if (adev->gfx.mec.mqd_backup[mqd_idx])
 			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
 	}
 
 	return r;
 
 out_unlock:
 	vi_srbm_select(adev, 0, 0, 0, 0);
 	mutex_unlock(&adev->srbm_mutex);
 	return r;
 }
 
 static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
 	int mqd_idx = ring - &adev->gfx.compute_ring[0];
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
index 788cc3a..ec3f11f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -10,21 +10,26 @@
  *
  * The above copyright notice and this permission notice shall be included in
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
 
 #ifndef __GFX_V8_0_H__
 #define __GFX_V8_0_H__
 
 extern const struct amdgpu_ip_block_version gfx_v8_0_ip_block;
 extern const struct amdgpu_ip_block_version gfx_v8_1_ip_block;
 
+struct amdgpu_device;
+struct vi_mqd;
+
+int gfx_v8_0_mqd_commit(struct amdgpu_device *adev, struct vi_mqd *mqd);
+
 #endif
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 06/17] drm/amdgpu: fix kgd_hqd_load failing to update shadow_wptr
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (4 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 05/17] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu v2 Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 07/17] drm/amdgpu: rename rdev to adev Andres Rodriguez
                     ` (11 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

The return value from copy_form_user is 0 for the success case.

Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index f9ad534..8af2975 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -235,41 +235,41 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 static inline struct vi_mqd *get_mqd(void *mqd)
 {
 	return (struct vi_mqd *)mqd;
 }
 
 static inline struct cik_sdma_rlc_registers *get_sdma_mqd(void *mqd)
 {
 	return (struct cik_sdma_rlc_registers *)mqd;
 }
 
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 			uint32_t queue_id, uint32_t __user *wptr)
 {
 	struct vi_mqd *m;
 	uint32_t shadow_wptr, valid_wptr;
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
 	m = get_mqd(mqd);
 
 	valid_wptr = copy_from_user(&shadow_wptr, wptr, sizeof(shadow_wptr));
-	if (valid_wptr > 0)
+	if (valid_wptr == 0)
 		m->cp_hqd_pq_wptr = shadow_wptr;
 
 	acquire_queue(kgd, pipe_id, queue_id);
 	gfx_v8_0_mqd_commit(adev, mqd);
 	release_queue(kgd);
 
 	return 0;
 }
 
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd)
 {
 	return 0;
 }
 
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 				uint32_t pipe_id, uint32_t queue_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t act;
 	bool retval = false;
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 07/17] drm/amdgpu: rename rdev to adev
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (5 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 06/17] drm/amdgpu: fix kgd_hqd_load failing to update shadow_wptr Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 08/17] drm/radeon: take ownership of pipe initialization Andres Rodriguez
                     ` (10 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Rename straggler instances of r(adeon)dev to a(mdgpu)dev

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 70 +++++++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 14 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c      |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      |  2 +-
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index dba8a5b..3200ff9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -43,204 +43,204 @@ int amdgpu_amdkfd_init(void)
 		return -ENOENT;
 
 	ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
 	if (ret) {
 		symbol_put(kgd2kfd_init);
 		kgd2kfd = NULL;
 	}
 
 #elif defined(CONFIG_HSA_AMD)
 	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
 	if (ret)
 		kgd2kfd = NULL;
 
 #else
 	ret = -ENOENT;
 #endif
 
 	return ret;
 }
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
 {
-	switch (rdev->asic_type) {
+	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_KAVERI:
 		kfd2kgd = amdgpu_amdkfd_gfx_7_get_functions();
 		break;
 #endif
 	case CHIP_CARRIZO:
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		break;
 	default:
 		return false;
 	}
 
 	return true;
 }
 
 void amdgpu_amdkfd_fini(void)
 {
 	if (kgd2kfd) {
 		kgd2kfd->exit();
 		symbol_put(kgd2kfd_init);
 	}
 }
 
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
 	if (kgd2kfd)
-		rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
-					rdev->pdev, kfd2kgd);
+		adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
+					adev->pdev, kfd2kgd);
 }
 
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-	if (rdev->kfd) {
+	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
 
 			.first_compute_pipe = 1,
 			.compute_pipe_count = 4 - 1,
 		};
 
-		amdgpu_doorbell_get_kfd_info(rdev,
+		amdgpu_doorbell_get_kfd_info(adev,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 
-		kgd2kfd->device_init(rdev->kfd, &gpu_resources);
+		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 	}
 }
 
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
-	if (rdev->kfd) {
-		kgd2kfd->device_exit(rdev->kfd);
-		rdev->kfd = NULL;
+	if (adev->kfd) {
+		kgd2kfd->device_exit(adev->kfd);
+		adev->kfd = NULL;
 	}
 }
 
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 		const void *ih_ring_entry)
 {
-	if (rdev->kfd)
-		kgd2kfd->interrupt(rdev->kfd, ih_ring_entry);
+	if (adev->kfd)
+		kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
 }
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev)
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
 {
-	if (rdev->kfd)
-		kgd2kfd->suspend(rdev->kfd);
+	if (adev->kfd)
+		kgd2kfd->suspend(adev->kfd);
 }
 
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev)
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 {
 	int r = 0;
 
-	if (rdev->kfd)
-		r = kgd2kfd->resume(rdev->kfd);
+	if (adev->kfd)
+		r = kgd2kfd->resume(adev->kfd);
 
 	return r;
 }
 
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **cpu_ptr)
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
 	int r;
 
 	BUG_ON(kgd == NULL);
 	BUG_ON(gpu_addr == NULL);
 	BUG_ON(cpu_ptr == NULL);
 
 	*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
 	if ((*mem) == NULL)
 		return -ENOMEM;
 
-	r = amdgpu_bo_create(rdev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
+	r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
 			     AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo);
 	if (r) {
-		dev_err(rdev->dev,
+		dev_err(adev->dev,
 			"failed to allocate BO for amdkfd (%d)\n", r);
 		return r;
 	}
 
 	/* map the buffer */
 	r = amdgpu_bo_reserve((*mem)->bo, true);
 	if (r) {
-		dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+		dev_err(adev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
 		goto allocate_mem_reserve_bo_failed;
 	}
 
 	r = amdgpu_bo_pin((*mem)->bo, AMDGPU_GEM_DOMAIN_GTT,
 				&(*mem)->gpu_addr);
 	if (r) {
-		dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 		goto allocate_mem_pin_bo_failed;
 	}
 	*gpu_addr = (*mem)->gpu_addr;
 
 	r = amdgpu_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
 	if (r) {
-		dev_err(rdev->dev,
+		dev_err(adev->dev,
 			"(%d) failed to map bo to kernel for amdkfd\n", r);
 		goto allocate_mem_kmap_bo_failed;
 	}
 	*cpu_ptr = (*mem)->cpu_ptr;
 
 	amdgpu_bo_unreserve((*mem)->bo);
 
 	return 0;
 
 allocate_mem_kmap_bo_failed:
 	amdgpu_bo_unpin((*mem)->bo);
 allocate_mem_pin_bo_failed:
 	amdgpu_bo_unreserve((*mem)->bo);
 allocate_mem_reserve_bo_failed:
 	amdgpu_bo_unref(&(*mem)->bo);
 
 	return r;
 }
 
 void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 {
 	struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
 
 	BUG_ON(mem == NULL);
 
 	amdgpu_bo_reserve(mem->bo, true);
 	amdgpu_bo_kunmap(mem->bo);
 	amdgpu_bo_unpin(mem->bo);
 	amdgpu_bo_unreserve(mem->bo);
 	amdgpu_bo_unref(&(mem->bo));
 	kfree(mem);
 }
 
 uint64_t get_vmem_size(struct kgd_dev *kgd)
 {
-	struct amdgpu_device *rdev =
+	struct amdgpu_device *adev =
 		(struct amdgpu_device *)kgd;
 
 	BUG_ON(kgd == NULL);
 
-	return rdev->mc.real_vram_size;
+	return adev->mc.real_vram_size;
 }
 
 uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
-	if (rdev->gfx.funcs->get_gpu_clock_counter)
-		return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
+	if (adev->gfx.funcs->get_gpu_clock_counter)
+		return adev->gfx.funcs->get_gpu_clock_counter(adev);
 	return 0;
 }
 
 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd)
 {
-	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
 
 	/* The sclk is in quantas of 10kHz */
-	return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
+	return adev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index de530f68d..73f83a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -22,44 +22,44 @@
 
 /* amdgpu_amdkfd.h defines the private interface between amdgpu and amdkfd. */
 
 #ifndef AMDGPU_AMDKFD_H_INCLUDED
 #define AMDGPU_AMDKFD_H_INCLUDED
 
 #include <linux/types.h>
 #include <kgd_kfd_interface.h>
 
 struct amdgpu_device;
 
 struct kgd_mem {
 	struct amdgpu_bo *bo;
 	uint64_t gpu_addr;
 	void *cpu_ptr;
 };
 
 int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 
-bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
+bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev);
 
-void amdgpu_amdkfd_suspend(struct amdgpu_device *rdev);
-int amdgpu_amdkfd_resume(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_interrupt(struct amdgpu_device *rdev,
+void amdgpu_amdkfd_suspend(struct amdgpu_device *adev);
+int amdgpu_amdkfd_resume(struct amdgpu_device *adev);
+void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 			const void *ih_ring_entry);
-void amdgpu_amdkfd_device_probe(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_device_init(struct amdgpu_device *rdev);
-void amdgpu_amdkfd_device_fini(struct amdgpu_device *rdev);
+void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
+void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 
 /* Shared API */
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **cpu_ptr);
 void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
 uint64_t get_vmem_size(struct kgd_dev *kgd);
 uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
 
 uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
 
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 35e5178..75ad957 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1822,41 +1822,41 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
 			     adev->gfx.config.max_shader_engines, 16);
 
 	gfx_v7_0_raster_config(adev, &raster_config, &raster_config_1);
 
 	if (!adev->gfx.config.backend_enable_mask ||
 			adev->gfx.config.num_rbs >= num_rb_pipes) {
 		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
 		WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
 	} else {
 		gfx_v7_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
 							adev->gfx.config.backend_enable_mask,
 							num_rb_pipes);
 	}
 	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
 /**
  * gmc_v7_0_init_compute_vmid - gart enable
  *
- * @rdev: amdgpu_device pointer
+ * @adev: amdgpu_device pointer
  *
  * Initialize compute vmid sh_mem registers
  *
  */
 #define DEFAULT_SH_MEM_BASES	(0x6000)
 #define FIRST_COMPUTE_VMID	(8)
 #define LAST_COMPUTE_VMID	(16)
 static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
 {
 	int i;
 	uint32_t sh_mem_config;
 	uint32_t sh_mem_bases;
 
 	/*
 	 * Configure apertures:
 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
 	*/
 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 00b1eff..938b280 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3789,41 +3789,41 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
 			adev->gfx.config.rb_config[i][j].rb_backend_disable =
 				RREG32(mmCC_RB_BACKEND_DISABLE);
 			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
 				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
 			adev->gfx.config.rb_config[i][j].raster_config =
 				RREG32(mmPA_SC_RASTER_CONFIG);
 			adev->gfx.config.rb_config[i][j].raster_config_1 =
 				RREG32(mmPA_SC_RASTER_CONFIG_1);
 		}
 	}
 	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
 /**
  * gfx_v8_0_init_compute_vmid - gart enable
  *
- * @rdev: amdgpu_device pointer
+ * @adev: amdgpu_device pointer
  *
  * Initialize compute vmid sh_mem registers
  *
  */
 #define DEFAULT_SH_MEM_BASES	(0x6000)
 #define FIRST_COMPUTE_VMID	(8)
 #define LAST_COMPUTE_VMID	(16)
 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
 {
 	int i;
 	uint32_t sh_mem_config;
 	uint32_t sh_mem_bases;
 
 	/*
 	 * Configure apertures:
 	 * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
 	 * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
 	 * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
 	 */
 	sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 08/17] drm/radeon: take ownership of pipe initialization
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (6 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 07/17] drm/amdgpu: rename rdev to adev Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2 Andres Rodriguez
                     ` (9 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Take ownership of pipe initialization away from KFD.

Note that hpd_eop_gpu_addr was already large enough to accomodate all
pipes.

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/radeon/cik.c        | 27 ++++++++++++++-------------
 drivers/gpu/drm/radeon/radeon_kfd.c | 13 +------------
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 53710dd..3d084c2 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4563,57 +4563,58 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 	bool use_doorbell = true;
 	u64 hqd_gpu_addr;
 	u64 mqd_gpu_addr;
 	u64 eop_gpu_addr;
 	u64 wb_gpu_addr;
 	u32 *buf;
 	struct bonaire_mqd *mqd;
 
 	r = cik_cp_compute_start(rdev);
 	if (r)
 		return r;
 
 	/* fix up chicken bits */
 	tmp = RREG32(CP_CPF_DEBUG);
 	tmp |= (1 << 23);
 	WREG32(CP_CPF_DEBUG, tmp);
 
 	/* init the pipes */
 	mutex_lock(&rdev->srbm_mutex);
 
-	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
+	for (i = 0; i < rdev->mec.num_pipe; ++i) {
+		cik_srbm_select(rdev, 0, i, 0, 0);
 
-	cik_srbm_select(rdev, 0, 0, 0, 0);
-
-	/* write the EOP addr */
-	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
-	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
+		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2) ;
+		/* write the EOP addr */
+		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
+		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
 
-	/* set the VMID assigned */
-	WREG32(CP_HPD_EOP_VMID, 0);
+		/* set the VMID assigned */
+		WREG32(CP_HPD_EOP_VMID, 0);
 
-	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
-	tmp = RREG32(CP_HPD_EOP_CONTROL);
-	tmp &= ~EOP_SIZE_MASK;
-	tmp |= order_base_2(MEC_HPD_SIZE / 8);
-	WREG32(CP_HPD_EOP_CONTROL, tmp);
+		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
+		tmp = RREG32(CP_HPD_EOP_CONTROL);
+		tmp &= ~EOP_SIZE_MASK;
+		tmp |= order_base_2(MEC_HPD_SIZE / 8);
+		WREG32(CP_HPD_EOP_CONTROL, tmp);
 
+	}
 	mutex_unlock(&rdev->srbm_mutex);
 
 	/* init the queues.  Just two for now. */
 	for (i = 0; i < 2; i++) {
 		if (i == 0)
 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
 		else
 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
 
 		if (rdev->ring[idx].mqd_obj == NULL) {
 			r = radeon_bo_create(rdev,
 					     sizeof(struct bonaire_mqd),
 					     PAGE_SIZE, true,
 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
 					     NULL, &rdev->ring[idx].mqd_obj);
 			if (r) {
 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
 				return r;
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 87a9ebb..a06e3b1 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -406,52 +406,41 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 					ATC_VMID_PASID_MAPPING_VALID_MASK;
 
 	write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t),
 			pasid_mapping);
 
 	while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) &
 								(1U << vmid)))
 		cpu_relax();
 	write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 
 	/* Mapping vmid to pasid also for IH block */
 	write_register(kgd, IH_VMID_0_LUT + vmid * sizeof(uint32_t),
 			pasid_mapping);
 
 	return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-	uint32_t mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-	lock_srbm(kgd, mec, pipe, 0, 0);
-	write_register(kgd, CP_HPD_EOP_BASE_ADDR,
-			lower_32_bits(hpd_gpu_addr >> 8));
-	write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI,
-			upper_32_bits(hpd_gpu_addr >> 8));
-	write_register(kgd, CP_HPD_EOP_VMID, 0);
-	write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size);
-	unlock_srbm(kgd);
-
+	/* nothing to do here */
 	return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	uint32_t mec;
 	uint32_t pipe;
 
 	mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 	pipe = (pipe_id % CIK_PIPE_PER_MEC);
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
 	write_register(kgd, CPC_INT_CNTL,
 			TIME_STAMP_INT_ENABLE | OPCODE_ERROR_INT_ENABLE);
 
 	unlock_srbm(kgd);
 
 	return 0;
 }
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (7 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 08/17] drm/radeon: take ownership of pipe initialization Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
       [not found]     ` <20170413213542.18802-10-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2017-04-13 21:35   ` [PATCH 10/17] drm/amdgpu: allow split of queues with kfd at queue granularity v4 Andres Rodriguez
                     ` (8 subsequent siblings)
  17 siblings, 1 reply; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Make amdgpu the owner of all per-pipe state of the HQDs.

This change will allow us to split the queues between kfd and amdgpu
with a queue granularity instead of pipe granularity.

This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
goes unused.

v2: support for gfx9

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h                |  4 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c  | 13 +------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c  |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c              | 28 ++++++++++----
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c              | 33 +++++++++++-----
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c              | 24 ++++++++----
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 45 ----------------------
 7 files changed, 65 insertions(+), 83 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 3abd2dc..6b294d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -880,43 +880,43 @@ struct amdgpu_rlc {
 	/* for firmware data */
 	u32 save_and_restore_offset;
 	u32 clear_state_descriptor_offset;
 	u32 avail_scratch_ram_locations;
 	u32 reg_restore_list_size;
 	u32 reg_list_format_start;
 	u32 reg_list_format_separate_start;
 	u32 starting_offsets_start;
 	u32 reg_list_format_size_bytes;
 	u32 reg_list_size_bytes;
 
 	u32 *register_list_format;
 	u32 *register_restore;
 };
 
 struct amdgpu_mec {
 	struct amdgpu_bo	*hpd_eop_obj;
 	u64			hpd_eop_gpu_addr;
 	struct amdgpu_bo	*mec_fw_obj;
 	u64			mec_fw_gpu_addr;
-	u32 num_pipe;
 	u32 num_mec;
-	u32 num_queue;
+	u32 num_pipe_per_mec;
+	u32 num_queue_per_pipe;
 	void			*mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
 };
 
 struct amdgpu_kiq {
 	u64			eop_gpu_addr;
 	struct amdgpu_bo	*eop_obj;
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 };
 
 /*
  * GPU scratch registers structures, functions & helpers
  */
 struct amdgpu_scratch {
 	unsigned		num_reg;
 	uint32_t                reg_base;
 	uint32_t		free_mask;
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 038b7ea..910f9d3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -227,52 +227,41 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 	 * SW cleared it. So the protocol is to always wait & clear.
 	 */
 	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 			ATC_VMID0_PASID_MAPPING__VALID_MASK;
 
 	WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
 
 	while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
 		cpu_relax();
 	WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 
 	/* Mapping vmid to pasid also for IH block */
 	WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 
 	return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
-	struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
-
-	lock_srbm(kgd, mec, pipe, 0, 0);
-	WREG32(mmCP_HPD_EOP_BASE_ADDR, lower_32_bits(hpd_gpu_addr >> 8));
-	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(hpd_gpu_addr >> 8));
-	WREG32(mmCP_HPD_EOP_VMID, 0);
-	WREG32(mmCP_HPD_EOP_CONTROL, hpd_size);
-	unlock_srbm(kgd);
-
+	/* amdgpu owns the per-pipe state */
 	return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t mec;
 	uint32_t pipe;
 
 	mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
 	pipe = (pipe_id % CIK_PIPE_PER_MEC);
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
 	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 			CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 
 	unlock_srbm(kgd);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 8af2975..6ba94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -189,40 +189,41 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 	 * So the protocol is to always wait & clear.
 	 */
 	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
 			ATC_VMID0_PASID_MAPPING__VALID_MASK;
 
 	WREG32(mmATC_VMID0_PASID_MAPPING + vmid, pasid_mapping);
 
 	while (!(RREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS) & (1U << vmid)))
 		cpu_relax();
 	WREG32(mmATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid);
 
 	/* Mapping vmid to pasid also for IH block */
 	WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 
 	return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
+	/* amdgpu owns the per-pipe state */
 	return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t mec;
 	uint32_t pipe;
 
 	mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
 	pipe = (pipe_id % VI_PIPE_PER_MEC);
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
 	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
 
 	unlock_srbm(kgd);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 75ad957..41bda98 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2810,84 +2810,98 @@ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
 static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 {
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 }
 
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
+	size_t mec_hpd_size;
 
 	/*
 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
 	 * Nonetheless, we assign only 1 pipe because all other pipes will
 	 * be handled by KFD
 	 */
-	adev->gfx.mec.num_mec = 1;
-	adev->gfx.mec.num_pipe = 1;
-	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+	switch (adev->asic_type) {
+	case CHIP_KAVERI:
+		adev->gfx.mec.num_mec = 2;
+		break;
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+	default:
+		adev->gfx.mec.num_mec = 1;
+		break;
+	}
+	adev->gfx.mec.num_pipe_per_mec = 4;
+	adev->gfx.mec.num_queue_per_pipe = 8;
 
+	mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
+		* GFX7_MEC_HPD_SIZE * 2;
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2,
+				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v7_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 			  &adev->gfx.mec.hpd_eop_gpu_addr);
 	if (r) {
 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 		gfx_v7_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
 	if (r) {
 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
 		gfx_v7_0_mec_fini(adev);
 		return r;
 	}
 
 	/* clear memory.  Not sure if this is required or not */
-	memset(hpd, 0, adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * GFX7_MEC_HPD_SIZE * 2);
+	memset(hpd, 0, mec_hpd_size);
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 	return 0;
 }
 
 struct hqd_registers
 {
 	u32 cp_mqd_base_addr;
 	u32 cp_mqd_base_addr_hi;
 	u32 cp_hqd_active;
 	u32 cp_hqd_vmid;
 	u32 cp_hqd_persistent_state;
 	u32 cp_hqd_pipe_priority;
 	u32 cp_hqd_queue_priority;
 	u32 cp_hqd_quantum;
 	u32 cp_hqd_pq_base;
 	u32 cp_hqd_pq_base_hi;
 	u32 cp_hqd_pq_rptr;
@@ -3191,43 +3205,43 @@ static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
 /**
  * gfx_v7_0_cp_compute_resume - setup the compute queue registers
  *
  * @adev: amdgpu_device pointer
  *
  * Program the compute queues and test them to make sure they
  * are working.
  * Returns 0 for success, error for failure.
  */
 static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
 {
 	int r, i, j;
 	u32 tmp;
 	struct amdgpu_ring *ring;
 
 	/* fix up chicken bits */
 	tmp = RREG32(mmCP_CPF_DEBUG);
 	tmp |= (1 << 23);
 	WREG32(mmCP_CPF_DEBUG, tmp);
 
-	/* init the pipes */
+	/* init all pipes (even the ones we don't own) */
 	for (i = 0; i < adev->gfx.mec.num_mec; i++)
-		for (j = 0; j < adev->gfx.mec.num_pipe; j++)
+		for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++)
 			gfx_v7_0_compute_pipe_init(adev, i, j);
 
 	/* init the queues */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		r = gfx_v7_0_compute_queue_init(adev, i);
 		if (r) {
 			gfx_v7_0_cp_compute_fini(adev);
 			return r;
 		}
 	}
 
 	gfx_v7_0_cp_compute_enable(adev, true);
 
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
 		ring->ready = true;
 		r = amdgpu_ring_test_ring(ring);
 		if (r)
 			ring->ready = false;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 938b280..29c487e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1401,81 +1401,96 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
 	r = amdgpu_ring_init(adev, ring, 1024,
 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
 	if (r)
 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 
 	return r;
 }
 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
 				   struct amdgpu_irq_src *irq)
 {
 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
 }
 
 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
+	size_t mec_hpd_size;
 
-	/*
-	 * we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
-	 */
-	adev->gfx.mec.num_mec = 1;
-	adev->gfx.mec.num_pipe = 1;
-	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+	switch (adev->asic_type) {
+	case CHIP_FIJI:
+	case CHIP_TONGA:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_POLARIS10:
+	case CHIP_CARRIZO:
+		adev->gfx.mec.num_mec = 2;
+		break;
+	case CHIP_TOPAZ:
+	case CHIP_STONEY:
+	default:
+		adev->gfx.mec.num_mec = 1;
+		break;
+	}
+
+	adev->gfx.mec.num_pipe_per_mec = 4;
+	adev->gfx.mec.num_queue_per_pipe = 8;
+
+	/* only 1 pipe of the first MEC is owned by amdgpu */
+	mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE,
+				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v8_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 			  &adev->gfx.mec.hpd_eop_gpu_addr);
 	if (r) {
 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 		gfx_v8_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
 	if (r) {
 		dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
 		gfx_v8_0_mec_fini(adev);
 		return r;
 	}
 
-	memset(hpd, 0, adev->gfx.mec.num_queue * GFX8_MEC_HPD_SIZE);
+	memset(hpd, 0, mec_hpd_size);
 
 	amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
 	amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 	return 0;
 }
 
 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
 {
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
 	amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
 }
 
 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0de7ff4..d100b15 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -465,54 +465,62 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 	}
 	if (adev->gfx.mec.mec_fw_obj) {
 		r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
 
 		amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj);
 		adev->gfx.mec.mec_fw_obj = NULL;
 	}
 }
 
 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	const __le32 *fw_data;
 	unsigned fw_size;
 	u32 *fw;
+	size_t mec_hpd_size;
 
 	const struct gfx_firmware_header_v1_0 *mec_hdr;
 
-	/*
-	 * we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
-	 */
-	adev->gfx.mec.num_mec = 1;
-	adev->gfx.mec.num_pipe = 1;
-	adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+		adev->gfx.mec.num_mec = 2;
+		break;
+	default:
+		adev->gfx.mec.num_mec = 1;
+		break;
+	}
+
+	adev->gfx.mec.num_pipe_per_mec = 4;
+	adev->gfx.mec.num_queue_per_pipe = 8;
+
+	/* only 1 pipe of the first MEC is owned by amdgpu */
+	mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX9_MEC_HPD_SIZE;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
-				     adev->gfx.mec.num_queue * GFX9_MEC_HPD_SIZE,
+				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v9_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 			  &adev->gfx.mec.hpd_eop_gpu_addr);
 	if (r) {
 		dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
 		gfx_v9_0_mec_fini(adev);
 		return r;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f49c551..c064dea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -455,89 +455,44 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm,
 }
 
 static int
 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
 			unsigned int vmid)
 {
 	uint32_t pasid_mapping;
 
 	pasid_mapping = (pasid == 0) ? 0 :
 		(uint32_t)pasid |
 		ATC_VMID_PASID_MAPPING_VALID;
 
 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
 						dqm->dev->kgd, pasid_mapping,
 						vmid);
 }
 
 int init_pipelines(struct device_queue_manager *dqm,
 			unsigned int pipes_num, unsigned int first_pipe)
 {
-	void *hpdptr;
-	struct mqd_manager *mqd;
-	unsigned int i, err, inx;
-	uint64_t pipe_hpd_addr;
-
 	BUG_ON(!dqm || !dqm->dev);
 
 	pr_debug("kfd: In func %s\n", __func__);
 
-	/*
-	 * Allocate memory for the HPDs. This is hardware-owned per-pipe data.
-	 * The driver never accesses this memory after zeroing it.
-	 * It doesn't even have to be saved/restored on suspend/resume
-	 * because it contains no data when there are no active queues.
-	 */
-
-	err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num,
-					&dqm->pipeline_mem);
-
-	if (err) {
-		pr_err("kfd: error allocate vidmem num pipes: %d\n",
-			pipes_num);
-		return -ENOMEM;
-	}
-
-	hpdptr = dqm->pipeline_mem->cpu_ptr;
-	dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr;
-
-	memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num);
-
-	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
-	if (mqd == NULL) {
-		kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < pipes_num; i++) {
-		inx = i + first_pipe;
-		/*
-		 * HPD buffer on GTT is allocated by amdkfd, no need to waste
-		 * space in GTT for pipelines we don't initialize
-		 */
-		pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES;
-		pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr);
-		/* = log2(bytes/4)-1 */
-		dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx,
-				CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr);
-	}
-
 	return 0;
 }
 
 static void init_interrupts(struct device_queue_manager *dqm)
 {
 	unsigned int i;
 
 	BUG_ON(dqm == NULL);
 
 	for (i = 0 ; i < get_pipes_num(dqm) ; i++)
 		dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd,
 				i + get_first_pipe(dqm));
 }
 
 static int init_scheduler(struct device_queue_manager *dqm)
 {
 	int retval;
 
 	BUG_ON(!dqm);
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 10/17] drm/amdgpu: allow split of queues with kfd at queue granularity v4
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (8 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2 Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 11/17] drm/amdgpu: teach amdgpu how to enable interrupts for any pipe v3 Andres Rodriguez
                     ` (7 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Previously the queue/pipe split with kfd operated with pipe
granularity. This patch allows amdgpu to take ownership of an arbitrary
set of queues.

It also consolidates the last few magic numbers in the compute
initialization process into mec_init.

v2: support for gfx9
v3: renamed AMDGPU_MAX_QUEUES to AMDGPU_MAX_COMPUTE_QUEUES
v4: fix off-by-one in num_mec checks in *_compute_queue_acquire

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h             |  7 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c           | 82 +++++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c           | 81 +++++++++++++++++++-----
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c           | 84 +++++++++++++++++++++++--
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h |  1 +
 5 files changed, 211 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6b294d2..61990be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -29,40 +29,42 @@
 #define __AMDGPU_H__
 
 #include <linux/atomic.h>
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/rbtree.h>
 #include <linux/hashtable.h>
 #include <linux/dma-fence.h>
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
 #include <ttm/ttm_placement.h>
 #include <ttm/ttm_module.h>
 #include <ttm/ttm_execbuf_util.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_gem.h>
 #include <drm/amdgpu_drm.h>
 
+#include <kgd_kfd_interface.h>
+
 #include "amd_shared.h"
 #include "amdgpu_mode.h"
 #include "amdgpu_ih.h"
 #include "amdgpu_irq.h"
 #include "amdgpu_ucode.h"
 #include "amdgpu_ttm.h"
 #include "amdgpu_psp.h"
 #include "amdgpu_gds.h"
 #include "amdgpu_sync.h"
 #include "amdgpu_ring.h"
 #include "amdgpu_vm.h"
 #include "amd_powerplay.h"
 #include "amdgpu_dpm.h"
 #include "amdgpu_acp.h"
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
 
 #include "gpu_scheduler.h"
 #include "amdgpu_virt.h"
 
@@ -875,49 +877,54 @@ struct amdgpu_rlc {
 
 	/* safe mode for updating CG/PG state */
 	bool in_safe_mode;
 	const struct amdgpu_rlc_funcs *funcs;
 
 	/* for firmware data */
 	u32 save_and_restore_offset;
 	u32 clear_state_descriptor_offset;
 	u32 avail_scratch_ram_locations;
 	u32 reg_restore_list_size;
 	u32 reg_list_format_start;
 	u32 reg_list_format_separate_start;
 	u32 starting_offsets_start;
 	u32 reg_list_format_size_bytes;
 	u32 reg_list_size_bytes;
 
 	u32 *register_list_format;
 	u32 *register_restore;
 };
 
+#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
+
 struct amdgpu_mec {
 	struct amdgpu_bo	*hpd_eop_obj;
 	u64			hpd_eop_gpu_addr;
 	struct amdgpu_bo	*mec_fw_obj;
 	u64			mec_fw_gpu_addr;
 	u32 num_mec;
 	u32 num_pipe_per_mec;
 	u32 num_queue_per_pipe;
 	void			*mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1];
+
+	/* These are the resources for which amdgpu takes ownership */
+	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 };
 
 struct amdgpu_kiq {
 	u64			eop_gpu_addr;
 	struct amdgpu_bo	*eop_obj;
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 };
 
 /*
  * GPU scratch registers structures, functions & helpers
  */
 struct amdgpu_scratch {
 	unsigned		num_reg;
 	uint32_t                reg_base;
 	uint32_t		free_mask;
 };
 
 /*
  * GFX configurations
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 41bda98..8520b4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -32,41 +32,40 @@
 #include "amdgpu_ucode.h"
 #include "clearstate_ci.h"
 
 #include "dce/dce_8_0_d.h"
 #include "dce/dce_8_0_sh_mask.h"
 
 #include "bif/bif_4_1_d.h"
 #include "bif/bif_4_1_sh_mask.h"
 
 #include "gca/gfx_7_0_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 
 #include "gmc/gmc_7_0_d.h"
 #include "gmc/gmc_7_0_sh_mask.h"
 
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
 #define GFX7_NUM_GFX_RINGS     1
-#define GFX7_NUM_COMPUTE_RINGS 8
 #define GFX7_MEC_HPD_SIZE      2048
 
 static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev);
 
 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
 MODULE_FIRMWARE("radeon/bonaire_me.bin");
 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
 
 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
 MODULE_FIRMWARE("radeon/hawaii_me.bin");
 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
 
 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
 MODULE_FIRMWARE("radeon/kaveri_me.bin");
@@ -2806,67 +2805,98 @@ static void gfx_v7_0_cp_compute_fini(struct amdgpu_device *adev)
 		}
 	}
 }
 
 static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 {
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 }
 
+static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, queue, pipe, mec;
+
+	/* policy for amdgpu compute queue ownership */
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		queue = i % adev->gfx.mec.num_queue_per_pipe;
+		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+			% adev->gfx.mec.num_pipe_per_mec;
+		mec = (i / adev->gfx.mec.num_queue_per_pipe)
+			/ adev->gfx.mec.num_pipe_per_mec;
+
+		/* we've run out of HW */
+		if (mec >= adev->gfx.mec.num_mec)
+			break;
+
+		/* policy: amdgpu owns all queues in the first pipe */
+		if (mec == 0 && pipe == 0)
+			set_bit(i, adev->gfx.mec.queue_bitmap);
+	}
+
+	/* update the number of active compute rings */
+	adev->gfx.num_compute_rings =
+		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* If you hit this case and edited the policy, you probably just
+	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	size_t mec_hpd_size;
 
-	/*
-	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
-	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
-	 * Nonetheless, we assign only 1 pipe because all other pipes will
-	 * be handled by KFD
-	 */
+	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
 	switch (adev->asic_type) {
 	case CHIP_KAVERI:
 		adev->gfx.mec.num_mec = 2;
 		break;
 	case CHIP_BONAIRE:
 	case CHIP_HAWAII:
 	case CHIP_KABINI:
 	case CHIP_MULLINS:
 	default:
 		adev->gfx.mec.num_mec = 1;
 		break;
 	}
 	adev->gfx.mec.num_pipe_per_mec = 4;
 	adev->gfx.mec.num_queue_per_pipe = 8;
 
+	/* take ownership of the relevant compute queues */
+	gfx_v7_0_compute_queue_acquire(adev);
+
+	/* allocate space for ALL pipes (even the ones we don't own) */
 	mec_hpd_size = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec
 		* GFX7_MEC_HPD_SIZE * 2;
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
 				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v7_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
@@ -4511,41 +4541,41 @@ static void gfx_v7_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
 }
 
 static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
 	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
 	.select_se_sh = &gfx_v7_0_select_se_sh,
 	.read_wave_data = &gfx_v7_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v7_0_read_wave_sgprs,
 };
 
 static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
 	.enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
 	.exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
 };
 
 static int gfx_v7_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
 	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
 	gfx_v7_0_set_ring_funcs(adev);
 	gfx_v7_0_set_irq_funcs(adev);
 	gfx_v7_0_set_gds_init(adev);
 
 	return 0;
 }
 
 static int gfx_v7_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
 
 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
 	if (r)
 		return r;
 
 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
 	if (r)
@@ -4707,41 +4737,41 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 	gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
 	switch (adev->gfx.config.mem_row_size_in_kb) {
 	case 1:
 	default:
 		gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
 		break;
 	case 2:
 		gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
 		break;
 	case 4:
 		gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
 		break;
 	}
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 }
 
 static int gfx_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i, r;
+	int i, r, ring_id;
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
 	if (r)
 		return r;
 
 	/* Privileged reg */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
 			      &adev->gfx.priv_reg_irq);
 	if (r)
 		return r;
 
 	/* Privileged inst */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
 			      &adev->gfx.priv_inst_irq);
 	if (r)
 		return r;
 
 	gfx_v7_0_scratch_init(adev);
 
@@ -4758,62 +4788,72 @@ static int gfx_v7_0_sw_init(void *handle)
 	}
 
 	/* allocate mec buffers */
 	r = gfx_v7_0_mec_init(adev);
 	if (r) {
 		DRM_ERROR("Failed to init MEC BOs!\n");
 		return r;
 	}
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 		ring = &adev->gfx.gfx_ring[i];
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "gfx");
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
 		if (r)
 			return r;
 	}
 
 	/* set up the compute queues */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
 		unsigned irq_type;
 
-		/* max 32 queues per MEC */
-		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-			DRM_ERROR("Too many (%d) compute rings!\n", i);
-			break;
-		}
-		ring = &adev->gfx.compute_ring[i];
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		ring = &adev->gfx.compute_ring[ring_id];
+
+		/* mec0 is me1 */
+		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+				/ adev->gfx.mec.num_pipe_per_mec)
+				+ 1;
+		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+				% adev->gfx.mec.num_pipe_per_mec;
+		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
-		ring->me = 1; /* first MEC */
-		ring->pipe = i / 8;
-		ring->queue = i % 8;
+		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+
+		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+			+ ring->pipe;
+
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, irq_type);
 		if (r)
 			return r;
+
+		ring_id++;
 	}
 
 	/* reserve GDS, GWS and OA resource for gfx */
 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
 				    &adev->gds.gds_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
 
 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
 				    &adev->gds.gws_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
 
 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
 				    &adev->gds.oa_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 29c487e..fc94e2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -35,41 +35,40 @@
 #include "gmc/gmc_8_2_d.h"
 #include "gmc/gmc_8_2_sh_mask.h"
 
 #include "oss/oss_3_0_d.h"
 #include "oss/oss_3_0_sh_mask.h"
 
 #include "bif/bif_5_0_d.h"
 #include "bif/bif_5_0_sh_mask.h"
 
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_enum.h"
 
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
 #include "smu/smu_7_1_3_d.h"
 
 #define GFX8_NUM_GFX_RINGS     1
-#define GFX8_NUM_COMPUTE_RINGS 8
 #define GFX8_MEC_HPD_SIZE 2048
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
 
 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
 #define SAMPLE_SPLIT(x)					((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
 #define BANK_WIDTH(x)					((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
 #define BANK_HEIGHT(x)					((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
 #define MACRO_TILE_ASPECT(x)				((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
 #define NUM_BANKS(x)					((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
 
 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
@@ -1397,67 +1396,102 @@ static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
 		ring->pipe = 1;
 	}
 
 	ring->queue = 0;
 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
 	r = amdgpu_ring_init(adev, ring, 1024,
 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
 	if (r)
 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 
 	return r;
 }
 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
 				   struct amdgpu_irq_src *irq)
 {
 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
 }
 
+static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, queue, pipe, mec;
+
+	/* policy for amdgpu compute queue ownership */
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		queue = i % adev->gfx.mec.num_queue_per_pipe;
+		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+			% adev->gfx.mec.num_pipe_per_mec;
+		mec = (i / adev->gfx.mec.num_queue_per_pipe)
+			/ adev->gfx.mec.num_pipe_per_mec;
+
+		/* we've run out of HW */
+		if (mec >= adev->gfx.mec.num_mec)
+			break;
+
+		/* policy: amdgpu owns all queues in the first pipe */
+		if (mec == 0 && pipe == 0)
+			set_bit(i, adev->gfx.mec.queue_bitmap);
+	}
+
+	/* update the number of active compute rings */
+	adev->gfx.num_compute_rings =
+		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* If you hit this case and edited the policy, you probably just
+	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	size_t mec_hpd_size;
 
+	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
 	switch (adev->asic_type) {
 	case CHIP_FIJI:
 	case CHIP_TONGA:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 	case CHIP_POLARIS10:
 	case CHIP_CARRIZO:
 		adev->gfx.mec.num_mec = 2;
 		break;
 	case CHIP_TOPAZ:
 	case CHIP_STONEY:
 	default:
 		adev->gfx.mec.num_mec = 1;
 		break;
 	}
 
 	adev->gfx.mec.num_pipe_per_mec = 4;
 	adev->gfx.mec.num_queue_per_pipe = 8;
 
-	/* only 1 pipe of the first MEC is owned by amdgpu */
-	mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX8_MEC_HPD_SIZE;
+	/* take ownership of the relevant compute queues */
+	gfx_v8_0_compute_queue_acquire(adev);
+
+	mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
 				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v8_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 			  &adev->gfx.mec.hpd_eop_gpu_addr);
@@ -2070,41 +2104,41 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 	/* fix up row size */
 	switch (adev->gfx.config.mem_row_size_in_kb) {
 	case 1:
 	default:
 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
 		break;
 	case 2:
 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
 		break;
 	case 4:
 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
 		break;
 	}
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 
 	return 0;
 }
 
 static int gfx_v8_0_sw_init(void *handle)
 {
-	int i, r;
+	int i, r, ring_id;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* KIQ event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
 	if (r)
 		return r;
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
 	if (r)
 		return r;
 
 	/* Privileged reg */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
 			      &adev->gfx.priv_reg_irq);
 	if (r)
 		return r;
 
@@ -2137,63 +2171,76 @@ static int gfx_v8_0_sw_init(void *handle)
 	}
 
 	/* set up the gfx ring */
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 		ring = &adev->gfx.gfx_ring[i];
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "gfx");
 		/* no gfx doorbells on iceland */
 		if (adev->asic_type != CHIP_TOPAZ) {
 			ring->use_doorbell = true;
 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
 		}
 
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
 				     AMDGPU_CP_IRQ_GFX_EOP);
 		if (r)
 			return r;
 	}
 
 	/* set up the compute queues */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
 		unsigned irq_type;
 
-		/* max 32 queues per MEC */
-		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
-			DRM_ERROR("Too many (%d) compute rings!\n", i);
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
 			break;
-		}
-		ring = &adev->gfx.compute_ring[i];
+
+		ring = &adev->gfx.compute_ring[ring_id];
+
+		/* mec0 is me1 */
+		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+				/ adev->gfx.mec.num_pipe_per_mec)
+				+ 1;
+		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+				% adev->gfx.mec.num_pipe_per_mec;
+		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
-		ring->me = 1; /* first MEC */
-		ring->pipe = i / 8;
-		ring->queue = i % 8;
-		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX8_MEC_HPD_SIZE);
+		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
+		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
 		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
+
+		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+			+ ring->pipe;
+
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
 				     irq_type);
 		if (r)
 			return r;
+
+		ring_id++;
 	}
 
 	r = gfx_v8_0_kiq_init(adev);
 	if (r) {
 		DRM_ERROR("Failed to init KIQ BOs!\n");
 		return r;
 	}
 
 	kiq = &adev->gfx.kiq;
 	r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
 	if (r)
 		return r;
 
 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
 	r = gfx_v8_0_compute_mqd_sw_init(adev);
 	if (r)
 		return r;
 
 	/* reserve GDS, GWS and OA resource for gfx */
 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
@@ -5642,41 +5689,41 @@ static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
 				     uint32_t size, uint32_t *dst)
 {
 	wave_read_regs(
 		adev, simd, wave, 0,
 		start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
 }
 
 
 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
 	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
 	.select_se_sh = &gfx_v8_0_select_se_sh,
 	.read_wave_data = &gfx_v8_0_read_wave_data,
 	.read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
 };
 
 static int gfx_v8_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
 	gfx_v8_0_set_gds_init(adev);
 	gfx_v8_0_set_rlc_funcs(adev);
 
 	return 0;
 }
 
 static int gfx_v8_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
 
 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
 	if (r)
 		return r;
 
 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index d100b15..fef0fbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -21,41 +21,40 @@
  *
  */
 #include <linux/firmware.h>
 #include "drmP.h"
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "soc15.h"
 #include "soc15d.h"
 
 #include "vega10/soc15ip.h"
 #include "vega10/GC/gc_9_0_offset.h"
 #include "vega10/GC/gc_9_0_sh_mask.h"
 #include "vega10/vega10_enum.h"
 #include "vega10/HDP/hdp_4_0_offset.h"
 
 #include "soc15_common.h"
 #include "clearstate_gfx9.h"
 #include "v9_structs.h"
 
 #define GFX9_NUM_GFX_RINGS     1
-#define GFX9_NUM_COMPUTE_RINGS 8
 #define GFX9_NUM_SE		4
 #define GFX9_MEC_HPD_SIZE 2048
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x2000
 
 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
 
 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
 {
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0)},
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID1_SIZE),
 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID1), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID1)},
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID2_SIZE),
 		SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID2), SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID2)},
 	{SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_BASE), SOC15_REG_OFFSET(GC, 0, mmGDS_VMID3_SIZE),
@@ -458,65 +457,99 @@ static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 
 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 	if (adev->gfx.mec.mec_fw_obj) {
 		r = amdgpu_bo_reserve(adev->gfx.mec.mec_fw_obj, false);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve mec firmware bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.mec_fw_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
 
 		amdgpu_bo_unref(&adev->gfx.mec.mec_fw_obj);
 		adev->gfx.mec.mec_fw_obj = NULL;
 	}
 }
 
+static void gfx_v9_0_compute_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, queue, pipe, mec;
+
+	/* policy for amdgpu compute queue ownership */
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		queue = i % adev->gfx.mec.num_queue_per_pipe;
+		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+			% adev->gfx.mec.num_pipe_per_mec;
+		mec = (i / adev->gfx.mec.num_queue_per_pipe)
+			/ adev->gfx.mec.num_pipe_per_mec;
+
+		/* we've run out of HW */
+		if (mec >= adev->gfx.mec.num_mec)
+			break;
+
+		/* policy: amdgpu owns all queues in the first pipe */
+		if (mec == 0 && pipe == 0)
+			set_bit(i, adev->gfx.mec.queue_bitmap);
+	}
+
+	/* update the number of active compute rings */
+	adev->gfx.num_compute_rings =
+		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
+	/* If you hit this case and edited the policy, you probably just
+	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
+	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
+		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+}
+
 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	const __le32 *fw_data;
 	unsigned fw_size;
 	u32 *fw;
 	size_t mec_hpd_size;
 
 	const struct gfx_firmware_header_v1_0 *mec_hdr;
 
+	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
+
 	switch (adev->asic_type) {
 	case CHIP_VEGA10:
 		adev->gfx.mec.num_mec = 2;
 		break;
 	default:
 		adev->gfx.mec.num_mec = 1;
 		break;
 	}
 
 	adev->gfx.mec.num_pipe_per_mec = 4;
 	adev->gfx.mec.num_queue_per_pipe = 8;
 
-	/* only 1 pipe of the first MEC is owned by amdgpu */
-	mec_hpd_size = 1 * 1 * adev->gfx.mec.num_queue_per_pipe * GFX9_MEC_HPD_SIZE;
+	/* take ownership of the relevant compute queues */
+	gfx_v9_0_compute_queue_acquire(adev);
+	mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
 
 	if (adev->gfx.mec.hpd_eop_obj == NULL) {
 		r = amdgpu_bo_create(adev,
 				     mec_hpd_size,
 				     PAGE_SIZE, true,
 				     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
 				     &adev->gfx.mec.hpd_eop_obj);
 		if (r) {
 			dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
 			return r;
 		}
 	}
 
 	r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 	if (unlikely(r != 0)) {
 		gfx_v9_0_mec_fini(adev);
 		return r;
 	}
 	r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
 			  &adev->gfx.mec.hpd_eop_gpu_addr);
@@ -1013,41 +1046,41 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
 	amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
 	amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
 				PACKET3_DMA_DATA_SRC_SEL(2)));
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_size);
 
 
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
 				   amdgpu_gds_reg_offset[0].mem_size, 0);
 
 	amdgpu_ring_commit(ring);
 
 	return 0;
 }
 
 static int gfx_v9_0_sw_init(void *handle)
 {
-	int i, r;
+	int i, r, ring_id;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* KIQ event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 178, &adev->gfx.kiq.irq);
 	if (r)
 		return r;
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 181, &adev->gfx.eop_irq);
 	if (r)
 		return r;
 
 	/* Privileged reg */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_GRBM_CP, 184,
 			      &adev->gfx.priv_reg_irq);
 	if (r)
 		return r;
 
@@ -1070,41 +1103,80 @@ static int gfx_v9_0_sw_init(void *handle)
 	r = gfx_v9_0_mec_init(adev);
 	if (r) {
 		DRM_ERROR("Failed to init MEC BOs!\n");
 		return r;
 	}
 
 	/* set up the gfx ring */
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 		ring = &adev->gfx.gfx_ring[i];
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "gfx");
 		ring->use_doorbell = true;
 		ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1;
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
 		if (r)
 			return r;
 	}
 
 	/* set up the compute queues */
-	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
+	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
+		unsigned irq_type;
+
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
+			break;
+
+		ring = &adev->gfx.compute_ring[ring_id];
+
+		/* mec0 is me1 */
+		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
+				/ adev->gfx.mec.num_pipe_per_mec)
+				+ 1;
+		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
+				% adev->gfx.mec.num_pipe_per_mec;
+		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
+
+		ring->ring_obj = NULL;
+		ring->use_doorbell = true;
+		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE);
+		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+			+ ring->pipe;
+
+		/* type-2 packets are deprecated on MEC, use type-3 instead */
+		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+				     irq_type);
+		if (r)
+			return r;
+
+		ring_id++;
+	}
+
+	/* set up the compute queues */
+	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
 		unsigned irq_type;
 
 		/* max 32 queues per MEC */
 		if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
 			DRM_ERROR("Too many (%d) compute rings!\n", i);
 			break;
 		}
 		ring = &adev->gfx.compute_ring[i];
 		ring->ring_obj = NULL;
 		ring->use_doorbell = true;
 		ring->doorbell_index = (AMDGPU_DOORBELL64_MEC_RING0 + i) << 1;
 		ring->me = 1; /* first MEC */
 		ring->pipe = i / 8;
 		ring->queue = i % 8;
 		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX9_MEC_HPD_SIZE);
 		sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
 		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
 		/* type-2 packets are deprecated on MEC, use type-3 instead */
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, irq_type);
@@ -2453,41 +2525,41 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
 				   amdgpu_gds_reg_offset[vmid].mem_size,
 				   gds_size);
 
 	/* GWS */
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
 				   amdgpu_gds_reg_offset[vmid].gws,
 				   gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
 
 	/* OA */
 	gfx_v9_0_write_data_to_reg(ring, 0, false,
 				   amdgpu_gds_reg_offset[vmid].oa,
 				   (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
 static int gfx_v9_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
-	adev->gfx.num_compute_rings = GFX9_NUM_COMPUTE_RINGS;
+	adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 	gfx_v9_0_set_ring_funcs(adev);
 	gfx_v9_0_set_irq_funcs(adev);
 	gfx_v9_0_set_gds_init(adev);
 	gfx_v9_0_set_rlc_funcs(adev);
 
 	return 0;
 }
 
 static int gfx_v9_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
 
 	r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
 	if (r)
 		return r;
 
 	r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index a09d9f3..67f6d19 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -16,40 +16,41 @@
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /*
  * This file defines the private interface between the
  * AMD kernel graphics drivers and the AMD KFD.
  */
 
 #ifndef KGD_KFD_INTERFACE_H_INCLUDED
 #define KGD_KFD_INTERFACE_H_INCLUDED
 
 #include <linux/types.h>
 
 struct pci_dev;
 
 #define KFD_INTERFACE_VERSION 1
+#define KGD_MAX_QUEUES 128
 
 struct kfd_dev;
 struct kgd_dev;
 
 struct kgd_mem;
 
 enum kgd_memory_pool {
 	KGD_POOL_SYSTEM_CACHEABLE = 1,
 	KGD_POOL_SYSTEM_WRITECOMBINE = 2,
 	KGD_POOL_FRAMEBUFFER = 3,
 };
 
 enum kgd_engine_type {
 	KGD_ENGINE_PFP = 1,
 	KGD_ENGINE_ME,
 	KGD_ENGINE_CE,
 	KGD_ENGINE_MEC1,
 	KGD_ENGINE_MEC2,
 	KGD_ENGINE_RLC,
 	KGD_ENGINE_SDMA1,
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 11/17] drm/amdgpu: teach amdgpu how to enable interrupts for any pipe v3
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (9 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 10/17] drm/amdgpu: allow split of queues with kfd at queue granularity v4 Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 12/17] drm/amdkfd: allow split HQD on per-queue granularity v4 Andres Rodriguez
                     ` (6 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

The current implementation is hardcoded to enable ME1/PIPE0 interrupts
only.

This patch allows amdgpu to enable interrupts for any pipe of ME1.

v2: added gfx9 support
v3: use soc15_grbm_select for gfx9

Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 48 ++++++++++++---------------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 33 +++++++++++------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 50 +++++++++++------------------------
 3 files changed, 49 insertions(+), 82 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8520b4b..8969c69 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5047,76 +5047,62 @@ static void gfx_v7_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
 		cp_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
 		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
 		cp_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
 		break;
 	default:
 		break;
 	}
 }
 
 static void gfx_v7_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 						     int me, int pipe,
 						     enum amdgpu_interrupt_state state)
 {
-	u32 mec_int_cntl, mec_int_cntl_reg;
-
-	/*
-	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
-	 * handles the setting of interrupts for this specific pipe. All other
-	 * pipes' interrupts are set by amdkfd.
+	/* Me 0 is for graphics and Me 2 is reserved for HW scheduling
+	 * So we should only really be configuring ME 1 i.e. MEC0
 	 */
-
-	if (me == 1) {
-		switch (pipe) {
-		case 0:
-			mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
-			break;
-		default:
-			DRM_DEBUG("invalid pipe %d\n", pipe);
-			return;
-		}
-	} else {
-		DRM_DEBUG("invalid me %d\n", me);
+	if (me != 1) {
+		DRM_ERROR("Ignoring request to enable interrupts for invalid me:%d\n", me);
 		return;
 	}
 
-	switch (state) {
-	case AMDGPU_IRQ_STATE_DISABLE:
-		mec_int_cntl = RREG32(mec_int_cntl_reg);
-		mec_int_cntl &= ~CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
-		WREG32(mec_int_cntl_reg, mec_int_cntl);
-		break;
-	case AMDGPU_IRQ_STATE_ENABLE:
-		mec_int_cntl = RREG32(mec_int_cntl_reg);
-		mec_int_cntl |= CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK;
-		WREG32(mec_int_cntl_reg, mec_int_cntl);
-		break;
-	default:
-		break;
+	if (pipe >= adev->gfx.mec.num_pipe_per_mec) {
+		DRM_ERROR("Ignoring request to enable interrupts for invalid "
+				"me:%d pipe:%d\n", pipe, me);
+		return;
 	}
+
+	mutex_lock(&adev->srbm_mutex);
+	cik_srbm_select(adev, me, pipe, 0, 0);
+
+	WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE,
+			state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+	cik_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
 }
 
 static int gfx_v7_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
 					     struct amdgpu_irq_src *src,
 					     unsigned type,
 					     enum amdgpu_interrupt_state state)
 {
 	u32 cp_int_cntl;
 
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
 		cp_int_cntl &= ~CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
 		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
 		cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
 		cp_int_cntl |= CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK;
 		WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fc94e2b..8cc9874 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6769,61 +6769,60 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
 				  uint32_t val)
 {
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
 	amdgpu_ring_write(ring, (1 << 16)); /* no inc addr */
 	amdgpu_ring_write(ring, reg);
 	amdgpu_ring_write(ring, 0);
 	amdgpu_ring_write(ring, val);
 }
 
 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
 	WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 }
 
 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 						     int me, int pipe,
 						     enum amdgpu_interrupt_state state)
 {
-	/*
-	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
-	 * handles the setting of interrupts for this specific pipe. All other
-	 * pipes' interrupts are set by amdkfd.
-	 */
+	/* Me 0 is reserved for graphics */
+	if (me < 1 || me > adev->gfx.mec.num_mec) {
+		DRM_ERROR("Ignoring request to enable interrupts for invalid me:%d\n", me);
+		return;
+	}
 
-	if (me == 1) {
-		switch (pipe) {
-		case 0:
-			break;
-		default:
-			DRM_DEBUG("invalid pipe %d\n", pipe);
-			return;
-		}
-	} else {
-		DRM_DEBUG("invalid me %d\n", me);
+	if (pipe >= adev->gfx.mec.num_pipe_per_mec) {
+		DRM_ERROR("Ignoring request to enable interrupts for invalid "
+				"me:%d pipe:%d\n", pipe, me);
 		return;
 	}
 
-	WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
-		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+	mutex_lock(&adev->srbm_mutex);
+	vi_srbm_select(adev, me, pipe, 0, 0);
+
+	WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE,
+			state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+	vi_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
 }
 
 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
 					     struct amdgpu_irq_src *source,
 					     unsigned type,
 					     enum amdgpu_interrupt_state state)
 {
 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 
 	return 0;
 }
 
 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
 					      struct amdgpu_irq_src *source,
 					      unsigned type,
 					      enum amdgpu_interrupt_state state)
 {
 	WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
 		     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index fef0fbf..6208493 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3257,78 +3257,60 @@ static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
 
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
 	case AMDGPU_IRQ_STATE_ENABLE:
 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
 			       TIME_STAMP_INT_ENABLE,
 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
 		break;
 	default:
 		break;
 	}
 }
 
 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
 						     int me, int pipe,
 						     enum amdgpu_interrupt_state state)
 {
-	u32 mec_int_cntl, mec_int_cntl_reg;
-
-	/*
-	 * amdgpu controls only pipe 0 of MEC1. That's why this function only
-	 * handles the setting of interrupts for this specific pipe. All other
-	 * pipes' interrupts are set by amdkfd.
-	 */
-
-	if (me == 1) {
-		switch (pipe) {
-		case 0:
-			mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
-			break;
-		default:
-			DRM_DEBUG("invalid pipe %d\n", pipe);
-			return;
-		}
-	} else {
-		DRM_DEBUG("invalid me %d\n", me);
+	/* Me 0 is reserved for graphics */
+	if (me < 1 || me > adev->gfx.mec.num_mec) {
+		DRM_ERROR("Ignoring request to enable interrupts for invalid me:%d\n", me);
 		return;
 	}
 
-	switch (state) {
-	case AMDGPU_IRQ_STATE_DISABLE:
-		mec_int_cntl = RREG32(mec_int_cntl_reg);
-		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
-					     TIME_STAMP_INT_ENABLE, 0);
-		WREG32(mec_int_cntl_reg, mec_int_cntl);
-		break;
-	case AMDGPU_IRQ_STATE_ENABLE:
-		mec_int_cntl = RREG32(mec_int_cntl_reg);
-		mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
-					     TIME_STAMP_INT_ENABLE, 1);
-		WREG32(mec_int_cntl_reg, mec_int_cntl);
-		break;
-	default:
-		break;
+	if (pipe >= adev->gfx.mec.num_pipe_per_mec) {
+		DRM_ERROR("Ignoring request to enable interrupts for invalid "
+				"me:%d pipe:%d\n", pipe, me);
+		return;
 	}
+
+	mutex_lock(&adev->srbm_mutex);
+	soc15_grbm_select(adev, me, pipe, 0, 0);
+
+	WREG32_FIELD(CPC_INT_CNTL, TIME_STAMP_INT_ENABLE,
+			state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+
+	soc15_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
 }
 
 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
 					     struct amdgpu_irq_src *source,
 					     unsigned type,
 					     enum amdgpu_interrupt_state state)
 {
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
 	case AMDGPU_IRQ_STATE_ENABLE:
 		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
 			       PRIV_REG_INT_ENABLE,
 			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
 		break;
 	default:
 		break;
 	}
 
 	return 0;
 }
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 12/17] drm/amdkfd: allow split HQD on per-queue granularity v4
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (10 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 11/17] drm/amdgpu: teach amdgpu how to enable interrupts for any pipe v3 Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 13/17] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c Andres Rodriguez
                     ` (5 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.

This allows for more interesting pipe/queue splits.

v2: fix overflow check for res.queue_mask
v3: fix shift overflow when setting res.queue_mask
v4: fix comment in is_pipeline_enabled()

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c         |  22 ++++-
 drivers/gpu/drm/amd/amdkfd/kfd_device.c            |   4 +
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 100 ++++++++++++++-------
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  10 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager_cik.c  |   2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c    |   3 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |   2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h    |  17 ++--
 drivers/gpu/drm/radeon/radeon_kfd.c                |  21 ++++-
 9 files changed, 126 insertions(+), 55 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3200ff9..8fc5aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -78,48 +78,64 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *adev)
 	return true;
 }
 
 void amdgpu_amdkfd_fini(void)
 {
 	if (kgd2kfd) {
 		kgd2kfd->exit();
 		symbol_put(kgd2kfd_init);
 	}
 }
 
 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
 	if (kgd2kfd)
 		adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
 					adev->pdev, kfd2kgd);
 }
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
+	int i;
+	int last_valid_bit;
 	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
-
-			.first_compute_pipe = 1,
-			.compute_pipe_count = 4 - 1,
+			.num_mec = adev->gfx.mec.num_mec,
+			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
 		};
 
+		/* this is going to have a few of the MSBs set that we need to
+		 * clear */
+		bitmap_complement(gpu_resources.queue_bitmap,
+				  adev->gfx.mec.queue_bitmap,
+				  KGD_MAX_QUEUES);
+
+		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
+		 * nbits is not compile time constant */
+		last_valid_bit = adev->gfx.mec.num_mec
+				* adev->gfx.mec.num_pipe_per_mec
+				* adev->gfx.mec.num_queue_per_pipe;
+		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+			clear_bit(i, gpu_resources.queue_bitmap);
+
 		amdgpu_doorbell_get_kfd_info(adev,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 
 		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 	}
 }
 
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
 	if (adev->kfd) {
 		kgd2kfd->device_exit(adev->kfd);
 		adev->kfd = NULL;
 	}
 }
 
 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 		const void *ih_ring_entry)
 {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7c..88187bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -209,40 +209,44 @@ static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid,
 			pasid,
 			address,
 			flags);
 
 	dev = kfd_device_by_pci_dev(pdev);
 	BUG_ON(dev == NULL);
 
 	kfd_signal_iommu_event(dev, pasid, address,
 			flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC);
 
 	return AMD_IOMMU_INV_PRI_RSP_INVALID;
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
 			 const struct kgd2kfd_shared_resources *gpu_resources)
 {
 	unsigned int size;
 
 	kfd->shared_resources = *gpu_resources;
 
+	/* We only use the first MEC */
+	if (kfd->shared_resources.num_mec > 1)
+		kfd->shared_resources.num_mec = 1;
+
 	/* calculate max size of mqds needed for queues */
 	size = max_num_of_queues_per_device *
 			kfd->device_info->mqd_size_aligned;
 
 	/*
 	 * calculate max size of runlist packet.
 	 * There can be only 2 packets at once
 	 */
 	size += (KFD_MAX_NUM_OF_PROCESSES * sizeof(struct pm4_map_process) +
 		max_num_of_queues_per_device *
 		sizeof(struct pm4_map_queues) + sizeof(struct pm4_runlist)) * 2;
 
 	/* Add size of HIQ & DIQ */
 	size += KFD_KERNEL_QUEUE_SIZE * 2;
 
 	/* add another 512KB for all other allocations on gart (HPD, fences) */
 	size += 512 * 1024;
 
 	if (kfd->kfd2kgd->init_gtt_mem_allocation(
 			kfd->kgd, size, &kfd->gtt_mem,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c064dea..5f28720 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -46,55 +46,78 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 
 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
 static int destroy_queues_cpsch(struct device_queue_manager *dqm,
 				bool preempt_static_queues, bool lock);
 
 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 					struct queue *q,
 					struct qcm_process_device *qpd);
 
 static void deallocate_sdma_queue(struct device_queue_manager *dqm,
 				unsigned int sdma_queue_id);
 
 static inline
 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
 {
 	if (type == KFD_QUEUE_TYPE_SDMA)
 		return KFD_MQD_TYPE_SDMA;
 	return KFD_MQD_TYPE_CP;
 }
 
-unsigned int get_first_pipe(struct device_queue_manager *dqm)
+static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
+{
+	int i;
+	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
+		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;
+
+	/* queue is available for KFD usage if bit is 1 */
+	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
+		if (test_bit(pipe_offset + i,
+			      dqm->dev->shared_resources.queue_bitmap))
+			return true;
+	return false;
+}
+
+unsigned int get_mec_num(struct device_queue_manager *dqm)
+{
+	BUG_ON(!dqm || !dqm->dev);
+
+	return dqm->dev->shared_resources.num_mec;
+}
+
+unsigned int get_queues_num(struct device_queue_manager *dqm)
 {
 	BUG_ON(!dqm || !dqm->dev);
-	return dqm->dev->shared_resources.first_compute_pipe;
+	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
+				KGD_MAX_QUEUES);
 }
 
-unsigned int get_pipes_num(struct device_queue_manager *dqm)
+unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
 {
 	BUG_ON(!dqm || !dqm->dev);
-	return dqm->dev->shared_resources.compute_pipe_count;
+	return dqm->dev->shared_resources.num_queue_per_pipe;
 }
 
-static inline unsigned int get_pipes_num_cpsch(void)
+unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
 {
-	return PIPE_PER_ME_CP_SCHEDULING;
+	BUG_ON(!dqm || !dqm->dev);
+	return dqm->dev->shared_resources.num_pipe_per_mec;
 }
 
 void program_sh_mem_settings(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd)
 {
 	return dqm->dev->kfd2kgd->program_sh_mem_settings(
 						dqm->dev->kgd, qpd->vmid,
 						qpd->sh_mem_config,
 						qpd->sh_mem_ape1_base,
 						qpd->sh_mem_ape1_limit,
 						qpd->sh_mem_bases);
 }
 
 static int allocate_vmid(struct device_queue_manager *dqm,
 			struct qcm_process_device *qpd,
 			struct queue *q)
 {
 	int bit, allocated_vmid;
 
 	if (dqm->vmid_bitmap == 0)
@@ -183,63 +206,67 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
 
 	/*
 	 * Unconditionally increment this counter, regardless of the queue's
 	 * type or whether the queue is active.
 	 */
 	dqm->total_queue_count++;
 	pr_debug("Total of %d queues are accountable so far\n",
 			dqm->total_queue_count);
 
 	mutex_unlock(&dqm->lock);
 	return 0;
 }
 
 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
 {
 	bool set;
 	int pipe, bit, i;
 
 	set = false;
 
-	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm);
-			pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) {
+	for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm);
+			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
+
+		if (!is_pipe_enabled(dqm, 0, pipe))
+			continue;
+
 		if (dqm->allocated_queues[pipe] != 0) {
 			bit = find_first_bit(
 				(unsigned long *)&dqm->allocated_queues[pipe],
-				QUEUES_PER_PIPE);
+				get_queues_per_pipe(dqm));
 
 			clear_bit(bit,
 				(unsigned long *)&dqm->allocated_queues[pipe]);
 			q->pipe = pipe;
 			q->queue = bit;
 			set = true;
 			break;
 		}
 	}
 
 	if (!set)
 		return -EBUSY;
 
 	pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
 				__func__, q->pipe, q->queue);
 	/* horizontal hqd allocation */
-	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
+	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
 
 	return 0;
 }
 
 static inline void deallocate_hqd(struct device_queue_manager *dqm,
 				struct queue *q)
 {
 	set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
 }
 
 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 					struct queue *q,
 					struct qcm_process_device *qpd)
 {
 	int retval;
 	struct mqd_manager *mqd;
 
 	BUG_ON(!dqm || !q || !qpd);
 
 	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
@@ -452,95 +479,84 @@ static int unregister_process_nocpsch(struct device_queue_manager *dqm,
 out:
 	mutex_unlock(&dqm->lock);
 	return retval;
 }
 
 static int
 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
 			unsigned int vmid)
 {
 	uint32_t pasid_mapping;
 
 	pasid_mapping = (pasid == 0) ? 0 :
 		(uint32_t)pasid |
 		ATC_VMID_PASID_MAPPING_VALID;
 
 	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
 						dqm->dev->kgd, pasid_mapping,
 						vmid);
 }
 
-int init_pipelines(struct device_queue_manager *dqm,
-			unsigned int pipes_num, unsigned int first_pipe)
-{
-	BUG_ON(!dqm || !dqm->dev);
-
-	pr_debug("kfd: In func %s\n", __func__);
-
-	return 0;
-}
-
 static void init_interrupts(struct device_queue_manager *dqm)
 {
 	unsigned int i;
 
 	BUG_ON(dqm == NULL);
 
-	for (i = 0 ; i < get_pipes_num(dqm) ; i++)
-		dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd,
-				i + get_first_pipe(dqm));
+	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
+		if (is_pipe_enabled(dqm, 0, i))
+			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
 }
 
 static int init_scheduler(struct device_queue_manager *dqm)
 {
-	int retval;
+	int retval = 0;
 
 	BUG_ON(!dqm);
 
 	pr_debug("kfd: In %s\n", __func__);
 
-	retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
 	return retval;
 }
 
 static int initialize_nocpsch(struct device_queue_manager *dqm)
 {
 	int i;
 
 	BUG_ON(!dqm);
 
 	pr_debug("kfd: In func %s num of pipes: %d\n",
-			__func__, get_pipes_num(dqm));
+			__func__, get_pipes_per_mec(dqm));
 
 	mutex_init(&dqm->lock);
 	INIT_LIST_HEAD(&dqm->queues);
 	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
 	dqm->sdma_queue_count = 0;
-	dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
+	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
 					sizeof(unsigned int), GFP_KERNEL);
 	if (!dqm->allocated_queues) {
 		mutex_destroy(&dqm->lock);
 		return -ENOMEM;
 	}
 
-	for (i = 0; i < get_pipes_num(dqm); i++)
-		dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
+	for (i = 0; i < get_pipes_per_mec(dqm); i++)
+		dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1;
 
 	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
 	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
 
 	init_scheduler(dqm);
 	return 0;
 }
 
 static void uninitialize_nocpsch(struct device_queue_manager *dqm)
 {
 	int i;
 
 	BUG_ON(!dqm);
 
 	BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
 
 	kfree(dqm->allocated_queues);
 	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
 		kfree(dqm->mqds[i]);
 	mutex_destroy(&dqm->lock);
@@ -613,71 +629,87 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 		return retval;
 	}
 
 	retval = mqd->load_mqd(mqd, q->mqd, 0,
 				0, NULL);
 	if (retval != 0) {
 		deallocate_sdma_queue(dqm, q->sdma_id);
 		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 		return retval;
 	}
 
 	return 0;
 }
 
 /*
  * Device Queue Manager implementation for cp scheduler
  */
 
 static int set_sched_resources(struct device_queue_manager *dqm)
 {
+	int i;
 	struct scheduling_resources res;
-	unsigned int queue_num, queue_mask;
 
 	BUG_ON(!dqm);
 
 	pr_debug("kfd: In func %s\n", __func__);
 
-	queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
-	queue_mask = (1 << queue_num) - 1;
 	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
 	res.vmid_mask <<= KFD_VMID_START_OFFSET;
-	res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
+
+	/* Avoid touching the internal representation queue_bitmap directly.
+	 * Even though doing a simple memcpy might sound tempting, it would
+	 * silently break if the implementation of bitmaps is changed */
+	res.queue_mask = 0;
+	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
+			continue;
+
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of res.queue_mask needs updating */
+		if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
+			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
+			break;
+		}
+
+		res.queue_mask |= (1ull << i);
+	}
 	res.gws_mask = res.oac_mask = res.gds_heap_base =
 						res.gds_heap_size = 0;
 
 	pr_debug("kfd: scheduling resources:\n"
 			"      vmid mask: 0x%8X\n"
 			"      queue mask: 0x%8llX\n",
 			res.vmid_mask, res.queue_mask);
 
 	return pm_send_set_resources(&dqm->packets, &res);
 }
 
 static int initialize_cpsch(struct device_queue_manager *dqm)
 {
 	int retval;
 
 	BUG_ON(!dqm);
 
 	pr_debug("kfd: In func %s num of pipes: %d\n",
-			__func__, get_pipes_num_cpsch());
+			__func__, get_pipes_per_mec(dqm));
 
 	mutex_init(&dqm->lock);
 	INIT_LIST_HEAD(&dqm->queues);
 	dqm->queue_count = dqm->processes_count = 0;
 	dqm->sdma_queue_count = 0;
 	dqm->active_runlist = false;
 	retval = dqm->ops_asic_specific.initialize(dqm);
 	if (retval != 0)
 		goto fail_init_pipelines;
 
 	return 0;
 
 fail_init_pipelines:
 	mutex_destroy(&dqm->lock);
 	return retval;
 }
 
 static int start_cpsch(struct device_queue_manager *dqm)
 {
 	struct device_process_node *node;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index a625b91..66b9615 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -13,42 +13,40 @@
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  *
  */
 
 #ifndef KFD_DEVICE_QUEUE_MANAGER_H_
 #define KFD_DEVICE_QUEUE_MANAGER_H_
 
 #include <linux/rwsem.h>
 #include <linux/list.h>
 #include "kfd_priv.h"
 #include "kfd_mqd_manager.h"
 
 #define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS	(500)
-#define QUEUES_PER_PIPE				(8)
-#define PIPE_PER_ME_CP_SCHEDULING		(3)
 #define CIK_VMID_NUM				(8)
 #define KFD_VMID_START_OFFSET			(8)
 #define VMID_PER_DEVICE				CIK_VMID_NUM
 #define KFD_DQM_FIRST_PIPE			(0)
 #define CIK_SDMA_QUEUES				(4)
 #define CIK_SDMA_QUEUES_PER_ENGINE		(2)
 #define CIK_SDMA_ENGINE_NUM			(2)
 
 struct device_process_node {
 	struct qcm_process_device *qpd;
 	struct list_head list;
 };
 
 /**
  * struct device_queue_manager_ops
  *
  * @create_queue: Queue creation routine.
  *
  * @destroy_queue: Queue destruction routine.
  *
@@ -165,37 +163,37 @@ struct device_queue_manager {
 	unsigned int		processes_count;
 	unsigned int		queue_count;
 	unsigned int		sdma_queue_count;
 	unsigned int		total_queue_count;
 	unsigned int		next_pipe_to_allocate;
 	unsigned int		*allocated_queues;
 	unsigned int		sdma_bitmap;
 	unsigned int		vmid_bitmap;
 	uint64_t		pipelines_addr;
 	struct kfd_mem_obj	*pipeline_mem;
 	uint64_t		fence_gpu_addr;
 	unsigned int		*fence_addr;
 	struct kfd_mem_obj	*fence_mem;
 	bool			active_runlist;
 };
 
 void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
 void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
 void program_sh_mem_settings(struct device_queue_manager *dqm,
 					struct qcm_process_device *qpd);
-int init_pipelines(struct device_queue_manager *dqm,
-		unsigned int pipes_num, unsigned int first_pipe);
-unsigned int get_first_pipe(struct device_queue_manager *dqm);
-unsigned int get_pipes_num(struct device_queue_manager *dqm);
+unsigned int get_mec_num(struct device_queue_manager *dqm);
+unsigned int get_queues_num(struct device_queue_manager *dqm);
+unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
+unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
 
 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
 {
 	return (pdd->lds_base >> 16) & 0xFF;
 }
 
 static inline unsigned int
 get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd)
 {
 	return (pdd->lds_base >> 60) & 0x0E;
 }
 
 #endif /* KFD_DEVICE_QUEUE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index c6f435a..48dc056 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -134,22 +134,22 @@ static int register_process_cik(struct device_queue_manager *dqm,
 }
 
 static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
 				struct qcm_process_device *qpd)
 {
 	uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT);
 
 	if (q->process->is_32bit_user_mode)
 		value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) |
 				get_sh_mem_bases_32(qpd_to_pdd(qpd));
 	else
 		value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) <<
 				SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) &
 				SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK;
 
 	q->properties.sdma_vm_addr = value;
 }
 
 static int initialize_cpsch_cik(struct device_queue_manager *dqm)
 {
-	return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index ca8c093..7131998 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -48,42 +48,41 @@ static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
 	header.count = packet_size/sizeof(uint32_t) - 2;
 	header.type = PM4_TYPE_3;
 
 	return header.u32all;
 }
 
 static void pm_calc_rlib_size(struct packet_manager *pm,
 				unsigned int *rlib_size,
 				bool *over_subscription)
 {
 	unsigned int process_count, queue_count;
 	unsigned int map_queue_size;
 
 	BUG_ON(!pm || !rlib_size || !over_subscription);
 
 	process_count = pm->dqm->processes_count;
 	queue_count = pm->dqm->queue_count;
 
 	/* check if there is over subscription*/
 	*over_subscription = false;
-	if ((process_count > 1) ||
-		queue_count > PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) {
+	if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
 		*over_subscription = true;
 		pr_debug("kfd: over subscribed runlist\n");
 	}
 
 	map_queue_size =
 		(pm->dqm->dev->device_info->asic_family == CHIP_CARRIZO) ?
 		sizeof(struct pm4_mes_map_queues) :
 		sizeof(struct pm4_map_queues);
 	/* calculate run list ib allocation size */
 	*rlib_size = process_count * sizeof(struct pm4_map_process) +
 		     queue_count * map_queue_size;
 
 	/*
 	 * Increase the allocation size in case we need a chained run list
 	 * when over subscription
 	 */
 	if (*over_subscription)
 		*rlib_size += sizeof(struct pm4_runlist);
 
 	pr_debug("kfd: runlist ib size %d\n", *rlib_size);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index e1fb40b..32cdf2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -192,41 +192,41 @@ int pqm_create_queue(struct process_queue_manager *pqm,
 	if (retval != 0)
 		return retval;
 
 	if (list_empty(&pqm->queues)) {
 		pdd->qpd.pqm = pqm;
 		dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
 	}
 
 	pqn = kzalloc(sizeof(struct process_queue_node), GFP_KERNEL);
 	if (!pqn) {
 		retval = -ENOMEM;
 		goto err_allocate_pqn;
 	}
 
 	switch (type) {
 	case KFD_QUEUE_TYPE_SDMA:
 	case KFD_QUEUE_TYPE_COMPUTE:
 		/* check if there is over subscription */
 		if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
 		((dev->dqm->processes_count >= VMID_PER_DEVICE) ||
-		(dev->dqm->queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE))) {
+		(dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
 			pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
 			retval = -EPERM;
 			goto err_create_queue;
 		}
 
 		retval = create_cp_queue(pqm, dev, &q, &q_properties, f, *qid);
 		if (retval != 0)
 			goto err_create_queue;
 		pqn->q = q;
 		pqn->kq = NULL;
 		retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd,
 						&q->properties.vmid);
 		pr_debug("DQM returned %d for create_queue\n", retval);
 		print_queue(q);
 		break;
 	case KFD_QUEUE_TYPE_DIQ:
 		kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_DIQ);
 		if (kq == NULL) {
 			retval = -ENOMEM;
 			goto err_create_queue;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 67f6d19..91ef148 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -12,78 +12,85 @@
  * all copies or substantial portions of the Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 
 /*
  * This file defines the private interface between the
  * AMD kernel graphics drivers and the AMD KFD.
  */
 
 #ifndef KGD_KFD_INTERFACE_H_INCLUDED
 #define KGD_KFD_INTERFACE_H_INCLUDED
 
 #include <linux/types.h>
+#include <linux/bitmap.h>
 
 struct pci_dev;
 
-#define KFD_INTERFACE_VERSION 1
+#define KFD_INTERFACE_VERSION 2
 #define KGD_MAX_QUEUES 128
 
 struct kfd_dev;
 struct kgd_dev;
 
 struct kgd_mem;
 
 enum kgd_memory_pool {
 	KGD_POOL_SYSTEM_CACHEABLE = 1,
 	KGD_POOL_SYSTEM_WRITECOMBINE = 2,
 	KGD_POOL_FRAMEBUFFER = 3,
 };
 
 enum kgd_engine_type {
 	KGD_ENGINE_PFP = 1,
 	KGD_ENGINE_ME,
 	KGD_ENGINE_CE,
 	KGD_ENGINE_MEC1,
 	KGD_ENGINE_MEC2,
 	KGD_ENGINE_RLC,
 	KGD_ENGINE_SDMA1,
 	KGD_ENGINE_SDMA2,
 	KGD_ENGINE_MAX
 };
 
 struct kgd2kfd_shared_resources {
 	/* Bit n == 1 means VMID n is available for KFD. */
 	unsigned int compute_vmid_bitmap;
 
-	/* Compute pipes are counted starting from MEC0/pipe0 as 0. */
-	unsigned int first_compute_pipe;
+	/* number of mec available from the hardware */
+	uint32_t num_mec;
 
-	/* Number of MEC pipes available for KFD. */
-	unsigned int compute_pipe_count;
+	/* number of pipes per mec */
+	uint32_t num_pipe_per_mec;
+
+	/* number of queues per pipe */
+	uint32_t num_queue_per_pipe;
+
+	/* Bit n == 1 means Queue n is available for KFD */
+	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
 
 	/* Base address of doorbell aperture. */
 	phys_addr_t doorbell_physical_address;
 
 	/* Size in bytes of doorbell aperture. */
 	size_t doorbell_aperture_size;
 
 	/* Number of bytes at start of aperture reserved for KGD. */
 	size_t doorbell_start_offset;
 };
 
 /**
  * struct kfd2kgd_calls
  *
  * @init_gtt_mem_allocation: Allocate a buffer on the gart aperture.
  * The buffer can be used for mqds, hpds, kernel queue, fence and runlists
  *
  * @free_gtt_mem: Frees a buffer that was allocated on the gart aperture
  *
  * @get_vmem_size: Retrieves (physical) size of VRAM
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index a06e3b1..699fe7f 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -162,48 +162,63 @@ int radeon_kfd_init(void)
 	return ret;
 }
 
 void radeon_kfd_fini(void)
 {
 	if (kgd2kfd) {
 		kgd2kfd->exit();
 		symbol_put(kgd2kfd_init);
 	}
 }
 
 void radeon_kfd_device_probe(struct radeon_device *rdev)
 {
 	if (kgd2kfd)
 		rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev,
 			rdev->pdev, &kfd2kgd);
 }
 
 void radeon_kfd_device_init(struct radeon_device *rdev)
 {
+	int i, queue, pipe, mec;
+
 	if (rdev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
-
-			.first_compute_pipe = 1,
-			.compute_pipe_count = 4 - 1,
+			.num_mec = 1,
+			.num_pipe_per_mec = 4,
+			.num_queue_per_pipe = 8
 		};
 
+		bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES);
+
+		for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+			queue = i % gpu_resources.num_queue_per_pipe;
+			pipe = (i / gpu_resources.num_queue_per_pipe)
+				% gpu_resources.num_pipe_per_mec;
+			mec = (i / gpu_resources.num_queue_per_pipe)
+				/ gpu_resources.num_pipe_per_mec;
+
+			if (mec == 0 && pipe > 0)
+				set_bit(i, gpu_resources.queue_bitmap);
+		}
+
 		radeon_doorbell_get_kfd_info(rdev,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 
 		kgd2kfd->device_init(rdev->kfd, &gpu_resources);
 	}
 }
 
 void radeon_kfd_device_fini(struct radeon_device *rdev)
 {
 	if (rdev->kfd) {
 		kgd2kfd->device_exit(rdev->kfd);
 		rdev->kfd = NULL;
 	}
 }
 
 void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry)
 {
 	if (rdev->kfd)
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 13/17] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (11 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 12/17] drm/amdkfd: allow split HQD on per-queue granularity v4 Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 14/17] drm/amdgpu: allocate queues horizontally across pipes Andres Rodriguez
                     ` (4 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

This information is already available in adev.

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 12 ++++++------
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 12 ++++++------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 910f9d3..5254562 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -22,42 +22,40 @@
 
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "cikd.h"
 #include "cik_sdma.h"
 #include "amdgpu_ucode.h"
 #include "gfx_v7_0.h"
 #include "gca/gfx_7_2_d.h"
 #include "gca/gfx_7_2_enum.h"
 #include "gca/gfx_7_2_sh_mask.h"
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 #include "gmc/gmc_7_1_d.h"
 #include "gmc/gmc_7_1_sh_mask.h"
 #include "cik_structs.h"
 
-#define CIK_PIPE_PER_MEC	(4)
-
 enum {
 	MAX_TRAPID = 8,		/* 3 bits in the bitfield. */
 	MAX_WATCH_ADDRESSES = 4
 };
 
 enum {
 	ADDRESS_WATCH_REG_ADDR_HI = 0,
 	ADDRESS_WATCH_REG_ADDR_LO,
 	ADDRESS_WATCH_REG_CNTL,
 	ADDRESS_WATCH_REG_MAX
 };
 
 /*  not defined in the CI/KV reg file  */
 enum {
 	ADDRESS_WATCH_REG_CNTL_ATC_BIT = 0x10000000UL,
 	ADDRESS_WATCH_REG_CNTL_DEFAULT_MASK = 0x00FFFFFF,
 	ADDRESS_WATCH_REG_ADDLOW_MASK_EXTENSION = 0x03000000,
 	/* extend the mask to 26 bits to match the low address field */
 	ADDRESS_WATCH_REG_ADDLOW_SHIFT = 6,
 	ADDRESS_WATCH_REG_ADDHIGH_MASK = 0xFFFF
@@ -169,42 +167,44 @@ static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 			uint32_t queue, uint32_t vmid)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 
 	mutex_lock(&adev->srbm_mutex);
 	WREG32(mmSRBM_GFX_CNTL, value);
 }
 
 static void unlock_srbm(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
 	WREG32(mmSRBM_GFX_CNTL, 0);
 	mutex_unlock(&adev->srbm_mutex);
 }
 
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t queue_id)
 {
-	uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC);
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
 static void release_queue(struct kgd_dev *kgd)
 {
 	unlock_srbm(kgd);
 }
 
 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 					uint32_t sh_mem_config,
 					uint32_t sh_mem_ape1_base,
 					uint32_t sh_mem_ape1_limit,
 					uint32_t sh_mem_bases)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
 	lock_srbm(kgd, 0, 0, 0, vmid);
 
 	WREG32(mmSH_MEM_CONFIG, sh_mem_config);
@@ -237,42 +237,42 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 
 	/* Mapping vmid to pasid also for IH block */
 	WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 
 	return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
 	/* amdgpu owns the per-pipe state */
 	return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t mec;
 	uint32_t pipe;
 
-	mec = (pipe_id / CIK_PIPE_PER_MEC) + 1;
-	pipe = (pipe_id % CIK_PIPE_PER_MEC);
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
 	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
 			CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
 
 	unlock_srbm(kgd);
 
 	return 0;
 }
 
 static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 {
 	uint32_t retval;
 
 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 			m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;
 
 	pr_debug("kfd: sdma base address: 0x%x\n", retval);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 6ba94e9..133d066 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -22,42 +22,40 @@
 
 #include <linux/module.h>
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_ucode.h"
 #include "gfx_v8_0.h"
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "oss/oss_3_0_sh_mask.h"
 #include "oss/oss_3_0_d.h"
 #include "gmc/gmc_8_1_sh_mask.h"
 #include "gmc/gmc_8_1_d.h"
 #include "vi_structs.h"
 #include "vid.h"
 
-#define VI_PIPE_PER_MEC	(4)
-
 struct cik_sdma_rlc_registers;
 
 /*
  * Register access functions
  */
 
 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t sh_mem_config,
 		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
 		uint32_t sh_mem_bases);
 static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 		unsigned int vmid);
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 		uint32_t hpd_size, uint64_t hpd_gpu_addr);
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
 static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		uint32_t queue_id, uint32_t __user *wptr);
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd);
 static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
 		uint32_t pipe_id, uint32_t queue_id);
@@ -130,42 +128,44 @@ static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
 			uint32_t queue, uint32_t vmid)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue);
 
 	mutex_lock(&adev->srbm_mutex);
 	WREG32(mmSRBM_GFX_CNTL, value);
 }
 
 static void unlock_srbm(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
 	WREG32(mmSRBM_GFX_CNTL, 0);
 	mutex_unlock(&adev->srbm_mutex);
 }
 
 static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t queue_id)
 {
-	uint32_t mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
-	uint32_t pipe = (pipe_id % VI_PIPE_PER_MEC);
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 	lock_srbm(kgd, mec, pipe, queue_id, 0);
 }
 
 static void release_queue(struct kgd_dev *kgd)
 {
 	unlock_srbm(kgd);
 }
 
 static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
 					uint32_t sh_mem_config,
 					uint32_t sh_mem_ape1_base,
 					uint32_t sh_mem_ape1_limit,
 					uint32_t sh_mem_bases)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
 	lock_srbm(kgd, 0, 0, 0, vmid);
 
 	WREG32(mmSH_MEM_CONFIG, sh_mem_config);
@@ -199,42 +199,42 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
 
 	/* Mapping vmid to pasid also for IH block */
 	WREG32(mmIH_VMID_0_LUT + vmid, pasid_mapping);
 
 	return 0;
 }
 
 static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
 				uint32_t hpd_size, uint64_t hpd_gpu_addr)
 {
 	/* amdgpu owns the per-pipe state */
 	return 0;
 }
 
 static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	uint32_t mec;
 	uint32_t pipe;
 
-	mec = (++pipe_id / VI_PIPE_PER_MEC) + 1;
-	pipe = (pipe_id % VI_PIPE_PER_MEC);
+	mec = (++pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
 
 	lock_srbm(kgd, mec, pipe, 0, 0);
 
 	WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK);
 
 	unlock_srbm(kgd);
 
 	return 0;
 }
 
 static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 {
 	return 0;
 }
 
 static inline struct vi_mqd *get_mqd(void *mqd)
 {
 	return (struct vi_mqd *)mqd;
 }
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 14/17] drm/amdgpu: allocate queues horizontally across pipes
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (12 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 13/17] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 15/17] drm/amdgpu: remove hardcoded queue_mask in PACKET3_SET_RESOURCES Andres Rodriguez
                     ` (3 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Pipes provide better concurrency than queues, therefore we want to make
sure that apps use queues from different pipes whenever possible.

Optimize for the trivial case where an app will consume rings in order,
therefore we don't want adjacent rings to belong to the same pipe.

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h   | 13 ++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 83 +++++++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 86 +++++++++++++++++++++--------------
 3 files changed, 113 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 61990be..0583396 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1762,40 +1762,53 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
 		ring->count_dw -= count_dw;
 	}
 }
 
 static inline struct amdgpu_sdma_instance *
 amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	int i;
 
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		if (&adev->sdma.instance[i].ring == ring)
 			break;
 
 	if (i < AMDGPU_MAX_SDMA_INSTANCES)
 		return &adev->sdma.instance[i];
 	else
 		return NULL;
 }
 
+static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
+						int mec, int pipe, int queue)
+{
+	int bit = 0;
+
+	bit += mec * adev->gfx.mec.num_pipe_per_mec
+		* adev->gfx.mec.num_queue_per_pipe;
+	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
+	bit += queue;
+
+	return test_bit(bit, adev->gfx.mec.queue_bitmap);
+}
+
 /*
  * ASICs macro.
  */
 #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
 #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
 #define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
 #define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l))
 #define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8969c69..684f053 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4733,45 +4733,76 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
 	adev->gfx.config.num_gpus = 1;
 	adev->gfx.config.multi_gpu_tile_size = 64;
 
 	/* fix up row size */
 	gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
 	switch (adev->gfx.config.mem_row_size_in_kb) {
 	case 1:
 	default:
 		gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
 		break;
 	case 2:
 		gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
 		break;
 	case 4:
 		gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
 		break;
 	}
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 }
 
+static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+					int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			&adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	int i, r, ring_id;
+	int i, j, k, r, ring_id;
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
 	if (r)
 		return r;
 
 	/* Privileged reg */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
 			      &adev->gfx.priv_reg_irq);
 	if (r)
 		return r;
 
 	/* Privileged inst */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185,
 			      &adev->gfx.priv_inst_irq);
 	if (r)
 		return r;
 
 	gfx_v7_0_scratch_init(adev);
 
@@ -4787,73 +4818,57 @@ static int gfx_v7_0_sw_init(void *handle)
 		return r;
 	}
 
 	/* allocate mec buffers */
 	r = gfx_v7_0_mec_init(adev);
 	if (r) {
 		DRM_ERROR("Failed to init MEC BOs!\n");
 		return r;
 	}
 
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 		ring = &adev->gfx.gfx_ring[i];
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "gfx");
 		r = amdgpu_ring_init(adev, ring, 1024,
 				     &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP);
 		if (r)
 			return r;
 	}
 
-	/* set up the compute queues */
-	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-		unsigned irq_type;
-
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		ring = &adev->gfx.compute_ring[ring_id];
-
-		/* mec0 is me1 */
-		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-				/ adev->gfx.mec.num_pipe_per_mec)
-				+ 1;
-		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-				% adev->gfx.mec.num_pipe_per_mec;
-		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
-
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-			+ ring->pipe;
-
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024,
-				     &adev->gfx.eop_irq, irq_type);
-		if (r)
-			return r;
-
-		ring_id++;
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+					continue;
+
+				r = gfx_v7_0_compute_ring_init(adev,
+								ring_id,
+								i, k, j);
+				if (r)
+					return r;
+
+				ring_id++;
+			}
+		}
 	}
 
 	/* reserve GDS, GWS and OA resource for gfx */
 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
 				    &adev->gds.gds_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
 
 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
 				    &adev->gds.gws_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
 
 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
 				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
 				    &adev->gds.oa_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8cc9874..90e1dd3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -2102,43 +2102,78 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
 	adev->gfx.config.multi_gpu_tile_size = 64;
 
 	/* fix up row size */
 	switch (adev->gfx.config.mem_row_size_in_kb) {
 	case 1:
 	default:
 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
 		break;
 	case 2:
 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
 		break;
 	case 4:
 		gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
 		break;
 	}
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 
 	return 0;
 }
 
+static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
+					int mec, int pipe, int queue)
+{
+	int r;
+	unsigned irq_type;
+	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
+
+	ring = &adev->gfx.compute_ring[ring_id];
+
+	/* mec0 is me1 */
+	ring->me = mec + 1;
+	ring->pipe = pipe;
+	ring->queue = queue;
+
+	ring->ring_obj = NULL;
+	ring->use_doorbell = true;
+	ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
+	ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
+				+ (ring_id * GFX8_MEC_HPD_SIZE);
+	sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
+
+	irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
+		+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
+		+ ring->pipe;
+
+	/* type-2 packets are deprecated on MEC, use type-3 instead */
+	r = amdgpu_ring_init(adev, ring, 1024,
+			&adev->gfx.eop_irq, irq_type);
+	if (r)
+		return r;
+
+
+	return 0;
+}
+
 static int gfx_v8_0_sw_init(void *handle)
 {
-	int i, r, ring_id;
+	int i, j, k, r, ring_id;
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* KIQ event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 178, &adev->gfx.kiq.irq);
 	if (r)
 		return r;
 
 	/* EOP Event */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq);
 	if (r)
 		return r;
 
 	/* Privileged reg */
 	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184,
 			      &adev->gfx.priv_reg_irq);
 	if (r)
 		return r;
 
@@ -2170,77 +2205,58 @@ static int gfx_v8_0_sw_init(void *handle)
 		return r;
 	}
 
 	/* set up the gfx ring */
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
 		ring = &adev->gfx.gfx_ring[i];
 		ring->ring_obj = NULL;
 		sprintf(ring->name, "gfx");
 		/* no gfx doorbells on iceland */
 		if (adev->asic_type != CHIP_TOPAZ) {
 			ring->use_doorbell = true;
 			ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
 		}
 
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
 				     AMDGPU_CP_IRQ_GFX_EOP);
 		if (r)
 			return r;
 	}
 
-	/* set up the compute queues */
-	for (i = 0, ring_id = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; i++) {
-		unsigned irq_type;
-
-		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
-			continue;
-
-		if (WARN_ON(ring_id >= AMDGPU_MAX_COMPUTE_RINGS))
-			break;
-
-		ring = &adev->gfx.compute_ring[ring_id];
-
-		/* mec0 is me1 */
-		ring->me = ((i / adev->gfx.mec.num_queue_per_pipe)
-				/ adev->gfx.mec.num_pipe_per_mec)
-				+ 1;
-		ring->pipe = (i / adev->gfx.mec.num_queue_per_pipe)
-				% adev->gfx.mec.num_pipe_per_mec;
-		ring->queue = i % adev->gfx.mec.num_queue_per_pipe;
-
-		ring->ring_obj = NULL;
-		ring->use_doorbell = true;
-		ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE);
-		ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id;
-		sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
 
-		irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
-			+ ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
-			+ ring->pipe;
+	/* set up the compute queues - allocate horizontally across pipes */
+	ring_id = 0;
+	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
+		for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
+			for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
+				if (!amdgpu_is_mec_queue_enabled(adev, i, k, j))
+					continue;
 
-		/* type-2 packets are deprecated on MEC, use type-3 instead */
-		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
-				     irq_type);
-		if (r)
-			return r;
+				r = gfx_v8_0_compute_ring_init(adev,
+								ring_id,
+								i, k, j);
+				if (r)
+					return r;
 
-		ring_id++;
+				ring_id++;
+			}
+		}
 	}
 
 	r = gfx_v8_0_kiq_init(adev);
 	if (r) {
 		DRM_ERROR("Failed to init KIQ BOs!\n");
 		return r;
 	}
 
 	kiq = &adev->gfx.kiq;
 	r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
 	if (r)
 		return r;
 
 	/* create MQD for all compute queues as well as KIQ for SRIOV case */
 	r = gfx_v8_0_compute_mqd_sw_init(adev);
 	if (r)
 		return r;
 
 	/* reserve GDS, GWS and OA resource for gfx */
 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 15/17] drm/amdgpu: remove hardcoded queue_mask in PACKET3_SET_RESOURCES
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (13 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 14/17] drm/amdgpu: allocate queues horizontally across pipes Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 16/17] drm/amdgpu: avoid KIQ clashing with compute or KFD queues v2 Andres Rodriguez
                     ` (2 subsequent siblings)
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

The assumption that we are only using the first pipe no longer holds.
Instead, calculate the queue_mask from the queue_bitmap.

Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 20 ++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 23 +++++++++++++++++++++--
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 90e1dd3..ff77351 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4697,60 +4697,76 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
 
 /* KIQ functions */
 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
 {
 	uint32_t tmp;
 	struct amdgpu_device *adev = ring->adev;
 
 	/* tell RLC which is KIQ queue */
 	tmp = RREG32(mmRLC_CP_SCHEDULERS);
 	tmp &= 0xffffff00;
 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
 	tmp |= 0x80;
 	WREG32(mmRLC_CP_SCHEDULERS, tmp);
 }
 
 static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
 	uint32_t scratch, tmp = 0;
+	uint64_t queue_mask = 0;
 	int r, i;
 
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of queue_mask needs updating */
+		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+			break;
+		}
+
+		queue_mask |= (1ull << i);
+	}
+
 	r = amdgpu_gfx_scratch_get(adev, &scratch);
 	if (r) {
 		DRM_ERROR("Failed to get scratch reg (%d).\n", r);
 		return r;
 	}
 	WREG32(scratch, 0xCAFEDEAD);
 
 	r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11);
 	if (r) {
 		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
 		amdgpu_gfx_scratch_free(adev, scratch);
 		return r;
 	}
 	/* set resources */
 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
 	amdgpu_ring_write(kiq_ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
-	amdgpu_ring_write(kiq_ring, 0x000000FF);	/* queue mask lo */
-	amdgpu_ring_write(kiq_ring, 0);	/* queue mask hi */
+	amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask));	/* queue mask hi */
 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask lo */
 	amdgpu_ring_write(kiq_ring, 0);	/* gws mask hi */
 	amdgpu_ring_write(kiq_ring, 0);	/* oac mask */
 	amdgpu_ring_write(kiq_ring, 0);	/* gds heap base:0, gds heap size:0 */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
 		uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
 		uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
 
 		/* map queues */
 		amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
 		/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
 		amdgpu_ring_write(kiq_ring,
 				  PACKET3_MAP_QUEUES_NUM_QUEUES(1));
 		amdgpu_ring_write(kiq_ring,
 				  PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index) |
 				  PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
 				  PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
 				  PACKET3_MAP_QUEUES_ME(ring->me == 1 ? 0 : 1)); /* doorbell */
 		amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6208493..5a5ff47 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1895,46 +1895,65 @@ static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
 /* KIQ functions */
 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
 {
 	uint32_t tmp;
 	struct amdgpu_device *adev = ring->adev;
 
 	/* tell RLC which is KIQ queue */
 	tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
 	tmp &= 0xffffff00;
 	tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
 	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 	tmp |= 0x80;
 	WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
 }
 
 static void gfx_v9_0_kiq_enable(struct amdgpu_ring *ring)
 {
+	int i;
+	struct amdgpu_device *adev = ring->adev;
+	uint64_t queue_mask = 0;
+
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of queue_mask needs updating */
+		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+			break;
+		}
+
+		queue_mask |= (1ull << i);
+	}
+
 	amdgpu_ring_alloc(ring, 8);
 	/* set resources */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
 	amdgpu_ring_write(ring, 0);	/* vmid_mask:0 queue_type:0 (KIQ) */
-	amdgpu_ring_write(ring, 0x000000FF);	/* queue mask lo */
-	amdgpu_ring_write(ring, 0);	/* queue mask hi */
+	amdgpu_ring_write(ring, lower_32_bits(queue_mask));	/* queue mask lo */
+	amdgpu_ring_write(ring, upper_32_bits(queue_mask));	/* queue mask hi */
 	amdgpu_ring_write(ring, 0);	/* gws mask lo */
 	amdgpu_ring_write(ring, 0);	/* gws mask hi */
 	amdgpu_ring_write(ring, 0);	/* oac mask */
 	amdgpu_ring_write(ring, 0);	/* gds heap base:0, gds heap size:0 */
 	amdgpu_ring_commit(ring);
 	udelay(50);
 }
 
 static void gfx_v9_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
 				   struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = kiq_ring->adev;
 	uint64_t mqd_addr, wptr_addr;
 
 	mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
 	wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
 	amdgpu_ring_alloc(kiq_ring, 8);
 
 	amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
 	/* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 16/17] drm/amdgpu: avoid KIQ clashing with compute or KFD queues v2
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (14 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 15/17] drm/amdgpu: remove hardcoded queue_mask in PACKET3_SET_RESOURCES Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:35   ` [PATCH 17/17] drm/amdgpu: new queue policy, take first 2 queues of each pipe v2 Andres Rodriguez
  2017-04-13 21:39   ` [PATCH split] Improve pipe split between amdgpu and amdkfd Andres Rodriguez
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Instead of picking an arbitrary queue for KIQ, search for one according
to policy. The queue must be unused.

Also report the KIQ as an unavailable resource to KFD.

In testing I ran into KCQ initialization issues when using pipes 2/3 of
MEC2 for the KIQ. Therefore the policy disallows grabbing one of these.

v2: fix (ring.me + 1) to (ring.me -1) in amdgpu_amdkfd_device_init

Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        | 23 +++++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |  8 ++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 43 ++++++++++++++++++++++++------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 42 ++++++++++++++++++++++++-----
 4 files changed, 98 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 0583396..0a58575 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1762,51 +1762,68 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *sr
 		ring->count_dw -= count_dw;
 	}
 }
 
 static inline struct amdgpu_sdma_instance *
 amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	int i;
 
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		if (&adev->sdma.instance[i].ring == ring)
 			break;
 
 	if (i < AMDGPU_MAX_SDMA_INSTANCES)
 		return &adev->sdma.instance[i];
 	else
 		return NULL;
 }
 
-static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
-						int mec, int pipe, int queue)
+static inline int amdgpu_queue_to_bit(struct amdgpu_device *adev,
+				      int mec, int pipe, int queue)
 {
 	int bit = 0;
 
 	bit += mec * adev->gfx.mec.num_pipe_per_mec
 		* adev->gfx.mec.num_queue_per_pipe;
 	bit += pipe * adev->gfx.mec.num_queue_per_pipe;
 	bit += queue;
 
-	return test_bit(bit, adev->gfx.mec.queue_bitmap);
+	return bit;
+}
+
+static inline void amdgpu_bit_to_queue(struct amdgpu_device *adev, int bit,
+				       int *mec, int *pipe, int *queue)
+{
+	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
+	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
+		% adev->gfx.mec.num_pipe_per_mec;
+	*mec = (bit / adev->gfx.mec.num_queue_per_pipe)
+	       / adev->gfx.mec.num_pipe_per_mec;
+
+}
+static inline bool amdgpu_is_mec_queue_enabled(struct amdgpu_device *adev,
+					       int mec, int pipe, int queue)
+{
+	return test_bit(amdgpu_queue_to_bit(adev, mec, pipe, queue),
+			adev->gfx.mec.queue_bitmap);
 }
 
 /*
  * ASICs macro.
  */
 #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
 #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
 #define amdgpu_get_pcie_lanes(adev) (adev)->asic_funcs->get_pcie_lanes((adev))
 #define amdgpu_set_pcie_lanes(adev, l) (adev)->asic_funcs->set_pcie_lanes((adev), (l))
 #define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
 #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev))
 #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
 #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8fc5aa3..339e8cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -94,40 +94,48 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 }
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 	int i;
 	int last_valid_bit;
 	if (adev->kfd) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = 0xFF00,
 			.num_mec = adev->gfx.mec.num_mec,
 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
 			.num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
 		};
 
 		/* this is going to have a few of the MSBs set that we need to
 		 * clear */
 		bitmap_complement(gpu_resources.queue_bitmap,
 				  adev->gfx.mec.queue_bitmap,
 				  KGD_MAX_QUEUES);
 
+		/* remove the KIQ bit as well */
+		if (adev->gfx.kiq.ring.ready)
+			clear_bit(amdgpu_queue_to_bit(adev,
+						      adev->gfx.kiq.ring.me - 1,
+						      adev->gfx.kiq.ring.pipe,
+						      adev->gfx.kiq.ring.queue),
+				  gpu_resources.queue_bitmap);
+
 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
 		 * nbits is not compile time constant */
 		last_valid_bit = adev->gfx.mec.num_mec
 				* adev->gfx.mec.num_pipe_per_mec
 				* adev->gfx.mec.num_queue_per_pipe;
 		for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
 			clear_bit(i, gpu_resources.queue_bitmap);
 
 		amdgpu_doorbell_get_kfd_info(adev,
 				&gpu_resources.doorbell_physical_address,
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);
 
 		kgd2kfd->device_init(adev->kfd, &gpu_resources);
 	}
 }
 
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
 	if (adev->kfd) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index ff77351..2178611 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1356,64 +1356,91 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
 	}
 
 	return 0;
 }
 
 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
 {
 	int r;
 
 	if (adev->gfx.mec.hpd_eop_obj) {
 		r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
 		if (unlikely(r != 0))
 			dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
 		amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
 		amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
 		amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 }
 
+static int gfx_v8_0_kiq_acquire(struct amdgpu_device *adev,
+				 struct amdgpu_ring *ring)
+{
+	int queue_bit;
+	int mec, pipe, queue;
+
+	queue_bit = adev->gfx.mec.num_mec
+		    * adev->gfx.mec.num_pipe_per_mec
+		    * adev->gfx.mec.num_queue_per_pipe;
+
+	while (queue_bit-- >= 0) {
+		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		amdgpu_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
+
+		/* Using pipes 2/3 from MEC 2 seems cause problems */
+		if (mec == 1 && pipe > 1)
+			continue;
+
+		ring->me = mec + 1;
+		ring->pipe = pipe;
+		ring->queue = queue;
+
+		return 0;
+	}
+
+	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
+	return -EINVAL;
+}
+
 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_irq_src *irq)
 {
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	int r = 0;
 
 	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
 	if (r)
 		return r;
 
 	ring->adev = NULL;
 	ring->ring_obj = NULL;
 	ring->use_doorbell = true;
 	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
-	if (adev->gfx.mec2_fw) {
-		ring->me = 2;
-		ring->pipe = 0;
-	} else {
-		ring->me = 1;
-		ring->pipe = 1;
-	}
 
-	ring->queue = 0;
+	r = gfx_v8_0_kiq_acquire(adev, ring);
+	if (r)
+		return r;
+
 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
 	r = amdgpu_ring_init(adev, ring, 1024,
 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
 	if (r)
 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 
 	return r;
 }
 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
 				   struct amdgpu_irq_src *irq)
 {
 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
 }
 
 static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
 {
 	int i, queue, pipe, mec;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 5a5ff47..57511cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -631,62 +631,90 @@ static int gfx_v9_0_kiq_init(struct amdgpu_device *adev)
 
 	r = amdgpu_bo_create_kernel(adev, GFX9_MEC_HPD_SIZE, PAGE_SIZE,
 				    AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
 				    &kiq->eop_gpu_addr, (void **)&hpd);
 	if (r) {
 		dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
 		return r;
 	}
 
 	memset(hpd, 0, GFX9_MEC_HPD_SIZE);
 
 	r = amdgpu_bo_reserve(kiq->eop_obj, false);
 	if (unlikely(r != 0))
 		dev_warn(adev->dev, "(%d) reserve kiq eop bo failed\n", r);
 	amdgpu_bo_kunmap(kiq->eop_obj);
 	amdgpu_bo_unreserve(kiq->eop_obj);
 
 	return 0;
 }
 
+static int gfx_v9_0_kiq_acquire(struct amdgpu_device *adev,
+				 struct amdgpu_ring *ring)
+{
+	int queue_bit;
+	int mec, pipe, queue;
+
+	queue_bit = adev->gfx.mec.num_mec
+		    * adev->gfx.mec.num_pipe_per_mec
+		    * adev->gfx.mec.num_queue_per_pipe;
+
+	while (queue_bit-- >= 0) {
+		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		amdgpu_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
+
+		/* Using pipes 2/3 from MEC 2 seems cause problems */
+		if (mec == 1 && pipe > 1)
+			continue;
+
+		ring->me = mec + 1;
+		ring->pipe = pipe;
+		ring->queue = queue;
+
+		return 0;
+	}
+
+	dev_err(adev->dev, "Failed to find a queue for KIQ\n");
+	return -EINVAL;
+}
+
 static int gfx_v9_0_kiq_init_ring(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_irq_src *irq)
 {
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 	int r = 0;
 
 	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
 	if (r)
 		return r;
 
 	ring->adev = NULL;
 	ring->ring_obj = NULL;
 	ring->use_doorbell = true;
 	ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
-	if (adev->gfx.mec2_fw) {
-		ring->me = 2;
-		ring->pipe = 0;
-	} else {
-		ring->me = 1;
-		ring->pipe = 1;
-	}
+
+	r = gfx_v9_0_kiq_acquire(adev, ring);
+	if (r)
+		return r;
 
 	irq->data = ring;
 	ring->queue = 0;
 	ring->eop_gpu_addr = kiq->eop_gpu_addr;
 	sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
 	r = amdgpu_ring_init(adev, ring, 1024,
 			     irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
 	if (r)
 		dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
 
 	return r;
 }
 static void gfx_v9_0_kiq_free_ring(struct amdgpu_ring *ring,
 				   struct amdgpu_irq_src *irq)
 {
 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
 	irq->data = NULL;
 }
 
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 17/17] drm/amdgpu: new queue policy, take first 2 queues of each pipe v2
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (15 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 16/17] drm/amdgpu: avoid KIQ clashing with compute or KFD queues v2 Andres Rodriguez
@ 2017-04-13 21:35   ` Andres Rodriguez
  2017-04-13 21:39   ` [PATCH split] Improve pipe split between amdgpu and amdkfd Andres Rodriguez
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:35 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w

Instead of taking the first pipe and giving the rest to kfd, take the
first 2 queues of each pipe.

Effectively, amdgpu and amdkfd own the same number of queues. But
because the queues are spread over multiple pipes the hardware will be
able to better handle concurrent compute workloads.

amdgpu goes from 1 pipe to 4 pipes, i.e. from 1 compute threads to 4
amdkfd goes from 3 pipe to 4 pipes, i.e. from 3 compute threads to 4

v2: fix policy comment

Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 684f053..c0844a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2821,42 +2821,42 @@ static void gfx_v7_0_mec_fini(struct amdgpu_device *adev)
 		adev->gfx.mec.hpd_eop_obj = NULL;
 	}
 }
 
 static void gfx_v7_0_compute_queue_acquire(struct amdgpu_device *adev)
 {
 	int i, queue, pipe, mec;
 
 	/* policy for amdgpu compute queue ownership */
 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
 		queue = i % adev->gfx.mec.num_queue_per_pipe;
 		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
 			% adev->gfx.mec.num_pipe_per_mec;
 		mec = (i / adev->gfx.mec.num_queue_per_pipe)
 			/ adev->gfx.mec.num_pipe_per_mec;
 
 		/* we've run out of HW */
 		if (mec >= adev->gfx.mec.num_mec)
 			break;
 
-		/* policy: amdgpu owns all queues in the first pipe */
-		if (mec == 0 && pipe == 0)
+		/* policy: amdgpu owns the first two queues of the first MEC */
+		if (mec == 0 && queue < 2)
 			set_bit(i, adev->gfx.mec.queue_bitmap);
 	}
 
 	/* update the number of active compute rings */
 	adev->gfx.num_compute_rings =
 		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 
 	/* If you hit this case and edited the policy, you probably just
 	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
 	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
 		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 }
 
 static int gfx_v7_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	size_t mec_hpd_size;
 
 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 2178611..a5ba48b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1439,42 +1439,42 @@ static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
 	amdgpu_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
 	amdgpu_ring_fini(ring);
 }
 
 static void gfx_v8_0_compute_queue_acquire(struct amdgpu_device *adev)
 {
 	int i, queue, pipe, mec;
 
 	/* policy for amdgpu compute queue ownership */
 	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
 		queue = i % adev->gfx.mec.num_queue_per_pipe;
 		pipe = (i / adev->gfx.mec.num_queue_per_pipe)
 			% adev->gfx.mec.num_pipe_per_mec;
 		mec = (i / adev->gfx.mec.num_queue_per_pipe)
 			/ adev->gfx.mec.num_pipe_per_mec;
 
 		/* we've run out of HW */
 		if (mec >= adev->gfx.mec.num_mec)
 			break;
 
-		/* policy: amdgpu owns all queues in the first pipe */
-		if (mec == 0 && pipe == 0)
+		/* policy: amdgpu owns the first two queues of the first MEC */
+		if (mec == 0 && queue < 2)
 			set_bit(i, adev->gfx.mec.queue_bitmap);
 	}
 
 	/* update the number of active compute rings */
 	adev->gfx.num_compute_rings =
 		bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 
 	/* If you hit this case and edited the policy, you probably just
 	 * need to increase AMDGPU_MAX_COMPUTE_RINGS */
 	if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
 		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 }
 
 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
 {
 	int r;
 	u32 *hpd;
 	size_t mec_hpd_size;
 
 	bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH split] Improve pipe split between amdgpu and amdkfd
       [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
                     ` (16 preceding siblings ...)
  2017-04-13 21:35   ` [PATCH 17/17] drm/amdgpu: new queue policy, take first 2 queues of each pipe v2 Andres Rodriguez
@ 2017-04-13 21:39   ` Andres Rodriguez
  17 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-13 21:39 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Forgot to mention:
   * Re-ordered some patches as suggested by Felix
   * Included "drm: Fix get_property logic fumble" during testing, 
otherwise the system boots to a black screen.

Regards,
Andres

On 2017-04-13 05:35 PM, Andres Rodriguez wrote:
> This is a split of patches that are ready to land from the series:
> Add support for high priority scheduling in amdgpu v8
>
> I've included Felix and Alex's feedback from the thread above. This includes:
>  * Separate MEC_HPD_SIZE rename into a separate patch (patch 01)
>  * Added a patch to fix the kgd_hqd_load bug Felix pointed out (patch 06)
>  * Fixes for various off-by-one errors
>  * Use gfx_v8_0_deactivate_hqd
>
> Only comment I didn't address was changing the queue allocation policy for
> gfx9 (similar to gfx7/8). See inline reply in that thread for more details
> on why this was skipped.
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2
       [not found]     ` <20170413213542.18802-10-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-15 12:37       ` Oded Gabbay
       [not found]         ` <CAFCwf12haVgYLxxcqMqTd-GX7KjxtZXD1Stuqj87HAP32TdvXQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Oded Gabbay @ 2017-04-15 12:37 UTC (permalink / raw)
  To: Andres Rodriguez; +Cc: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Fri, Apr 14, 2017 at 12:35 AM, Andres Rodriguez <andresx7@gmail.com> wrote:
> Make amdgpu the owner of all per-pipe state of the HQDs.
>
> This change will allow us to split the queues between kfd and amdgpu
> with a queue granularity instead of pipe granularity.
>
> This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
> goes unused.
>
> v2: support for gfx9
>
> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
> Acked-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>

Hi Andres,
FYI, with your patchset rebased on alex's drm-next-4.12-wip, my Kaveri
machine won't boot the OS (Ubuntu 16.04).
I bisected your patch-set and the first bad patch is this one.

Unfortunately, I don't have the means to dump kernel crashes before
the system boots.

Thx,
Oded
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2
       [not found]         ` <CAFCwf12haVgYLxxcqMqTd-GX7KjxtZXD1Stuqj87HAP32TdvXQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-04-16  0:12           ` Andres Rodriguez
       [not found]             ` <CAFQ_0eED3HxUN8Sz0buefckSmVWvMwBD0mFk_GA1XeLABdsTnw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-16  0:12 UTC (permalink / raw)
  To: Oded Gabbay; +Cc: amd-gfx list

Hey Oded,

I've had problems with 4.12-wip on its own. Just wanted to confirm if
you picked up this fix for a bug in drm-next:
"drm: Fix get_property logic fumble"

Regards,
Andres

On Sat, Apr 15, 2017 at 8:37 AM, Oded Gabbay <oded.gabbay@gmail.com> wrote:
> On Fri, Apr 14, 2017 at 12:35 AM, Andres Rodriguez <andresx7@gmail.com> wrote:
>> Make amdgpu the owner of all per-pipe state of the HQDs.
>>
>> This change will allow us to split the queues between kfd and amdgpu
>> with a queue granularity instead of pipe granularity.
>>
>> This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
>> goes unused.
>>
>> v2: support for gfx9
>>
>> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> Acked-by: Christian König <christian.koenig@amd.com>
>> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
>
> Hi Andres,
> FYI, with your patchset rebased on alex's drm-next-4.12-wip, my Kaveri
> machine won't boot the OS (Ubuntu 16.04).
> I bisected your patch-set and the first bad patch is this one.
>
> Unfortunately, I don't have the means to dump kernel crashes before
> the system boots.
>
> Thx,
> Oded
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2
       [not found]             ` <CAFQ_0eED3HxUN8Sz0buefckSmVWvMwBD0mFk_GA1XeLABdsTnw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-04-16  5:25               ` Oded Gabbay
       [not found]                 ` <CAFQ_0eE1O+SS_ucS-8Rjqm3S4aDjZq1r0OfzZhTKE=9Z12xMZQ@mail.gmail.com>
       [not found]                 ` <CAFCwf12mXeEHd9pb9L84wibaKC0zrvO6dHggQ-ePP7odN=oj6A-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 2 replies; 26+ messages in thread
From: Oded Gabbay @ 2017-04-16  5:25 UTC (permalink / raw)
  To: Andres Rodriguez; +Cc: amd-gfx list

Hey Andres,

Yes, that patch indeed fixed the black screen on Alex's 4.12-wip, but
it still wouldn't boot with your entire patch-set. It would boot only
with patches 1-8.

I'll try to get a USB-to-serial dongle so I could try dump kernel
messages during boot but it will probably take some time.

Oded

On Sun, Apr 16, 2017 at 3:12 AM, Andres Rodriguez <andresx7@gmail.com> wrote:
> Hey Oded,
>
> I've had problems with 4.12-wip on its own. Just wanted to confirm if
> you picked up this fix for a bug in drm-next:
> "drm: Fix get_property logic fumble"
>
> Regards,
> Andres
>
> On Sat, Apr 15, 2017 at 8:37 AM, Oded Gabbay <oded.gabbay@gmail.com> wrote:
>> On Fri, Apr 14, 2017 at 12:35 AM, Andres Rodriguez <andresx7@gmail.com> wrote:
>>> Make amdgpu the owner of all per-pipe state of the HQDs.
>>>
>>> This change will allow us to split the queues between kfd and amdgpu
>>> with a queue granularity instead of pipe granularity.
>>>
>>> This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
>>> goes unused.
>>>
>>> v2: support for gfx9
>>>
>>> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> Acked-by: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
>>
>> Hi Andres,
>> FYI, with your patchset rebased on alex's drm-next-4.12-wip, my Kaveri
>> machine won't boot the OS (Ubuntu 16.04).
>> I bisected your patch-set and the first bad patch is this one.
>>
>> Unfortunately, I don't have the means to dump kernel crashes before
>> the system boots.
>>
>> Thx,
>> Oded
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2
       [not found]                   ` <CAFQ_0eE1O+SS_ucS-8Rjqm3S4aDjZq1r0OfzZhTKE=9Z12xMZQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-04-16 14:51                     ` Andres Rodriguez
  0 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-16 14:51 UTC (permalink / raw)
  To: Oded Gabbay; +Cc: amd-gfx list


[-- Attachment #1.1: Type: text/plain, Size: 2036 bytes --]

Thanks for the info.

I'll test it as well on Monday.

Regards,
Andres

On Apr 16, 2017 1:26 AM, "Oded Gabbay" <oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

Hey Andres,

Yes, that patch indeed fixed the black screen on Alex's 4.12-wip, but
it still wouldn't boot with your entire patch-set. It would boot only
with patches 1-8.

I'll try to get a USB-to-serial dongle so I could try dump kernel
messages during boot but it will probably take some time.

Oded

On Sun, Apr 16, 2017 at 3:12 AM, Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
wrote:
> Hey Oded,
>
> I've had problems with 4.12-wip on its own. Just wanted to confirm if
> you picked up this fix for a bug in drm-next:
> "drm: Fix get_property logic fumble"
>
> Regards,
> Andres
>
> On Sat, Apr 15, 2017 at 8:37 AM, Oded Gabbay <oded.gabbay-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
wrote:
>> On Fri, Apr 14, 2017 at 12:35 AM, Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
wrote:
>>> Make amdgpu the owner of all per-pipe state of the HQDs.
>>>
>>> This change will allow us to split the queues between kfd and amdgpu
>>> with a queue granularity instead of pipe granularity.
>>>
>>> This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
>>> goes unused.
>>>
>>> v2: support for gfx9
>>>
>>> Reviewed-by: Edward O'Callaghan <funfunctor-dczkZgxz+BNUPWh3PAxdjQ@public.gmane.org>
>>> Reviewed-by: Felix Kuehling <Felix.Kuehling-5C7GfCeVMHo@public.gmane.org>
>>> Acked-by: Christian König <christian.koenig-5C7GfCeVMHo@public.gmane.org>
>>> Signed-off-by: Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
>>
>> Hi Andres,
>> FYI, with your patchset rebased on alex's drm-next-4.12-wip, my Kaveri
>> machine won't boot the OS (Ubuntu 16.04).
>> I bisected your patch-set and the first bad patch is this one.
>>
>> Unfortunately, I don't have the means to dump kernel crashes before
>> the system boots.
>>
>> Thx,
>> Oded

[-- Attachment #1.2: Type: text/html, Size: 3495 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 02/17] drm/amdgpu: refactor MQD/HQD initialization v3
       [not found]     ` <20170413213542.18802-3-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-17 20:13       ` Andres Rodriguez
  0 siblings, 0 replies; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-17 20:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW, Oded Gabbay



On 2017-04-13 05:35 PM, Andres Rodriguez wrote:
> The MQD programming sequence currently exists in 3 different places.
> Refactor it to absorb all the duplicates.
>
> The success path remains mostly identical except for a slightly
> different order in the non-kiq case. This shouldn't matter if the HQD
> is disabled.
>
> The error handling paths have been updated to deal with the new code
> structure.
>
> v2: the non-kiq path for gfxv8 was dropped in the rebase
> v3: split MEC_HPD_SIZE rename, dropped doorbell changes
>
> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
> Acked-by: Christian König <christian.koenig@amd.com>
> Acked-by: Felix Kuehling <Felix.Kuehling@amd.com>
> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 439 ++++++++++++++++++----------------
>  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  78 +++---
>  2 files changed, 271 insertions(+), 246 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 3b98162..4e6a60c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -2927,281 +2927,316 @@ struct bonaire_mqd
>  	u32 perf_counter_enable;
>  	u32 pgm[2];
>  	u32 tba[2];
>  	u32 tma[2];
>  	u32 pgm_rsrc[2];
>  	u32 vmid;
>  	u32 resource_limits;
>  	u32 static_thread_mgmt01[2];
>  	u32 tmp_ring_size;
>  	u32 static_thread_mgmt23[2];
>  	u32 restart[3];
>  	u32 thread_trace_enable;
>  	u32 reserved1;
>  	u32 user_data[16];
>  	u32 vgtcs_invoke_count[2];
>  	struct hqd_registers queue_state;
>  	u32 dequeue_cntr;
>  	u32 interrupt_queue[64];
>  };
>
> -/**
> - * gfx_v7_0_cp_compute_resume - setup the compute queue registers
> - *
> - * @adev: amdgpu_device pointer
> - *
> - * Program the compute queues and test them to make sure they
> - * are working.
> - * Returns 0 for success, error for failure.
> - */
> -static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
> +static void gfx_v7_0_compute_pipe_init(struct amdgpu_device *adev, int me, int pipe)
>  {
> -	int r, i, j;
> -	u32 tmp;
> -	bool use_doorbell = true;
> -	u64 hqd_gpu_addr;
> -	u64 mqd_gpu_addr;
>  	u64 eop_gpu_addr;
> -	u64 wb_gpu_addr;
> -	u32 *buf;
> -	struct bonaire_mqd *mqd;
> -	struct amdgpu_ring *ring;
> -
> -	/* fix up chicken bits */
> -	tmp = RREG32(mmCP_CPF_DEBUG);
> -	tmp |= (1 << 23);
> -	WREG32(mmCP_CPF_DEBUG, tmp);
> +	u32 tmp;
> +	size_t eop_offset = me * pipe * GFX7_MEC_HPD_SIZE * 2;

The offset here is incorrect, it should be:

(mec * num_pipe_per_mec + pipe) * ...


>
> -	/* init the pipes */
>  	mutex_lock(&adev->srbm_mutex);
> -	for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
> -		int me = (i < 4) ? 1 : 2;
> -		int pipe = (i < 4) ? i : (i - 4);
> +	eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + eop_offset;
>
> -		eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * GFX7_MEC_HPD_SIZE * 2);
> +	cik_srbm_select(adev, me, pipe, 0, 0);
>
> -		cik_srbm_select(adev, me, pipe, 0, 0);
> +	/* write the EOP addr */
> +	WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
> +	WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
>
> -		/* write the EOP addr */
> -		WREG32(mmCP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
> -		WREG32(mmCP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
> +	/* set the VMID assigned */
> +	WREG32(mmCP_HPD_EOP_VMID, 0);
>
> -		/* set the VMID assigned */
> -		WREG32(mmCP_HPD_EOP_VMID, 0);
> +	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
> +	tmp = RREG32(mmCP_HPD_EOP_CONTROL);
> +	tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
> +	tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
> +	WREG32(mmCP_HPD_EOP_CONTROL, tmp);
>
> -		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
> -		tmp = RREG32(mmCP_HPD_EOP_CONTROL);
> -		tmp &= ~CP_HPD_EOP_CONTROL__EOP_SIZE_MASK;
> -		tmp |= order_base_2(GFX7_MEC_HPD_SIZE / 8);
> -		WREG32(mmCP_HPD_EOP_CONTROL, tmp);
> -	}
>  	cik_srbm_select(adev, 0, 0, 0, 0);
>  	mutex_unlock(&adev->srbm_mutex);
> +}
>
> -	/* init the queues.  Just two for now. */
> -	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> -		ring = &adev->gfx.compute_ring[i];
> +static int gfx_v7_0_mqd_deactivate(struct amdgpu_device *adev)
> +{
> +	int i;
>
> -		if (ring->mqd_obj == NULL) {
> -			r = amdgpu_bo_create(adev,
> -					     sizeof(struct bonaire_mqd),
> -					     PAGE_SIZE, true,
> -					     AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
> -					     &ring->mqd_obj);
> -			if (r) {
> -				dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
> -				return r;
> -			}
> +	/* disable the queue if it's active */
> +	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
> +		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
> +		for (i = 0; i < adev->usec_timeout; i++) {
> +			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
> +				break;
> +			udelay(1);
>  		}
>
> -		r = amdgpu_bo_reserve(ring->mqd_obj, false);
> -		if (unlikely(r != 0)) {
> -			gfx_v7_0_cp_compute_fini(adev);
> -			return r;
> -		}
> -		r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
> -				  &mqd_gpu_addr);
> -		if (r) {
> -			dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
> -			gfx_v7_0_cp_compute_fini(adev);
> -			return r;
> -		}
> -		r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
> -		if (r) {
> -			dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
> -			gfx_v7_0_cp_compute_fini(adev);
> -			return r;
> -		}
> +		if (i == adev->usec_timeout)
> +			return -ETIMEDOUT;
>
> -		/* init the mqd struct */
> -		memset(buf, 0, sizeof(struct bonaire_mqd));
> +		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
> +		WREG32(mmCP_HQD_PQ_RPTR, 0);
> +		WREG32(mmCP_HQD_PQ_WPTR, 0);
> +	}
>
> -		mqd = (struct bonaire_mqd *)buf;
> -		mqd->header = 0xC0310800;
> -		mqd->static_thread_mgmt01[0] = 0xffffffff;
> -		mqd->static_thread_mgmt01[1] = 0xffffffff;
> -		mqd->static_thread_mgmt23[0] = 0xffffffff;
> -		mqd->static_thread_mgmt23[1] = 0xffffffff;
> +	return 0;
> +}
>
> -		mutex_lock(&adev->srbm_mutex);
> -		cik_srbm_select(adev, ring->me,
> -				ring->pipe,
> -				ring->queue, 0);
> +static void gfx_v7_0_mqd_init(struct amdgpu_device *adev,
> +			     struct bonaire_mqd *mqd,
> +			     uint64_t mqd_gpu_addr,
> +			     struct amdgpu_ring *ring)
> +{
> +	u64 hqd_gpu_addr;
> +	u64 wb_gpu_addr;
>
> -		/* disable wptr polling */
> -		tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
> -		tmp &= ~CP_PQ_WPTR_POLL_CNTL__EN_MASK;
> -		WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
> +	/* init the mqd struct */
> +	memset(mqd, 0, sizeof(struct bonaire_mqd));
>
> -		/* enable doorbell? */
> -		mqd->queue_state.cp_hqd_pq_doorbell_control =
> -			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
> -		if (use_doorbell)
> -			mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> -		else
> -			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> -		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
> -		       mqd->queue_state.cp_hqd_pq_doorbell_control);
> -
> -		/* disable the queue if it's active */
> -		mqd->queue_state.cp_hqd_dequeue_request = 0;
> -		mqd->queue_state.cp_hqd_pq_rptr = 0;
> -		mqd->queue_state.cp_hqd_pq_wptr= 0;
> -		if (RREG32(mmCP_HQD_ACTIVE) & 1) {
> -			WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
> -			for (j = 0; j < adev->usec_timeout; j++) {
> -				if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
> -					break;
> -				udelay(1);
> -			}
> -			WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
> -			WREG32(mmCP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
> -			WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
> -		}
> +	mqd->header = 0xC0310800;
> +	mqd->static_thread_mgmt01[0] = 0xffffffff;
> +	mqd->static_thread_mgmt01[1] = 0xffffffff;
> +	mqd->static_thread_mgmt23[0] = 0xffffffff;
> +	mqd->static_thread_mgmt23[1] = 0xffffffff;
>
> -		/* set the pointer to the MQD */
> -		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
> -		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
> -		WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
> -		WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
> -		/* set MQD vmid to 0 */
> -		mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
> -		mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
> -		WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
> -
> -		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
> -		hqd_gpu_addr = ring->gpu_addr >> 8;
> -		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
> -		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
> -		WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
> -		WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
> -
> -		/* set up the HQD, this is similar to CP_RB0_CNTL */
> -		mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
> -		mqd->queue_state.cp_hqd_pq_control &=
> -			~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
> -					CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
> -
> -		mqd->queue_state.cp_hqd_pq_control |=
> -			order_base_2(ring->ring_size / 8);
> -		mqd->queue_state.cp_hqd_pq_control |=
> -			(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
> +	/* enable doorbell? */
> +	mqd->queue_state.cp_hqd_pq_doorbell_control =
> +		RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
> +	if (ring->use_doorbell)
> +		mqd->queue_state.cp_hqd_pq_doorbell_control |= CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> +	else
> +		mqd->queue_state.cp_hqd_pq_doorbell_control &= ~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> +
> +	/* set the pointer to the MQD */
> +	mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
> +	mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
> +
> +	/* set MQD vmid to 0 */
> +	mqd->queue_state.cp_mqd_control = RREG32(mmCP_MQD_CONTROL);
> +	mqd->queue_state.cp_mqd_control &= ~CP_MQD_CONTROL__VMID_MASK;
> +
> +	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
> +	hqd_gpu_addr = ring->gpu_addr >> 8;
> +	mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
> +	mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
> +
> +	/* set up the HQD, this is similar to CP_RB0_CNTL */
> +	mqd->queue_state.cp_hqd_pq_control = RREG32(mmCP_HQD_PQ_CONTROL);
> +	mqd->queue_state.cp_hqd_pq_control &=
> +		~(CP_HQD_PQ_CONTROL__QUEUE_SIZE_MASK |
> +				CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE_MASK);
> +
> +	mqd->queue_state.cp_hqd_pq_control |=
> +		order_base_2(ring->ring_size / 8);
> +	mqd->queue_state.cp_hqd_pq_control |=
> +		(order_base_2(AMDGPU_GPU_PAGE_SIZE/8) << 8);
>  #ifdef __BIG_ENDIAN
> -		mqd->queue_state.cp_hqd_pq_control |=
> -			2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
> +	mqd->queue_state.cp_hqd_pq_control |=
> +		2 << CP_HQD_PQ_CONTROL__ENDIAN_SWAP__SHIFT;
>  #endif
> -		mqd->queue_state.cp_hqd_pq_control &=
> -			~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
> +	mqd->queue_state.cp_hqd_pq_control &=
> +		~(CP_HQD_PQ_CONTROL__UNORD_DISPATCH_MASK |
>  				CP_HQD_PQ_CONTROL__ROQ_PQ_IB_FLIP_MASK |
>  				CP_HQD_PQ_CONTROL__PQ_VOLATILE_MASK);
> -		mqd->queue_state.cp_hqd_pq_control |=
> -			CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
> -			CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
> -		WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
> -
> -		/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
> -		wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> -		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
> -		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
> -		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
> -		WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
> -		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
> -
> -		/* set the wb address wether it's enabled or not */
> -		wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
> -		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
> -		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
> -			upper_32_bits(wb_gpu_addr) & 0xffff;
> -		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
> -		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
> -		WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
> -		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
> -
> -		/* enable the doorbell if requested */
> -		if (use_doorbell) {
> -			mqd->queue_state.cp_hqd_pq_doorbell_control =
> -				RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
> -			mqd->queue_state.cp_hqd_pq_doorbell_control &=
> -				~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
> -			mqd->queue_state.cp_hqd_pq_doorbell_control |=
> -				(ring->doorbell_index <<
> -				 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
> -			mqd->queue_state.cp_hqd_pq_doorbell_control |=
> -				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> -			mqd->queue_state.cp_hqd_pq_doorbell_control &=
> -				~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
> -				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
> +	mqd->queue_state.cp_hqd_pq_control |=
> +		CP_HQD_PQ_CONTROL__PRIV_STATE_MASK |
> +		CP_HQD_PQ_CONTROL__KMD_QUEUE_MASK; /* assuming kernel queue control */
>
> -		} else {
> -			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
> +	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
> +	wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
> +	mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
> +	mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
> +
> +	/* set the wb address wether it's enabled or not */
> +	wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
> +	mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
> +	mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
> +		upper_32_bits(wb_gpu_addr) & 0xffff;
> +
> +	/* enable the doorbell if requested */
> +	if (ring->use_doorbell) {
> +		mqd->queue_state.cp_hqd_pq_doorbell_control =
> +			RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
> +		mqd->queue_state.cp_hqd_pq_doorbell_control &=
> +			~CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET_MASK;
> +		mqd->queue_state.cp_hqd_pq_doorbell_control |=
> +			(ring->doorbell_index <<
> +			 CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT);
> +		mqd->queue_state.cp_hqd_pq_doorbell_control |=
> +			CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_EN_MASK;
> +		mqd->queue_state.cp_hqd_pq_doorbell_control &=
> +			~(CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_SOURCE_MASK |
> +					CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_HIT_MASK);
> +
> +	} else {
> +		mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
> +	}
> +
> +	/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
> +	ring->wptr = 0;
> +	mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
> +	mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
> +
> +	/* set the vmid for the queue */
> +	mqd->queue_state.cp_hqd_vmid = 0;
> +
> +	/* activate the queue */
> +	mqd->queue_state.cp_hqd_active = 1;
> +}
> +
> +static int gfx_v7_0_mqd_commit(struct amdgpu_device *adev,
> +			       struct bonaire_mqd *mqd)
> +{
> +	u32 tmp;
> +
> +	/* disable wptr polling */
> +	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
> +	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
> +	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
> +
> +	/* program MQD field to HW */
> +	WREG32(mmCP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
> +	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
> +	WREG32(mmCP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
> +	WREG32(mmCP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
> +	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
> +	WREG32(mmCP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
> +	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
> +	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
> +	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR, mqd->queue_state.cp_hqd_pq_rptr_report_addr);
> +	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI, mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
> +	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->queue_state.cp_hqd_pq_doorbell_control);
> +	WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
> +	WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
> +
> +	/* activate the HQD */
> +	WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
> +
> +	return 0;
> +}
> +
> +static int gfx_v7_0_compute_queue_init(struct amdgpu_device *adev, int ring_id)
> +{
> +	int r;
> +	u64 mqd_gpu_addr;
> +	struct bonaire_mqd *mqd;
> +	struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
> +
> +	if (ring->mqd_obj == NULL) {
> +		r = amdgpu_bo_create(adev,
> +				sizeof(struct bonaire_mqd),
> +				PAGE_SIZE, true,
> +				AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
> +				&ring->mqd_obj);
> +		if (r) {
> +			dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
> +			return r;
>  		}
> -		WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
> -		       mqd->queue_state.cp_hqd_pq_doorbell_control);
> +	}
>
> -		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
> -		ring->wptr = 0;
> -		mqd->queue_state.cp_hqd_pq_wptr = lower_32_bits(ring->wptr);
> -		WREG32(mmCP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
> -		mqd->queue_state.cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
> +	r = amdgpu_bo_reserve(ring->mqd_obj, false);
> +	if (unlikely(r != 0))
> +		goto out;
>
> -		/* set the vmid for the queue */
> -		mqd->queue_state.cp_hqd_vmid = 0;
> -		WREG32(mmCP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
> +	r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
> +			&mqd_gpu_addr);
> +	if (r) {
> +		dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
> +		goto out_unreserve;
> +	}
> +	r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&mqd);
> +	if (r) {
> +		dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
> +		goto out_unreserve;
> +	}
>
> -		/* activate the queue */
> -		mqd->queue_state.cp_hqd_active = 1;
> -		WREG32(mmCP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
> +	mutex_lock(&adev->srbm_mutex);
> +	cik_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>
> -		cik_srbm_select(adev, 0, 0, 0, 0);
> -		mutex_unlock(&adev->srbm_mutex);
> +	gfx_v7_0_mqd_init(adev, mqd, mqd_gpu_addr, ring);
> +	gfx_v7_0_mqd_deactivate(adev);
> +	gfx_v7_0_mqd_commit(adev, mqd);
>
> -		amdgpu_bo_kunmap(ring->mqd_obj);
> -		amdgpu_bo_unreserve(ring->mqd_obj);
> +	cik_srbm_select(adev, 0, 0, 0, 0);
> +	mutex_unlock(&adev->srbm_mutex);
>
> -		ring->ready = true;
> +	amdgpu_bo_kunmap(ring->mqd_obj);
> +out_unreserve:
> +	amdgpu_bo_unreserve(ring->mqd_obj);
> +out:
> +	return 0;
> +}
> +
> +/**
> + * gfx_v7_0_cp_compute_resume - setup the compute queue registers
> + *
> + * @adev: amdgpu_device pointer
> + *
> + * Program the compute queues and test them to make sure they
> + * are working.
> + * Returns 0 for success, error for failure.
> + */
> +static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
> +{
> +	int r, i, j;
> +	u32 tmp;
> +	struct amdgpu_ring *ring;
> +
> +	/* fix up chicken bits */
> +	tmp = RREG32(mmCP_CPF_DEBUG);
> +	tmp |= (1 << 23);
> +	WREG32(mmCP_CPF_DEBUG, tmp);
> +
> +	/* init the pipes */
> +	for (i = 0; i < adev->gfx.mec.num_mec; i++)
> +		for (j = 0; j < adev->gfx.mec.num_pipe; j++)
> +			gfx_v7_0_compute_pipe_init(adev, i, j);
> +
> +	/* init the queues */
> +	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
> +		r = gfx_v7_0_compute_queue_init(adev, i);
> +		if (r) {
> +			gfx_v7_0_cp_compute_fini(adev);
> +			return r;
> +		}
>  	}
>
>  	gfx_v7_0_cp_compute_enable(adev, true);
>
>  	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>  		ring = &adev->gfx.compute_ring[i];
> -
> +		ring->ready = true;
>  		r = amdgpu_ring_test_ring(ring);
>  		if (r)
>  			ring->ready = false;
>  	}
>
>  	return 0;
>  }
>
>  static void gfx_v7_0_cp_enable(struct amdgpu_device *adev, bool enable)
>  {
>  	gfx_v7_0_cp_gfx_enable(adev, enable);
>  	gfx_v7_0_cp_compute_enable(adev, enable);
>  }
>
>  static int gfx_v7_0_cp_load_microcode(struct amdgpu_device *adev)
>  {
>  	int r;
>
>  	r = gfx_v7_0_cp_gfx_load_microcode(adev);
>  	if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index fc94c3a..b670302 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -4735,79 +4735,98 @@ static int gfx_v8_0_kiq_kcq_disable(struct amdgpu_device *adev)
>  	amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
>  	amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
>  	amdgpu_ring_commit(kiq_ring);
>
>  	for (i = 0; i < adev->usec_timeout; i++) {
>  		tmp = RREG32(scratch);
>  		if (tmp == 0xDEADBEEF)
>  			break;
>  		DRM_UDELAY(1);
>  	}
>  	if (i >= adev->usec_timeout) {
>  		DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n",
>  			  scratch, tmp);
>  		r = -EINVAL;
>  	}
>  	amdgpu_gfx_scratch_free(adev, scratch);
>
>  	return r;
>  }
>
> +static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
> +{
> +	int i, r = 0;
> +
> +	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
> +		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
> +		for (i = 0; i < adev->usec_timeout; i++) {
> +			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
> +				break;
> +			udelay(1);
> +		}
> +		if (i == adev->usec_timeout)
> +			r = -ETIMEDOUT;
> +	}
> +	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
> +	WREG32(mmCP_HQD_PQ_RPTR, 0);
> +	WREG32(mmCP_HQD_PQ_WPTR, 0);
> +
> +	return r;
> +}
> +
>  static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>  {
>  	struct amdgpu_device *adev = ring->adev;
>  	struct vi_mqd *mqd = ring->mqd_ptr;
>  	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
>  	uint32_t tmp;
>
> +	/* init the mqd struct */
> +	memset(mqd, 0, sizeof(struct vi_mqd));
> +
>  	mqd->header = 0xC0310800;
>  	mqd->compute_pipelinestat_enable = 0x00000001;
>  	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
>  	mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
>  	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
>  	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
>  	mqd->compute_misc_reserved = 0x00000003;
>
>  	eop_base_addr = ring->eop_gpu_addr >> 8;
>  	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
>  	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
>
>  	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
>  	tmp = RREG32(mmCP_HQD_EOP_CONTROL);
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
>  			(order_base_2(GFX8_MEC_HPD_SIZE / 4) - 1));
>
>  	mqd->cp_hqd_eop_control = tmp;
>
>  	/* enable doorbell? */
>  	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
>  			    CP_HQD_PQ_DOORBELL_CONTROL,
>  			    DOORBELL_EN,
>  			    ring->use_doorbell ? 1 : 0);
>
>  	mqd->cp_hqd_pq_doorbell_control = tmp;
>
> -	/* disable the queue if it's active */
> -	mqd->cp_hqd_dequeue_request = 0;
> -	mqd->cp_hqd_pq_rptr = 0;
> -	mqd->cp_hqd_pq_wptr = 0;
> -
>  	/* set the pointer to the MQD */
>  	mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
>  	mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
>
>  	/* set MQD vmid to 0 */
>  	tmp = RREG32(mmCP_MQD_CONTROL);
>  	tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
>  	mqd->cp_mqd_control = tmp;
>
>  	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
>  	hqd_gpu_addr = ring->gpu_addr >> 8;
>  	mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
>  	mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
>
>  	/* set up the HQD, this is similar to CP_RB0_CNTL */
>  	tmp = RREG32(mmCP_HQD_PQ_CONTROL);
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
>  			    (order_base_2(ring->ring_size / 4) - 1));
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
>  			((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
> @@ -4863,157 +4882,149 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>  	/* set MTYPE */
>  	tmp = RREG32(mmCP_HQD_IB_CONTROL);
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MTYPE, 3);
>  	mqd->cp_hqd_ib_control = tmp;
>
>  	tmp = RREG32(mmCP_HQD_IQ_TIMER);
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_IQ_TIMER, MTYPE, 3);
>  	mqd->cp_hqd_iq_timer = tmp;
>
>  	tmp = RREG32(mmCP_HQD_CTX_SAVE_CONTROL);
>  	tmp = REG_SET_FIELD(tmp, CP_HQD_CTX_SAVE_CONTROL, MTYPE, 3);
>  	mqd->cp_hqd_ctx_save_control = tmp;
>
>  	/* activate the queue */
>  	mqd->cp_hqd_active = 1;
>
>  	return 0;
>  }
>
> -static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
> +static int gfx_v8_0_mqd_commit(struct amdgpu_ring *ring)
>  {
>  	struct amdgpu_device *adev = ring->adev;
>  	struct vi_mqd *mqd = ring->mqd_ptr;
> -	int j;
>
>  	/* disable wptr polling */
>  	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
>
>  	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
>  	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
>
>  	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
>  	WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
>
>  	/* enable doorbell? */
>  	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
>
> -	/* disable the queue if it's active */
> -	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
> -		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
> -		for (j = 0; j < adev->usec_timeout; j++) {
> -			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
> -				break;
> -			udelay(1);
> -		}
> -		WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
> -		WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
> -		WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
> -	}
> +	/* set pq read/write pointers */
> +	WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
> +	WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
> +	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>
>  	/* set the pointer to the MQD */
>  	WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
>  	WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
>
>  	/* set MQD vmid to 0 */
>  	WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
>
>  	/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
>  	WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
>  	WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
>
>  	/* set up the HQD, this is similar to CP_RB0_CNTL */
>  	WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
>
>  	/* set the wb address whether it's enabled or not */
>  	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
>  				mqd->cp_hqd_pq_rptr_report_addr_lo);
>  	WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
>  				mqd->cp_hqd_pq_rptr_report_addr_hi);
>
>  	/* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
>  	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
>  	WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
>
> +	/* enable the doorbell if requested */
>  	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
>
>  	/* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
>  	WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
>
>  	/* set the vmid for the queue */
>  	WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
>
>  	WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
>
>  	/* activate the queue */
>  	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
>
>  	return 0;
>  }
>
>  static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
>  {
>  	struct amdgpu_device *adev = ring->adev;
>  	struct vi_mqd *mqd = ring->mqd_ptr;
>  	int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
>
>  	gfx_v8_0_kiq_setting(ring);
>
>  	if (adev->gfx.in_reset) { /* for GPU_RESET case */
>  		/* reset MQD to a clean status */
>  		if (adev->gfx.mec.mqd_backup[mqd_idx])
>  			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
>
>  		/* reset ring buffer */
>  		ring->wptr = 0;
>  		amdgpu_ring_clear_ring(ring);
>
>  		mutex_lock(&adev->srbm_mutex);
>  		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
> -		gfx_v8_0_kiq_init_register(ring);
> +		gfx_v8_0_deactivate_hqd(adev, 1);
> +		gfx_v8_0_mqd_commit(ring);
>  		vi_srbm_select(adev, 0, 0, 0, 0);
>  		mutex_unlock(&adev->srbm_mutex);
>  	} else {
> -		memset((void *)mqd, 0, sizeof(*mqd));
>  		mutex_lock(&adev->srbm_mutex);
>  		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>  		gfx_v8_0_mqd_init(ring);
> -		gfx_v8_0_kiq_init_register(ring);
> +		gfx_v8_0_deactivate_hqd(adev, 1);
> +		gfx_v8_0_mqd_commit(ring);
>  		vi_srbm_select(adev, 0, 0, 0, 0);
>  		mutex_unlock(&adev->srbm_mutex);
>
>  		if (adev->gfx.mec.mqd_backup[mqd_idx])
>  			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
>  	}
>
>  	return 0;
>  }
>
>  static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
>  {
>  	struct amdgpu_device *adev = ring->adev;
>  	struct vi_mqd *mqd = ring->mqd_ptr;
>  	int mqd_idx = ring - &adev->gfx.compute_ring[0];
>
>  	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
> -		memset((void *)mqd, 0, sizeof(*mqd));
>  		mutex_lock(&adev->srbm_mutex);
>  		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
>  		gfx_v8_0_mqd_init(ring);
>  		vi_srbm_select(adev, 0, 0, 0, 0);
>  		mutex_unlock(&adev->srbm_mutex);
>
>  		if (adev->gfx.mec.mqd_backup[mqd_idx])
>  			memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
>  	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
>  		/* reset MQD to a clean status */
>  		if (adev->gfx.mec.mqd_backup[mqd_idx])
>  			memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
>
>  		/* reset ring buffer */
>  		ring->wptr = 0;
>  		amdgpu_ring_clear_ring(ring);
>  	}
>
>  	return 0;
>  }
> @@ -5284,61 +5295,40 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
>  	/* SRBM_STATUS */
>  	tmp = RREG32(mmSRBM_STATUS);
>  	if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
>  		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
>  						SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
>  	if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
>  		srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
>  						SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
>
>  	if (grbm_soft_reset || srbm_soft_reset) {
>  		adev->gfx.grbm_soft_reset = grbm_soft_reset;
>  		adev->gfx.srbm_soft_reset = srbm_soft_reset;
>  		return true;
>  	} else {
>  		adev->gfx.grbm_soft_reset = 0;
>  		adev->gfx.srbm_soft_reset = 0;
>  		return false;
>  	}
>  }
>
> -static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
> -{
> -	int i, r = 0;
> -
> -	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
> -		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, req);
> -		for (i = 0; i < adev->usec_timeout; i++) {
> -			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
> -				break;
> -			udelay(1);
> -		}
> -		if (i == adev->usec_timeout)
> -			r = -ETIMEDOUT;
> -	}
> -	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
> -	WREG32(mmCP_HQD_PQ_RPTR, 0);
> -	WREG32(mmCP_HQD_PQ_WPTR, 0);
> -
> -	return r;
> -}
> -
>  static int gfx_v8_0_pre_soft_reset(void *handle)
>  {
>  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>  	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
>
>  	if ((!adev->gfx.grbm_soft_reset) &&
>  	    (!adev->gfx.srbm_soft_reset))
>  		return 0;
>
>  	grbm_soft_reset = adev->gfx.grbm_soft_reset;
>  	srbm_soft_reset = adev->gfx.srbm_soft_reset;
>
>  	/* stop the rlc */
>  	gfx_v8_0_rlc_stop(adev);
>
>  	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
>  	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
>  		/* Disable GFX parsing/prefetching */
>  		gfx_v8_0_cp_gfx_enable(adev, false);
>
>
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2
       [not found]                 ` <CAFCwf12mXeEHd9pb9L84wibaKC0zrvO6dHggQ-ePP7odN=oj6A-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2017-04-17 22:46                   ` Andres Rodriguez
       [not found]                     ` <b145c07f-311b-5002-ce1d-37871d7a6968-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 26+ messages in thread
From: Andres Rodriguez @ 2017-04-17 22:46 UTC (permalink / raw)
  To: Oded Gabbay; +Cc: amd-gfx list

Sending an updated patch with 2 bugs fixed:

1. the offset into the HPD bo was incorrect
2. the compute_pipe_init function had an mec/me mismatch, so registers 
were being set for the wrong ME

Regards,
Andres

On 2017-04-16 01:25 AM, Oded Gabbay wrote:
> Hey Andres,
>
> Yes, that patch indeed fixed the black screen on Alex's 4.12-wip, but
> it still wouldn't boot with your entire patch-set. It would boot only
> with patches 1-8.
>
> I'll try to get a USB-to-serial dongle so I could try dump kernel
> messages during boot but it will probably take some time.
>
> Oded
>
> On Sun, Apr 16, 2017 at 3:12 AM, Andres Rodriguez <andresx7@gmail.com> wrote:
>> Hey Oded,
>>
>> I've had problems with 4.12-wip on its own. Just wanted to confirm if
>> you picked up this fix for a bug in drm-next:
>> "drm: Fix get_property logic fumble"
>>
>> Regards,
>> Andres
>>
>> On Sat, Apr 15, 2017 at 8:37 AM, Oded Gabbay <oded.gabbay@gmail.com> wrote:
>>> On Fri, Apr 14, 2017 at 12:35 AM, Andres Rodriguez <andresx7@gmail.com> wrote:
>>>> Make amdgpu the owner of all per-pipe state of the HQDs.
>>>>
>>>> This change will allow us to split the queues between kfd and amdgpu
>>>> with a queue granularity instead of pipe granularity.
>>>>
>>>> This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
>>>> goes unused.
>>>>
>>>> v2: support for gfx9
>>>>
>>>> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>> Acked-by: Christian König <christian.koenig@amd.com>
>>>> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
>>>
>>> Hi Andres,
>>> FYI, with your patchset rebased on alex's drm-next-4.12-wip, my Kaveri
>>> machine won't boot the OS (Ubuntu 16.04).
>>> I bisected your patch-set and the first bad patch is this one.
>>>
>>> Unfortunately, I don't have the means to dump kernel crashes before
>>> the system boots.
>>>
>>> Thx,
>>> Oded
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2
       [not found]                     ` <b145c07f-311b-5002-ce1d-37871d7a6968-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-04-18 19:41                       ` Oded Gabbay
  0 siblings, 0 replies; 26+ messages in thread
From: Oded Gabbay @ 2017-04-18 19:41 UTC (permalink / raw)
  To: Andres Rodriguez; +Cc: amd-gfx list

With the new patch-set, I get the following error (on Kaveri), when
running KFDEventTest.SignalEvent (There are some other tests who
produce same result e.g. KFDQMTest.OverSubscribeCpQueues):

dmesg:
[  276.528643] kfd: qcm fence wait loop timeout expired
[  276.528649] kfd: the cp might be in an unrecoverable state due to
an unsuccessful queues preemption
[  276.528864] traps: kfdtest[5963] general protection ip:7f1c0dd23e5d
sp:7fffd1a419b0 error:0
[  276.528869]  in libhsakmt-1.so.0.0.1[7f1c0dd20000+8000]

Oded

On Tue, Apr 18, 2017 at 1:46 AM, Andres Rodriguez <andresx7@gmail.com> wrote:
> Sending an updated patch with 2 bugs fixed:
>
> 1. the offset into the HPD bo was incorrect
> 2. the compute_pipe_init function had an mec/me mismatch, so registers were
> being set for the wrong ME
>
> Regards,
> Andres
>
> On 2017-04-16 01:25 AM, Oded Gabbay wrote:
>>
>> Hey Andres,
>>
>> Yes, that patch indeed fixed the black screen on Alex's 4.12-wip, but
>> it still wouldn't boot with your entire patch-set. It would boot only
>> with patches 1-8.
>>
>> I'll try to get a USB-to-serial dongle so I could try dump kernel
>> messages during boot but it will probably take some time.
>>
>> Oded
>>
>> On Sun, Apr 16, 2017 at 3:12 AM, Andres Rodriguez <andresx7@gmail.com>
>> wrote:
>>>
>>> Hey Oded,
>>>
>>> I've had problems with 4.12-wip on its own. Just wanted to confirm if
>>> you picked up this fix for a bug in drm-next:
>>> "drm: Fix get_property logic fumble"
>>>
>>> Regards,
>>> Andres
>>>
>>> On Sat, Apr 15, 2017 at 8:37 AM, Oded Gabbay <oded.gabbay@gmail.com>
>>> wrote:
>>>>
>>>> On Fri, Apr 14, 2017 at 12:35 AM, Andres Rodriguez <andresx7@gmail.com>
>>>> wrote:
>>>>>
>>>>> Make amdgpu the owner of all per-pipe state of the HQDs.
>>>>>
>>>>> This change will allow us to split the queues between kfd and amdgpu
>>>>> with a queue granularity instead of pipe granularity.
>>>>>
>>>>> This patch fixes kfd allocating an HDP_EOP region for its 3 pipes which
>>>>> goes unused.
>>>>>
>>>>> v2: support for gfx9
>>>>>
>>>>> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net>
>>>>> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>>>> Acked-by: Christian König <christian.koenig@amd.com>
>>>>> Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
>>>>
>>>>
>>>> Hi Andres,
>>>> FYI, with your patchset rebased on alex's drm-next-4.12-wip, my Kaveri
>>>> machine won't boot the OS (Ubuntu 16.04).
>>>> I bisected your patch-set and the first bad patch is this one.
>>>>
>>>> Unfortunately, I don't have the means to dump kernel crashes before
>>>> the system boots.
>>>>
>>>> Thx,
>>>> Oded
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2017-04-18 19:41 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-13 21:35 [PATCH split] Improve pipe split between amdgpu and amdkfd Andres Rodriguez
     [not found] ` <20170413213542.18802-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-13 21:35   ` [PATCH 01/17] drm/amdgpu: clarify MEC_HPD_SIZE is specific to a gfx generation Andres Rodriguez
2017-04-13 21:35   ` [PATCH 02/17] drm/amdgpu: refactor MQD/HQD initialization v3 Andres Rodriguez
     [not found]     ` <20170413213542.18802-3-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-17 20:13       ` Andres Rodriguez
2017-04-13 21:35   ` [PATCH 03/17] drm/amdgpu: detect timeout error when deactivating hqd Andres Rodriguez
2017-04-13 21:35   ` [PATCH 04/17] drm/amdgpu: remove duplicate definition of cik_mqd Andres Rodriguez
2017-04-13 21:35   ` [PATCH 05/17] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu v2 Andres Rodriguez
2017-04-13 21:35   ` [PATCH 06/17] drm/amdgpu: fix kgd_hqd_load failing to update shadow_wptr Andres Rodriguez
2017-04-13 21:35   ` [PATCH 07/17] drm/amdgpu: rename rdev to adev Andres Rodriguez
2017-04-13 21:35   ` [PATCH 08/17] drm/radeon: take ownership of pipe initialization Andres Rodriguez
2017-04-13 21:35   ` [PATCH 09/17] drm/amdgpu: take ownership of per-pipe configuration v2 Andres Rodriguez
     [not found]     ` <20170413213542.18802-10-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-15 12:37       ` Oded Gabbay
     [not found]         ` <CAFCwf12haVgYLxxcqMqTd-GX7KjxtZXD1Stuqj87HAP32TdvXQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-16  0:12           ` Andres Rodriguez
     [not found]             ` <CAFQ_0eED3HxUN8Sz0buefckSmVWvMwBD0mFk_GA1XeLABdsTnw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-16  5:25               ` Oded Gabbay
     [not found]                 ` <CAFQ_0eE1O+SS_ucS-8Rjqm3S4aDjZq1r0OfzZhTKE=9Z12xMZQ@mail.gmail.com>
     [not found]                   ` <CAFQ_0eE1O+SS_ucS-8Rjqm3S4aDjZq1r0OfzZhTKE=9Z12xMZQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-16 14:51                     ` Andres Rodriguez
     [not found]                 ` <CAFCwf12mXeEHd9pb9L84wibaKC0zrvO6dHggQ-ePP7odN=oj6A-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-17 22:46                   ` Andres Rodriguez
     [not found]                     ` <b145c07f-311b-5002-ce1d-37871d7a6968-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-18 19:41                       ` Oded Gabbay
2017-04-13 21:35   ` [PATCH 10/17] drm/amdgpu: allow split of queues with kfd at queue granularity v4 Andres Rodriguez
2017-04-13 21:35   ` [PATCH 11/17] drm/amdgpu: teach amdgpu how to enable interrupts for any pipe v3 Andres Rodriguez
2017-04-13 21:35   ` [PATCH 12/17] drm/amdkfd: allow split HQD on per-queue granularity v4 Andres Rodriguez
2017-04-13 21:35   ` [PATCH 13/17] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c Andres Rodriguez
2017-04-13 21:35   ` [PATCH 14/17] drm/amdgpu: allocate queues horizontally across pipes Andres Rodriguez
2017-04-13 21:35   ` [PATCH 15/17] drm/amdgpu: remove hardcoded queue_mask in PACKET3_SET_RESOURCES Andres Rodriguez
2017-04-13 21:35   ` [PATCH 16/17] drm/amdgpu: avoid KIQ clashing with compute or KFD queues v2 Andres Rodriguez
2017-04-13 21:35   ` [PATCH 17/17] drm/amdgpu: new queue policy, take first 2 queues of each pipe v2 Andres Rodriguez
2017-04-13 21:39   ` [PATCH split] Improve pipe split between amdgpu and amdkfd Andres Rodriguez

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.