All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Cc: andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org
Subject: [PATCH 22/26] drm/amdgpu: implement ring set_priority for gfx_v8 compute v5
Date: Thu,  6 Apr 2017 02:21:35 -0400	[thread overview]
Message-ID: <20170406062139.3335-23-andresx7@gmail.com> (raw)
In-Reply-To: <20170406062139.3335-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>

Programming CP_HQD_QUEUE_PRIORITY enables a queue to take priority over
other queues on the same pipe. Multiple queues on a pipe are timesliced
so this gives us full precedence over other queues.

Programming CP_HQD_PIPE_PRIORITY changes the SPI_ARB_PRIORITY of the
wave as follows:
        0x2: CS_H
        0x1: CS_M
        0x0: CS_L

The SPI block will then dispatch work according to the policy set by
SPI_ARB_PRIORITY. In the current policy CS_H is higher priority than
gfx.

In order to prevent getting stuck in loops of CUs bouncing between GFX
and high priority compute and introducing further latency, we reserve
CUs 2+ for high priority compute on-demand.

v2: fix srbm_select to ring->queue and use ring->funcs->type
v3: use AMD_SCHED_PRIORITY_* instead of AMDGPU_CTX_PRIORITY_*
v4: switch int to enum amd_sched_priority
v5: corresponding changes for srbm_lock

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  3 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  1 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 96 +++++++++++++++++++++++++++++-
 3 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b9a4161..c56a884 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1044,20 +1044,23 @@ struct amdgpu_gfx {
 	uint32_t			me_feature_version;
 	uint32_t			ce_feature_version;
 	uint32_t			pfp_feature_version;
 	uint32_t			rlc_feature_version;
 	uint32_t			mec_feature_version;
 	uint32_t			mec2_feature_version;
 	struct amdgpu_ring		gfx_ring[AMDGPU_MAX_GFX_RINGS];
 	unsigned			num_gfx_rings;
 	struct amdgpu_ring		compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
 	unsigned			num_compute_rings;
+	spinlock_t			cu_reserve_lock;
+	uint32_t			cu_reserve_pipe_mask;
+	uint32_t			cu_reserve_queue_mask[AMDGPU_MAX_COMPUTE_RINGS];
 	struct amdgpu_irq_src		eop_irq;
 	struct amdgpu_irq_src		priv_reg_irq;
 	struct amdgpu_irq_src		priv_inst_irq;
 	/* gfx status */
 	uint32_t			gfx_current_status;
 	/* ce ram size*/
 	unsigned			ce_ram_size;
 	struct amdgpu_cu_info		cu_info;
 	const struct amdgpu_gfx_funcs	*funcs;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 07f16b4..29b45bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1874,20 +1874,21 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	/* Registers mapping */
 	/* TODO: block userspace mapping of io register */
 	spin_lock_init(&adev->mmio_idx_lock);
 	spin_lock_init(&adev->smc_idx_lock);
 	spin_lock_init(&adev->pcie_idx_lock);
 	spin_lock_init(&adev->uvd_ctx_idx_lock);
 	spin_lock_init(&adev->didt_idx_lock);
 	spin_lock_init(&adev->gc_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);
 	spin_lock_init(&adev->mm_stats.lock);
+	spin_lock_init(&adev->gfx.cu_reserve_lock);
 
 	INIT_LIST_HEAD(&adev->shadow_list);
 	mutex_init(&adev->shadow_list_lock);
 
 	INIT_LIST_HEAD(&adev->gtt_list);
 	spin_lock_init(&adev->gtt_list_lock);
 
 	INIT_LIST_HEAD(&adev->ring_lru_list);
 	spin_lock_init(&adev->ring_lru_list_lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 3cfe3c0..f94d532 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -46,21 +46,24 @@
 #include "gca/gfx_8_0_sh_mask.h"
 #include "gca/gfx_8_0_enum.h"
 
 #include "dce/dce_10_0_d.h"
 #include "dce/dce_10_0_sh_mask.h"
 
 #include "smu/smu_7_1_3_d.h"
 
 #define GFX8_NUM_GFX_RINGS     1
 #define GFX8_MEC_HPD_SIZE 2048
-
+#define GFX8_CU_RESERVE_RESOURCES 0x45888
+#define GFX8_CU_NUM 8
+#define GFX8_UNRESERVED_CU_NUM 2
+#define GFX8_CU_RESERVE_PIPE_SHIFT 7
 
 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
 
 #define ARRAY_MODE(x)					((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
 #define PIPE_CONFIG(x)					((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
 #define TILE_SPLIT(x)					((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
 #define MICRO_TILE_MODE_NEW(x)				((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
@@ -6710,20 +6713,110 @@ static u64 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
 
 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 
 	/* XXX check if swapping is necessary on BE */
 	adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
 	WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
 }
 
+static void gfx_v8_0_cu_reserve(struct amdgpu_device *adev,
+				struct amdgpu_ring *ring, bool acquire)
+{
+	int i, resources;
+	int tmp = 0, queue_mask = 0, type_mask = 0;
+	int reserve_res_reg, reserve_en_reg;
+
+	/* gfx_v8_0_cu_reserve only supports compute path */
+	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+		return;
+
+	spin_lock(&adev->gfx.cu_reserve_lock);
+	if (acquire) {
+		adev->gfx.cu_reserve_pipe_mask |= (1 << ring->pipe);
+		adev->gfx.cu_reserve_queue_mask[ring->pipe] |= (1 << ring->queue);
+	} else {
+		adev->gfx.cu_reserve_pipe_mask &= ~(1 << ring->pipe);
+		adev->gfx.cu_reserve_queue_mask[ring->pipe] &= ~(1 << ring->queue);
+	}
+
+	/* compute pipe 0 starts at GFX8_CU_RESERVE_PIPE_SHIFT */
+	type_mask = (adev->gfx.cu_reserve_pipe_mask << GFX8_CU_RESERVE_PIPE_SHIFT);
+
+	/* HW only has one register for queue mask, so we collaspse them */
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_RINGS; i++)
+		queue_mask |= adev->gfx.cu_reserve_queue_mask[i];
+
+	/* leave the first CUs for general processing */
+	for (i = GFX8_UNRESERVED_CU_NUM; i < GFX8_CU_NUM; i++) {
+		reserve_res_reg = mmSPI_RESOURCE_RESERVE_CU_0 + i;
+		reserve_en_reg = mmSPI_RESOURCE_RESERVE_EN_CU_0 + i;
+
+		tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+				    TYPE_MASK, type_mask);
+		tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+				    QUEUE_MASK, queue_mask);
+		if (queue_mask) {
+			resources = GFX8_CU_RESERVE_RESOURCES;
+			tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+					    EN, 1);
+		} else {
+			resources = 0;
+			tmp = REG_SET_FIELD(tmp, SPI_RESOURCE_RESERVE_EN_CU_0,
+					    EN, 0);
+		}
+		/* Commit */
+		WREG32(reserve_res_reg, resources);
+		WREG32(reserve_en_reg, tmp);
+	}
+
+	spin_unlock(&adev->gfx.cu_reserve_lock);
+}
+
+static void gfx_v8_0_set_spi_priority(struct amdgpu_device *adev,
+				      struct amdgpu_ring *ring,
+				      enum amd_sched_priority priority)
+{
+	spin_lock(&adev->srbm_lock);
+	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+	switch (priority) {
+	case AMD_SCHED_PRIORITY_NORMAL:
+		WREG32(mmCP_HQD_PIPE_PRIORITY, 0x0);
+		WREG32(mmCP_HQD_QUEUE_PRIORITY, 0x0);
+		break;
+	case AMD_SCHED_PRIORITY_HIGH:
+		WREG32(mmCP_HQD_PIPE_PRIORITY, 0x2);
+		WREG32(mmCP_HQD_QUEUE_PRIORITY, 0xf);
+		break;
+	default:
+		WARN(1, "Attempt to set invalid SPI priority:%d for ring:%d\n",
+				priority, ring->idx);
+		break;
+	}
+
+	vi_srbm_select(adev, 0, 0, 0, 0);
+	spin_unlock(&adev->srbm_lock);
+}
+static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+					       enum amd_sched_priority priority)
+{
+	struct amdgpu_device *adev = ring->adev;
+
+	if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+		return;
+
+	gfx_v8_0_set_spi_priority(adev, ring, priority);
+	gfx_v8_0_cu_reserve(adev, ring, priority == AMD_SCHED_PRIORITY_HIGH);
+}
+
 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
 					     u64 addr, u64 seq,
 					     unsigned flags)
 {
 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 
 	/* RELEASE_MEM - flush caches, send int */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
 	amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
@@ -7140,20 +7233,21 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
 	.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
 	.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
 	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
 	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
 	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.set_priority = gfx_v8_0_ring_set_priority_compute,
 };
 
 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
 	.type = AMDGPU_RING_TYPE_KIQ,
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = false,
 	.get_rptr = gfx_v8_0_ring_get_rptr,
 	.get_wptr = gfx_v8_0_ring_get_wptr_compute,
 	.set_wptr = gfx_v8_0_ring_set_wptr_compute,
-- 
2.9.3

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2017-04-06  6:21 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-04-06  6:21 [PATCH] Add support for high priority scheduling in amdgpu v8 Andres Rodriguez
     [not found] ` <20170406062139.3335-1-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-06  6:21   ` [PATCH 01/26] drm/amdgpu: refactor MQD/HQD initialization v2 Andres Rodriguez
     [not found]     ` <20170406062139.3335-2-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-11 22:08       ` Alex Deucher
     [not found]         ` <CADnq5_PAth5UD1zq2dYvmFXgqE4s_OKY_VtHuKj467JDuyCHhA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-12 21:06           ` Andres Rodriguez
2017-04-11 22:18       ` Alex Deucher
     [not found]         ` <CADnq5_OdNy8hW8=WZouJBTtGjd261E9Boz6zL2L1o-GYegqP8Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-12 19:35           ` Andres Rodriguez
2017-04-12 21:17           ` Andres Rodriguez
2017-04-06  6:21   ` [PATCH 02/26] drm/amdgpu: doorbell registers need only be set once v2 Andres Rodriguez
     [not found]     ` <20170406062139.3335-3-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-11 21:46       ` Alex Deucher
2017-04-06  6:21   ` [PATCH 03/26] drm/amdgpu: detect timeout error when deactivating hqd Andres Rodriguez
2017-04-06  6:21   ` [PATCH 04/26] drm/amdgpu: remove duplicate definition of cik_mqd Andres Rodriguez
2017-04-06  6:21   ` [PATCH 05/26] drm/amdgpu: unify MQD programming sequence for kfd and amdgpu Andres Rodriguez
     [not found]     ` <20170406062139.3335-6-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-11 20:35       ` Felix Kuehling
2017-04-06  6:21   ` [PATCH 06/26] drm/amdgpu: rename rdev to adev Andres Rodriguez
2017-04-06  6:21   ` [PATCH 07/26] drm/amdgpu: take ownership of per-pipe configuration v2 Andres Rodriguez
2017-04-06  6:21   ` [PATCH 08/26] drm/radeon: take ownership of pipe initialization Andres Rodriguez
2017-04-06  6:21   ` [PATCH 09/26] drm/amdgpu: allow split of queues with kfd at queue granularity v3 Andres Rodriguez
     [not found]     ` <20170406062139.3335-10-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-11 21:36       ` Felix Kuehling
2017-04-06  6:21   ` [PATCH 10/26] drm/amdgpu: teach amdgpu how to enable interrupts for any pipe v3 Andres Rodriguez
2017-04-06  6:21   ` [PATCH 11/26] drm/amdkfd: allow split HQD on per-queue granularity v4 Andres Rodriguez
2017-04-06  6:21   ` [PATCH 12/26] drm/amdgpu: remove duplicate magic constants from amdgpu_amdkfd_gfx*.c Andres Rodriguez
2017-04-06  6:21   ` [PATCH 13/26] drm/amdgpu: allocate queues horizontally across pipes Andres Rodriguez
2017-04-06  6:21   ` [PATCH 14/26] drm/amdgpu: remove hardcoded queue_mask in PACKET3_SET_RESOURCES v2 Andres Rodriguez
2017-04-06  6:21   ` [PATCH 15/26] drm/amdgpu: avoid KIQ clashing with compute or KFD queues Andres Rodriguez
     [not found]     ` <20170406062139.3335-16-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-11 22:24       ` Felix Kuehling
2017-04-06  6:21   ` [PATCH 16/26] drm/amdgpu: new queue policy, take first 2 queues of each pipe Andres Rodriguez
     [not found]     ` <20170406062139.3335-17-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-11 22:30       ` Felix Kuehling
     [not found]         ` <de2e972f-df8c-f3d2-1bc0-7336be3e67a8-5C7GfCeVMHo@public.gmane.org>
2017-04-12 19:06           ` Andres Rodriguez
2017-04-06  6:21   ` [PATCH 17/26] drm/amdgpu: untie user ring ids from kernel ring ids v5 Andres Rodriguez
2017-04-06  6:21   ` [PATCH 18/26] drm/amdgpu: implement lru amdgpu_queue_mgr policy for compute v4 Andres Rodriguez
2017-04-06  6:21   ` [PATCH 19/26] drm/amdgpu: add parameter to allocate high priority contexts v7 Andres Rodriguez
2017-04-06  6:21   ` [PATCH 20/26] drm/amdgpu: add framework for HW specific priority settings v6 Andres Rodriguez
2017-04-06  6:21   ` [PATCH 21/26] drm/amdgpu: convert srbm lock to a spinlock v2 Andres Rodriguez
     [not found]     ` <20170406062139.3335-22-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-11 22:19       ` Alex Deucher
     [not found]         ` <CADnq5_OLYK+NzN9CZC4SDMnTKDxQpqH9MnT6jvNih3hVbOFBQw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2017-04-12  8:15           ` Christian König
2017-04-06  6:21   ` Andres Rodriguez [this message]
     [not found]     ` <20170406062139.3335-23-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-11 22:35       ` [PATCH 22/26] drm/amdgpu: implement ring set_priority for gfx_v8 compute v5 Alex Deucher
2017-04-06  6:21   ` [PATCH 23/26] drm/amdgpu: condense mqd programming sequence Andres Rodriguez
2017-04-06  6:21   ` [PATCH 24/26] drm/amdgpu: workaround tonga HW bug in HQD " Andres Rodriguez
2017-04-06  6:21   ` [PATCH 25/26] drm/amdgpu: guarantee bijective mapping of ring ids for LRU v3 Andres Rodriguez
     [not found]     ` <20170406062139.3335-26-andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-04-12 17:59       ` Felix Kuehling
2017-04-06  6:21   ` [PATCH 26/26] drm/amdgpu: use LRU mapping policy for SDMA engines Andres Rodriguez
2017-04-11 22:31   ` [PATCH] Add support for high priority scheduling in amdgpu v8 Felix Kuehling
     [not found]     ` <65647f25-dfd4-a831-6808-cfb25f40d1c8-5C7GfCeVMHo@public.gmane.org>
2017-04-12 18:29       ` Andres Rodriguez

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170406062139.3335-23-andresx7@gmail.com \
    --to=andresx7-re5jqeeqqe8avxtiumwx3w@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.