All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] drm/amdgpu: add GC 10.3 NOALLOC registers
@ 2020-10-20 20:26 Alex Deucher
  2020-10-20 20:26 ` [PATCH 2/3] drm/amdgpu: add support to configure MALL for sienna_cichlid (v2) Alex Deucher
  2020-10-20 20:26 ` [PATCH 3/3] drm/amdgpu/display: add MALL support Alex Deucher
  0 siblings, 2 replies; 7+ messages in thread
From: Alex Deucher @ 2020-10-20 20:26 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher

This adds the NOALLOC registers.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../include/asic_reg/gc/gc_10_3_0_default.h   |  1 +
 .../include/asic_reg/gc/gc_10_3_0_offset.h    |  2 ++
 .../include/asic_reg/gc/gc_10_3_0_sh_mask.h   | 33 +++++++++++++++++++
 3 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h
index e245e912535e..21d2f7d1debc 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_default.h
@@ -1859,6 +1859,7 @@
 #define mmGCMC_SHARED_VIRT_RESET_REQ2_DEFAULT                                    0x00000000
 #define mmGCMC_VM_XGMI_LFB_CNTL_DEFAULT                                          0x00000000
 #define mmGCMC_VM_XGMI_LFB_SIZE_DEFAULT                                          0x00000000
+#define mmGCMC_VM_FB_NOALLOC_CNTL_DEFAULT                                        0x00000000
 #define mmGCUTCL2_HARVEST_BYPASS_GROUPS_DEFAULT                                  0x00000000
 
 
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
index 66a4151fa676..0102487a2c5f 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_offset.h
@@ -3661,6 +3661,8 @@
 #define mmGCMC_VM_XGMI_LFB_CNTL_BASE_IDX                                                               0
 #define mmGCMC_VM_XGMI_LFB_SIZE                                                                        0x16f8
 #define mmGCMC_VM_XGMI_LFB_SIZE_BASE_IDX                                                               0
+#define mmGCMC_VM_FB_NOALLOC_CNTL                                                                      0x16f9
+#define mmGCMC_VM_FB_NOALLOC_CNTL_BASE_IDX                                                             0
 #define mmGCUTCL2_HARVEST_BYPASS_GROUPS                                                                0x16fa
 #define mmGCUTCL2_HARVEST_BYPASS_GROUPS_BASE_IDX                                                       0
 
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
index aed799d9a0e8..4d2a1432c121 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gc/gc_10_3_0_sh_mask.h
@@ -612,6 +612,7 @@
 #define SDMA0_UTCL1_PAGE__DMA_PAGE_SIZE__SHIFT                                                                0x10
 #define SDMA0_UTCL1_PAGE__USE_BC__SHIFT                                                                       0x16
 #define SDMA0_UTCL1_PAGE__ADDR_IS_PA__SHIFT                                                                   0x17
+#define SDMA0_UTCL1_PAGE__LLC_NOALLOC__SHIFT                                                                  0x18
 #define SDMA0_UTCL1_PAGE__VM_HOLE_MASK                                                                        0x00000001L
 #define SDMA0_UTCL1_PAGE__REQ_TYPE_MASK                                                                       0x0000001EL
 #define SDMA0_UTCL1_PAGE__USE_MTYPE_MASK                                                                      0x000003C0L
@@ -622,6 +623,7 @@
 #define SDMA0_UTCL1_PAGE__DMA_PAGE_SIZE_MASK                                                                  0x003F0000L
 #define SDMA0_UTCL1_PAGE__USE_BC_MASK                                                                         0x00400000L
 #define SDMA0_UTCL1_PAGE__ADDR_IS_PA_MASK                                                                     0x00800000L
+#define SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK                                                                    0x01000000L
 //SDMA0_RELAX_ORDERING_LUT
 #define SDMA0_RELAX_ORDERING_LUT__RESERVED0__SHIFT                                                            0x0
 #define SDMA0_RELAX_ORDERING_LUT__COPY__SHIFT                                                                 0x1
@@ -3484,6 +3486,7 @@
 #define SDMA1_UTCL1_PAGE__DMA_PAGE_SIZE__SHIFT                                                                0x10
 #define SDMA1_UTCL1_PAGE__USE_BC__SHIFT                                                                       0x16
 #define SDMA1_UTCL1_PAGE__ADDR_IS_PA__SHIFT                                                                   0x17
+#define SDMA1_UTCL1_PAGE__LLC_NOALLOC__SHIFT                                                                  0x18
 #define SDMA1_UTCL1_PAGE__VM_HOLE_MASK                                                                        0x00000001L
 #define SDMA1_UTCL1_PAGE__REQ_TYPE_MASK                                                                       0x0000001EL
 #define SDMA1_UTCL1_PAGE__USE_MTYPE_MASK                                                                      0x000003C0L
@@ -3494,6 +3497,7 @@
 #define SDMA1_UTCL1_PAGE__DMA_PAGE_SIZE_MASK                                                                  0x003F0000L
 #define SDMA1_UTCL1_PAGE__USE_BC_MASK                                                                         0x00400000L
 #define SDMA1_UTCL1_PAGE__ADDR_IS_PA_MASK                                                                     0x00800000L
+#define SDMA1_UTCL1_PAGE__LLC_NOALLOC_MASK                                                                    0x01000000L
 //SDMA1_RELAX_ORDERING_LUT
 #define SDMA1_RELAX_ORDERING_LUT__RESERVED0__SHIFT                                                            0x0
 #define SDMA1_RELAX_ORDERING_LUT__COPY__SHIFT                                                                 0x1
@@ -7284,6 +7288,7 @@
 #define WD_UTCL1_CNTL__FRAG_LIMIT_MODE__SHIFT                                                                 0x1b
 #define WD_UTCL1_CNTL__FORCE_SNOOP__SHIFT                                                                     0x1c
 #define WD_UTCL1_CNTL__MTYPE_OVERRIDE__SHIFT                                                                  0x1d
+#define WD_UTCL1_CNTL__LLC_NOALLOC_OVERRIDE__SHIFT                                                            0x1e
 #define WD_UTCL1_CNTL__XNACK_REDO_TIMER_CNT_MASK                                                              0x000FFFFFL
 #define WD_UTCL1_CNTL__VMID_RESET_MODE_MASK                                                                   0x00800000L
 #define WD_UTCL1_CNTL__DROP_MODE_MASK                                                                         0x01000000L
@@ -7292,6 +7297,7 @@
 #define WD_UTCL1_CNTL__FRAG_LIMIT_MODE_MASK                                                                   0x08000000L
 #define WD_UTCL1_CNTL__FORCE_SNOOP_MASK                                                                       0x10000000L
 #define WD_UTCL1_CNTL__MTYPE_OVERRIDE_MASK                                                                    0x20000000L
+#define WD_UTCL1_CNTL__LLC_NOALLOC_OVERRIDE_MASK                                                              0x40000000L
 //WD_UTCL1_STATUS
 #define WD_UTCL1_STATUS__FAULT_DETECTED__SHIFT                                                                0x0
 #define WD_UTCL1_STATUS__RETRY_DETECTED__SHIFT                                                                0x1
@@ -7321,6 +7327,7 @@
 #define IA_UTCL1_CNTL__FRAG_LIMIT_MODE__SHIFT                                                                 0x1b
 #define IA_UTCL1_CNTL__FORCE_SNOOP__SHIFT                                                                     0x1c
 #define IA_UTCL1_CNTL__MTYPE_OVERRIDE__SHIFT                                                                  0x1d
+#define IA_UTCL1_CNTL__LLC_NOALLOC_OVERRIDE__SHIFT                                                            0x1e
 #define IA_UTCL1_CNTL__XNACK_REDO_TIMER_CNT_MASK                                                              0x000FFFFFL
 #define IA_UTCL1_CNTL__VMID_RESET_MODE_MASK                                                                   0x00800000L
 #define IA_UTCL1_CNTL__DROP_MODE_MASK                                                                         0x01000000L
@@ -7329,6 +7336,7 @@
 #define IA_UTCL1_CNTL__FRAG_LIMIT_MODE_MASK                                                                   0x08000000L
 #define IA_UTCL1_CNTL__FORCE_SNOOP_MASK                                                                       0x10000000L
 #define IA_UTCL1_CNTL__MTYPE_OVERRIDE_MASK                                                                    0x20000000L
+#define IA_UTCL1_CNTL__LLC_NOALLOC_OVERRIDE_MASK                                                              0x40000000L
 //IA_UTCL1_STATUS
 #define IA_UTCL1_STATUS__FAULT_DETECTED__SHIFT                                                                0x0
 #define IA_UTCL1_STATUS__RETRY_DETECTED__SHIFT                                                                0x1
@@ -13584,6 +13592,13 @@
 //GCMC_VM_XGMI_LFB_SIZE
 #define GCMC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE__SHIFT                                                             0x0
 #define GCMC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE_MASK                                                               0x0001FFFFL
+//GCMC_VM_FB_NOALLOC_CNTL
+#define GCMC_VM_FB_NOALLOC_CNTL__LOCAL_FB_NOALLOC_NOPTE__SHIFT                                                0x0
+#define GCMC_VM_FB_NOALLOC_CNTL__REMOTE_FB_NOALLOC_NOPTE__SHIFT                                               0x1
+#define GCMC_VM_FB_NOALLOC_CNTL__FB_NOALLOC_WALKER_FETCH__SHIFT                                               0x2
+#define GCMC_VM_FB_NOALLOC_CNTL__LOCAL_FB_NOALLOC_NOPTE_MASK                                                  0x00000001L
+#define GCMC_VM_FB_NOALLOC_CNTL__REMOTE_FB_NOALLOC_NOPTE_MASK                                                 0x00000002L
+#define GCMC_VM_FB_NOALLOC_CNTL__FB_NOALLOC_WALKER_FETCH_MASK                                                 0x00000004L
 //GCUTCL2_HARVEST_BYPASS_GROUPS
 #define GCUTCL2_HARVEST_BYPASS_GROUPS__BYPASS_GROUPS__SHIFT                                                   0x0
 #define GCUTCL2_HARVEST_BYPASS_GROUPS__BYPASS_GROUPS_MASK                                                     0xFFFFFFFFL
@@ -20063,6 +20078,10 @@
 #define DB_RMI_L2_CACHE_CONTROL__HTILE_RD_POLICY__SHIFT                                                       0x14
 #define DB_RMI_L2_CACHE_CONTROL__Z_BIG_PAGE__SHIFT                                                            0x18
 #define DB_RMI_L2_CACHE_CONTROL__S_BIG_PAGE__SHIFT                                                            0x19
+#define DB_RMI_L2_CACHE_CONTROL__Z_NOALLOC__SHIFT                                                             0x1a
+#define DB_RMI_L2_CACHE_CONTROL__S_NOALLOC__SHIFT                                                             0x1b
+#define DB_RMI_L2_CACHE_CONTROL__HTILE_NOALLOC__SHIFT                                                         0x1c
+#define DB_RMI_L2_CACHE_CONTROL__ZPCPSD_NOALLOC__SHIFT                                                        0x1d
 #define DB_RMI_L2_CACHE_CONTROL__Z_WR_POLICY_MASK                                                             0x00000003L
 #define DB_RMI_L2_CACHE_CONTROL__S_WR_POLICY_MASK                                                             0x0000000CL
 #define DB_RMI_L2_CACHE_CONTROL__HTILE_WR_POLICY_MASK                                                         0x00000030L
@@ -20072,6 +20091,10 @@
 #define DB_RMI_L2_CACHE_CONTROL__HTILE_RD_POLICY_MASK                                                         0x00300000L
 #define DB_RMI_L2_CACHE_CONTROL__Z_BIG_PAGE_MASK                                                              0x01000000L
 #define DB_RMI_L2_CACHE_CONTROL__S_BIG_PAGE_MASK                                                              0x02000000L
+#define DB_RMI_L2_CACHE_CONTROL__Z_NOALLOC_MASK                                                               0x04000000L
+#define DB_RMI_L2_CACHE_CONTROL__S_NOALLOC_MASK                                                               0x08000000L
+#define DB_RMI_L2_CACHE_CONTROL__HTILE_NOALLOC_MASK                                                           0x10000000L
+#define DB_RMI_L2_CACHE_CONTROL__ZPCPSD_NOALLOC_MASK                                                          0x20000000L
 //TA_BC_BASE_ADDR
 #define TA_BC_BASE_ADDR__ADDRESS__SHIFT                                                                       0x0
 #define TA_BC_BASE_ADDR__ADDRESS_MASK                                                                         0xFFFFFFFFL
@@ -32705,6 +32728,8 @@
 #define RLC_SPM_MC_CNTL__RLC_SPM_VOL__SHIFT                                                                   0xe
 #define RLC_SPM_MC_CNTL__RLC_SPM_NOFILL__SHIFT                                                                0xf
 #define RLC_SPM_MC_CNTL__RESERVED_3__SHIFT                                                                    0x10
+#define RLC_SPM_MC_CNTL__RLC_SPM_LLC_NOALLOC__SHIFT                                                           0x12
+#define RLC_SPM_MC_CNTL__RLC_SPM_LLC_NOALLOC_OVER__SHIFT                                                      0x13
 #define RLC_SPM_MC_CNTL__RESERVED__SHIFT                                                                      0x14
 #define RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK                                                                    0x0000000FL
 #define RLC_SPM_MC_CNTL__RLC_SPM_POLICY_MASK                                                                  0x00000030L
@@ -32717,6 +32742,8 @@
 #define RLC_SPM_MC_CNTL__RLC_SPM_VOL_MASK                                                                     0x00004000L
 #define RLC_SPM_MC_CNTL__RLC_SPM_NOFILL_MASK                                                                  0x00008000L
 #define RLC_SPM_MC_CNTL__RESERVED_3_MASK                                                                      0x00030000L
+#define RLC_SPM_MC_CNTL__RLC_SPM_LLC_NOALLOC_MASK                                                             0x00040000L
+#define RLC_SPM_MC_CNTL__RLC_SPM_LLC_NOALLOC_OVER_MASK                                                        0x00080000L
 #define RLC_SPM_MC_CNTL__RESERVED_MASK                                                                        0xFFF00000L
 //RLC_SPM_INT_CNTL
 #define RLC_SPM_INT_CNTL__RLC_SPM_INT_CNTL__SHIFT                                                             0x0
@@ -37158,6 +37185,7 @@
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__MTYPE__SHIFT                                         0x12
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__MEMLOG__SHIFT                                        0x15
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__NACK__SHIFT                                          0x16
+#define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__LLC_NOALLOC__SHIFT                                   0x18
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__ACK__SHIFT                                           0x1f
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__ADDR_MASK                                            0x0000000FL
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__PERMS_MASK                                           0x00000070L
@@ -37169,6 +37197,7 @@
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__MTYPE_MASK                                           0x001C0000L
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__MEMLOG_MASK                                          0x00200000L
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__NACK_MASK                                            0x00C00000L
+#define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__LLC_NOALLOC_MASK                                     0x01000000L
 #define GCUTC_GPUVA_VMID_TRANSLATION_ASSIST_RESPONSE_HI__ACK_MASK                                             0x80000000L
 
 
@@ -37761,6 +37790,7 @@
 #define SDMA2_UTCL1_PAGE__DMA_PAGE_SIZE__SHIFT                                                                0x10
 #define SDMA2_UTCL1_PAGE__USE_BC__SHIFT                                                                       0x16
 #define SDMA2_UTCL1_PAGE__ADDR_IS_PA__SHIFT                                                                   0x17
+#define SDMA2_UTCL1_PAGE__LLC_NOALLOC__SHIFT                                                                  0x18
 #define SDMA2_UTCL1_PAGE__VM_HOLE_MASK                                                                        0x00000001L
 #define SDMA2_UTCL1_PAGE__REQ_TYPE_MASK                                                                       0x0000001EL
 #define SDMA2_UTCL1_PAGE__USE_MTYPE_MASK                                                                      0x000003C0L
@@ -37771,6 +37801,7 @@
 #define SDMA2_UTCL1_PAGE__DMA_PAGE_SIZE_MASK                                                                  0x003F0000L
 #define SDMA2_UTCL1_PAGE__USE_BC_MASK                                                                         0x00400000L
 #define SDMA2_UTCL1_PAGE__ADDR_IS_PA_MASK                                                                     0x00800000L
+#define SDMA2_UTCL1_PAGE__LLC_NOALLOC_MASK                                                                    0x01000000L
 //SDMA2_RELAX_ORDERING_LUT
 #define SDMA2_RELAX_ORDERING_LUT__RESERVED0__SHIFT                                                            0x0
 #define SDMA2_RELAX_ORDERING_LUT__COPY__SHIFT                                                                 0x1
@@ -40633,6 +40664,7 @@
 #define SDMA3_UTCL1_PAGE__DMA_PAGE_SIZE__SHIFT                                                                0x10
 #define SDMA3_UTCL1_PAGE__USE_BC__SHIFT                                                                       0x16
 #define SDMA3_UTCL1_PAGE__ADDR_IS_PA__SHIFT                                                                   0x17
+#define SDMA3_UTCL1_PAGE__LLC_NOALLOC__SHIFT                                                                  0x18
 #define SDMA3_UTCL1_PAGE__VM_HOLE_MASK                                                                        0x00000001L
 #define SDMA3_UTCL1_PAGE__REQ_TYPE_MASK                                                                       0x0000001EL
 #define SDMA3_UTCL1_PAGE__USE_MTYPE_MASK                                                                      0x000003C0L
@@ -40643,6 +40675,7 @@
 #define SDMA3_UTCL1_PAGE__DMA_PAGE_SIZE_MASK                                                                  0x003F0000L
 #define SDMA3_UTCL1_PAGE__USE_BC_MASK                                                                         0x00400000L
 #define SDMA3_UTCL1_PAGE__ADDR_IS_PA_MASK                                                                     0x00800000L
+#define SDMA3_UTCL1_PAGE__LLC_NOALLOC_MASK                                                                    0x01000000L
 //SDMA3_RELAX_ORDERING_LUT
 #define SDMA3_RELAX_ORDERING_LUT__RESERVED0__SHIFT                                                            0x0
 #define SDMA3_RELAX_ORDERING_LUT__COPY__SHIFT                                                                 0x1
-- 
2.25.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] drm/amdgpu: add support to configure MALL for sienna_cichlid (v2)
  2020-10-20 20:26 [PATCH 1/3] drm/amdgpu: add GC 10.3 NOALLOC registers Alex Deucher
@ 2020-10-20 20:26 ` Alex Deucher
  2020-10-21 11:01   ` Christian König
  2020-10-26 23:11   ` Luben Tuikov
  2020-10-20 20:26 ` [PATCH 3/3] drm/amdgpu/display: add MALL support Alex Deucher
  1 sibling, 2 replies; 7+ messages in thread
From: Alex Deucher @ 2020-10-20 20:26 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Likun Gao, Hawking Zhang

From: Likun Gao <Likun.Gao@amd.com>

Enable Memory Access at Last Level (MALL) feature for sienna_cichlid.

v2: drop module option.  We need to add UAPI so userspace can
request MALL per buffer.

Signed-off-by: Likun Gao <Likun.Gao@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ffea3b89b9da..929d7cb92dc0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -76,6 +76,9 @@ struct amdgpu_bo_list_entry;
 /* PTE is handled as PDE for VEGA10 (Translate Further) */
 #define AMDGPU_PTE_TF		(1ULL << 56)
 
+/* MALL noalloc for sienna_cichlid, reserved for older ASICs  */
+#define AMDGPU_PTE_NOALLOC	(1ULL << 58)
+
 /* PDE Block Fragment Size for VEGA10 */
 #define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59)
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 40af17610207..ef385a529013 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -486,7 +486,8 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
 /*
  * PTE format on NAVI 10:
  * 63:59 reserved
- * 58:57 reserved
+ * 58 reserved and for sienna_cichlid is used for MALL noalloc
+ * 57 reserved
  * 56 F
  * 55 L
  * 54 reserved
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index 18eca0d4dbcc..ae6158456094 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -707,7 +707,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
 		temp &= 0xFF0FFF;
 		temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
 			 (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
-			 0x01000000);
+			 SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
 		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
 
 		if (!amdgpu_sriov_vf(adev)) {
-- 
2.25.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] drm/amdgpu/display: add MALL support
  2020-10-20 20:26 [PATCH 1/3] drm/amdgpu: add GC 10.3 NOALLOC registers Alex Deucher
  2020-10-20 20:26 ` [PATCH 2/3] drm/amdgpu: add support to configure MALL for sienna_cichlid (v2) Alex Deucher
@ 2020-10-20 20:26 ` Alex Deucher
  2020-10-26 15:51   ` Bas Nieuwenhuizen
  1 sibling, 1 reply; 7+ messages in thread
From: Alex Deucher @ 2020-10-20 20:26 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Bhawanpreet Lakha

From: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>

Enable Memory Access at Last Level (MALL) feature for display.

Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 .../drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h |  1 +
 .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  | 10 +++
 .../dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c  |  9 +++
 .../dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h  |  2 +
 .../drm/amd/display/dc/dcn30/dcn30_hwseq.c    | 65 +++++++++++++++++++
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |  2 +-
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 20 ++++++
 7 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
index 5ed03287aaaf..fa09c594fd36 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
@@ -53,6 +53,7 @@
 #define DALSMC_MSG_GetDcModeMaxDpmFreq            0xC
 #define DALSMC_MSG_SetMinDeepSleepDcefclk         0xD
 #define DALSMC_MSG_NumOfDisplays                  0xE
+#define DALSMC_MSG_SetDisplayRefreshFromMall      0xF
 #define DALSMC_MSG_SetExternalClientDfCstateAllow 0x10
 #define DALSMC_MSG_BacoAudioD3PME                 0x11
 #define DALSMC_Message_Count                      0x12
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
index b0e9b0509568..7bad73b2d146 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
@@ -145,6 +145,16 @@ static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
 	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
 	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
 
+	/* Set D - MALL - SR enter and exit times adjusted for MALL */
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
+//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
 }
 
 void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
index 7ee3ec5a8af8..8ecc708bcd9e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
@@ -297,6 +297,15 @@ void dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t nu
 			DALSMC_MSG_NumOfDisplays, num_displays, NULL);
 }
 
+void dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale)
+{
+	/* bits 8:7 for cache timer scale, bits 6:1 for cache timer delay, bit 0 = 1 for enable, = 0 for disable */
+	uint32_t param = (cache_timer_scale << 7) | (cache_timer_delay << 1) | (enable ? 1 : 0);
+
+	dcn30_smu_send_msg_with_param(clk_mgr,
+			DALSMC_MSG_SetDisplayRefreshFromMall, param, NULL);
+}
+
 void dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable)
 {
 	smu_print("SMU Set external client df cstate allow: enable = %d\n", enable);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
index 236f20ec90d4..dd2640a3ce5d 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
@@ -70,6 +70,7 @@ typedef enum {
 typedef enum {
 	WATERMARKS_CLOCK_RANGE = 0,
 	WATERMARKS_DUMMY_PSTATE,
+	WATERMARKS_MALL,
 	WATERMARKS_COUNT,
 } WATERMARKS_FLAGS_e;
 
@@ -102,6 +103,7 @@ unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, P
 unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk);
 void         dcn30_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
 void         dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
+void         dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale);
 void         dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable);
 void         dcn30_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
index 8eb8e13e1130..a06f6d19e38e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
@@ -696,6 +696,10 @@ void dcn30_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
 
 bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 {
+	union dmub_rb_cmd cmd;
+	unsigned int surface_size, refresh_hz, denom;
+	uint32_t tmr_delay = 0, tmr_scale = 0;
+
 	if (!dc->ctx->dmub_srv)
 		return false;
 
@@ -710,12 +714,73 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
 					/* Fail eligibility on a visible stream */
 					break;
 			}
+
+			// TODO: remove hard code size
+			if (surface_size < 128 * 1024 * 1024) {
+				refresh_hz = (unsigned long long) dc->current_state->streams[0]->timing.pix_clk_100hz * 100LL /
+						(dc->current_state->streams[0]->timing.v_total * dc->current_state->streams[0]->timing.h_total);
+
+				/*
+				 * Delay_Us = 65.28 * (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
+				 * Delay_Us / 65.28 = (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
+				 * (Delay_Us / 65.28) / 2^MallFrameCacheTmrScale = 64 + MallFrameCacheTmrDly
+				 * MallFrameCacheTmrDly = ((Delay_Us / 65.28) / 2^MallFrameCacheTmrScale) - 64
+				 *                      = (1000000 / refresh) / 65.28 / 2^MallFrameCacheTmrScale - 64
+				 *                      = 1000000 / (refresh * 65.28 * 2^MallFrameCacheTmrScale) - 64
+				 *                      = (1000000 * 100) / (refresh * 6528 * 2^MallFrameCacheTmrScale) - 64
+				 *
+				 * need to round up the result of the division before the subtraction
+				 */
+				denom = refresh_hz * 6528;
+				tmr_delay = (100000000LL + denom - 1) / denom - 64LL;
+
+				/* scale should be increased until it fits into 6 bits */
+				while (tmr_delay & ~0x3F) {
+					tmr_scale++;
+
+					if (tmr_scale > 3) {
+						/* The delay exceeds the range of the hystersis timer */
+						ASSERT(false);
+						return false;
+					}
+
+					denom *= 2;
+					tmr_delay = (100000000LL + denom - 1) / denom - 64LL;
+				}
+
+				/* Enable MALL */
+				memset(&cmd, 0, sizeof(cmd));
+				cmd.mall.header.type = DMUB_CMD__MALL;
+				cmd.mall.header.sub_type =
+					DMUB_CMD__MALL_ACTION_ALLOW;
+				cmd.mall.header.payload_bytes =
+					sizeof(cmd.mall) -
+					sizeof(cmd.mall.header);
+				cmd.mall.tmr_delay = tmr_delay;
+				cmd.mall.tmr_scale = tmr_scale;
+
+				dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
+				dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
+
+				return true;
+			}
 		}
 
 		/* No applicable optimizations */
 		return false;
 	}
 
+	/* Disable MALL */
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.mall.header.type = DMUB_CMD__MALL;
+	cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_DISALLOW;
+	cmd.mall.header.payload_bytes =
+		sizeof(cmd.mall) - sizeof(cmd.mall.header);
+
+	dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
+	dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
+	dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
+
 	return true;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 783a1d7ae7d3..b132bb7f6704 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -2247,7 +2247,7 @@ void dcn30_calculate_wm_and_dlg(
 	/* Set D:
 	 * DCFCLK: Min Required
 	 * FCLK(proportional to UCLK): 1GHz or Max
-	 * sr_enter_exit = 4, sr_exit = 2us
+	 * MALL stutter, sr_enter_exit = 4, sr_exit = 2us
 	 */
 	/*
 	if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 26a4c6caf606..6e5be1fdb4bb 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -298,6 +298,7 @@ enum dmub_cmd_type {
 	DMUB_CMD__REG_REG_WAIT = 4,
 	DMUB_CMD__PLAT_54186_WA = 5,
 	DMUB_CMD__PSR = 64,
+	DMUB_CMD__MALL = 65,
 	DMUB_CMD__ABM = 66,
 	DMUB_CMD__HW_LOCK = 69,
 	DMUB_CMD__DP_AUX_ACCESS = 70,
@@ -425,6 +426,18 @@ struct dmub_rb_cmd_PLAT_54186_wa {
 	struct dmub_cmd_PLAT_54186_wa flip;
 };
 
+struct dmub_rb_cmd_mall {
+	struct dmub_cmd_header header;
+	union dmub_addr cursor_copy_src;
+	union dmub_addr cursor_copy_dst;
+	uint32_t tmr_delay;
+	uint32_t tmr_scale;
+	uint16_t cursor_width;
+	uint16_t cursor_pitch;
+	uint16_t cursor_height;
+	uint8_t cursor_bpp;
+};
+
 struct dmub_cmd_digx_encoder_control_data {
 	union dig_encoder_control_parameters_v1_5 dig;
 };
@@ -556,6 +569,12 @@ enum psr_version {
 	PSR_VERSION_UNSUPPORTED			= 0xFFFFFFFF,
 };
 
+enum dmub_cmd_mall_type {
+	DMUB_CMD__MALL_ACTION_ALLOW = 0,
+	DMUB_CMD__MALL_ACTION_DISALLOW = 1,
+	DMUB_CMD__MALL_ACTION_COPY_CURSOR = 2,
+};
+
 struct dmub_cmd_psr_copy_settings_data {
 	union dmub_psr_debug_flags debug;
 	uint16_t psr_level;
@@ -761,6 +780,7 @@ union dmub_rb_cmd {
 	struct dmub_rb_cmd_psr_enable psr_enable;
 	struct dmub_rb_cmd_psr_set_level psr_set_level;
 	struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa;
+	struct dmub_rb_cmd_mall mall;
 	struct dmub_rb_cmd_abm_set_pipe abm_set_pipe;
 	struct dmub_rb_cmd_abm_set_backlight abm_set_backlight;
 	struct dmub_rb_cmd_abm_set_level abm_set_level;
-- 
2.25.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: add support to configure MALL for sienna_cichlid (v2)
  2020-10-20 20:26 ` [PATCH 2/3] drm/amdgpu: add support to configure MALL for sienna_cichlid (v2) Alex Deucher
@ 2020-10-21 11:01   ` Christian König
  2020-10-26 23:11   ` Luben Tuikov
  1 sibling, 0 replies; 7+ messages in thread
From: Christian König @ 2020-10-21 11:01 UTC (permalink / raw)
  To: Alex Deucher, amd-gfx; +Cc: Alex Deucher, Likun Gao, Hawking Zhang

Am 20.10.20 um 22:26 schrieb Alex Deucher:
> From: Likun Gao <Likun.Gao@amd.com>
>
> Enable Memory Access at Last Level (MALL) feature for sienna_cichlid.
>
> v2: drop module option.  We need to add UAPI so userspace can
> request MALL per buffer.
>
> Signed-off-by: Likun Gao <Likun.Gao@amd.com>
> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++
>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++-
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +-
>   3 files changed, 6 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index ffea3b89b9da..929d7cb92dc0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -76,6 +76,9 @@ struct amdgpu_bo_list_entry;
>   /* PTE is handled as PDE for VEGA10 (Translate Further) */
>   #define AMDGPU_PTE_TF		(1ULL << 56)
>   
> +/* MALL noalloc for sienna_cichlid, reserved for older ASICs  */
> +#define AMDGPU_PTE_NOALLOC	(1ULL << 58)
> +
>   /* PDE Block Fragment Size for VEGA10 */
>   #define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59)
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 40af17610207..ef385a529013 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -486,7 +486,8 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
>   /*
>    * PTE format on NAVI 10:
>    * 63:59 reserved
> - * 58:57 reserved
> + * 58 reserved and for sienna_cichlid is used for MALL noalloc
> + * 57 reserved
>    * 56 F
>    * 55 L
>    * 54 reserved
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 18eca0d4dbcc..ae6158456094 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -707,7 +707,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>   		temp &= 0xFF0FFF;
>   		temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
>   			 (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
> -			 0x01000000);
> +			 SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
>   		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
>   
>   		if (!amdgpu_sriov_vf(adev)) {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu/display: add MALL support
  2020-10-20 20:26 ` [PATCH 3/3] drm/amdgpu/display: add MALL support Alex Deucher
@ 2020-10-26 15:51   ` Bas Nieuwenhuizen
  2020-10-26 16:19     ` Alex Deucher
  0 siblings, 1 reply; 7+ messages in thread
From: Bas Nieuwenhuizen @ 2020-10-26 15:51 UTC (permalink / raw)
  To: Alex Deucher; +Cc: Alex Deucher, Bhawanpreet Lakha, amd-gfx mailing list

On Tue, Oct 20, 2020 at 10:26 PM Alex Deucher <alexdeucher@gmail.com> wrote:
>
> From: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
>
> Enable Memory Access at Last Level (MALL) feature for display.
>
> Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  .../drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h |  1 +
>  .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  | 10 +++
>  .../dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c  |  9 +++
>  .../dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h  |  2 +
>  .../drm/amd/display/dc/dcn30/dcn30_hwseq.c    | 65 +++++++++++++++++++
>  .../drm/amd/display/dc/dcn30/dcn30_resource.c |  2 +-
>  .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 20 ++++++
>  7 files changed, 108 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
> index 5ed03287aaaf..fa09c594fd36 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
> @@ -53,6 +53,7 @@
>  #define DALSMC_MSG_GetDcModeMaxDpmFreq            0xC
>  #define DALSMC_MSG_SetMinDeepSleepDcefclk         0xD
>  #define DALSMC_MSG_NumOfDisplays                  0xE
> +#define DALSMC_MSG_SetDisplayRefreshFromMall      0xF
>  #define DALSMC_MSG_SetExternalClientDfCstateAllow 0x10
>  #define DALSMC_MSG_BacoAudioD3PME                 0x11
>  #define DALSMC_Message_Count                      0x12
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> index b0e9b0509568..7bad73b2d146 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> @@ -145,6 +145,16 @@ static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
>         clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
>         clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
>
> +       /* Set D - MALL - SR enter and exit times adjusted for MALL */
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
> +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
>  }
>
>  void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
> index 7ee3ec5a8af8..8ecc708bcd9e 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
> @@ -297,6 +297,15 @@ void dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t nu
>                         DALSMC_MSG_NumOfDisplays, num_displays, NULL);
>  }
>
> +void dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale)
> +{
> +       /* bits 8:7 for cache timer scale, bits 6:1 for cache timer delay, bit 0 = 1 for enable, = 0 for disable */
> +       uint32_t param = (cache_timer_scale << 7) | (cache_timer_delay << 1) | (enable ? 1 : 0);
> +
> +       dcn30_smu_send_msg_with_param(clk_mgr,
> +                       DALSMC_MSG_SetDisplayRefreshFromMall, param, NULL);
> +}
> +
>  void dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable)
>  {
>         smu_print("SMU Set external client df cstate allow: enable = %d\n", enable);
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
> index 236f20ec90d4..dd2640a3ce5d 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
> @@ -70,6 +70,7 @@ typedef enum {
>  typedef enum {
>         WATERMARKS_CLOCK_RANGE = 0,
>         WATERMARKS_DUMMY_PSTATE,
> +       WATERMARKS_MALL,
>         WATERMARKS_COUNT,
>  } WATERMARKS_FLAGS_e;
>
> @@ -102,6 +103,7 @@ unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, P
>  unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk);
>  void         dcn30_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
>  void         dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
> +void         dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale);
>  void         dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable);
>  void         dcn30_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> index 8eb8e13e1130..a06f6d19e38e 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> @@ -696,6 +696,10 @@ void dcn30_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
>
>  bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
>  {
> +       union dmub_rb_cmd cmd;
> +       unsigned int surface_size, refresh_hz, denom;
> +       uint32_t tmr_delay = 0, tmr_scale = 0;
> +
>         if (!dc->ctx->dmub_srv)
>                 return false;
>
> @@ -710,12 +714,73 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
>                                         /* Fail eligibility on a visible stream */
>                                         break;
>                         }
> +
> +                       // TODO: remove hard code size
> +                       if (surface_size < 128 * 1024 * 1024) {

I think surface_size is uninitialized here?

> +                               refresh_hz = (unsigned long long) dc->current_state->streams[0]->timing.pix_clk_100hz * 100LL /
> +                                               (dc->current_state->streams[0]->timing.v_total * dc->current_state->streams[0]->timing.h_total);
> +
> +                               /*
> +                                * Delay_Us = 65.28 * (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
> +                                * Delay_Us / 65.28 = (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
> +                                * (Delay_Us / 65.28) / 2^MallFrameCacheTmrScale = 64 + MallFrameCacheTmrDly
> +                                * MallFrameCacheTmrDly = ((Delay_Us / 65.28) / 2^MallFrameCacheTmrScale) - 64
> +                                *                      = (1000000 / refresh) / 65.28 / 2^MallFrameCacheTmrScale - 64
> +                                *                      = 1000000 / (refresh * 65.28 * 2^MallFrameCacheTmrScale) - 64
> +                                *                      = (1000000 * 100) / (refresh * 6528 * 2^MallFrameCacheTmrScale) - 64
> +                                *
> +                                * need to round up the result of the division before the subtraction
> +                                */
> +                               denom = refresh_hz * 6528;
> +                               tmr_delay = (100000000LL + denom - 1) / denom - 64LL;
> +
> +                               /* scale should be increased until it fits into 6 bits */
> +                               while (tmr_delay & ~0x3F) {
> +                                       tmr_scale++;
> +
> +                                       if (tmr_scale > 3) {
> +                                               /* The delay exceeds the range of the hystersis timer */
> +                                               ASSERT(false);
> +                                               return false;
> +                                       }
> +
> +                                       denom *= 2;
> +                                       tmr_delay = (100000000LL + denom - 1) / denom - 64LL;
> +                               }
> +
> +                               /* Enable MALL */
> +                               memset(&cmd, 0, sizeof(cmd));
> +                               cmd.mall.header.type = DMUB_CMD__MALL;
> +                               cmd.mall.header.sub_type =
> +                                       DMUB_CMD__MALL_ACTION_ALLOW;
> +                               cmd.mall.header.payload_bytes =
> +                                       sizeof(cmd.mall) -
> +                                       sizeof(cmd.mall.header);
> +                               cmd.mall.tmr_delay = tmr_delay;
> +                               cmd.mall.tmr_scale = tmr_scale;
> +
> +                               dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
> +                               dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
> +
> +                               return true;
> +                       }
>                 }
>
>                 /* No applicable optimizations */
>                 return false;
>         }
>
> +       /* Disable MALL */
> +       memset(&cmd, 0, sizeof(cmd));
> +       cmd.mall.header.type = DMUB_CMD__MALL;
> +       cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_DISALLOW;
> +       cmd.mall.header.payload_bytes =
> +               sizeof(cmd.mall) - sizeof(cmd.mall.header);
> +
> +       dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
> +       dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
> +       dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
> +
>         return true;
>  }
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> index 783a1d7ae7d3..b132bb7f6704 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> @@ -2247,7 +2247,7 @@ void dcn30_calculate_wm_and_dlg(
>         /* Set D:
>          * DCFCLK: Min Required
>          * FCLK(proportional to UCLK): 1GHz or Max
> -        * sr_enter_exit = 4, sr_exit = 2us
> +        * MALL stutter, sr_enter_exit = 4, sr_exit = 2us
>          */
>         /*
>         if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
> diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> index 26a4c6caf606..6e5be1fdb4bb 100644
> --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> @@ -298,6 +298,7 @@ enum dmub_cmd_type {
>         DMUB_CMD__REG_REG_WAIT = 4,
>         DMUB_CMD__PLAT_54186_WA = 5,
>         DMUB_CMD__PSR = 64,
> +       DMUB_CMD__MALL = 65,
>         DMUB_CMD__ABM = 66,
>         DMUB_CMD__HW_LOCK = 69,
>         DMUB_CMD__DP_AUX_ACCESS = 70,
> @@ -425,6 +426,18 @@ struct dmub_rb_cmd_PLAT_54186_wa {
>         struct dmub_cmd_PLAT_54186_wa flip;
>  };
>
> +struct dmub_rb_cmd_mall {
> +       struct dmub_cmd_header header;
> +       union dmub_addr cursor_copy_src;
> +       union dmub_addr cursor_copy_dst;
> +       uint32_t tmr_delay;
> +       uint32_t tmr_scale;
> +       uint16_t cursor_width;
> +       uint16_t cursor_pitch;
> +       uint16_t cursor_height;
> +       uint8_t cursor_bpp;
> +};
> +
>  struct dmub_cmd_digx_encoder_control_data {
>         union dig_encoder_control_parameters_v1_5 dig;
>  };
> @@ -556,6 +569,12 @@ enum psr_version {
>         PSR_VERSION_UNSUPPORTED                 = 0xFFFFFFFF,
>  };
>
> +enum dmub_cmd_mall_type {
> +       DMUB_CMD__MALL_ACTION_ALLOW = 0,
> +       DMUB_CMD__MALL_ACTION_DISALLOW = 1,
> +       DMUB_CMD__MALL_ACTION_COPY_CURSOR = 2,
> +};
> +
>  struct dmub_cmd_psr_copy_settings_data {
>         union dmub_psr_debug_flags debug;
>         uint16_t psr_level;
> @@ -761,6 +780,7 @@ union dmub_rb_cmd {
>         struct dmub_rb_cmd_psr_enable psr_enable;
>         struct dmub_rb_cmd_psr_set_level psr_set_level;
>         struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa;
> +       struct dmub_rb_cmd_mall mall;
>         struct dmub_rb_cmd_abm_set_pipe abm_set_pipe;
>         struct dmub_rb_cmd_abm_set_backlight abm_set_backlight;
>         struct dmub_rb_cmd_abm_set_level abm_set_level;
> --
> 2.25.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 3/3] drm/amdgpu/display: add MALL support
  2020-10-26 15:51   ` Bas Nieuwenhuizen
@ 2020-10-26 16:19     ` Alex Deucher
  0 siblings, 0 replies; 7+ messages in thread
From: Alex Deucher @ 2020-10-26 16:19 UTC (permalink / raw)
  To: Bas Nieuwenhuizen; +Cc: Alex Deucher, Bhawanpreet Lakha, amd-gfx mailing list

On Mon, Oct 26, 2020 at 11:51 AM Bas Nieuwenhuizen
<bas@basnieuwenhuizen.nl> wrote:
>
> On Tue, Oct 20, 2020 at 10:26 PM Alex Deucher <alexdeucher@gmail.com> wrote:
> >
> > From: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
> >
> > Enable Memory Access at Last Level (MALL) feature for display.
> >
> > Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
> > Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> > ---
> >  .../drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h |  1 +
> >  .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  | 10 +++
> >  .../dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c  |  9 +++
> >  .../dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h  |  2 +
> >  .../drm/amd/display/dc/dcn30/dcn30_hwseq.c    | 65 +++++++++++++++++++
> >  .../drm/amd/display/dc/dcn30/dcn30_resource.c |  2 +-
> >  .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   | 20 ++++++
> >  7 files changed, 108 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
> > index 5ed03287aaaf..fa09c594fd36 100644
> > --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
> > +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dalsmc.h
> > @@ -53,6 +53,7 @@
> >  #define DALSMC_MSG_GetDcModeMaxDpmFreq            0xC
> >  #define DALSMC_MSG_SetMinDeepSleepDcefclk         0xD
> >  #define DALSMC_MSG_NumOfDisplays                  0xE
> > +#define DALSMC_MSG_SetDisplayRefreshFromMall      0xF
> >  #define DALSMC_MSG_SetExternalClientDfCstateAllow 0x10
> >  #define DALSMC_MSG_BacoAudioD3PME                 0x11
> >  #define DALSMC_Message_Count                      0x12
> > diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> > index b0e9b0509568..7bad73b2d146 100644
> > --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> > +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> > @@ -145,6 +145,16 @@ static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
> >         clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
> >         clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
> >
> > +       /* Set D - MALL - SR enter and exit times adjusted for MALL */
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
> > +//     clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
> >  }
> >
> >  void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
> > diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
> > index 7ee3ec5a8af8..8ecc708bcd9e 100644
> > --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
> > +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.c
> > @@ -297,6 +297,15 @@ void dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t nu
> >                         DALSMC_MSG_NumOfDisplays, num_displays, NULL);
> >  }
> >
> > +void dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale)
> > +{
> > +       /* bits 8:7 for cache timer scale, bits 6:1 for cache timer delay, bit 0 = 1 for enable, = 0 for disable */
> > +       uint32_t param = (cache_timer_scale << 7) | (cache_timer_delay << 1) | (enable ? 1 : 0);
> > +
> > +       dcn30_smu_send_msg_with_param(clk_mgr,
> > +                       DALSMC_MSG_SetDisplayRefreshFromMall, param, NULL);
> > +}
> > +
> >  void dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable)
> >  {
> >         smu_print("SMU Set external client df cstate allow: enable = %d\n", enable);
> > diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
> > index 236f20ec90d4..dd2640a3ce5d 100644
> > --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
> > +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr_smu_msg.h
> > @@ -70,6 +70,7 @@ typedef enum {
> >  typedef enum {
> >         WATERMARKS_CLOCK_RANGE = 0,
> >         WATERMARKS_DUMMY_PSTATE,
> > +       WATERMARKS_MALL,
> >         WATERMARKS_COUNT,
> >  } WATERMARKS_FLAGS_e;
> >
> > @@ -102,6 +103,7 @@ unsigned int dcn30_smu_get_dpm_freq_by_index(struct clk_mgr_internal *clk_mgr, P
> >  unsigned int dcn30_smu_get_dc_mode_max_dpm_freq(struct clk_mgr_internal *clk_mgr, PPCLK_e clk);
> >  void         dcn30_smu_set_min_deep_sleep_dcef_clk(struct clk_mgr_internal *clk_mgr, uint32_t freq_mhz);
> >  void         dcn30_smu_set_num_of_displays(struct clk_mgr_internal *clk_mgr, uint32_t num_displays);
> > +void         dcn30_smu_set_display_refresh_from_mall(struct clk_mgr_internal *clk_mgr, bool enable, uint8_t cache_timer_delay, uint8_t cache_timer_scale);
> >  void         dcn30_smu_set_external_client_df_cstate_allow(struct clk_mgr_internal *clk_mgr, bool enable);
> >  void         dcn30_smu_set_pme_workaround(struct clk_mgr_internal *clk_mgr);
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> > index 8eb8e13e1130..a06f6d19e38e 100644
> > --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> > +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
> > @@ -696,6 +696,10 @@ void dcn30_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
> >
> >  bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
> >  {
> > +       union dmub_rb_cmd cmd;
> > +       unsigned int surface_size, refresh_hz, denom;
> > +       uint32_t tmr_delay = 0, tmr_scale = 0;
> > +
> >         if (!dc->ctx->dmub_srv)
> >                 return false;
> >
> > @@ -710,12 +714,73 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable)
> >                                         /* Fail eligibility on a visible stream */
> >                                         break;
> >                         }
> > +
> > +                       // TODO: remove hard code size
> > +                       if (surface_size < 128 * 1024 * 1024) {
>
> I think surface_size is uninitialized here?

Whoops, looks like this crossed with another patch when I rebased and
we lost the surface_size calculation.  Fix sent.

Alex


>
> > +                               refresh_hz = (unsigned long long) dc->current_state->streams[0]->timing.pix_clk_100hz * 100LL /
> > +                                               (dc->current_state->streams[0]->timing.v_total * dc->current_state->streams[0]->timing.h_total);
> > +
> > +                               /*
> > +                                * Delay_Us = 65.28 * (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
> > +                                * Delay_Us / 65.28 = (64 + MallFrameCacheTmrDly) * 2^MallFrameCacheTmrScale
> > +                                * (Delay_Us / 65.28) / 2^MallFrameCacheTmrScale = 64 + MallFrameCacheTmrDly
> > +                                * MallFrameCacheTmrDly = ((Delay_Us / 65.28) / 2^MallFrameCacheTmrScale) - 64
> > +                                *                      = (1000000 / refresh) / 65.28 / 2^MallFrameCacheTmrScale - 64
> > +                                *                      = 1000000 / (refresh * 65.28 * 2^MallFrameCacheTmrScale) - 64
> > +                                *                      = (1000000 * 100) / (refresh * 6528 * 2^MallFrameCacheTmrScale) - 64
> > +                                *
> > +                                * need to round up the result of the division before the subtraction
> > +                                */
> > +                               denom = refresh_hz * 6528;
> > +                               tmr_delay = (100000000LL + denom - 1) / denom - 64LL;
> > +
> > +                               /* scale should be increased until it fits into 6 bits */
> > +                               while (tmr_delay & ~0x3F) {
> > +                                       tmr_scale++;
> > +
> > +                                       if (tmr_scale > 3) {
> > +                                               /* The delay exceeds the range of the hystersis timer */
> > +                                               ASSERT(false);
> > +                                               return false;
> > +                                       }
> > +
> > +                                       denom *= 2;
> > +                                       tmr_delay = (100000000LL + denom - 1) / denom - 64LL;
> > +                               }
> > +
> > +                               /* Enable MALL */
> > +                               memset(&cmd, 0, sizeof(cmd));
> > +                               cmd.mall.header.type = DMUB_CMD__MALL;
> > +                               cmd.mall.header.sub_type =
> > +                                       DMUB_CMD__MALL_ACTION_ALLOW;
> > +                               cmd.mall.header.payload_bytes =
> > +                                       sizeof(cmd.mall) -
> > +                                       sizeof(cmd.mall.header);
> > +                               cmd.mall.tmr_delay = tmr_delay;
> > +                               cmd.mall.tmr_scale = tmr_scale;
> > +
> > +                               dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
> > +                               dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
> > +
> > +                               return true;
> > +                       }
> >                 }
> >
> >                 /* No applicable optimizations */
> >                 return false;
> >         }
> >
> > +       /* Disable MALL */
> > +       memset(&cmd, 0, sizeof(cmd));
> > +       cmd.mall.header.type = DMUB_CMD__MALL;
> > +       cmd.mall.header.sub_type = DMUB_CMD__MALL_ACTION_DISALLOW;
> > +       cmd.mall.header.payload_bytes =
> > +               sizeof(cmd.mall) - sizeof(cmd.mall.header);
> > +
> > +       dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
> > +       dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
> > +       dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
> > +
> >         return true;
> >  }
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> > index 783a1d7ae7d3..b132bb7f6704 100644
> > --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> > +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
> > @@ -2247,7 +2247,7 @@ void dcn30_calculate_wm_and_dlg(
> >         /* Set D:
> >          * DCFCLK: Min Required
> >          * FCLK(proportional to UCLK): 1GHz or Max
> > -        * sr_enter_exit = 4, sr_exit = 2us
> > +        * MALL stutter, sr_enter_exit = 4, sr_exit = 2us
> >          */
> >         /*
> >         if (dc->clk_mgr->bw_params->wm_table.nv_entries[WM_D].valid) {
> > diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> > index 26a4c6caf606..6e5be1fdb4bb 100644
> > --- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> > +++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
> > @@ -298,6 +298,7 @@ enum dmub_cmd_type {
> >         DMUB_CMD__REG_REG_WAIT = 4,
> >         DMUB_CMD__PLAT_54186_WA = 5,
> >         DMUB_CMD__PSR = 64,
> > +       DMUB_CMD__MALL = 65,
> >         DMUB_CMD__ABM = 66,
> >         DMUB_CMD__HW_LOCK = 69,
> >         DMUB_CMD__DP_AUX_ACCESS = 70,
> > @@ -425,6 +426,18 @@ struct dmub_rb_cmd_PLAT_54186_wa {
> >         struct dmub_cmd_PLAT_54186_wa flip;
> >  };
> >
> > +struct dmub_rb_cmd_mall {
> > +       struct dmub_cmd_header header;
> > +       union dmub_addr cursor_copy_src;
> > +       union dmub_addr cursor_copy_dst;
> > +       uint32_t tmr_delay;
> > +       uint32_t tmr_scale;
> > +       uint16_t cursor_width;
> > +       uint16_t cursor_pitch;
> > +       uint16_t cursor_height;
> > +       uint8_t cursor_bpp;
> > +};
> > +
> >  struct dmub_cmd_digx_encoder_control_data {
> >         union dig_encoder_control_parameters_v1_5 dig;
> >  };
> > @@ -556,6 +569,12 @@ enum psr_version {
> >         PSR_VERSION_UNSUPPORTED                 = 0xFFFFFFFF,
> >  };
> >
> > +enum dmub_cmd_mall_type {
> > +       DMUB_CMD__MALL_ACTION_ALLOW = 0,
> > +       DMUB_CMD__MALL_ACTION_DISALLOW = 1,
> > +       DMUB_CMD__MALL_ACTION_COPY_CURSOR = 2,
> > +};
> > +
> >  struct dmub_cmd_psr_copy_settings_data {
> >         union dmub_psr_debug_flags debug;
> >         uint16_t psr_level;
> > @@ -761,6 +780,7 @@ union dmub_rb_cmd {
> >         struct dmub_rb_cmd_psr_enable psr_enable;
> >         struct dmub_rb_cmd_psr_set_level psr_set_level;
> >         struct dmub_rb_cmd_PLAT_54186_wa PLAT_54186_wa;
> > +       struct dmub_rb_cmd_mall mall;
> >         struct dmub_rb_cmd_abm_set_pipe abm_set_pipe;
> >         struct dmub_rb_cmd_abm_set_backlight abm_set_backlight;
> >         struct dmub_rb_cmd_abm_set_level abm_set_level;
> > --
> > 2.25.4
> >
> > _______________________________________________
> > amd-gfx mailing list
> > amd-gfx@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/3] drm/amdgpu: add support to configure MALL for sienna_cichlid (v2)
  2020-10-20 20:26 ` [PATCH 2/3] drm/amdgpu: add support to configure MALL for sienna_cichlid (v2) Alex Deucher
  2020-10-21 11:01   ` Christian König
@ 2020-10-26 23:11   ` Luben Tuikov
  1 sibling, 0 replies; 7+ messages in thread
From: Luben Tuikov @ 2020-10-26 23:11 UTC (permalink / raw)
  To: Alex Deucher, amd-gfx; +Cc: Alex Deucher, Likun Gao, Hawking Zhang

On 2020-10-20 4:26 p.m., Alex Deucher wrote:
> From: Likun Gao <Likun.Gao@amd.com>
> 
> Enable Memory Access at Last Level (MALL) feature for sienna_cichlid.
> 
> v2: drop module option.  We need to add UAPI so userspace can
> request MALL per buffer.
> 
> Signed-off-by: Likun Gao <Likun.Gao@amd.com>
> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 +++
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++-
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 2 +-
>  3 files changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index ffea3b89b9da..929d7cb92dc0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -76,6 +76,9 @@ struct amdgpu_bo_list_entry;
>  /* PTE is handled as PDE for VEGA10 (Translate Further) */
>  #define AMDGPU_PTE_TF		(1ULL << 56)
>  
> +/* MALL noalloc for sienna_cichlid, reserved for older ASICs  */
> +#define AMDGPU_PTE_NOALLOC	(1ULL << 58)
> +

Would've been good to define "MALL", as it is being
done in the message of this commit above, here
in this comment to the macro.

Else, what "MALL" means is lost, unless
one does git-blame and finds the commit which
introduced it.

Regards,
Luben

>  /* PDE Block Fragment Size for VEGA10 */
>  #define AMDGPU_PDE_BFS(a)	((uint64_t)a << 59)
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 40af17610207..ef385a529013 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -486,7 +486,8 @@ static void gmc_v10_0_emit_pasid_mapping(struct amdgpu_ring *ring, unsigned vmid
>  /*
>   * PTE format on NAVI 10:
>   * 63:59 reserved
> - * 58:57 reserved
> + * 58 reserved and for sienna_cichlid is used for MALL noalloc
> + * 57 reserved
>   * 56 F
>   * 55 L
>   * 54 reserved
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 18eca0d4dbcc..ae6158456094 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -707,7 +707,7 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
>  		temp &= 0xFF0FFF;
>  		temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) |
>  			 (CACHE_WRITE_POLICY_L2__DEFAULT << 14) |
> -			 0x01000000);
> +			 SDMA0_UTCL1_PAGE__LLC_NOALLOC_MASK);
>  		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_UTCL1_PAGE), temp);
>  
>  		if (!amdgpu_sriov_vf(adev)) {
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-10-26 23:12 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-20 20:26 [PATCH 1/3] drm/amdgpu: add GC 10.3 NOALLOC registers Alex Deucher
2020-10-20 20:26 ` [PATCH 2/3] drm/amdgpu: add support to configure MALL for sienna_cichlid (v2) Alex Deucher
2020-10-21 11:01   ` Christian König
2020-10-26 23:11   ` Luben Tuikov
2020-10-20 20:26 ` [PATCH 3/3] drm/amdgpu/display: add MALL support Alex Deucher
2020-10-26 15:51   ` Bas Nieuwenhuizen
2020-10-26 16:19     ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.