* [PATCH] drm/amd/display: move remaining FPU code to dml folder
@ 2022-10-21 0:05 Ao Zhong
2022-10-21 0:31 ` [PATCH RESEND] " Ao Zhong
0 siblings, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-21 0:05 UTC (permalink / raw)
To: harry.wentland, sunpeng.li, Rodrigo.Siqueira; +Cc: amd-gfx
Subject: [PATCH] drm/amd/display: move remaining FPU code to dml folder
Move remaining FPU code to dml folder
in preparation for enabling aarch64 support.
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
.../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
.../drm/amd/display/dc/dcn32/dcn32_resource.c | 5 ++-
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c | 40 +++++++++++++++++
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h | 3 ++
.../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++
.../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 ++
6 files changed, 59 insertions(+), 44 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context
*ctx)
return value;
}
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
- struct dc *dc)
-{
- if (dc->ctx->dce_version == DCN_VERSION_1_01) {
- struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
- struct dcn_ip_params *dcn_ip = dc->dcn_ip;
- struct display_mode_lib *dml = &dc->dml;
-
- dml->ip.max_num_dpp = 3;
- /* TODO how to handle 23.84? */
- dcn_soc->dram_clock_change_latency = 23;
- dcn_ip->max_num_dpp = 3;
- }
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->urgent_latency = 3;
- dc->debug.disable_dmcu = true;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
- }
-
-
- dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width /
ddr4_dram_width;
- ASSERT(dc->dcn_soc->number_of_channels < 3);
- if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
- dc->dcn_soc->number_of_channels = 2;
-
- if (dc->dcn_soc->number_of_channels == 1) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
- }
- }
-}
-
static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage
*clks)
{
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
- /* Other architectures we build for build this with soft-float */
+ DC_FP_START();
dcn10_resource_construct_fp(dc);
+ DC_FP_END();
if (!dc->config.is_vmin_only_asic)
if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ DC_FP_START();
+ dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+ DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; //
according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..0495cecaf1df 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
#include "dcn10/dcn10_resource.h"
#include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
/**
* DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,41 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
};
+
+void dcn10_resource_construct_fp(
+ struct dc *dc)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+ struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+ struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+ struct display_mode_lib *dml = &dc->dml;
+
+ dml->ip.max_num_dpp = 3;
+ /* TODO how to handle 23.84? */
+ dcn_soc->dram_clock_change_latency = 23;
+ dcn_ip->max_num_dpp = 3;
+ }
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->urgent_latency = 3;
+ dc->debug.disable_dmcu = true;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+ }
+
+
+ dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width /
ddr4_dram_width;
+ ASSERT(dc->dcn_soc->number_of_channels < 3);
+ if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+ dc->dcn_soc->number_of_channels = 2;
+
+ if (dc->dcn_soc->number_of_channels == 1) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..dcbfb73b0afd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,7 @@
#ifndef __DCN10_FPU_H__
#define __DCN10_FPU_H__
+void dcn10_resource_construct_fp(
+ struct dc *dc);
+
#endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc
*dc, struct clk_bw_params *bw_pa
}
}
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int
dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
#endif
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH RESEND] drm/amd/display: move remaining FPU code to dml folder
2022-10-21 0:05 [PATCH] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
@ 2022-10-21 0:31 ` Ao Zhong
2022-10-21 4:31 ` [PATCH] drm/amd/display: add DCN support for ARM64 Ao Zhong
2022-10-25 15:42 ` [PATCH RESEND] " Rodrigo Siqueira
0 siblings, 2 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-21 0:31 UTC (permalink / raw)
To: Harry Wentland, Leo Li, Rodrigo Siqueira; +Cc: Ao Zhong, amd-gfx
Move remaining FPU code to dml folder
in preparation for enabling aarch64 support.
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
.../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
.../drm/amd/display/dc/dcn32/dcn32_resource.c | 5 ++-
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c | 40 +++++++++++++++++
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h | 3 ++
.../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++
.../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 ++
6 files changed, 59 insertions(+), 44 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
return value;
}
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
- struct dc *dc)
-{
- if (dc->ctx->dce_version == DCN_VERSION_1_01) {
- struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
- struct dcn_ip_params *dcn_ip = dc->dcn_ip;
- struct display_mode_lib *dml = &dc->dml;
-
- dml->ip.max_num_dpp = 3;
- /* TODO how to handle 23.84? */
- dcn_soc->dram_clock_change_latency = 23;
- dcn_ip->max_num_dpp = 3;
- }
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->urgent_latency = 3;
- dc->debug.disable_dmcu = true;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
- }
-
-
- dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
- ASSERT(dc->dcn_soc->number_of_channels < 3);
- if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
- dc->dcn_soc->number_of_channels = 2;
-
- if (dc->dcn_soc->number_of_channels == 1) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
- }
- }
-}
-
static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
{
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
- /* Other architectures we build for build this with soft-float */
+ DC_FP_START();
dcn10_resource_construct_fp(dc);
+ DC_FP_END();
if (!dc->config.is_vmin_only_asic)
if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ DC_FP_START();
+ dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+ DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..0495cecaf1df 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
#include "dcn10/dcn10_resource.h"
#include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
/**
* DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,41 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
};
+
+void dcn10_resource_construct_fp(
+ struct dc *dc)
+{
+ dc_assert_fp_enabled();
+
+ if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+ struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+ struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+ struct display_mode_lib *dml = &dc->dml;
+
+ dml->ip.max_num_dpp = 3;
+ /* TODO how to handle 23.84? */
+ dcn_soc->dram_clock_change_latency = 23;
+ dcn_ip->max_num_dpp = 3;
+ }
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->urgent_latency = 3;
+ dc->debug.disable_dmcu = true;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+ }
+
+
+ dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+ ASSERT(dc->dcn_soc->number_of_channels < 3);
+ if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+ dc->dcn_soc->number_of_channels = 2;
+
+ if (dc->dcn_soc->number_of_channels == 1) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..dcbfb73b0afd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,7 @@
#ifndef __DCN10_FPU_H__
#define __DCN10_FPU_H__
+void dcn10_resource_construct_fp(
+ struct dc *dc);
+
#endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
}
}
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
#endif
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH] drm/amd/display: add DCN support for ARM64
2022-10-21 0:31 ` [PATCH RESEND] " Ao Zhong
@ 2022-10-21 4:31 ` Ao Zhong
2022-10-25 15:48 ` Rodrigo Siqueira
2022-10-25 15:42 ` [PATCH RESEND] " Rodrigo Siqueira
1 sibling, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-21 4:31 UTC (permalink / raw)
To: Harry Wentland, Leo Li, Rodrigo Siqueira; +Cc: Ao Zhong, amd-gfx
After moving all FPU code to the DML folder, we can enable DCN support
for the ARM64 platform. Remove the -mgeneral-regs-only CFLAG form the
code in the DML folder that needs to use hardware FPU, and add a control
mechanism for ARM Neon.
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
drivers/gpu/drm/amd/display/Kconfig | 2 +-
.../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 6 ++
drivers/gpu/drm/amd/display/dc/dml/Makefile | 64 ++++++++++++-------
3 files changed, 49 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 0142affcdaa3..a7f1c4e51719 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
select SND_HDA_COMPONENT if SND_HDA_CORE
- select DRM_AMD_DC_DCN if (X86 || PPC64)
+ select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON))
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index ab0c6d191038..1743ca0a3641 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -31,6 +31,8 @@
#elif defined(CONFIG_PPC64)
#include <asm/switch_to.h>
#include <asm/cputable.h>
+#elif defined(CONFIG_ARM64)
+#include <asm/neon.h>
#endif
/**
@@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line)
preempt_disable();
enable_kernel_fp();
}
+#elif defined(CONFIG_ARM64)
+ kernel_neon_begin();
#endif
}
@@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line)
disable_kernel_fp();
preempt_enable();
}
+#elif defined(CONFIG_ARM64)
+ kernel_neon_end();
#endif
}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index d0c6cf61c676..3cdd109189e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -33,6 +33,12 @@ ifdef CONFIG_PPC64
dml_ccflags := -mhard-float -maltivec
endif
+ifdef CONFIG_ARM64
+ifdef CONFIG_DRM_AMD_DC_DCN
+dml_rcflags_arm64 := -mgeneral-regs-only
+endif
+endif
+
ifdef CONFIG_CC_IS_GCC
ifeq ($(call cc-ifversion, -lt, 0701, y), y)
IS_OLD_GCC = 1
@@ -87,32 +93,46 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) $(dml_rcflags_arm64)
CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) $(dml_rcflags_arm64)
+ifdef CONFIG_ARM64
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags_arm64)
+endif
endif
CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags) $(dml_rcflags_arm64)
DML = calcs/dce_calcs.o calcs/custom_float.o calcs/bw_fixed.o
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH RESEND] drm/amd/display: move remaining FPU code to dml folder
2022-10-21 0:31 ` [PATCH RESEND] " Ao Zhong
2022-10-21 4:31 ` [PATCH] drm/amd/display: add DCN support for ARM64 Ao Zhong
@ 2022-10-25 15:42 ` Rodrigo Siqueira
2022-10-26 11:02 ` [PATCH v2 1/2] " Ao Zhong
` (3 more replies)
1 sibling, 4 replies; 18+ messages in thread
From: Rodrigo Siqueira @ 2022-10-25 15:42 UTC (permalink / raw)
To: Ao Zhong; +Cc: Leo Li, Harry Wentland, amd-gfx
Hi Ao,
First of all, thanks a lot for this patch.
On 10/20/22 20:31, Ao Zhong wrote:
> Move remaining FPU code to dml folder
> in preparation for enabling aarch64 support.
I guess you found some of the issues here after you tried enabling the
arm64 compilation, right? If so, could you expand the commit message to
describe it better?
>
> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> ---
> .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
> .../drm/amd/display/dc/dcn32/dcn32_resource.c | 5 ++-
> .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c | 40 +++++++++++++++++
> .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h | 3 ++
> .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++
> .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 ++
Could you split this commit in two parts?
One for DCN10 and another one for DCN32.
> 6 files changed, 59 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> index 56d30baf12df..6bfac8088ab0 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> @@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
> return value;
> }
>
> -/*
> - * Some architectures don't support soft-float (e.g. aarch64), on those
> - * this function has to be called with hardfloat enabled, make sure not
> - * to inline it so whatever fp stuff is done stays inside
> - */
> -static noinline void dcn10_resource_construct_fp(
> - struct dc *dc)
> -{
> - if (dc->ctx->dce_version == DCN_VERSION_1_01) {
> - struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
> - struct dcn_ip_params *dcn_ip = dc->dcn_ip;
> - struct display_mode_lib *dml = &dc->dml;
> -
> - dml->ip.max_num_dpp = 3;
> - /* TODO how to handle 23.84? */
> - dcn_soc->dram_clock_change_latency = 23;
> - dcn_ip->max_num_dpp = 3;
> - }
> - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> - dc->dcn_soc->urgent_latency = 3;
> - dc->debug.disable_dmcu = true;
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
> - }
> -
> -
> - dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
> - ASSERT(dc->dcn_soc->number_of_channels < 3);
> - if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
> - dc->dcn_soc->number_of_channels = 2;
> -
> - if (dc->dcn_soc->number_of_channels == 1) {
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
> - dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
> - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
> - }
> - }
> -}
> -
> static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
> {
> int i;
> @@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
> memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
> memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
>
> - /* Other architectures we build for build this with soft-float */
> + DC_FP_START();
> dcn10_resource_construct_fp(dc);
> + DC_FP_END();
>
> if (!dc->config.is_vmin_only_asic)
> if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> index a88dd7b3d1c1..287b7fa9bf41 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
> timing = &pipe->stream->timing;
>
> pipes[pipe_cnt].pipe.src.gpuvm = true;
> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> + DC_FP_START();
> + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
> + DC_FP_END();
> pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
> pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
> pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> index 99644d896222..0495cecaf1df 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> @@ -27,6 +27,8 @@
> #include "dcn10/dcn10_resource.h"
>
> #include "dcn10_fpu.h"
> +#include "resource.h"
> +#include "amdgpu_dm/dc_fpu.h"
>
> /**
> * DOC: DCN10 FPU manipulation Overview
> @@ -121,3 +123,41 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
> .writeback_dram_clock_change_latency_us = 23.0,
> .return_bus_width_bytes = 64,
> };
> +
> +void dcn10_resource_construct_fp(
> + struct dc *dc)
Since this is a small function signature, could you add the dc parameter
in the same line as the function name? Same idea for the header file.
> +{
> + dc_assert_fp_enabled();
> +
Drop the extra space in the above line.
Thanks
Siqueira
> + if (dc->ctx->dce_version == DCN_VERSION_1_01) {
> + struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
> + struct dcn_ip_params *dcn_ip = dc->dcn_ip;
> + struct display_mode_lib *dml = &dc->dml;
> +
> + dml->ip.max_num_dpp = 3;
> + /* TODO how to handle 23.84? */
> + dcn_soc->dram_clock_change_latency = 23;
> + dcn_ip->max_num_dpp = 3;
> + }
> + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> + dc->dcn_soc->urgent_latency = 3;
> + dc->debug.disable_dmcu = true;
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
> + }
> +
> +
> + dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
> + ASSERT(dc->dcn_soc->number_of_channels < 3);
> + if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
> + dc->dcn_soc->number_of_channels = 2;
> +
> + if (dc->dcn_soc->number_of_channels == 1) {
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
> + dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
> + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
> + }
> + }
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> index e74ed4b4ce5b..dcbfb73b0afd 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> @@ -27,4 +27,7 @@
> #ifndef __DCN10_FPU_H__
> #define __DCN10_FPU_H__
>
> +void dcn10_resource_construct_fp(
> + struct dc *dc);
> +
> #endif /* __DCN20_FPU_H__ */
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> index 819de0f11012..58772fce6437 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
> }
> }
>
> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> + int pipe_cnt)
> +{
> + dc_assert_fp_enabled();
> +
> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> index 3a3dc2ce4c73..ab010e7e840b 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>
> void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>
> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> + int pipe_cnt);
> +
> #endif
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH] drm/amd/display: add DCN support for ARM64
2022-10-21 4:31 ` [PATCH] drm/amd/display: add DCN support for ARM64 Ao Zhong
@ 2022-10-25 15:48 ` Rodrigo Siqueira
2022-10-25 21:17 ` [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
2022-10-25 21:17 ` [PATCH v2 2/2] " Ao Zhong
0 siblings, 2 replies; 18+ messages in thread
From: Rodrigo Siqueira @ 2022-10-25 15:48 UTC (permalink / raw)
To: Ao Zhong, Harry Wentland, Leo Li; +Cc: amd-gfx
On 10/21/22 00:31, Ao Zhong wrote:
> After moving all FPU code to the DML folder, we can enable DCN support
> for the ARM64 platform. Remove the -mgeneral-regs-only CFLAG form the
> code in the DML folder that needs to use hardware FPU, and add a control
> mechanism for ARM Neon.
>
> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> ---
> drivers/gpu/drm/amd/display/Kconfig | 2 +-
> .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c | 6 ++
> drivers/gpu/drm/amd/display/dc/dml/Makefile | 64 ++++++++++++-------
> 3 files changed, 49 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
> index 0142affcdaa3..a7f1c4e51719 100644
> --- a/drivers/gpu/drm/amd/display/Kconfig
> +++ b/drivers/gpu/drm/amd/display/Kconfig
> @@ -6,7 +6,7 @@ config DRM_AMD_DC
> bool "AMD DC - Enable new display engine"
> default y
> select SND_HDA_COMPONENT if SND_HDA_CORE
> - select DRM_AMD_DC_DCN if (X86 || PPC64)
> + select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON))
> help
> Choose this option if you want to use the new display engine
> support for AMDGPU. This adds required support for Vega and
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> index ab0c6d191038..1743ca0a3641 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> @@ -31,6 +31,8 @@
> #elif defined(CONFIG_PPC64)
> #include <asm/switch_to.h>
> #include <asm/cputable.h>
> +#elif defined(CONFIG_ARM64)
> +#include <asm/neon.h>
> #endif
>
> /**
> @@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line)
> preempt_disable();
> enable_kernel_fp();
> }
> +#elif defined(CONFIG_ARM64)
> + kernel_neon_begin();
> #endif
> }
>
> @@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line)
> disable_kernel_fp();
> preempt_enable();
> }
> +#elif defined(CONFIG_ARM64)
> + kernel_neon_end();
> #endif
> }
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index d0c6cf61c676..3cdd109189e0 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -33,6 +33,12 @@ ifdef CONFIG_PPC64
> dml_ccflags := -mhard-float -maltivec
> endif
>
> +ifdef CONFIG_ARM64
> +ifdef CONFIG_DRM_AMD_DC_DCN
> +dml_rcflags_arm64 := -mgeneral-regs-only
> +endif
> +endif
> +
> ifdef CONFIG_CC_IS_GCC
> ifeq ($(call cc-ifversion, -lt, 0701, y), y)
> IS_OLD_GCC = 1
> @@ -87,32 +93,46 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
> CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
> CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
> CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) $(dml_rcflags_arm64)
> CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +ifdef CONFIG_ARM64
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags_arm64)
> +endif
> endif
> CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
> CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags) $(dml_rcflags_arm64)
>
> DML = calcs/dce_calcs.o calcs/custom_float.o calcs/bw_fixed.o
>
Hi Ao,
This patch lgtm, but let's focus in the FPU isolation for DCN10/32 first.
After we get it merge, resend this patch but this time also add Arnd
Bergmann, Nathan Chancellor, Stephen Rothwell, and Alex Deucher.
Thanks
Siqueira
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-25 15:48 ` Rodrigo Siqueira
@ 2022-10-25 21:17 ` Ao Zhong
2022-10-26 7:19 ` Christian König
2022-10-25 21:17 ` [PATCH v2 2/2] " Ao Zhong
1 sibling, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-25 21:17 UTC (permalink / raw)
To: Rodrigo Siqueira; +Cc: Leo Li, Ao Zhong, Harry Wentland, amd-gfx
In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
.../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c | 38 ++++++++++++++++
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h | 2 +
3 files changed, 42 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
return value;
}
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
- struct dc *dc)
-{
- if (dc->ctx->dce_version == DCN_VERSION_1_01) {
- struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
- struct dcn_ip_params *dcn_ip = dc->dcn_ip;
- struct display_mode_lib *dml = &dc->dml;
-
- dml->ip.max_num_dpp = 3;
- /* TODO how to handle 23.84? */
- dcn_soc->dram_clock_change_latency = 23;
- dcn_ip->max_num_dpp = 3;
- }
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->urgent_latency = 3;
- dc->debug.disable_dmcu = true;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
- }
-
-
- dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
- ASSERT(dc->dcn_soc->number_of_channels < 3);
- if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
- dc->dcn_soc->number_of_channels = 2;
-
- if (dc->dcn_soc->number_of_channels == 1) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
- }
- }
-}
-
static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
{
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
- /* Other architectures we build for build this with soft-float */
+ DC_FP_START();
dcn10_resource_construct_fp(dc);
+ DC_FP_END();
if (!dc->config.is_vmin_only_asic)
if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..8b5e6fff5444 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
#include "dcn10/dcn10_resource.h"
#include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
/**
* DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,39 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
};
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+ dc_assert_fp_enabled();
+ if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+ struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+ struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+ struct display_mode_lib *dml = &dc->dml;
+
+ dml->ip.max_num_dpp = 3;
+ /* TODO how to handle 23.84? */
+ dcn_soc->dram_clock_change_latency = 23;
+ dcn_ip->max_num_dpp = 3;
+ }
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->urgent_latency = 3;
+ dc->debug.disable_dmcu = true;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+ }
+
+
+ dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+ ASSERT(dc->dcn_soc->number_of_channels < 3);
+ if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+ dc->dcn_soc->number_of_channels = 2;
+
+ if (dc->dcn_soc->number_of_channels == 1) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+ }
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..63219ecd8478 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,6 @@
#ifndef __DCN10_FPU_H__
#define __DCN10_FPU_H__
+void dcn10_resource_construct_fp(struct dc *dc);
+
#endif /* __DCN20_FPU_H__ */
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 2/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-25 15:48 ` Rodrigo Siqueira
2022-10-25 21:17 ` [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
@ 2022-10-25 21:17 ` Ao Zhong
1 sibling, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-25 21:17 UTC (permalink / raw)
To: Rodrigo Siqueira; +Cc: Leo Li, Ao Zhong, Harry Wentland, amd-gfx
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ DC_FP_START();
+ dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+ DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
}
}
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
#endif
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-25 21:17 ` [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
@ 2022-10-26 7:19 ` Christian König
2022-10-26 10:41 ` Ao Zhong
0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2022-10-26 7:19 UTC (permalink / raw)
To: Ao Zhong, Rodrigo Siqueira; +Cc: Leo Li, Harry Wentland, amd-gfx
Am 25.10.22 um 23:17 schrieb Ao Zhong:
> In the process of enabling DCN support for arm64, I found that the
> dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
> needs to use FPU. This will cause compilation to fail on ARM64 platforms
> because -mgeneral-regs-only is enabled by default to disable the
> hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
> dml/dcn10 folder to enable hardware FPU for that function.
Of hand that looks good to me, but our display team needs to take a look.
Feel free to add an Acked-by: Christian König <christian.koenig@amd.com>
for the series.
While at it could you make sure that checkpatch.pl doesn't has anything
to complain about the moved code?
Thanks for the help,
Christian.
>
> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> ---
> .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
> .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c | 38 ++++++++++++++++
> .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h | 2 +
> 3 files changed, 42 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> index 56d30baf12df..6bfac8088ab0 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> @@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
> return value;
> }
>
> -/*
> - * Some architectures don't support soft-float (e.g. aarch64), on those
> - * this function has to be called with hardfloat enabled, make sure not
> - * to inline it so whatever fp stuff is done stays inside
> - */
> -static noinline void dcn10_resource_construct_fp(
> - struct dc *dc)
> -{
> - if (dc->ctx->dce_version == DCN_VERSION_1_01) {
> - struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
> - struct dcn_ip_params *dcn_ip = dc->dcn_ip;
> - struct display_mode_lib *dml = &dc->dml;
> -
> - dml->ip.max_num_dpp = 3;
> - /* TODO how to handle 23.84? */
> - dcn_soc->dram_clock_change_latency = 23;
> - dcn_ip->max_num_dpp = 3;
> - }
> - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> - dc->dcn_soc->urgent_latency = 3;
> - dc->debug.disable_dmcu = true;
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
> - }
> -
> -
> - dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
> - ASSERT(dc->dcn_soc->number_of_channels < 3);
> - if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
> - dc->dcn_soc->number_of_channels = 2;
> -
> - if (dc->dcn_soc->number_of_channels == 1) {
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
> - dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
> - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
> - }
> - }
> -}
> -
> static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
> {
> int i;
> @@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
> memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
> memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
>
> - /* Other architectures we build for build this with soft-float */
> + DC_FP_START();
> dcn10_resource_construct_fp(dc);
> + DC_FP_END();
>
> if (!dc->config.is_vmin_only_asic)
> if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> index 99644d896222..8b5e6fff5444 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> @@ -27,6 +27,8 @@
> #include "dcn10/dcn10_resource.h"
>
> #include "dcn10_fpu.h"
> +#include "resource.h"
> +#include "amdgpu_dm/dc_fpu.h"
>
> /**
> * DOC: DCN10 FPU manipulation Overview
> @@ -121,3 +123,39 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
> .writeback_dram_clock_change_latency_us = 23.0,
> .return_bus_width_bytes = 64,
> };
> +
> +void dcn10_resource_construct_fp(struct dc *dc)
> +{
> + dc_assert_fp_enabled();
> + if (dc->ctx->dce_version == DCN_VERSION_1_01) {
> + struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
> + struct dcn_ip_params *dcn_ip = dc->dcn_ip;
> + struct display_mode_lib *dml = &dc->dml;
> +
> + dml->ip.max_num_dpp = 3;
> + /* TODO how to handle 23.84? */
> + dcn_soc->dram_clock_change_latency = 23;
> + dcn_ip->max_num_dpp = 3;
> + }
> + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> + dc->dcn_soc->urgent_latency = 3;
> + dc->debug.disable_dmcu = true;
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
> + }
> +
> +
> + dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
> + ASSERT(dc->dcn_soc->number_of_channels < 3);
> + if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
> + dc->dcn_soc->number_of_channels = 2;
> +
> + if (dc->dcn_soc->number_of_channels == 1) {
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
> + dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
> + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
> + }
> + }
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> index e74ed4b4ce5b..63219ecd8478 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> @@ -27,4 +27,6 @@
> #ifndef __DCN10_FPU_H__
> #define __DCN10_FPU_H__
>
> +void dcn10_resource_construct_fp(struct dc *dc);
> +
> #endif /* __DCN20_FPU_H__ */
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-26 7:19 ` Christian König
@ 2022-10-26 10:41 ` Ao Zhong
0 siblings, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 10:41 UTC (permalink / raw)
To: Christian König; +Cc: Leo Li, Harry Wentland, Rodrigo Siqueira, amd-gfx
Hello Christian,
thank you for your review. I got a warning in checking the first patch with checkpatch.pl.
I'll fix it in the next version.
---------------------------------------------------------------
0001-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch
---------------------------------------------------------------
WARNING:braces {} are not necessary for single statement blocks
#131: FILE: drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c:157:
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+ }
total: 0 errors, 1 warnings, 110 lines checked
NOTE: For some of the reported defects, checkpatch may be able to
mechanically convert to the typical style using --fix or --fix-inplace.
0001-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch has style problems, please review.
---------------------------------------------------------------
0002-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch
---------------------------------------------------------------
total: 0 errors, 0 warnings, 29 lines checked
0002-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch has no obvious style problems and is ready for submission.
NOTE: If any of the errors are false positives, please report
them to the maintainer, see CHECKPATCH in MAINTAINERS.
Am 26.10.22 um 09:19 schrieb Christian König:
> Am 25.10.22 um 23:17 schrieb Ao Zhong:
>> In the process of enabling DCN support for arm64, I found that the
>> dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
>> needs to use FPU. This will cause compilation to fail on ARM64 platforms
>> because -mgeneral-regs-only is enabled by default to disable the
>> hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
>> dml/dcn10 folder to enable hardware FPU for that function.
>
> Of hand that looks good to me, but our display team needs to take a look.
>
> Feel free to add an Acked-by: Christian König <christian.koenig@amd.com> for the series.
>
> While at it could you make sure that checkpatch.pl doesn't has anything to complain about the moved code?
>
> Thanks for the help,
> Christian.
>
>>
>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>> ---
>> .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
>> .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c | 38 ++++++++++++++++
>> .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h | 2 +
>> 3 files changed, 42 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> index 56d30baf12df..6bfac8088ab0 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> @@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
>> return value;
>> }
>> -/*
>> - * Some architectures don't support soft-float (e.g. aarch64), on those
>> - * this function has to be called with hardfloat enabled, make sure not
>> - * to inline it so whatever fp stuff is done stays inside
>> - */
>> -static noinline void dcn10_resource_construct_fp(
>> - struct dc *dc)
>> -{
>> - if (dc->ctx->dce_version == DCN_VERSION_1_01) {
>> - struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
>> - struct dcn_ip_params *dcn_ip = dc->dcn_ip;
>> - struct display_mode_lib *dml = &dc->dml;
>> -
>> - dml->ip.max_num_dpp = 3;
>> - /* TODO how to handle 23.84? */
>> - dcn_soc->dram_clock_change_latency = 23;
>> - dcn_ip->max_num_dpp = 3;
>> - }
>> - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> - dc->dcn_soc->urgent_latency = 3;
>> - dc->debug.disable_dmcu = true;
>> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
>> - }
>> -
>> -
>> - dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
>> - ASSERT(dc->dcn_soc->number_of_channels < 3);
>> - if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
>> - dc->dcn_soc->number_of_channels = 2;
>> -
>> - if (dc->dcn_soc->number_of_channels == 1) {
>> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
>> - dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
>> - dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
>> - dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
>> - if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> - dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
>> - }
>> - }
>> -}
>> -
>> static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
>> {
>> int i;
>> @@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
>> memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
>> memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
>> - /* Other architectures we build for build this with soft-float */
>> + DC_FP_START();
>> dcn10_resource_construct_fp(dc);
>> + DC_FP_END();
>> if (!dc->config.is_vmin_only_asic)
>> if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
>> index 99644d896222..8b5e6fff5444 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
>> @@ -27,6 +27,8 @@
>> #include "dcn10/dcn10_resource.h"
>> #include "dcn10_fpu.h"
>> +#include "resource.h"
>> +#include "amdgpu_dm/dc_fpu.h"
>> /**
>> * DOC: DCN10 FPU manipulation Overview
>> @@ -121,3 +123,39 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
>> .writeback_dram_clock_change_latency_us = 23.0,
>> .return_bus_width_bytes = 64,
>> };
>> +
>> +void dcn10_resource_construct_fp(struct dc *dc)
>> +{
>> + dc_assert_fp_enabled();
>> + if (dc->ctx->dce_version == DCN_VERSION_1_01) {
>> + struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
>> + struct dcn_ip_params *dcn_ip = dc->dcn_ip;
>> + struct display_mode_lib *dml = &dc->dml;
>> +
>> + dml->ip.max_num_dpp = 3;
>> + /* TODO how to handle 23.84? */
>> + dcn_soc->dram_clock_change_latency = 23;
>> + dcn_ip->max_num_dpp = 3;
>> + }
>> + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> + dc->dcn_soc->urgent_latency = 3;
>> + dc->debug.disable_dmcu = true;
>> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
>> + }
>> +
>> +
>> + dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
>> + ASSERT(dc->dcn_soc->number_of_channels < 3);
>> + if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
>> + dc->dcn_soc->number_of_channels = 2;
>> +
>> + if (dc->dcn_soc->number_of_channels == 1) {
>> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
>> + dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
>> + dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
>> + dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
>> + if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> + dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
>> + }
>> + }
>> +}
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
>> index e74ed4b4ce5b..63219ecd8478 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
>> @@ -27,4 +27,6 @@
>> #ifndef __DCN10_FPU_H__
>> #define __DCN10_FPU_H__
>> +void dcn10_resource_construct_fp(struct dc *dc);
>> +
>> #endif /* __DCN20_FPU_H__ */
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-25 15:42 ` [PATCH RESEND] " Rodrigo Siqueira
@ 2022-10-26 11:02 ` Ao Zhong
2022-10-26 11:02 ` [PATCH v2 2/2] " Ao Zhong
` (2 subsequent siblings)
3 siblings, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 11:02 UTC (permalink / raw)
To: Rodrigo Siqueira
Cc: Leo Li, Ao Zhong, Harry Wentland, Christian König, amd-gfx
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 5470 bytes --]
In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
.../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c | 37 ++++++++++++++++
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h | 2 +
3 files changed, 41 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
return value;
}
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
- struct dc *dc)
-{
- if (dc->ctx->dce_version == DCN_VERSION_1_01) {
- struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
- struct dcn_ip_params *dcn_ip = dc->dcn_ip;
- struct display_mode_lib *dml = &dc->dml;
-
- dml->ip.max_num_dpp = 3;
- /* TODO how to handle 23.84? */
- dcn_soc->dram_clock_change_latency = 23;
- dcn_ip->max_num_dpp = 3;
- }
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->urgent_latency = 3;
- dc->debug.disable_dmcu = true;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
- }
-
-
- dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
- ASSERT(dc->dcn_soc->number_of_channels < 3);
- if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
- dc->dcn_soc->number_of_channels = 2;
-
- if (dc->dcn_soc->number_of_channels == 1) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
- }
- }
-}
-
static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
{
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
- /* Other architectures we build for build this with soft-float */
+ DC_FP_START();
dcn10_resource_construct_fp(dc);
+ DC_FP_END();
if (!dc->config.is_vmin_only_asic)
if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..340636f1de9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
#include "dcn10/dcn10_resource.h"
#include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
/**
* DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,38 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
};
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+ dc_assert_fp_enabled();
+ if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+ struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+ struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+ struct display_mode_lib *dml = &dc->dml;
+
+ dml->ip.max_num_dpp = 3;
+ /* TODO how to handle 23.84? */
+ dcn_soc->dram_clock_change_latency = 23;
+ dcn_ip->max_num_dpp = 3;
+ }
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->urgent_latency = 3;
+ dc->debug.disable_dmcu = true;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+ }
+
+
+ dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+ ASSERT(dc->dcn_soc->number_of_channels < 3);
+ if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+ dc->dcn_soc->number_of_channels = 2;
+
+ if (dc->dcn_soc->number_of_channels == 1) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev))
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..63219ecd8478 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,6 @@
#ifndef __DCN10_FPU_H__
#define __DCN10_FPU_H__
+void dcn10_resource_construct_fp(struct dc *dc);
+
#endif /* __DCN20_FPU_H__ */
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v2 2/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-25 15:42 ` [PATCH RESEND] " Rodrigo Siqueira
2022-10-26 11:02 ` [PATCH v2 1/2] " Ao Zhong
@ 2022-10-26 11:02 ` Ao Zhong
2022-10-26 11:13 ` [PATCH v3 1/2] " Ao Zhong
2022-10-26 11:13 ` [PATCH v3 2/2] " Ao Zhong
3 siblings, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 11:02 UTC (permalink / raw)
To: Rodrigo Siqueira
Cc: Leo Li, Ao Zhong, Harry Wentland, Christian König, amd-gfx
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 2916 bytes --]
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ DC_FP_START();
+ dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+ DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
}
}
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
#endif
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v3 1/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-25 15:42 ` [PATCH RESEND] " Rodrigo Siqueira
2022-10-26 11:02 ` [PATCH v2 1/2] " Ao Zhong
2022-10-26 11:02 ` [PATCH v2 2/2] " Ao Zhong
@ 2022-10-26 11:13 ` Ao Zhong
2022-10-26 11:13 ` [PATCH v3 2/2] " Ao Zhong
3 siblings, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 11:13 UTC (permalink / raw)
To: Rodrigo Siqueira
Cc: Leo Li, Ao Zhong, Harry Wentland, Christian König, amd-gfx
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 5470 bytes --]
In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
.../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c | 37 ++++++++++++++++
.../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h | 2 +
3 files changed, 41 insertions(+), 42 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
return value;
}
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
- struct dc *dc)
-{
- if (dc->ctx->dce_version == DCN_VERSION_1_01) {
- struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
- struct dcn_ip_params *dcn_ip = dc->dcn_ip;
- struct display_mode_lib *dml = &dc->dml;
-
- dml->ip.max_num_dpp = 3;
- /* TODO how to handle 23.84? */
- dcn_soc->dram_clock_change_latency = 23;
- dcn_ip->max_num_dpp = 3;
- }
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->urgent_latency = 3;
- dc->debug.disable_dmcu = true;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
- }
-
-
- dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
- ASSERT(dc->dcn_soc->number_of_channels < 3);
- if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
- dc->dcn_soc->number_of_channels = 2;
-
- if (dc->dcn_soc->number_of_channels == 1) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
- dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
- if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
- dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
- }
- }
-}
-
static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
{
int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
- /* Other architectures we build for build this with soft-float */
+ DC_FP_START();
dcn10_resource_construct_fp(dc);
+ DC_FP_END();
if (!dc->config.is_vmin_only_asic)
if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..340636f1de9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
#include "dcn10/dcn10_resource.h"
#include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
/**
* DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,38 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
.writeback_dram_clock_change_latency_us = 23.0,
.return_bus_width_bytes = 64,
};
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+ dc_assert_fp_enabled();
+ if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+ struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+ struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+ struct display_mode_lib *dml = &dc->dml;
+
+ dml->ip.max_num_dpp = 3;
+ /* TODO how to handle 23.84? */
+ dcn_soc->dram_clock_change_latency = 23;
+ dcn_ip->max_num_dpp = 3;
+ }
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+ dc->dcn_soc->urgent_latency = 3;
+ dc->debug.disable_dmcu = true;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+ }
+
+
+ dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+ ASSERT(dc->dcn_soc->number_of_channels < 3);
+ if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+ dc->dcn_soc->number_of_channels = 2;
+
+ if (dc->dcn_soc->number_of_channels == 1) {
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+ if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev))
+ dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+ }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..63219ecd8478 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,6 @@
#ifndef __DCN10_FPU_H__
#define __DCN10_FPU_H__
+void dcn10_resource_construct_fp(struct dc *dc);
+
#endif /* __DCN20_FPU_H__ */
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-25 15:42 ` [PATCH RESEND] " Rodrigo Siqueira
` (2 preceding siblings ...)
2022-10-26 11:13 ` [PATCH v3 1/2] " Ao Zhong
@ 2022-10-26 11:13 ` Ao Zhong
2022-10-26 16:12 ` Rodrigo Siqueira
3 siblings, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 11:13 UTC (permalink / raw)
To: Rodrigo Siqueira
Cc: Leo Li, Ao Zhong, Harry Wentland, Christian König, amd-gfx
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 2916 bytes --]
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++
drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++
3 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
timing = &pipe->stream->timing;
pipes[pipe_cnt].pipe.src.gpuvm = true;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
- pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+ DC_FP_START();
+ dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+ DC_FP_END();
pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
}
}
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt)
+{
+ dc_assert_fp_enabled();
+
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+ pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+ int pipe_cnt);
+
#endif
--
2.37.4
^ permalink raw reply related [flat|nested] 18+ messages in thread
* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-26 11:13 ` [PATCH v3 2/2] " Ao Zhong
@ 2022-10-26 16:12 ` Rodrigo Siqueira
2022-10-26 21:17 ` Ao Zhong
0 siblings, 1 reply; 18+ messages in thread
From: Rodrigo Siqueira @ 2022-10-26 16:12 UTC (permalink / raw)
To: Ao Zhong; +Cc: Leo Li, Harry Wentland, Christian König, amd-gfx
On 10/26/22 07:13, Ao Zhong wrote:
> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> these two operations in dcn32/dcn32_resource.c still need to use FPU,
> This will cause compilation to fail on ARM64 platforms because
> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
> in dcn32_fpu.c, and move above two operations into this function.
>
> Acked-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> ---
> drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++
> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++
> 3 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> index a88dd7b3d1c1..287b7fa9bf41 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
> timing = &pipe->stream->timing;
>
> pipes[pipe_cnt].pipe.src.gpuvm = true;
> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> + DC_FP_START();
> + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
> + DC_FP_END();
> pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
> pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
> pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> index 819de0f11012..58772fce6437 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
> }
> }
>
> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> + int pipe_cnt)
> +{
> + dc_assert_fp_enabled();
> +
> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> index 3a3dc2ce4c73..ab010e7e840b 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>
> void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>
> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> + int pipe_cnt);
> +
> #endif
Hi Ao,
First of all, thanks a lot for your patchset.
For both patches:
Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
And I also applied them to amd-staging-drm-next.
Btw, if you are using git-send-email for sending patches, I recommend
the following options:
git send-email --annotate --cover-letter --thread --no-chain-reply-to
--to="EMAILS" --cc="mailing@list.com" <SHA>
Always add a cover letter, it makes it easier to follow the patchset,
and you can also describe each change in the cover letter.
When you send that other patch enabling ARM64, please add as many
details as possible in the cover letter. Keep in mind that we have been
working for isolating those FPU codes in a way that we do not regress
any of our ASICs, which means that every change was well-tested on
multiple devices. Anyway, maybe you can refer to this cover letter to
write down the commit message:
https://patchwork.freedesktop.org/series/93042/
Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
Thanks again!
Siqueira
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-26 16:12 ` Rodrigo Siqueira
@ 2022-10-26 21:17 ` Ao Zhong
2022-10-27 15:38 ` Rodrigo Siqueira
0 siblings, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 21:17 UTC (permalink / raw)
To: Rodrigo Siqueira; +Cc: amd-gfx
Hi Rodrigo,
Thanks for your review! This is my first time submitting a patch to the kernel.
I'm not very good at using these tools yet. 😂
Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation
from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) SoC
with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my machine
supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 6400 on it
as my daily drive machine. It has decent performance. I uploaded a benchmark result on Geekbench.
Link: https://browser.geekbench.com/v5/cpu/18237269
Ao
Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
>
>
> On 10/26/22 07:13, Ao Zhong wrote:
>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
>> This will cause compilation to fail on ARM64 platforms because
>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
>> in dcn32_fpu.c, and move above two operations into this function.
>>
>> Acked-by: Christian König <christian.koenig@amd.com>
>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>> ---
>> drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
>> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++
>> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++
>> 3 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> index a88dd7b3d1c1..287b7fa9bf41 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>> timing = &pipe->stream->timing;
>> pipes[pipe_cnt].pipe.src.gpuvm = true;
>> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> + DC_FP_START();
>> + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
>> + DC_FP_END();
>> pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>> pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
>> pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> index 819de0f11012..58772fce6437 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
>> }
>> }
>> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>> + int pipe_cnt)
>> +{
>> + dc_assert_fp_enabled();
>> +
>> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> index 3a3dc2ce4c73..ab010e7e840b 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>> void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>> + int pipe_cnt);
>> +
>> #endif
>
> Hi Ao,
>
> First of all, thanks a lot for your patchset.
>
> For both patches:
>
> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
>
> And I also applied them to amd-staging-drm-next.
>
> Btw, if you are using git-send-email for sending patches, I recommend the following options:
>
> git send-email --annotate --cover-letter --thread --no-chain-reply-to --to="EMAILS" --cc="mailing@list.com" <SHA>
>
> Always add a cover letter, it makes it easier to follow the patchset, and you can also describe each change in the cover letter.
>
> When you send that other patch enabling ARM64, please add as many details as possible in the cover letter. Keep in mind that we have been working for isolating those FPU codes in a way that we do not regress any of our ASICs, which means that every change was well-tested on multiple devices. Anyway, maybe you can refer to this cover letter to write down the commit message:
>
> https://patchwork.freedesktop.org/series/93042/
>
> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
>
> Thanks again!
> Siqueira
>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-26 21:17 ` Ao Zhong
@ 2022-10-27 15:38 ` Rodrigo Siqueira
2022-10-27 16:48 ` Ao Zhong
2022-10-27 16:51 ` Ao Zhong
0 siblings, 2 replies; 18+ messages in thread
From: Rodrigo Siqueira @ 2022-10-27 15:38 UTC (permalink / raw)
To: Ao Zhong; +Cc: amd-gfx
Hi Ao,
Could you share a link that describe your workstation?
Thanks
On 10/26/22 17:17, Ao Zhong wrote:
> Hi Rodrigo,
>
> Thanks for your review! This is my first time submitting a patch to the kernel.
>
> I'm not very good at using these tools yet. 😂
>
> Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation
>
> from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) SoC
>
> with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my machine
>
> supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 6400 on it
>
> as my daily drive machine. It has decent performance. I uploaded a benchmark result on Geekbench.
>
> Link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbrowser.geekbench.com%2Fv5%2Fcpu%2F18237269&data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=Iyq4tWJL%2FfXuKB9xAUaVTQQmJQ0GRZ2rH%2F%2BXPTT%2F2tc%3D&reserved=0
>
> Ao
>
> Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
>>
>>
>> On 10/26/22 07:13, Ao Zhong wrote:
>>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
>>> This will cause compilation to fail on ARM64 platforms because
>>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
>>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
>>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
>>> in dcn32_fpu.c, and move above two operations into this function.
>>>
>>> Acked-by: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>>> ---
>>> drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
>>> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++
>>> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++
>>> 3 files changed, 14 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>> index a88dd7b3d1c1..287b7fa9bf41 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>>> timing = &pipe->stream->timing;
>>> pipes[pipe_cnt].pipe.src.gpuvm = true;
>>> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>> + DC_FP_START();
>>> + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
>>> + DC_FP_END();
>>> pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>>> pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
>>> pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>> index 819de0f11012..58772fce6437 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
>>> }
>>> }
>>> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>>> + int pipe_cnt)
>>> +{
>>> + dc_assert_fp_enabled();
>>> +
>>> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>> index 3a3dc2ce4c73..ab010e7e840b 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>>> void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>>> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>>> + int pipe_cnt);
>>> +
>>> #endif
>>
>> Hi Ao,
>>
>> First of all, thanks a lot for your patchset.
>>
>> For both patches:
>>
>> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
>>
>> And I also applied them to amd-staging-drm-next.
>>
>> Btw, if you are using git-send-email for sending patches, I recommend the following options:
>>
>> git send-email --annotate --cover-letter --thread --no-chain-reply-to --to="EMAILS" --cc="mailing@list.com" <SHA>
>>
>> Always add a cover letter, it makes it easier to follow the patchset, and you can also describe each change in the cover letter.
>>
>> When you send that other patch enabling ARM64, please add as many details as possible in the cover letter. Keep in mind that we have been working for isolating those FPU codes in a way that we do not regress any of our ASICs, which means that every change was well-tested on multiple devices. Anyway, maybe you can refer to this cover letter to write down the commit message:
>>
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fseries%2F93042%2F&data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=0GMN1Uj9iuQv2ZjipDHnl29V0UvWk6IL4XwlehdPNLA%3D&reserved=0
>>
>> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
>>
>> Thanks again!
>> Siqueira
>>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-27 15:38 ` Rodrigo Siqueira
@ 2022-10-27 16:48 ` Ao Zhong
2022-10-27 16:51 ` Ao Zhong
1 sibling, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-27 16:48 UTC (permalink / raw)
To: Rodrigo Siqueira; +Cc: amd-gfx
There isn't much information on the internet for Qingyun W510 as this
is not a retail machine. But I'm happy to provide any details about
this machine.
The Qingyun W510 is powered by Huawei's server SoC Kunpeng 920, it's
SBSA compatible.
Information about Kunpeng 920 can be found here.
Link: https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
But not all the functions provided by Kunpeng 920 can be use on
Qingyun W510, like SMMU( IOMMU on ARM ), SAS controller or Encryption
Acceleration Engine. This machine is SFF form factor, it has only two
sodimm memory slots and doesn't support ECC (some Kunpeng Desktop
motherboards support that) and 1x PCIe x4, 1x PCIe x16, 2x M.2 slot
(PCIe x4).
It also has 2 SATA 3.0 port, one for optical drive, and the other for
the HDD. This machine will be shipped with amd's RX550 or Jingjia
Micro JM7201 GPU. My machine comes with JM7201, it's a GPU
independently developed by China. Unfortunately, since there is no
open source driver, I can only use EFI framebuffer with mainline
kernel. Qingyun W510 also has a Huawei's Hi1103LPC WiFi/Bluetooth
module, and a power button with a Goodix fingerprint sensor. Since
none of them have open source drivers, I can't use them with mainline
kernel.
There are also two similar-looking machines, Qingyun W515 and Qingyun
W525, which use HiSilicon Kirin 990 SoC and HiSilicon Pangu M900 SoC,
which are based on mobile platforms.
My workstation should be a product of DVT stage, because Huawei only
allow users to use PCIe 3.0 in the release version of Qingyun W510.
Some machines may not be able to install more than 32G of memory due
to firmware.
Am Do., 27. Okt. 2022 um 17:38 Uhr schrieb Rodrigo Siqueira
<Rodrigo.Siqueira@amd.com>:
>
> Hi Ao,
>
> Could you share a link that describe your workstation?
>
> Thanks
>
> On 10/26/22 17:17, Ao Zhong wrote:
> > Hi Rodrigo,
> >
> > Thanks for your review! This is my first time submitting a patch to the kernel.
> >
> > I'm not very good at using these tools yet. 😂
> >
> > Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation
> >
> > from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) SoC
> >
> > with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my machine
> >
> > supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 6400 on it
> >
> > as my daily drive machine. It has decent performance. I uploaded a benchmark result on Geekbench.
> >
> > Link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbrowser.geekbench.com%2Fv5%2Fcpu%2F18237269&data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=Iyq4tWJL%2FfXuKB9xAUaVTQQmJQ0GRZ2rH%2F%2BXPTT%2F2tc%3D&reserved=0
> >
> > Ao
> >
> > Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
> >>
> >>
> >> On 10/26/22 07:13, Ao Zhong wrote:
> >>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> >>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> >>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
> >>> This will cause compilation to fail on ARM64 platforms because
> >>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
> >>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
> >>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
> >>> in dcn32_fpu.c, and move above two operations into this function.
> >>>
> >>> Acked-by: Christian König <christian.koenig@amd.com>
> >>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> >>> ---
> >>> drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
> >>> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++
> >>> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++
> >>> 3 files changed, 14 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> >>> index a88dd7b3d1c1..287b7fa9bf41 100644
> >>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> >>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> >>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
> >>> timing = &pipe->stream->timing;
> >>> pipes[pipe_cnt].pipe.src.gpuvm = true;
> >>> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> >>> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> >>> + DC_FP_START();
> >>> + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
> >>> + DC_FP_END();
> >>> pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
> >>> pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
> >>> pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
> >>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> >>> index 819de0f11012..58772fce6437 100644
> >>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> >>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> >>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
> >>> }
> >>> }
> >>> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> >>> + int pipe_cnt)
> >>> +{
> >>> + dc_assert_fp_enabled();
> >>> +
> >>> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> >>> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> >>> +}
> >>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> >>> index 3a3dc2ce4c73..ab010e7e840b 100644
> >>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> >>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> >>> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
> >>> void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
> >>> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> >>> + int pipe_cnt);
> >>> +
> >>> #endif
> >>
> >> Hi Ao,
> >>
> >> First of all, thanks a lot for your patchset.
> >>
> >> For both patches:
> >>
> >> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
> >>
> >> And I also applied them to amd-staging-drm-next.
> >>
> >> Btw, if you are using git-send-email for sending patches, I recommend the following options:
> >>
> >> git send-email --annotate --cover-letter --thread --no-chain-reply-to --to="EMAILS" --cc="mailing@list.com" <SHA>
> >>
> >> Always add a cover letter, it makes it easier to follow the patchset, and you can also describe each change in the cover letter.
> >>
> >> When you send that other patch enabling ARM64, please add as many details as possible in the cover letter. Keep in mind that we have been working for isolating those FPU codes in a way that we do not regress any of our ASICs, which means that every change was well-tested on multiple devices. Anyway, maybe you can refer to this cover letter to write down the commit message:
> >>
> >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fseries%2F93042%2F&data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=0GMN1Uj9iuQv2ZjipDHnl29V0UvWk6IL4XwlehdPNLA%3D&reserved=0
> >>
> >> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
> >>
> >> Thanks again!
> >> Siqueira
> >>
^ permalink raw reply [flat|nested] 18+ messages in thread
* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
2022-10-27 15:38 ` Rodrigo Siqueira
2022-10-27 16:48 ` Ao Zhong
@ 2022-10-27 16:51 ` Ao Zhong
1 sibling, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-27 16:51 UTC (permalink / raw)
To: Rodrigo Siqueira; +Cc: amd-gfx
Here are some lshw information
hacc-arm64-pc
description: Desktop Computer
product: HUAWEIPGU-WBY0 (C233)
vendor: HUAWEI
version: D1060
serial:
width: 64 bits
capabilities: smbios-3.2.0 dmi-3.2.0 smp cp15_barrier setend swp tagged_addr_disabled
configuration: chassis=desktop family=HUAWEI sku=C233 uuid=
*-core
description: Motherboard
product: HUAWEIPGU-WBY0-PCB
vendor: HUAWEI
physical id: 0
version: D1060
serial:
slot: Null
*-firmware
description: BIOS
vendor: Byosoft
physical id: 2
version: 1.11
date: 02/07/2020
size: 128KiB
capabilities: pci pnp upgrade cdboot bootselect edd acpi biosbootspecification uefi
*-cache:0
description: L1 cache
physical id: 5
slot: L1 Instruction Cache
size: 1536KiB
capacity: 1536KiB
capabilities: synchronous internal write-back instruction
configuration: level=1
*-cache:1
description: L1 cache
physical id: 6
slot: L1 Data Cache
size: 1536KiB
capacity: 1536KiB
capabilities: synchronous internal write-back data
configuration: level=1
*-cache:2
description: L2 cache
physical id: 7
slot: L2 Cache
size: 12MiB
capacity: 12MiB
capabilities: synchronous internal varies unified
configuration: level=2
*-cache:3
description: L3 cache
physical id: 8
slot: L3 Cache
size: 24MiB
capacity: 24MiB
capabilities: synchronous internal varies unified
configuration: level=3
*-cpu
description: CPU
product: ARM (NULL)
vendor: HISILICON
physical id: 9
bus info: cpu@0
version: HUAWEI Kunpeng920 3211K
serial: NULL
slot: CPU0
size: 2600MHz
capacity: 2600MHz
clock: 100MHz
capabilities: lm
configuration: cores=24 enabledcores=24 threads=24
*-memory
description: System Memory
physical id: a
slot: System board or motherboard
size: 64GiB
*-bank:0
description: DIMM DDR4 Synchronous 3200 MHz (0,3 ns)
product: F4-3200C22-32GRS
vendor: Unknown
physical id: 0
serial:
slot: SODIMM_B
size: 32GiB
width: 64 bits
clock: 3200MHz (0.3ns)
*-bank:1
description: DIMM DDR4 Synchronous 3200 MHz (0,3 ns)
product: F4-3200C22-32GRS
vendor: Unknown
physical id: 1
serial:
slot: SODIMM_A
size: 32GiB
width: 64 bits
clock: 3200MHz (0.3ns)
*-pci:0
description: PCI bridge
product: HiSilicon PCIe Root Port with Gen4
vendor: Huawei Technologies Co., Ltd.
physical id: 100
bus info: pci@0000:00:00.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:30 ioport:1000(size=4096) memory:e0c00000-e0efffff ioport:80080000000(size=6442450944)
*-pci
description: PCI bridge
product: Navi 10 XL Upstream Port of PCI Express Switch
vendor: Advanced Micro Devices, Inc. [AMD/ATI]
physical id: 0
bus info: pci@0000:01:00.0
version: c7
width: 32 bits
clock: 33MHz
capabilities: pci pm pciexpress msi normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:29 memory:e0e00000-e0e03fff ioport:1000(size=4096) memory:e0c00000-e0dfffff ioport:80080000000(size=6442450944)
*-pci
description: PCI bridge
product: Navi 10 XL Downstream Port of PCI Express Switch
vendor: Advanced Micro Devices, Inc. [AMD/ATI]
physical id: 0
bus info: pci@0000:02:00.0
version: 00
width: 32 bits
clock: 33MHz
capabilities: pci pm pciexpress msi normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:37 ioport:1000(size=4096) memory:e0c00000-e0dfffff ioport:80080000000(size=6442450944)
*-display
description: VGA compatible controller
product: Navi 24 [Radeon RX 6400 / 6500 XT]
vendor: Advanced Micro Devices, Inc. [AMD/ATI]
physical id: 0
bus info: pci@0000:03:00.0
logical name: /dev/fb0
version: c7
width: 64 bits
clock: 33MHz
capabilities: pm pciexpress msi vga_controller bus_master cap_list rom fb
configuration: depth=32 driver=amdgpu latency=0 mode=3440x1440 resolution=3440,1440 visual=truecolor xres=3440 yres=1440
resources: iomemory:8010-800f iomemory:8000-7fff irq:250 memory:80100000000-801ffffffff memory:80080000000-800801fffff ioport:1000(size=256) memory:e0c00000-e0cfffff memory:e0d00000-e0d1ffff
*-multimedia
description: Audio device
product: Navi 21/23 HDMI/DP Audio Controller
vendor: Advanced Micro Devices, Inc. [AMD/ATI]
physical id: 0.1
bus info: pci@0000:03:00.1
logical name: card0
logical name: /dev/snd/controlC0
logical name: /dev/snd/hwC0D0
logical name: /dev/snd/pcmC0D3p
logical name: /dev/snd/pcmC0D7p
version: 00
width: 32 bits
clock: 33MHz
capabilities: pm pciexpress msi bus_master cap_list
configuration: driver=snd_hda_intel latency=0
resources: irq:248 memory:e0d20000-e0d23fff
*-input:0
product: HDA ATI HDMI HDMI/DP,pcm=3
physical id: 0
logical name: input4
logical name: /dev/input/event4
*-input:1
product: HDA ATI HDMI HDMI/DP,pcm=7
physical id: 1
logical name: input5
logical name: /dev/input/event5
*-pci:1
description: PCI bridge
product: HiSilicon PCIe Root Port with Gen4
vendor: Huawei Technologies Co., Ltd.
physical id: 101
bus info: pci@0000:00:08.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:31 ioport:2000(size=4096) memory:e0f00000-e10fffff ioport:80018000000(size=2097152)
*-nvme
description: NVMe device
product: KINGSTON SNV2S1000G
vendor: Kingston Technology Company, Inc.
physical id: 0
bus info: pci@0000:04:00.0
logical name: /dev/nvme0
version: SBI02102
serial:
width: 64 bits
clock: 33MHz
capabilities: nvme pm msi pciexpress msix nvm_express bus_master cap_list
configuration: driver=nvme latency=0 nqn=nqn.2021-03.com.kingston:nvme:nvm-subsystem-sn- state=live
resources: irq:29 memory:e0f00000-e0f03fff
*-namespace:0
description: NVMe disk
physical id: 0
logical name: hwmon1
*-namespace:1
description: NVMe disk
physical id: 2
logical name: /dev/ng0n1
*-namespace:2
description: NVMe disk
physical id: 1
bus info: nvme@0:1
logical name: /dev/nvme0n1
size: 931GiB (1TB)
capabilities: gpt-1.00 partitioned partitioned:gpt
configuration: guid= logicalsectorsize=512 sectorsize=512 wwid=eui.00000000000000000026b7784e21fbc5
*-volume:0 UNCLAIMED
description: Windows FAT volume
vendor: mkfs.fat
physical id: 1
bus info: nvme@0:1,1
version: FAT32
serial:
size: 98MiB
capacity: 99MiB
capabilities: boot fat initialized
configuration: FATs=2 filesystem=fat
*-volume:1
description: EFI partition
physical id: 2
bus info: nvme@0:1,2
logical name: /dev/nvme0n1p2
logical name: /
logical name: /home
serial:
capacity: 866GiB
configuration: mount.fstype=btrfs mount.options=rw,relatime,compress=zstd:1,ssd,discard=async,space_cache=v2,subvolid=257,subvol=/@home state=mounted
*-volume:2
description: EFI partition
physical id: 3
bus info: nvme@0:1,3
logical name: /dev/nvme0n1p3
logical name: /boot
serial:
capacity: 1023MiB
configuration: mount.fstype=xfs mount.options=rw,relatime,attr2,discard,inode64,logbufs=8,logbsize=32k,noquota state=mounted
*-volume:3
description: Linux swap volume
vendor: Linux
physical id: 4
bus info: nvme@0:1,4
logical name: /dev/nvme0n1p4
version: 1
serial:
size: 64GiB
capacity: 64GiB
capabilities: nofs swap initialized
configuration: filesystem=swap pagesize=4096
*-pci:2
description: PCI bridge
product: HiSilicon PCIe Root Port with Gen4
vendor: Huawei Technologies Co., Ltd.
physical id: 102
bus info: pci@0000:00:0a.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:32 ioport:3000(size=4096) memory:e1100000-e12fffff ioport:80018200000(size=2097152)
*-nvme
description: NVMe device
product: KXG60ZNV512G TOSHIBA
vendor: Toshiba Corporation
physical id: 0
bus info: pci@0000:05:00.0
logical name: /dev/nvme1
version: AGXA4103
serial:
width: 64 bits
clock: 33MHz
capabilities: nvme pciexpress pm msi msix nvm_express bus_master cap_list
configuration: driver=nvme latency=0 nqn=nqn.2017-03.jp.co.toshiba:KXG60ZNV512G TOSHIBA: state=live
resources: irq:29 memory:e1100000-e1103fff
*-namespace:0
description: NVMe disk
physical id: 0
logical name: hwmon0
*-namespace:1
description: NVMe disk
physical id: 2
logical name: /dev/ng1n1
*-namespace:2
description: NVMe disk
physical id: 1
bus info: nvme@1:1
logical name: /dev/nvme1n1
size: 476GiB (512GB)
capabilities: gpt-1.00 partitioned partitioned:gpt
configuration: guid= logicalsectorsize=512 sectorsize=512 wwid=eui.00000000000000018ce38e0300176ffe
*-volume
description: EFI partition
physical id: 1
bus info: nvme@1:1,1
logical name: /dev/nvme1n1p1
logical name: /mnt/SSD2
serial:
capacity: 476GiB
configuration: mount.fstype=btrfs mount.options=rw,relatime,compress=zstd:1,ssd,discard=async,space_cache=v2,subvolid=5,subvol=/ state=mounted
*-pci:3
description: PCI bridge
product: HiSilicon PCIe Root Port with Gen4
vendor: Huawei Technologies Co., Ltd.
physical id: c
bus info: pci@0000:00:0c.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:33 ioport:4000(size=4096) memory:e0000000-e0bfffff ioport:80018400000(size=2097152)
*-network UNCLAIMED
description: Network controller
product: Huawei Technologies Co., Ltd.
vendor: Huawei Technologies Co., Ltd.
physical id: 0
bus info: pci@0000:06:00.0
version: 02
width: 64 bits
clock: 33MHz
capabilities: pm msi pciexpress cap_list
configuration: latency=0
resources: memory:e0000000-e07fffff memory:e0800000-e0803fff
*-pci:4
description: PCI bridge
product: HiSilicon PCIe Root Port with Gen4
vendor: Huawei Technologies Co., Ltd.
physical id: d
bus info: pci@0000:00:0d.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:34 ioport:5000(size=4096) memory:e1300000-e14fffff ioport:80018600000(size=2097152)
*-usb
description: USB controller
product: uPD720202 USB 3.0 Host Controller
vendor: Renesas Technology Corp.
physical id: 0
bus info: pci@0000:07:00.0
version: 02
width: 64 bits
clock: 33MHz
capabilities: pm msi msix pciexpress xhci bus_master cap_list
configuration: driver=xhci_hcd latency=0
resources: irq:29 memory:e1300000-e1301fff
*-usbhost:0
product: xHCI Host Controller
vendor: Linux 6.0.5-gentoo-arm64 xhci-hcd
physical id: 0
bus info: usb@2
logical name: usb2
version: 6.00
capabilities: usb-2.00
configuration: driver=hub slots=2 speed=480Mbit/s
*-usb:0
description: USB hub
product: 4-Port USB 2.1 Hub
vendor: Generic
physical id: 1
bus info: usb@2:1
version: 1.01
capabilities: usb-2.10
configuration: driver=hub slots=4 speed=480Mbit/s
*-usb:1 UNCLAIMED
description: Communication device
product: Goodix Fingerprint Device
vendor: Shenzhen Goodix Technology Co.,Ltd.
physical id: 2
bus info: usb@2:2
version: 2.00
capabilities: usb-2.00
configuration: maxpower=100mA speed=12Mbit/s
*-usbhost:1
product: xHCI Host Controller
vendor: Linux 6.0.5-gentoo-arm64 xhci-hcd
physical id: 1
bus info: usb@3
logical name: usb3
version: 6.00
capabilities: usb-3.00
configuration: driver=hub slots=2 speed=5000Mbit/s
*-usb
description: USB hub
product: 4-Port USB 3.1 Hub
vendor: Generic
physical id: 1
bus info: usb@3:1
version: 1.01
capabilities: usb-3.10
configuration: driver=hub slots=4 speed=5000Mbit/s
*-pci:5
description: PCI bridge
product: HiSilicon PCIe Root Port with Gen4
vendor: Huawei Technologies Co., Ltd.
physical id: e
bus info: pci@0000:00:0e.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:35 ioport:6000(size=4096) memory:e1500000-e16fffff ioport:80018800000(size=2097152)
*-pci:6
description: PCI bridge
product: HiSilicon PCIe Root Port with Gen4
vendor: Huawei Technologies Co., Ltd.
physical id: f
bus info: pci@0000:00:0f.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:36 ioport:7000(size=4096) memory:e1700000-e18fffff ioport:80018a00000(size=2097152)
*-pci:7
description: PCI bridge
product: HiSilicon PCI-PCI Bridge
vendor: Huawei Technologies Co., Ltd.
physical id: 103
bus info: pci@0000:74:00.0
version: 20
width: 64 bits
clock: 33MHz
capabilities: pci pciexpress pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: iomemory:1010-100f irq:0
*-pci:8
description: PCI bridge
product: HiSilicon PCI-PCI Bridge
vendor: Huawei Technologies Co., Ltd.
physical id: 104
bus info: pci@0000:74:01.0
version: 20
width: 64 bits
clock: 33MHz
capabilities: pci pciexpress pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: iomemory:1010-100f irq:0 ioport:141000000(size=8388608)
*-generic UNCLAIMED
description: Unassigned class
product: SafeNet (wrong ID)
vendor: SafeNet (wrong ID)
physical id: 0
bus info: pci@0000:76:00.0
version: ff
width: 32 bits
clock: 66MHz
capabilities: bus_master vga_palette cap_list
configuration: latency=255 maxlatency=255 mingnt=255
resources: memory:141000000-1413fffff memory:141400000-1417effff
*-sas:0 UNCLAIMED
description: Serial Attached SCSI controller
product: HiSilicon SAS 3.0 HBA
vendor: Huawei Technologies Co., Ltd.
physical id: b
bus info: pci@0000:74:02.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: sas pciexpress msi pm cap_list
configuration: latency=0
resources: memory:a2000000-a2007fff
*-sata
description: SATA controller
product: HiSilicon AHCI HBA
vendor: Huawei Technologies Co., Ltd.
physical id: 3
bus info: pci@0000:74:03.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: sata pciexpress msi pm ahci_1.0 bus_master cap_list
configuration: driver=ahci latency=0
resources: irq:235 memory:a2010000-a2010fff
*-sas:1 UNCLAIMED
description: Serial Attached SCSI controller
product: HiSilicon SAS 3.0 HBA
vendor: Huawei Technologies Co., Ltd.
physical id: 4
bus info: pci@0000:74:04.0
version: 21
width: 32 bits
clock: 33MHz
capabilities: sas pciexpress msi pm cap_list
configuration: latency=0
resources: memory:a2008000-a200ffff
*-pci:9
description: PCI bridge
product: HiSilicon PCI-PCI Bridge
vendor: Huawei Technologies Co., Ltd.
physical id: 105
bus info: pci@0000:78:00.0
version: 20
width: 32 bits
clock: 33MHz
capabilities: pci pciexpress pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: irq:0
*-raid UNCLAIMED
description: RAID bus controller
product: HiSilicon RDE Engine
vendor: Huawei Technologies Co., Ltd.
physical id: 10
bus info: pci@0000:78:01.0
version: 21
width: 64 bits
clock: 33MHz
capabilities: raid pciexpress msi pm cap_list
configuration: latency=0
resources: iomemory:20-1f memory:208000000-2083fffff
*-usb:0
description: USB controller
product: HiSilicon USB 1.1 Host Controller
vendor: Huawei Technologies Co., Ltd.
physical id: 11
bus info: pci@0000:7a:00.0
version: 21
width: 64 bits
clock: 33MHz
capabilities: pciexpress msi pm ohci bus_master cap_list
configuration: driver=ohci-pci latency=0
resources: iomemory:20-1f irq:249 memory:20c100000-20c100fff
*-usbhost
product: OHCI PCI host controller
vendor: Linux 6.0.5-gentoo-arm64 ohci_hcd
physical id: 1
bus info: usb@6
logical name: usb6
version: 6.00
capabilities: usb-1.10
configuration: driver=hub slots=2 speed=12Mbit/s
*-usb:1
description: USB controller
product: HiSilicon USB 2.0 2-port Host Controller
vendor: Huawei Technologies Co., Ltd.
physical id: 1
bus info: pci@0000:7a:01.0
version: 21
width: 64 bits
clock: 33MHz
capabilities: pciexpress msi pm ehci bus_master cap_list
configuration: driver=ehci-pci latency=0
resources: iomemory:20-1f irq:237 memory:20c101000-20c101fff
*-usbhost
product: EHCI Host Controller
vendor: Linux 6.0.5-gentoo-arm64 ehci_hcd
physical id: 1
bus info: usb@1
logical name: usb1
version: 6.00
capabilities: usb-2.00
configuration: driver=hub slots=2 speed=480Mbit/s
*-usb
description: Audio device
product: Generic USB Audio
vendor: Generic
physical id: 1
bus info: usb@1:1
logical name: card1
logical name: /dev/snd/controlC1
logical name: /dev/snd/pcmC1D0c
logical name: /dev/snd/pcmC1D0p
logical name: /dev/snd/pcmC1D1c
logical name: /dev/snd/pcmC1D1p
logical name: /dev/snd/pcmC1D2c
logical name: /dev/snd/pcmC1D2p
logical name: input1
logical name: /dev/input/event1
version: 0.13
capabilities: usb-2.00 audio-control usb
configuration: driver=usbhid maxpower=100mA speed=480Mbit/s
*-usb:2
description: USB controller
product: HiSilicon USB 3.0 Host Controller
vendor: Huawei Technologies Co., Ltd.
physical id: 12
bus info: pci@0000:7a:02.0
version: 21
width: 64 bits
clock: 33MHz
capabilities: pciexpress msi pm xhci bus_master cap_list
configuration: driver=xhci_hcd latency=0
resources: iomemory:20-1f irq:246 memory:20c000000-20c0fffff
*-usbhost:0
product: xHCI Host Controller
vendor: Linux 6.0.5-gentoo-arm64 xhci-hcd
physical id: 0
bus info: usb@4
logical name: usb4
version: 6.00
capabilities: usb-2.00
configuration: driver=hub slots=1 speed=480Mbit/s
*-usb
description: USB hub
product: 4-Port USB 2.1 Hub
vendor: Generic
physical id: 1
bus info: usb@4:1
version: 1.01
capabilities: usb-2.10
configuration: driver=hub slots=4 speed=480Mbit/s
*-usb:0
description: USB hub
product: USB2.1 Hub
vendor: GenesysLogic
physical id: 1
bus info: usb@4:1.1
version: 6.63
capabilities: usb-2.10
configuration: driver=hub maxpower=100mA slots=4 speed=480Mbit/s
*-usb:0
description: Bluetooth wireless interface
product: Bluetooth Radio
vendor: Realtek
physical id: 1
bus info: usb@4:1.1.1
version: 2.00
serial:
capabilities: bluetooth usb-1.10
configuration: driver=btusb maxpower=500mA speed=12Mbit/s
*-usb:1
description: Bluetooth wireless interface
product: Bluetooth Radio
vendor: Realtek
physical id: 2
bus info: usb@4:1.1.2
version: 2.00
serial:
capabilities: bluetooth usb-1.10
configuration: driver=btusb maxpower=500mA speed=12Mbit/s
*-usb:2
description: Video
product: FHD Camera Microphone: FHD Came
vendor: SunplusIT Inc
physical id: 3
bus info: usb@4:1.1.3
logical name: card3
logical name: /dev/snd/controlC3
logical name: /dev/snd/pcmC3D0c
logical name: input6
logical name: /dev/input/event6
version: 10.14
serial: 01.00.00
capabilities: usb-2.00 usb
configuration: driver=snd-usb-audio maxpower=500mA speed=480Mbit/s
*-usb:3
description: USB hub
product: HighSpeed Hub
vendor: NEC Corp.
physical id: 4
bus info: usb@4:1.1.4
version: 1.00
capabilities: usb-2.00
configuration: driver=hub maxpower=100mA slots=3 speed=480Mbit/s
*-usb
description: Keyboard
product: Topre Corporation HHKB Professional
vendor: Topre Corporation
physical id: 1
bus info: usb@4:1.1.4.1
logical name: input3
logical name: /dev/input/event3
logical name: input3::capslock
logical name: input3::compose
logical name: input3::kana
logical name: input3::numlock
logical name: input3::scrolllock
version: 1.02
capabilities: usb-1.10 usb
configuration: driver=usbhid maxpower=100mA speed=12Mbit/s
*-usb:1
description: Human interface device
product: SAVITECH Bravo-X USB Audio
vendor: SAVITECH
physical id: 2
bus info: usb@4:1.2
logical name: card2
logical name: /dev/snd/controlC2
logical name: /dev/snd/pcmC2D0p
logical name: /dev/snd/pcmC2D1p
logical name: input2
logical name: /dev/input/event2
version: 0.01
capabilities: usb-1.10 usb
configuration: driver=snd-usb-audio maxpower=100mA speed=12Mbit/s
*-usbhost:1
product: xHCI Host Controller
vendor: Linux 6.0.5-gentoo-arm64 xhci-hcd
physical id: 1
bus info: usb@5
logical name: usb5
version: 6.00
capabilities: usb-3.00
configuration: driver=hub slots=1 speed=5000Mbit/s
*-usb
description: USB hub
product: 4-Port USB 3.1 Hub
vendor: Generic
physical id: 1
bus info: usb@5:1
version: 1.01
capabilities: usb-3.10
configuration: driver=hub slots=4 speed=5000Mbit/s
*-usb
description: USB hub
product: USB3.1 Hub
vendor: GenesysLogic
physical id: 1
bus info: usb@5:1.1
version: 6.63
capabilities: usb-3.20
configuration: driver=hub slots=4 speed=5000Mbit/s
*-generic
description: System peripheral
product: HiSilicon Embedded DMA Engine
vendor: Huawei Technologies Co., Ltd.
physical id: 13
bus info: pci@0000:7b:00.0
version: 21
width: 64 bits
clock: 33MHz
capabilities: pciexpress msi pm bus_master cap_list
configuration: driver=hisi_dma latency=0
resources: iomemory:10-f irq:39 memory:148800000-148803fff
*-pci:10
description: PCI bridge
product: HiSilicon PCI-PCI Bridge
vendor: Huawei Technologies Co., Ltd.
physical id: 0
bus info: pci@0000:7c:00.0
version: 20
width: 64 bits
clock: 33MHz
capabilities: pci pciexpress pm normal_decode bus_master cap_list
configuration: driver=pcieport
resources: iomemory:1010-100f irq:0 ioport:120000000(size=2097152)
*-network
description: Ethernet interface
product: HNS GE/10GE/25GE RDMA Network Controller
vendor: Huawei Technologies Co., Ltd.
physical id: 0
bus info: pci@0000:7d:00.0
logical name: enp125s0f0
version: 21
serial:
size: 1Gbit/s
capacity: 1Gbit/s
width: 64 bits
clock: 33MHz
capabilities: pciexpress msix pm bus_master cap_list ethernet physical tp 10bt 10bt-fd 100bt 100bt-fd 1000bt-fd autonegotiation
configuration: autonegotiation=on broadcast=yes driver=hns3 driverversion=6.0.5-gentoo-arm64 duplex=full firmware=1.8.15.0 ip=192.168.1.150 latency=0 link=yes multicast=yes port=twisted pair speed=1Gbit/s
resources: iomemory:10-f iomemory:10-f irq:0 memory:120100000-12010ffff memory:120000000-1200fffff
*-pnp00:00
product: 16550A-compatible COM port
physical id: 14
capabilities: pnp
configuration: driver=serial
*-input:0
product: Power Button
physical id: 1
logical name: input0
logical name: /dev/input/event0
capabilities: platform
*-input:1
product: MX Vertical Mouse
physical id: 2
logical name: input7
logical name: /dev/input/event7
logical name: /dev/input/mouse0
capabilities: bluetooth
Am 27.10.22 um 17:38 schrieb Rodrigo Siqueira:
> Hi Ao,
>
> Could you share a link that describe your workstation?
>
> Thanks
>
> On 10/26/22 17:17, Ao Zhong wrote:
>> Hi Rodrigo,
>>
>> Thanks for your review! This is my first time submitting a patch to the kernel.
>>
>> I'm not very good at using these tools yet. 😂
>>
>> Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation
>>
>> from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) SoC
>>
>> with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my machine
>>
>> supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 6400 on it
>>
>> as my daily drive machine. It has decent performance. I uploaded a benchmark result on Geekbench.
>>
>> Link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbrowser.geekbench.com%2Fv5%2Fcpu%2F18237269&data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=Iyq4tWJL%2FfXuKB9xAUaVTQQmJQ0GRZ2rH%2F%2BXPTT%2F2tc%3D&reserved=0
>>
>> Ao
>>
>> Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
>>>
>>>
>>> On 10/26/22 07:13, Ao Zhong wrote:
>>>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
>>>> This will cause compilation to fail on ARM64 platforms because
>>>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
>>>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
>>>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
>>>> in dcn32_fpu.c, and move above two operations into this function.
>>>>
>>>> Acked-by: Christian König <christian.koenig@amd.com>
>>>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>>>> ---
>>>> drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
>>>> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c | 8 ++++++++
>>>> drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h | 3 +++
>>>> 3 files changed, 14 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>>> index a88dd7b3d1c1..287b7fa9bf41 100644
>>>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>>>> timing = &pipe->stream->timing;
>>>> pipes[pipe_cnt].pipe.src.gpuvm = true;
>>>> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>>> - pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>>> + DC_FP_START();
>>>> + dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
>>>> + DC_FP_END();
>>>> pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>>>> pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
>>>> pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>>> index 819de0f11012..58772fce6437 100644
>>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
>>>> }
>>>> }
>>>> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>>>> + int pipe_cnt)
>>>> +{
>>>> + dc_assert_fp_enabled();
>>>> +
>>>> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>>> + pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>>> index 3a3dc2ce4c73..ab010e7e840b 100644
>>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>>> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>>>> void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>>>> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>>>> + int pipe_cnt);
>>>> +
>>>> #endif
>>>
>>> Hi Ao,
>>>
>>> First of all, thanks a lot for your patchset.
>>>
>>> For both patches:
>>>
>>> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
>>>
>>> And I also applied them to amd-staging-drm-next.
>>>
>>> Btw, if you are using git-send-email for sending patches, I recommend the following options:
>>>
>>> git send-email --annotate --cover-letter --thread --no-chain-reply-to --to="EMAILS" --cc="mailing@list.com" <SHA>
>>>
>>> Always add a cover letter, it makes it easier to follow the patchset, and you can also describe each change in the cover letter.
>>>
>>> When you send that other patch enabling ARM64, please add as many details as possible in the cover letter. Keep in mind that we have been working for isolating those FPU codes in a way that we do not regress any of our ASICs, which means that every change was well-tested on multiple devices. Anyway, maybe you can refer to this cover letter to write down the commit message:
>>>
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fseries%2F93042%2F&data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=0GMN1Uj9iuQv2ZjipDHnl29V0UvWk6IL4XwlehdPNLA%3D&reserved=0
>>>
>>> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
>>>
>>> Thanks again!
>>> Siqueira
>>>
^ permalink raw reply [flat|nested] 18+ messages in thread
end of thread, other threads:[~2022-10-27 21:54 UTC | newest]
Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-21 0:05 [PATCH] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
2022-10-21 0:31 ` [PATCH RESEND] " Ao Zhong
2022-10-21 4:31 ` [PATCH] drm/amd/display: add DCN support for ARM64 Ao Zhong
2022-10-25 15:48 ` Rodrigo Siqueira
2022-10-25 21:17 ` [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
2022-10-26 7:19 ` Christian König
2022-10-26 10:41 ` Ao Zhong
2022-10-25 21:17 ` [PATCH v2 2/2] " Ao Zhong
2022-10-25 15:42 ` [PATCH RESEND] " Rodrigo Siqueira
2022-10-26 11:02 ` [PATCH v2 1/2] " Ao Zhong
2022-10-26 11:02 ` [PATCH v2 2/2] " Ao Zhong
2022-10-26 11:13 ` [PATCH v3 1/2] " Ao Zhong
2022-10-26 11:13 ` [PATCH v3 2/2] " Ao Zhong
2022-10-26 16:12 ` Rodrigo Siqueira
2022-10-26 21:17 ` Ao Zhong
2022-10-27 15:38 ` Rodrigo Siqueira
2022-10-27 16:48 ` Ao Zhong
2022-10-27 16:51 ` Ao Zhong
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.