All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amd/display: move remaining FPU code to dml folder
@ 2022-10-21  0:05 Ao Zhong
  2022-10-21  0:31 ` [PATCH RESEND] " Ao Zhong
  0 siblings, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-21  0:05 UTC (permalink / raw)
  To: harry.wentland, sunpeng.li, Rodrigo.Siqueira; +Cc: amd-gfx

Subject: [PATCH] drm/amd/display: move remaining FPU code to dml folder

Move remaining FPU code to dml folder
in preparation for enabling aarch64 support.

Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
  .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
  .../drm/amd/display/dc/dcn32/dcn32_resource.c |  5 ++-
  .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 40 +++++++++++++++++
  .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  3 ++
  .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  8 ++++
  .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  3 ++
  6 files changed, 59 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context 
*ctx)
      return value;
  }

-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-    struct dc *dc)
-{
-    if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-        struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-        struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-        struct display_mode_lib *dml = &dc->dml;
-
-        dml->ip.max_num_dpp = 3;
-        /* TODO how to handle 23.84? */
-        dcn_soc->dram_clock_change_latency = 23;
-        dcn_ip->max_num_dpp = 3;
-    }
-    if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-        dc->dcn_soc->urgent_latency = 3;
-        dc->debug.disable_dmcu = true;
-        dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-    }
-
-
-    dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
-    ASSERT(dc->dcn_soc->number_of_channels < 3);
-    if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-        dc->dcn_soc->number_of_channels = 2;
-
-    if (dc->dcn_soc->number_of_channels == 1) {
-        dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-        dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-        dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-        dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-        if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-            dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-        }
-    }
-}
-
  static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage 
*clks)
  {
      int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
      memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
      memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));

-    /* Other architectures we build for build this with soft-float */
+    DC_FP_START();
      dcn10_resource_construct_fp(dc);
+    DC_FP_END();

      if (!dc->config.is_vmin_only_asic)
          if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c 
b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
          timing = &pipe->stream->timing;

          pipes[pipe_cnt].pipe.src.gpuvm = true;
-        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+        DC_FP_START();
+        dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+        DC_FP_END();
          pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
          pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // 
according to spreadsheet
          pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..0495cecaf1df 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
  #include "dcn10/dcn10_resource.h"

  #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"

  /**
   * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,41 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
      .writeback_dram_clock_change_latency_us = 23.0,
      .return_bus_width_bytes = 64,
  };
+
+void dcn10_resource_construct_fp(
+    struct dc *dc)
+{
+    dc_assert_fp_enabled();
+
+    if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+        struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+        struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+        struct display_mode_lib *dml = &dc->dml;
+
+        dml->ip.max_num_dpp = 3;
+        /* TODO how to handle 23.84? */
+        dcn_soc->dram_clock_change_latency = 23;
+        dcn_ip->max_num_dpp = 3;
+    }
+    if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+        dc->dcn_soc->urgent_latency = 3;
+        dc->debug.disable_dmcu = true;
+        dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+    }
+
+
+    dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / 
ddr4_dram_width;
+    ASSERT(dc->dcn_soc->number_of_channels < 3);
+    if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+        dc->dcn_soc->number_of_channels = 2;
+
+    if (dc->dcn_soc->number_of_channels == 1) {
+        dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+        dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+        dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+        dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+        if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+            dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+        }
+    }
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..dcbfb73b0afd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,7 @@
  #ifndef __DCN10_FPU_H__
  #define __DCN10_FPU_H__

+void dcn10_resource_construct_fp(
+    struct dc *dc);
+
  #endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc 
*dc, struct clk_bw_params *bw_pa
      }
  }

+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+                  int pipe_cnt)
+{
+    dc_assert_fp_enabled();
+
+    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h 
b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int 
dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,

  void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);

+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+                  int pipe_cnt);
+
  #endif
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH RESEND] drm/amd/display: move remaining FPU code to dml folder
  2022-10-21  0:05 [PATCH] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
@ 2022-10-21  0:31 ` Ao Zhong
  2022-10-21  4:31   ` [PATCH] drm/amd/display: add DCN support for ARM64 Ao Zhong
  2022-10-25 15:42   ` [PATCH RESEND] " Rodrigo Siqueira
  0 siblings, 2 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-21  0:31 UTC (permalink / raw)
  To: Harry Wentland, Leo Li, Rodrigo Siqueira; +Cc: Ao Zhong, amd-gfx

Move remaining FPU code to dml folder
in preparation for enabling aarch64 support.

Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
 .../drm/amd/display/dc/dcn32/dcn32_resource.c |  5 ++-
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 40 +++++++++++++++++
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  3 ++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  8 ++++
 .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  3 ++
 6 files changed, 59 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
 	return value;
 }
 
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-	struct dc *dc)
-{
-	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-		struct display_mode_lib *dml = &dc->dml;
-
-		dml->ip.max_num_dpp = 3;
-		/* TODO how to handle 23.84? */
-		dcn_soc->dram_clock_change_latency = 23;
-		dcn_ip->max_num_dpp = 3;
-	}
-	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-		dc->dcn_soc->urgent_latency = 3;
-		dc->debug.disable_dmcu = true;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-	}
-
-
-	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
-	ASSERT(dc->dcn_soc->number_of_channels < 3);
-	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-		dc->dcn_soc->number_of_channels = 2;
-
-	if (dc->dcn_soc->number_of_channels == 1) {
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-		}
-	}
-}
-
 static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
 {
 	int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
 	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
 	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-	/* Other architectures we build for build this with soft-float */
+	DC_FP_START();
 	dcn10_resource_construct_fp(dc);
+	DC_FP_END();
 
 	if (!dc->config.is_vmin_only_asic)
 		if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
 		timing = &pipe->stream->timing;
 
 		pipes[pipe_cnt].pipe.src.gpuvm = true;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+		DC_FP_START();
+		dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+		DC_FP_END();
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
 		pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..0495cecaf1df 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
 #include "dcn10/dcn10_resource.h"
 
 #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
 
 /**
  * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,41 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
 	.writeback_dram_clock_change_latency_us = 23.0,
 	.return_bus_width_bytes = 64,
 };
+
+void dcn10_resource_construct_fp(
+	struct dc *dc)
+{
+	dc_assert_fp_enabled();
+	
+	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+		struct display_mode_lib *dml = &dc->dml;
+
+		dml->ip.max_num_dpp = 3;
+		/* TODO how to handle 23.84? */
+		dcn_soc->dram_clock_change_latency = 23;
+		dcn_ip->max_num_dpp = 3;
+	}
+	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+		dc->dcn_soc->urgent_latency = 3;
+		dc->debug.disable_dmcu = true;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+	}
+
+
+	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+	ASSERT(dc->dcn_soc->number_of_channels < 3);
+	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+		dc->dcn_soc->number_of_channels = 2;
+
+	if (dc->dcn_soc->number_of_channels == 1) {
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+		}
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..dcbfb73b0afd 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,7 @@
 #ifndef __DCN10_FPU_H__
 #define __DCN10_FPU_H__
 
+void dcn10_resource_construct_fp(
+	struct dc *dc);
+
 #endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
 	}
 }
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt)
+{
+	dc_assert_fp_enabled();
+
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt);
+
 #endif
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH] drm/amd/display: add DCN support for ARM64
  2022-10-21  0:31 ` [PATCH RESEND] " Ao Zhong
@ 2022-10-21  4:31   ` Ao Zhong
  2022-10-25 15:48     ` Rodrigo Siqueira
  2022-10-25 15:42   ` [PATCH RESEND] " Rodrigo Siqueira
  1 sibling, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-21  4:31 UTC (permalink / raw)
  To: Harry Wentland, Leo Li, Rodrigo Siqueira; +Cc: Ao Zhong, amd-gfx

After moving all FPU code to the DML folder, we can enable DCN support
for the ARM64 platform. Remove the -mgeneral-regs-only CFLAG form the
code in the DML folder that needs to use hardware FPU, and add a control
mechanism for ARM Neon.

Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
 drivers/gpu/drm/amd/display/Kconfig           |  2 +-
 .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c    |  6 ++
 drivers/gpu/drm/amd/display/dc/dml/Makefile   | 64 ++++++++++++-------
 3 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 0142affcdaa3..a7f1c4e51719 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -6,7 +6,7 @@ config DRM_AMD_DC
 	bool "AMD DC - Enable new display engine"
 	default y
 	select SND_HDA_COMPONENT if SND_HDA_CORE
-	select DRM_AMD_DC_DCN if (X86 || PPC64)
+	select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON))
 	help
 	  Choose this option if you want to use the new display engine
 	  support for AMDGPU. This adds required support for Vega and
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index ab0c6d191038..1743ca0a3641 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -31,6 +31,8 @@
 #elif defined(CONFIG_PPC64)
 #include <asm/switch_to.h>
 #include <asm/cputable.h>
+#elif defined(CONFIG_ARM64)
+#include <asm/neon.h>
 #endif
 
 /**
@@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line)
 			preempt_disable();
 			enable_kernel_fp();
 		}
+#elif defined(CONFIG_ARM64)
+		kernel_neon_begin();
 #endif
 	}
 
@@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line)
 			disable_kernel_fp();
 			preempt_enable();
 		}
+#elif defined(CONFIG_ARM64)
+		kernel_neon_end();
 #endif
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index d0c6cf61c676..3cdd109189e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -33,6 +33,12 @@ ifdef CONFIG_PPC64
 dml_ccflags := -mhard-float -maltivec
 endif
 
+ifdef CONFIG_ARM64
+ifdef CONFIG_DRM_AMD_DC_DCN
+dml_rcflags_arm64 := -mgeneral-regs-only
+endif
+endif
+
 ifdef CONFIG_CC_IS_GCC
 ifeq ($(call cc-ifversion, -lt, 0701, y), y)
 IS_OLD_GCC = 1
@@ -87,32 +93,46 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) $(dml_rcflags_arm64)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o  := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o  := $(dml_rcflags) $(dml_rcflags_arm64)
+ifdef CONFIG_ARM64
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags_arm64)
+endif
 endif
 CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags) $(dml_rcflags_arm64)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags) $(dml_rcflags_arm64)
 
 DML = calcs/dce_calcs.o calcs/custom_float.o calcs/bw_fixed.o
 
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH RESEND] drm/amd/display: move remaining FPU code to dml folder
  2022-10-21  0:31 ` [PATCH RESEND] " Ao Zhong
  2022-10-21  4:31   ` [PATCH] drm/amd/display: add DCN support for ARM64 Ao Zhong
@ 2022-10-25 15:42   ` Rodrigo Siqueira
  2022-10-26 11:02     ` [PATCH v2 1/2] " Ao Zhong
                       ` (3 more replies)
  1 sibling, 4 replies; 18+ messages in thread
From: Rodrigo Siqueira @ 2022-10-25 15:42 UTC (permalink / raw)
  To: Ao Zhong; +Cc: Leo Li, Harry Wentland, amd-gfx

Hi Ao,

First of all, thanks a lot for this patch.

On 10/20/22 20:31, Ao Zhong wrote:
> Move remaining FPU code to dml folder
> in preparation for enabling aarch64 support.

I guess you found some of the issues here after you tried enabling the 
arm64 compilation, right? If so, could you expand the commit message to 
describe it better?

> 
> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> ---
>   .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
>   .../drm/amd/display/dc/dcn32/dcn32_resource.c |  5 ++-
>   .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 40 +++++++++++++++++
>   .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  3 ++
>   .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  |  8 ++++
>   .../drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  |  3 ++

Could you split this commit in two parts?
One for DCN10 and another one for DCN32.

>   6 files changed, 59 insertions(+), 44 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> index 56d30baf12df..6bfac8088ab0 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> @@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
>   	return value;
>   }
>   
> -/*
> - * Some architectures don't support soft-float (e.g. aarch64), on those
> - * this function has to be called with hardfloat enabled, make sure not
> - * to inline it so whatever fp stuff is done stays inside
> - */
> -static noinline void dcn10_resource_construct_fp(
> -	struct dc *dc)
> -{
> -	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
> -		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
> -		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
> -		struct display_mode_lib *dml = &dc->dml;
> -
> -		dml->ip.max_num_dpp = 3;
> -		/* TODO how to handle 23.84? */
> -		dcn_soc->dram_clock_change_latency = 23;
> -		dcn_ip->max_num_dpp = 3;
> -	}
> -	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> -		dc->dcn_soc->urgent_latency = 3;
> -		dc->debug.disable_dmcu = true;
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
> -	}
> -
> -
> -	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
> -	ASSERT(dc->dcn_soc->number_of_channels < 3);
> -	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
> -		dc->dcn_soc->number_of_channels = 2;
> -
> -	if (dc->dcn_soc->number_of_channels == 1) {
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
> -		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> -			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
> -		}
> -	}
> -}
> -
>   static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
>   {
>   	int i;
> @@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
>   	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
>   	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
>   
> -	/* Other architectures we build for build this with soft-float */
> +	DC_FP_START();
>   	dcn10_resource_construct_fp(dc);
> +	DC_FP_END();
>   
>   	if (!dc->config.is_vmin_only_asic)
>   		if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> index a88dd7b3d1c1..287b7fa9bf41 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>   		timing = &pipe->stream->timing;
>   
>   		pipes[pipe_cnt].pipe.src.gpuvm = true;
> -		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> -		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> +		DC_FP_START();
> +		dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
> +		DC_FP_END();
>   		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>   		pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
>   		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> index 99644d896222..0495cecaf1df 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> @@ -27,6 +27,8 @@
>   #include "dcn10/dcn10_resource.h"
>   
>   #include "dcn10_fpu.h"
> +#include "resource.h"
> +#include "amdgpu_dm/dc_fpu.h"
>   
>   /**
>    * DOC: DCN10 FPU manipulation Overview
> @@ -121,3 +123,41 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
>   	.writeback_dram_clock_change_latency_us = 23.0,
>   	.return_bus_width_bytes = 64,
>   };
> +
> +void dcn10_resource_construct_fp(
> +	struct dc *dc)

Since this is a small function signature, could you add the dc parameter 
in the same line as the function name? Same idea for the header file.

> +{
> +	dc_assert_fp_enabled();
> +	

Drop the extra space in the above line.

Thanks
Siqueira

> +	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
> +		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
> +		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
> +		struct display_mode_lib *dml = &dc->dml;
> +
> +		dml->ip.max_num_dpp = 3;
> +		/* TODO how to handle 23.84? */
> +		dcn_soc->dram_clock_change_latency = 23;
> +		dcn_ip->max_num_dpp = 3;
> +	}
> +	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> +		dc->dcn_soc->urgent_latency = 3;
> +		dc->debug.disable_dmcu = true;
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
> +	}
> +
> +
> +	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
> +	ASSERT(dc->dcn_soc->number_of_channels < 3);
> +	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
> +		dc->dcn_soc->number_of_channels = 2;
> +
> +	if (dc->dcn_soc->number_of_channels == 1) {
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
> +		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> +			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
> +		}
> +	}
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> index e74ed4b4ce5b..dcbfb73b0afd 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> @@ -27,4 +27,7 @@
>   #ifndef __DCN10_FPU_H__
>   #define __DCN10_FPU_H__
>   
> +void dcn10_resource_construct_fp(
> +	struct dc *dc);
> +
>   #endif /* __DCN20_FPU_H__ */
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> index 819de0f11012..58772fce6437 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
>   	}
>   }
>   
> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> +				  int pipe_cnt)
> +{
> +	dc_assert_fp_enabled();
> +
> +	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> +	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> index 3a3dc2ce4c73..ab010e7e840b 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>   
>   void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>   
> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> +				  int pipe_cnt);
> +
>   #endif

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amd/display: add DCN support for ARM64
  2022-10-21  4:31   ` [PATCH] drm/amd/display: add DCN support for ARM64 Ao Zhong
@ 2022-10-25 15:48     ` Rodrigo Siqueira
  2022-10-25 21:17       ` [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
  2022-10-25 21:17       ` [PATCH v2 2/2] " Ao Zhong
  0 siblings, 2 replies; 18+ messages in thread
From: Rodrigo Siqueira @ 2022-10-25 15:48 UTC (permalink / raw)
  To: Ao Zhong, Harry Wentland, Leo Li; +Cc: amd-gfx



On 10/21/22 00:31, Ao Zhong wrote:
> After moving all FPU code to the DML folder, we can enable DCN support
> for the ARM64 platform. Remove the -mgeneral-regs-only CFLAG form the
> code in the DML folder that needs to use hardware FPU, and add a control
> mechanism for ARM Neon.
> 
> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> ---
>   drivers/gpu/drm/amd/display/Kconfig           |  2 +-
>   .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c    |  6 ++
>   drivers/gpu/drm/amd/display/dc/dml/Makefile   | 64 ++++++++++++-------
>   3 files changed, 49 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
> index 0142affcdaa3..a7f1c4e51719 100644
> --- a/drivers/gpu/drm/amd/display/Kconfig
> +++ b/drivers/gpu/drm/amd/display/Kconfig
> @@ -6,7 +6,7 @@ config DRM_AMD_DC
>   	bool "AMD DC - Enable new display engine"
>   	default y
>   	select SND_HDA_COMPONENT if SND_HDA_CORE
> -	select DRM_AMD_DC_DCN if (X86 || PPC64)
> +	select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON))
>   	help
>   	  Choose this option if you want to use the new display engine
>   	  support for AMDGPU. This adds required support for Vega and
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> index ab0c6d191038..1743ca0a3641 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
> @@ -31,6 +31,8 @@
>   #elif defined(CONFIG_PPC64)
>   #include <asm/switch_to.h>
>   #include <asm/cputable.h>
> +#elif defined(CONFIG_ARM64)
> +#include <asm/neon.h>
>   #endif
>   
>   /**
> @@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line)
>   			preempt_disable();
>   			enable_kernel_fp();
>   		}
> +#elif defined(CONFIG_ARM64)
> +		kernel_neon_begin();
>   #endif
>   	}
>   
> @@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line)
>   			disable_kernel_fp();
>   			preempt_enable();
>   		}
> +#elif defined(CONFIG_ARM64)
> +		kernel_neon_end();
>   #endif
>   	}
>   
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> index d0c6cf61c676..3cdd109189e0 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
> @@ -33,6 +33,12 @@ ifdef CONFIG_PPC64
>   dml_ccflags := -mhard-float -maltivec
>   endif
>   
> +ifdef CONFIG_ARM64
> +ifdef CONFIG_DRM_AMD_DC_DCN
> +dml_rcflags_arm64 := -mgeneral-regs-only
> +endif
> +endif
> +
>   ifdef CONFIG_CC_IS_GCC
>   ifeq ($(call cc-ifversion, -lt, 0701, y), y)
>   IS_OLD_GCC = 1
> @@ -87,32 +93,46 @@ CFLAGS_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_ccflags)
>   CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
>   CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
>   CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags) $(dml_rcflags_arm64)
>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o  := $(dml_rcflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20v2.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_mode_vba_21.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn21/display_rq_dlg_calc_21.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_mode_vba_30.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/display_rq_dlg_calc_30.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_mode_vba_31.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o  := $(dml_rcflags) $(dml_rcflags_arm64)
> +ifdef CONFIG_ARM64
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags_arm64)
> +endif
>   endif
>   CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
>   CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags)
> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_rcflags) $(dml_rcflags_arm64)
> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags) $(dml_rcflags_arm64)
>   
>   DML = calcs/dce_calcs.o calcs/custom_float.o calcs/bw_fixed.o
>   

Hi Ao,

This patch lgtm, but let's focus in the FPU isolation for DCN10/32 first.

After we get it merge, resend this patch but this time also add Arnd 
Bergmann, Nathan Chancellor, Stephen Rothwell, and Alex Deucher.

Thanks
Siqueira

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-25 15:48     ` Rodrigo Siqueira
@ 2022-10-25 21:17       ` Ao Zhong
  2022-10-26  7:19         ` Christian König
  2022-10-25 21:17       ` [PATCH v2 2/2] " Ao Zhong
  1 sibling, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-25 21:17 UTC (permalink / raw)
  To: Rodrigo Siqueira; +Cc: Leo Li, Ao Zhong, Harry Wentland, amd-gfx

In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.

Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 38 ++++++++++++++++
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
 3 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
 	return value;
 }
 
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-	struct dc *dc)
-{
-	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-		struct display_mode_lib *dml = &dc->dml;
-
-		dml->ip.max_num_dpp = 3;
-		/* TODO how to handle 23.84? */
-		dcn_soc->dram_clock_change_latency = 23;
-		dcn_ip->max_num_dpp = 3;
-	}
-	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-		dc->dcn_soc->urgent_latency = 3;
-		dc->debug.disable_dmcu = true;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-	}
-
-
-	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
-	ASSERT(dc->dcn_soc->number_of_channels < 3);
-	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-		dc->dcn_soc->number_of_channels = 2;
-
-	if (dc->dcn_soc->number_of_channels == 1) {
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-		}
-	}
-}
-
 static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
 {
 	int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
 	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
 	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-	/* Other architectures we build for build this with soft-float */
+	DC_FP_START();
 	dcn10_resource_construct_fp(dc);
+	DC_FP_END();
 
 	if (!dc->config.is_vmin_only_asic)
 		if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..8b5e6fff5444 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
 #include "dcn10/dcn10_resource.h"
 
 #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
 
 /**
  * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,39 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
 	.writeback_dram_clock_change_latency_us = 23.0,
 	.return_bus_width_bytes = 64,
 };
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+	dc_assert_fp_enabled();
+	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+		struct display_mode_lib *dml = &dc->dml;
+
+		dml->ip.max_num_dpp = 3;
+		/* TODO how to handle 23.84? */
+		dcn_soc->dram_clock_change_latency = 23;
+		dcn_ip->max_num_dpp = 3;
+	}
+	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+		dc->dcn_soc->urgent_latency = 3;
+		dc->debug.disable_dmcu = true;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+	}
+
+
+	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+	ASSERT(dc->dcn_soc->number_of_channels < 3);
+	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+		dc->dcn_soc->number_of_channels = 2;
+
+	if (dc->dcn_soc->number_of_channels == 1) {
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+		}
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..63219ecd8478 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,6 @@
 #ifndef __DCN10_FPU_H__
 #define __DCN10_FPU_H__
 
+void dcn10_resource_construct_fp(struct dc *dc);
+
 #endif /* __DCN20_FPU_H__ */
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-25 15:48     ` Rodrigo Siqueira
  2022-10-25 21:17       ` [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
@ 2022-10-25 21:17       ` Ao Zhong
  1 sibling, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-25 21:17 UTC (permalink / raw)
  To: Rodrigo Siqueira; +Cc: Leo Li, Ao Zhong, Harry Wentland, amd-gfx

pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.

Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 ++++++++
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
 		timing = &pipe->stream->timing;
 
 		pipes[pipe_cnt].pipe.src.gpuvm = true;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+		DC_FP_START();
+		dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+		DC_FP_END();
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
 		pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
 	}
 }
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt)
+{
+	dc_assert_fp_enabled();
+
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt);
+
 #endif
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-25 21:17       ` [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
@ 2022-10-26  7:19         ` Christian König
  2022-10-26 10:41           ` Ao Zhong
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2022-10-26  7:19 UTC (permalink / raw)
  To: Ao Zhong, Rodrigo Siqueira; +Cc: Leo Li, Harry Wentland, amd-gfx

Am 25.10.22 um 23:17 schrieb Ao Zhong:
> In the process of enabling DCN support for arm64, I found that the
> dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
> needs to use FPU. This will cause compilation to fail on ARM64 platforms
> because -mgeneral-regs-only is enabled by default to disable the
> hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
> dml/dcn10 folder to enable hardware FPU for that function.

Of hand that looks good to me, but our display team needs to take a look.

Feel free to add an Acked-by: Christian König <christian.koenig@amd.com> 
for the series.

While at it could you make sure that checkpatch.pl doesn't has anything 
to complain about the moved code?

Thanks for the help,
Christian.

>
> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> ---
>   .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
>   .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 38 ++++++++++++++++
>   .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
>   3 files changed, 42 insertions(+), 42 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> index 56d30baf12df..6bfac8088ab0 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
> @@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
>   	return value;
>   }
>   
> -/*
> - * Some architectures don't support soft-float (e.g. aarch64), on those
> - * this function has to be called with hardfloat enabled, make sure not
> - * to inline it so whatever fp stuff is done stays inside
> - */
> -static noinline void dcn10_resource_construct_fp(
> -	struct dc *dc)
> -{
> -	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
> -		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
> -		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
> -		struct display_mode_lib *dml = &dc->dml;
> -
> -		dml->ip.max_num_dpp = 3;
> -		/* TODO how to handle 23.84? */
> -		dcn_soc->dram_clock_change_latency = 23;
> -		dcn_ip->max_num_dpp = 3;
> -	}
> -	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> -		dc->dcn_soc->urgent_latency = 3;
> -		dc->debug.disable_dmcu = true;
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
> -	}
> -
> -
> -	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
> -	ASSERT(dc->dcn_soc->number_of_channels < 3);
> -	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
> -		dc->dcn_soc->number_of_channels = 2;
> -
> -	if (dc->dcn_soc->number_of_channels == 1) {
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
> -		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
> -		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> -			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
> -		}
> -	}
> -}
> -
>   static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
>   {
>   	int i;
> @@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
>   	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
>   	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
>   
> -	/* Other architectures we build for build this with soft-float */
> +	DC_FP_START();
>   	dcn10_resource_construct_fp(dc);
> +	DC_FP_END();
>   
>   	if (!dc->config.is_vmin_only_asic)
>   		if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> index 99644d896222..8b5e6fff5444 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
> @@ -27,6 +27,8 @@
>   #include "dcn10/dcn10_resource.h"
>   
>   #include "dcn10_fpu.h"
> +#include "resource.h"
> +#include "amdgpu_dm/dc_fpu.h"
>   
>   /**
>    * DOC: DCN10 FPU manipulation Overview
> @@ -121,3 +123,39 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
>   	.writeback_dram_clock_change_latency_us = 23.0,
>   	.return_bus_width_bytes = 64,
>   };
> +
> +void dcn10_resource_construct_fp(struct dc *dc)
> +{
> +	dc_assert_fp_enabled();
> +	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
> +		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
> +		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
> +		struct display_mode_lib *dml = &dc->dml;
> +
> +		dml->ip.max_num_dpp = 3;
> +		/* TODO how to handle 23.84? */
> +		dcn_soc->dram_clock_change_latency = 23;
> +		dcn_ip->max_num_dpp = 3;
> +	}
> +	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> +		dc->dcn_soc->urgent_latency = 3;
> +		dc->debug.disable_dmcu = true;
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
> +	}
> +
> +
> +	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
> +	ASSERT(dc->dcn_soc->number_of_channels < 3);
> +	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
> +		dc->dcn_soc->number_of_channels = 2;
> +
> +	if (dc->dcn_soc->number_of_channels == 1) {
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
> +		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
> +		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
> +			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
> +		}
> +	}
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> index e74ed4b4ce5b..63219ecd8478 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
> @@ -27,4 +27,6 @@
>   #ifndef __DCN10_FPU_H__
>   #define __DCN10_FPU_H__
>   
> +void dcn10_resource_construct_fp(struct dc *dc);
> +
>   #endif /* __DCN20_FPU_H__ */


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-26  7:19         ` Christian König
@ 2022-10-26 10:41           ` Ao Zhong
  0 siblings, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 10:41 UTC (permalink / raw)
  To: Christian König; +Cc: Leo Li, Harry Wentland, Rodrigo Siqueira, amd-gfx

Hello Christian,

thank you for your review. I got a warning in checking the first patch with checkpatch.pl.

I'll fix it in the next version.

---------------------------------------------------------------
0001-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch
---------------------------------------------------------------
WARNING:braces {} are not necessary for single statement blocks
#131: FILE: drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c:157:
+               if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+                       dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+               }

total: 0 errors, 1 warnings, 110 lines checked

NOTE: For some of the reported defects, checkpatch may be able to
     mechanically convert to the typical style using --fix or --fix-inplace.

0001-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch has style problems, please review.
---------------------------------------------------------------
0002-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch
---------------------------------------------------------------
total: 0 errors, 0 warnings, 29 lines checked

0002-drm-amd-display-move-remaining-FPU-code-to-dml-folde.patch has no obvious style problems and is ready for submission.

NOTE: If any of the errors are false positives, please report
     them to the maintainer, see CHECKPATCH in MAINTAINERS.

Am 26.10.22 um 09:19 schrieb Christian König:
> Am 25.10.22 um 23:17 schrieb Ao Zhong:
>> In the process of enabling DCN support for arm64, I found that the
>> dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
>> needs to use FPU. This will cause compilation to fail on ARM64 platforms
>> because -mgeneral-regs-only is enabled by default to disable the
>> hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
>> dml/dcn10 folder to enable hardware FPU for that function.
>
> Of hand that looks good to me, but our display team needs to take a look.
>
> Feel free to add an Acked-by: Christian König <christian.koenig@amd.com> for the series.
>
> While at it could you make sure that checkpatch.pl doesn't has anything to complain about the moved code?
>
> Thanks for the help,
> Christian.
>
>>
>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>> ---
>>   .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
>>   .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 38 ++++++++++++++++
>>   .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
>>   3 files changed, 42 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> index 56d30baf12df..6bfac8088ab0 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
>> @@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
>>       return value;
>>   }
>>   -/*
>> - * Some architectures don't support soft-float (e.g. aarch64), on those
>> - * this function has to be called with hardfloat enabled, make sure not
>> - * to inline it so whatever fp stuff is done stays inside
>> - */
>> -static noinline void dcn10_resource_construct_fp(
>> -    struct dc *dc)
>> -{
>> -    if (dc->ctx->dce_version == DCN_VERSION_1_01) {
>> -        struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
>> -        struct dcn_ip_params *dcn_ip = dc->dcn_ip;
>> -        struct display_mode_lib *dml = &dc->dml;
>> -
>> -        dml->ip.max_num_dpp = 3;
>> -        /* TODO how to handle 23.84? */
>> -        dcn_soc->dram_clock_change_latency = 23;
>> -        dcn_ip->max_num_dpp = 3;
>> -    }
>> -    if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> -        dc->dcn_soc->urgent_latency = 3;
>> -        dc->debug.disable_dmcu = true;
>> -        dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
>> -    }
>> -
>> -
>> -    dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
>> -    ASSERT(dc->dcn_soc->number_of_channels < 3);
>> -    if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
>> -        dc->dcn_soc->number_of_channels = 2;
>> -
>> -    if (dc->dcn_soc->number_of_channels == 1) {
>> -        dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
>> -        dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
>> -        dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
>> -        dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
>> -        if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> -            dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
>> -        }
>> -    }
>> -}
>> -
>>   static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
>>   {
>>       int i;
>> @@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
>>       memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
>>       memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
>>   -    /* Other architectures we build for build this with soft-float */
>> +    DC_FP_START();
>>       dcn10_resource_construct_fp(dc);
>> +    DC_FP_END();
>>         if (!dc->config.is_vmin_only_asic)
>>           if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
>> index 99644d896222..8b5e6fff5444 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
>> @@ -27,6 +27,8 @@
>>   #include "dcn10/dcn10_resource.h"
>>     #include "dcn10_fpu.h"
>> +#include "resource.h"
>> +#include "amdgpu_dm/dc_fpu.h"
>>     /**
>>    * DOC: DCN10 FPU manipulation Overview
>> @@ -121,3 +123,39 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
>>       .writeback_dram_clock_change_latency_us = 23.0,
>>       .return_bus_width_bytes = 64,
>>   };
>> +
>> +void dcn10_resource_construct_fp(struct dc *dc)
>> +{
>> +    dc_assert_fp_enabled();
>> +    if (dc->ctx->dce_version == DCN_VERSION_1_01) {
>> +        struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
>> +        struct dcn_ip_params *dcn_ip = dc->dcn_ip;
>> +        struct display_mode_lib *dml = &dc->dml;
>> +
>> +        dml->ip.max_num_dpp = 3;
>> +        /* TODO how to handle 23.84? */
>> +        dcn_soc->dram_clock_change_latency = 23;
>> +        dcn_ip->max_num_dpp = 3;
>> +    }
>> +    if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> +        dc->dcn_soc->urgent_latency = 3;
>> +        dc->debug.disable_dmcu = true;
>> +        dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
>> +    }
>> +
>> +
>> +    dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
>> +    ASSERT(dc->dcn_soc->number_of_channels < 3);
>> +    if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
>> +        dc->dcn_soc->number_of_channels = 2;
>> +
>> +    if (dc->dcn_soc->number_of_channels == 1) {
>> +        dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
>> +        dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
>> +        dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
>> +        dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
>> +        if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
>> +            dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
>> +        }
>> +    }
>> +}
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
>> index e74ed4b4ce5b..63219ecd8478 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
>> @@ -27,4 +27,6 @@
>>   #ifndef __DCN10_FPU_H__
>>   #define __DCN10_FPU_H__
>>   +void dcn10_resource_construct_fp(struct dc *dc);
>> +
>>   #endif /* __DCN20_FPU_H__ */
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-25 15:42   ` [PATCH RESEND] " Rodrigo Siqueira
@ 2022-10-26 11:02     ` Ao Zhong
  2022-10-26 11:02     ` [PATCH v2 2/2] " Ao Zhong
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 11:02 UTC (permalink / raw)
  To: Rodrigo Siqueira
  Cc: Leo Li, Ao Zhong, Harry Wentland, Christian König, amd-gfx

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 5470 bytes --]

In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 37 ++++++++++++++++
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
 3 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
 	return value;
 }
 
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-	struct dc *dc)
-{
-	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-		struct display_mode_lib *dml = &dc->dml;
-
-		dml->ip.max_num_dpp = 3;
-		/* TODO how to handle 23.84? */
-		dcn_soc->dram_clock_change_latency = 23;
-		dcn_ip->max_num_dpp = 3;
-	}
-	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-		dc->dcn_soc->urgent_latency = 3;
-		dc->debug.disable_dmcu = true;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-	}
-
-
-	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
-	ASSERT(dc->dcn_soc->number_of_channels < 3);
-	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-		dc->dcn_soc->number_of_channels = 2;
-
-	if (dc->dcn_soc->number_of_channels == 1) {
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-		}
-	}
-}
-
 static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
 {
 	int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
 	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
 	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-	/* Other architectures we build for build this with soft-float */
+	DC_FP_START();
 	dcn10_resource_construct_fp(dc);
+	DC_FP_END();
 
 	if (!dc->config.is_vmin_only_asic)
 		if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..340636f1de9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
 #include "dcn10/dcn10_resource.h"
 
 #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
 
 /**
  * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,38 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
 	.writeback_dram_clock_change_latency_us = 23.0,
 	.return_bus_width_bytes = 64,
 };
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+	dc_assert_fp_enabled();
+	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+		struct display_mode_lib *dml = &dc->dml;
+
+		dml->ip.max_num_dpp = 3;
+		/* TODO how to handle 23.84? */
+		dcn_soc->dram_clock_change_latency = 23;
+		dcn_ip->max_num_dpp = 3;
+	}
+	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+		dc->dcn_soc->urgent_latency = 3;
+		dc->debug.disable_dmcu = true;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+	}
+
+
+	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+	ASSERT(dc->dcn_soc->number_of_channels < 3);
+	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+		dc->dcn_soc->number_of_channels = 2;
+
+	if (dc->dcn_soc->number_of_channels == 1) {
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev))
+			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..63219ecd8478 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,6 @@
 #ifndef __DCN10_FPU_H__
 #define __DCN10_FPU_H__
 
+void dcn10_resource_construct_fp(struct dc *dc);
+
 #endif /* __DCN20_FPU_H__ */
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-25 15:42   ` [PATCH RESEND] " Rodrigo Siqueira
  2022-10-26 11:02     ` [PATCH v2 1/2] " Ao Zhong
@ 2022-10-26 11:02     ` Ao Zhong
  2022-10-26 11:13     ` [PATCH v3 1/2] " Ao Zhong
  2022-10-26 11:13     ` [PATCH v3 2/2] " Ao Zhong
  3 siblings, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 11:02 UTC (permalink / raw)
  To: Rodrigo Siqueira
  Cc: Leo Li, Ao Zhong, Harry Wentland, Christian König, amd-gfx

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 2916 bytes --]

pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 ++++++++
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
 		timing = &pipe->stream->timing;
 
 		pipes[pipe_cnt].pipe.src.gpuvm = true;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+		DC_FP_START();
+		dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+		DC_FP_END();
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
 		pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
 	}
 }
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt)
+{
+	dc_assert_fp_enabled();
+
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt);
+
 #endif
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 1/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-25 15:42   ` [PATCH RESEND] " Rodrigo Siqueira
  2022-10-26 11:02     ` [PATCH v2 1/2] " Ao Zhong
  2022-10-26 11:02     ` [PATCH v2 2/2] " Ao Zhong
@ 2022-10-26 11:13     ` Ao Zhong
  2022-10-26 11:13     ` [PATCH v3 2/2] " Ao Zhong
  3 siblings, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 11:13 UTC (permalink / raw)
  To: Rodrigo Siqueira
  Cc: Leo Li, Ao Zhong, Harry Wentland, Christian König, amd-gfx

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 5470 bytes --]

In the process of enabling DCN support for arm64, I found that the
dcn10_resource_construct_fp function in dcn10/dcn10_resource.c still
needs to use FPU. This will cause compilation to fail on ARM64 platforms
because -mgeneral-regs-only is enabled by default to disable the
hardware FPU. So move dcn10_resource_construct_fp from dcn10 folder to
dml/dcn10 folder to enable hardware FPU for that function.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
 .../drm/amd/display/dc/dcn10/dcn10_resource.c | 44 +------------------
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.c  | 37 ++++++++++++++++
 .../drm/amd/display/dc/dml/dcn10/dcn10_fpu.h  |  2 +
 3 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
 	return value;
 }
 
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-	struct dc *dc)
-{
-	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-		struct display_mode_lib *dml = &dc->dml;
-
-		dml->ip.max_num_dpp = 3;
-		/* TODO how to handle 23.84? */
-		dcn_soc->dram_clock_change_latency = 23;
-		dcn_ip->max_num_dpp = 3;
-	}
-	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-		dc->dcn_soc->urgent_latency = 3;
-		dc->debug.disable_dmcu = true;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-	}
-
-
-	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
-	ASSERT(dc->dcn_soc->number_of_channels < 3);
-	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-		dc->dcn_soc->number_of_channels = 2;
-
-	if (dc->dcn_soc->number_of_channels == 1) {
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-		}
-	}
-}
-
 static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
 {
 	int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
 	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
 	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-	/* Other architectures we build for build this with soft-float */
+	DC_FP_START();
 	dcn10_resource_construct_fp(dc);
+	DC_FP_END();
 
 	if (!dc->config.is_vmin_only_asic)
 		if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..340636f1de9f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
 #include "dcn10/dcn10_resource.h"
 
 #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
 
 /**
  * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,38 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
 	.writeback_dram_clock_change_latency_us = 23.0,
 	.return_bus_width_bytes = 64,
 };
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+	dc_assert_fp_enabled();
+	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+		struct display_mode_lib *dml = &dc->dml;
+
+		dml->ip.max_num_dpp = 3;
+		/* TODO how to handle 23.84? */
+		dcn_soc->dram_clock_change_latency = 23;
+		dcn_ip->max_num_dpp = 3;
+	}
+	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+		dc->dcn_soc->urgent_latency = 3;
+		dc->debug.disable_dmcu = true;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+	}
+
+
+	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+	ASSERT(dc->dcn_soc->number_of_channels < 3);
+	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+		dc->dcn_soc->number_of_channels = 2;
+
+	if (dc->dcn_soc->number_of_channels == 1) {
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev))
+			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..63219ecd8478 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,6 @@
 #ifndef __DCN10_FPU_H__
 #define __DCN10_FPU_H__
 
+void dcn10_resource_construct_fp(struct dc *dc);
+
 #endif /* __DCN20_FPU_H__ */
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-25 15:42   ` [PATCH RESEND] " Rodrigo Siqueira
                       ` (2 preceding siblings ...)
  2022-10-26 11:13     ` [PATCH v3 1/2] " Ao Zhong
@ 2022-10-26 11:13     ` Ao Zhong
  2022-10-26 16:12       ` Rodrigo Siqueira
  3 siblings, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 11:13 UTC (permalink / raw)
  To: Rodrigo Siqueira
  Cc: Leo Li, Ao Zhong, Harry Wentland, Christian König, amd-gfx

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 2916 bytes --]

pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
these two operations in dcn32/dcn32_resource.c still need to use FPU,
This will cause compilation to fail on ARM64 platforms because
-mgeneral-regs-only is enabled by default to disable the hardware FPU.
Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
in dcn32_fpu.c, and move above two operations into this function.

Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Ao Zhong <hacc1225@gmail.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 ++++++++
 drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..287b7fa9bf41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
 		timing = &pipe->stream->timing;
 
 		pipes[pipe_cnt].pipe.src.gpuvm = true;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+		DC_FP_START();
+		dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+		DC_FP_END();
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
 		pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..58772fce6437 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
 	}
 }
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt)
+{
+	dc_assert_fp_enabled();
+
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt);
+
 #endif
-- 
2.37.4


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-26 11:13     ` [PATCH v3 2/2] " Ao Zhong
@ 2022-10-26 16:12       ` Rodrigo Siqueira
  2022-10-26 21:17         ` Ao Zhong
  0 siblings, 1 reply; 18+ messages in thread
From: Rodrigo Siqueira @ 2022-10-26 16:12 UTC (permalink / raw)
  To: Ao Zhong; +Cc: Leo Li, Harry Wentland, Christian König, amd-gfx



On 10/26/22 07:13, Ao Zhong wrote:
> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> these two operations in dcn32/dcn32_resource.c still need to use FPU,
> This will cause compilation to fail on ARM64 platforms because
> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
> in dcn32_fpu.c, and move above two operations into this function.
> 
> Acked-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> ---
>   drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
>   drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 ++++++++
>   drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
>   3 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> index a88dd7b3d1c1..287b7fa9bf41 100644
> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>   		timing = &pipe->stream->timing;
>   
>   		pipes[pipe_cnt].pipe.src.gpuvm = true;
> -		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> -		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> +		DC_FP_START();
> +		dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
> +		DC_FP_END();
>   		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>   		pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
>   		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> index 819de0f11012..58772fce6437 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
>   	}
>   }
>   
> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> +				  int pipe_cnt)
> +{
> +	dc_assert_fp_enabled();
> +
> +	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> +	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> index 3a3dc2ce4c73..ab010e7e840b 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>   
>   void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>   
> +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> +				  int pipe_cnt);
> +
>   #endif

Hi Ao,

First of all, thanks a lot for your patchset.

For both patches:

Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>

And I also applied them to amd-staging-drm-next.

Btw, if you are using git-send-email for sending patches, I recommend 
the following options:

git send-email --annotate --cover-letter --thread --no-chain-reply-to 
--to="EMAILS" --cc="mailing@list.com" <SHA>

Always add a cover letter, it makes it easier to follow the patchset, 
and you can also describe each change in the cover letter.

When you send that other patch enabling ARM64, please add as many 
details as possible in the cover letter. Keep in mind that we have been 
working for isolating those FPU codes in a way that we do not regress 
any of our ASICs, which means that every change was well-tested on 
multiple devices. Anyway, maybe you can refer to this cover letter to 
write down the commit message:

https://patchwork.freedesktop.org/series/93042/

Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.

Thanks again!
Siqueira


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-26 16:12       ` Rodrigo Siqueira
@ 2022-10-26 21:17         ` Ao Zhong
  2022-10-27 15:38           ` Rodrigo Siqueira
  0 siblings, 1 reply; 18+ messages in thread
From: Ao Zhong @ 2022-10-26 21:17 UTC (permalink / raw)
  To: Rodrigo Siqueira; +Cc: amd-gfx

Hi Rodrigo,

Thanks for your review! This is my first time submitting a patch to the kernel.

I'm not very good at using these tools yet. 😂

Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation

from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) SoC

with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my machine

supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 6400 on it

as my daily drive machine. It has decent performance. I uploaded a benchmark result on Geekbench.

Link: https://browser.geekbench.com/v5/cpu/18237269

Ao

Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
>
>
> On 10/26/22 07:13, Ao Zhong wrote:
>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
>> This will cause compilation to fail on ARM64 platforms because
>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
>> in dcn32_fpu.c, and move above two operations into this function.
>>
>> Acked-by: Christian König <christian.koenig@amd.com>
>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>> ---
>>   drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
>>   drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 ++++++++
>>   drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
>>   3 files changed, 14 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> index a88dd7b3d1c1..287b7fa9bf41 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>>           timing = &pipe->stream->timing;
>>             pipes[pipe_cnt].pipe.src.gpuvm = true;
>> -        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> -        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> +        DC_FP_START();
>> +        dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
>> +        DC_FP_END();
>>           pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>>           pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
>>           pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> index 819de0f11012..58772fce6437 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
>>       }
>>   }
>>   +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>> +                  int pipe_cnt)
>> +{
>> +    dc_assert_fp_enabled();
>> +
>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>> +}
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> index 3a3dc2ce4c73..ab010e7e840b 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>>     void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>>   +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>> +                  int pipe_cnt);
>> +
>>   #endif
>
> Hi Ao,
>
> First of all, thanks a lot for your patchset.
>
> For both patches:
>
> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
>
> And I also applied them to amd-staging-drm-next.
>
> Btw, if you are using git-send-email for sending patches, I recommend the following options:
>
> git send-email --annotate --cover-letter --thread --no-chain-reply-to --to="EMAILS" --cc="mailing@list.com" <SHA>
>
> Always add a cover letter, it makes it easier to follow the patchset, and you can also describe each change in the cover letter.
>
> When you send that other patch enabling ARM64, please add as many details as possible in the cover letter. Keep in mind that we have been working for isolating those FPU codes in a way that we do not regress any of our ASICs, which means that every change was well-tested on multiple devices. Anyway, maybe you can refer to this cover letter to write down the commit message:
>
> https://patchwork.freedesktop.org/series/93042/
>
> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
>
> Thanks again!
> Siqueira
>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-26 21:17         ` Ao Zhong
@ 2022-10-27 15:38           ` Rodrigo Siqueira
  2022-10-27 16:48             ` Ao Zhong
  2022-10-27 16:51             ` Ao Zhong
  0 siblings, 2 replies; 18+ messages in thread
From: Rodrigo Siqueira @ 2022-10-27 15:38 UTC (permalink / raw)
  To: Ao Zhong; +Cc: amd-gfx

Hi Ao,

Could you share a link that describe your workstation?

Thanks

On 10/26/22 17:17, Ao Zhong wrote:
> Hi Rodrigo,
> 
> Thanks for your review! This is my first time submitting a patch to the kernel.
> 
> I'm not very good at using these tools yet. 😂
> 
> Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation
> 
> from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) SoC
> 
> with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my machine
> 
> supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 6400 on it
> 
> as my daily drive machine. It has decent performance. I uploaded a benchmark result on Geekbench.
> 
> Link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbrowser.geekbench.com%2Fv5%2Fcpu%2F18237269&amp;data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=Iyq4tWJL%2FfXuKB9xAUaVTQQmJQ0GRZ2rH%2F%2BXPTT%2F2tc%3D&amp;reserved=0
> 
> Ao
> 
> Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
>>
>>
>> On 10/26/22 07:13, Ao Zhong wrote:
>>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
>>> This will cause compilation to fail on ARM64 platforms because
>>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
>>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
>>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
>>> in dcn32_fpu.c, and move above two operations into this function.
>>>
>>> Acked-by: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>>> ---
>>>    drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
>>>    drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 ++++++++
>>>    drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
>>>    3 files changed, 14 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>> index a88dd7b3d1c1..287b7fa9bf41 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>>>            timing = &pipe->stream->timing;
>>>              pipes[pipe_cnt].pipe.src.gpuvm = true;
>>> -        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>> -        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>> +        DC_FP_START();
>>> +        dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
>>> +        DC_FP_END();
>>>            pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>>>            pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
>>>            pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>> index 819de0f11012..58772fce6437 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
>>>        }
>>>    }
>>>    +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>>> +                  int pipe_cnt)
>>> +{
>>> +    dc_assert_fp_enabled();
>>> +
>>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>> +}
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>> index 3a3dc2ce4c73..ab010e7e840b 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>>>      void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>>>    +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>>> +                  int pipe_cnt);
>>> +
>>>    #endif
>>
>> Hi Ao,
>>
>> First of all, thanks a lot for your patchset.
>>
>> For both patches:
>>
>> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
>>
>> And I also applied them to amd-staging-drm-next.
>>
>> Btw, if you are using git-send-email for sending patches, I recommend the following options:
>>
>> git send-email --annotate --cover-letter --thread --no-chain-reply-to --to="EMAILS" --cc="mailing@list.com" <SHA>
>>
>> Always add a cover letter, it makes it easier to follow the patchset, and you can also describe each change in the cover letter.
>>
>> When you send that other patch enabling ARM64, please add as many details as possible in the cover letter. Keep in mind that we have been working for isolating those FPU codes in a way that we do not regress any of our ASICs, which means that every change was well-tested on multiple devices. Anyway, maybe you can refer to this cover letter to write down the commit message:
>>
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fseries%2F93042%2F&amp;data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=0GMN1Uj9iuQv2ZjipDHnl29V0UvWk6IL4XwlehdPNLA%3D&amp;reserved=0
>>
>> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
>>
>> Thanks again!
>> Siqueira
>>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-27 15:38           ` Rodrigo Siqueira
@ 2022-10-27 16:48             ` Ao Zhong
  2022-10-27 16:51             ` Ao Zhong
  1 sibling, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-27 16:48 UTC (permalink / raw)
  To: Rodrigo Siqueira; +Cc: amd-gfx

There isn't much information on the internet for Qingyun W510 as this
is not a retail machine. But I'm happy to provide any details about
this machine.

The Qingyun W510 is powered by Huawei's server SoC Kunpeng 920, it's
SBSA compatible.
Information about Kunpeng 920 can be found here.
Link: https://en.wikichip.org/wiki/hisilicon/kunpeng/920-6426
But not all the functions provided by Kunpeng 920 can be use on
Qingyun W510, like SMMU( IOMMU on ARM ), SAS controller or Encryption
Acceleration Engine. This machine is SFF form factor, it has only two
sodimm memory slots and doesn't support ECC (some Kunpeng Desktop
motherboards support that) and 1x PCIe x4, 1x PCIe x16, 2x M.2 slot
(PCIe x4).
It also has 2 SATA 3.0 port, one for optical drive, and the other for
the HDD. This machine will be shipped with amd's RX550 or Jingjia
Micro JM7201 GPU. My machine comes with JM7201, it's a GPU
independently developed by China. Unfortunately, since there is no
open source driver, I can only use EFI framebuffer with mainline
kernel. Qingyun W510 also has a Huawei's Hi1103LPC WiFi/Bluetooth
module, and a power button with a Goodix fingerprint sensor. Since
none of them have open source drivers, I can't use them with mainline
kernel.

There are also two similar-looking machines, Qingyun W515 and Qingyun
W525, which use HiSilicon Kirin 990 SoC and HiSilicon Pangu M900 SoC,
which are based on mobile platforms.

My workstation should be a product of DVT stage, because Huawei only
allow users to use PCIe 3.0 in the release version of Qingyun W510.
Some machines may not be able to install more than 32G of memory due
to firmware.


Am Do., 27. Okt. 2022 um 17:38 Uhr schrieb Rodrigo Siqueira
<Rodrigo.Siqueira@amd.com>:
>
> Hi Ao,
>
> Could you share a link that describe your workstation?
>
> Thanks
>
> On 10/26/22 17:17, Ao Zhong wrote:
> > Hi Rodrigo,
> >
> > Thanks for your review! This is my first time submitting a patch to the kernel.
> >
> > I'm not very good at using these tools yet. 😂
> >
> > Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation
> >
> > from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) SoC
> >
> > with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my machine
> >
> > supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 6400 on it
> >
> > as my daily drive machine. It has decent performance. I uploaded a benchmark result on Geekbench.
> >
> > Link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbrowser.geekbench.com%2Fv5%2Fcpu%2F18237269&amp;data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=Iyq4tWJL%2FfXuKB9xAUaVTQQmJQ0GRZ2rH%2F%2BXPTT%2F2tc%3D&amp;reserved=0
> >
> > Ao
> >
> > Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
> >>
> >>
> >> On 10/26/22 07:13, Ao Zhong wrote:
> >>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> >>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> >>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
> >>> This will cause compilation to fail on ARM64 platforms because
> >>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
> >>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
> >>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
> >>> in dcn32_fpu.c, and move above two operations into this function.
> >>>
> >>> Acked-by: Christian König <christian.koenig@amd.com>
> >>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
> >>> ---
> >>>    drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
> >>>    drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 ++++++++
> >>>    drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
> >>>    3 files changed, 14 insertions(+), 2 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> >>> index a88dd7b3d1c1..287b7fa9bf41 100644
> >>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> >>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
> >>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
> >>>            timing = &pipe->stream->timing;
> >>>              pipes[pipe_cnt].pipe.src.gpuvm = true;
> >>> -        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> >>> -        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> >>> +        DC_FP_START();
> >>> +        dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
> >>> +        DC_FP_END();
> >>>            pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
> >>>            pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
> >>>            pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
> >>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> >>> index 819de0f11012..58772fce6437 100644
> >>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> >>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
> >>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
> >>>        }
> >>>    }
> >>>    +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> >>> +                  int pipe_cnt)
> >>> +{
> >>> +    dc_assert_fp_enabled();
> >>> +
> >>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
> >>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
> >>> +}
> >>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> >>> index 3a3dc2ce4c73..ab010e7e840b 100644
> >>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> >>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
> >>> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
> >>>      void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
> >>>    +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
> >>> +                  int pipe_cnt);
> >>> +
> >>>    #endif
> >>
> >> Hi Ao,
> >>
> >> First of all, thanks a lot for your patchset.
> >>
> >> For both patches:
> >>
> >> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
> >>
> >> And I also applied them to amd-staging-drm-next.
> >>
> >> Btw, if you are using git-send-email for sending patches, I recommend the following options:
> >>
> >> git send-email --annotate --cover-letter --thread --no-chain-reply-to --to="EMAILS" --cc="mailing@list.com" <SHA>
> >>
> >> Always add a cover letter, it makes it easier to follow the patchset, and you can also describe each change in the cover letter.
> >>
> >> When you send that other patch enabling ARM64, please add as many details as possible in the cover letter. Keep in mind that we have been working for isolating those FPU codes in a way that we do not regress any of our ASICs, which means that every change was well-tested on multiple devices. Anyway, maybe you can refer to this cover letter to write down the commit message:
> >>
> >> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fseries%2F93042%2F&amp;data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=0GMN1Uj9iuQv2ZjipDHnl29V0UvWk6IL4XwlehdPNLA%3D&amp;reserved=0
> >>
> >> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
> >>
> >> Thanks again!
> >> Siqueira
> >>

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v3 2/2] drm/amd/display: move remaining FPU code to dml folder
  2022-10-27 15:38           ` Rodrigo Siqueira
  2022-10-27 16:48             ` Ao Zhong
@ 2022-10-27 16:51             ` Ao Zhong
  1 sibling, 0 replies; 18+ messages in thread
From: Ao Zhong @ 2022-10-27 16:51 UTC (permalink / raw)
  To: Rodrigo Siqueira; +Cc: amd-gfx

Here are some lshw information

hacc-arm64-pc
    description: Desktop Computer
    product: HUAWEIPGU-WBY0 (C233)
    vendor: HUAWEI
    version: D1060
    serial:
    width: 64 bits
    capabilities: smbios-3.2.0 dmi-3.2.0 smp cp15_barrier setend swp tagged_addr_disabled
    configuration: chassis=desktop family=HUAWEI sku=C233 uuid=
  *-core
       description: Motherboard
       product: HUAWEIPGU-WBY0-PCB
       vendor: HUAWEI
       physical id: 0
       version: D1060
       serial:
       slot: Null
     *-firmware
          description: BIOS
          vendor: Byosoft
          physical id: 2
          version: 1.11
          date: 02/07/2020
          size: 128KiB
          capabilities: pci pnp upgrade cdboot bootselect edd acpi biosbootspecification uefi
     *-cache:0
          description: L1 cache
          physical id: 5
          slot: L1 Instruction Cache
          size: 1536KiB
          capacity: 1536KiB
          capabilities: synchronous internal write-back instruction
          configuration: level=1
     *-cache:1
          description: L1 cache
          physical id: 6
          slot: L1 Data Cache
          size: 1536KiB
          capacity: 1536KiB
          capabilities: synchronous internal write-back data
          configuration: level=1
     *-cache:2
          description: L2 cache
          physical id: 7
          slot: L2 Cache
          size: 12MiB
          capacity: 12MiB
          capabilities: synchronous internal varies unified
          configuration: level=2
     *-cache:3
          description: L3 cache
          physical id: 8
          slot: L3 Cache
          size: 24MiB
          capacity: 24MiB
          capabilities: synchronous internal varies unified
          configuration: level=3
     *-cpu
          description: CPU
          product: ARM (NULL)
          vendor: HISILICON
          physical id: 9
          bus info: cpu@0
          version: HUAWEI Kunpeng920 3211K
          serial: NULL
          slot: CPU0
          size: 2600MHz
          capacity: 2600MHz
          clock: 100MHz
          capabilities: lm
          configuration: cores=24 enabledcores=24 threads=24
     *-memory
          description: System Memory
          physical id: a
          slot: System board or motherboard
          size: 64GiB
        *-bank:0
             description: DIMM DDR4 Synchronous 3200 MHz (0,3 ns)
             product: F4-3200C22-32GRS
             vendor: Unknown
             physical id: 0
             serial:
             slot: SODIMM_B
             size: 32GiB
             width: 64 bits
             clock: 3200MHz (0.3ns)
        *-bank:1
             description: DIMM DDR4 Synchronous 3200 MHz (0,3 ns)
             product: F4-3200C22-32GRS
             vendor: Unknown
             physical id: 1
             serial:
             slot: SODIMM_A
             size: 32GiB
             width: 64 bits
             clock: 3200MHz (0.3ns)
     *-pci:0
          description: PCI bridge
          product: HiSilicon PCIe Root Port with Gen4
          vendor: Huawei Technologies Co., Ltd.
          physical id: 100
          bus info: pci@0000:00:00.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: irq:30 ioport:1000(size=4096) memory:e0c00000-e0efffff ioport:80080000000(size=6442450944)
        *-pci
             description: PCI bridge
             product: Navi 10 XL Upstream Port of PCI Express Switch
             vendor: Advanced Micro Devices, Inc. [AMD/ATI]
             physical id: 0
             bus info: pci@0000:01:00.0
             version: c7
             width: 32 bits
             clock: 33MHz
             capabilities: pci pm pciexpress msi normal_decode bus_master cap_list
             configuration: driver=pcieport
             resources: irq:29 memory:e0e00000-e0e03fff ioport:1000(size=4096) memory:e0c00000-e0dfffff ioport:80080000000(size=6442450944)
           *-pci
                description: PCI bridge
                product: Navi 10 XL Downstream Port of PCI Express Switch
                vendor: Advanced Micro Devices, Inc. [AMD/ATI]
                physical id: 0
                bus info: pci@0000:02:00.0
                version: 00
                width: 32 bits
                clock: 33MHz
                capabilities: pci pm pciexpress msi normal_decode bus_master cap_list
                configuration: driver=pcieport
                resources: irq:37 ioport:1000(size=4096) memory:e0c00000-e0dfffff ioport:80080000000(size=6442450944)
              *-display
                   description: VGA compatible controller
                   product: Navi 24 [Radeon RX 6400 / 6500 XT]
                   vendor: Advanced Micro Devices, Inc. [AMD/ATI]
                   physical id: 0
                   bus info: pci@0000:03:00.0
                   logical name: /dev/fb0
                   version: c7
                   width: 64 bits
                   clock: 33MHz
                   capabilities: pm pciexpress msi vga_controller bus_master cap_list rom fb
                   configuration: depth=32 driver=amdgpu latency=0 mode=3440x1440 resolution=3440,1440 visual=truecolor xres=3440 yres=1440
                   resources: iomemory:8010-800f iomemory:8000-7fff irq:250 memory:80100000000-801ffffffff memory:80080000000-800801fffff ioport:1000(size=256) memory:e0c00000-e0cfffff memory:e0d00000-e0d1ffff
              *-multimedia
                   description: Audio device
                   product: Navi 21/23 HDMI/DP Audio Controller
                   vendor: Advanced Micro Devices, Inc. [AMD/ATI]
                   physical id: 0.1
                   bus info: pci@0000:03:00.1
                   logical name: card0
                   logical name: /dev/snd/controlC0
                   logical name: /dev/snd/hwC0D0
                   logical name: /dev/snd/pcmC0D3p
                   logical name: /dev/snd/pcmC0D7p
                   version: 00
                   width: 32 bits
                   clock: 33MHz
                   capabilities: pm pciexpress msi bus_master cap_list
                   configuration: driver=snd_hda_intel latency=0
                   resources: irq:248 memory:e0d20000-e0d23fff
                 *-input:0
                      product: HDA ATI HDMI HDMI/DP,pcm=3
                      physical id: 0
                      logical name: input4
                      logical name: /dev/input/event4
                 *-input:1
                      product: HDA ATI HDMI HDMI/DP,pcm=7
                      physical id: 1
                      logical name: input5
                      logical name: /dev/input/event5
     *-pci:1
          description: PCI bridge
          product: HiSilicon PCIe Root Port with Gen4
          vendor: Huawei Technologies Co., Ltd.
          physical id: 101
          bus info: pci@0000:00:08.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: irq:31 ioport:2000(size=4096) memory:e0f00000-e10fffff ioport:80018000000(size=2097152)
        *-nvme
             description: NVMe device
             product: KINGSTON SNV2S1000G
             vendor: Kingston Technology Company, Inc.
             physical id: 0
             bus info: pci@0000:04:00.0
             logical name: /dev/nvme0
             version: SBI02102
             serial:
             width: 64 bits
             clock: 33MHz
             capabilities: nvme pm msi pciexpress msix nvm_express bus_master cap_list
             configuration: driver=nvme latency=0 nqn=nqn.2021-03.com.kingston:nvme:nvm-subsystem-sn- state=live
             resources: irq:29 memory:e0f00000-e0f03fff
           *-namespace:0
                description: NVMe disk
                physical id: 0
                logical name: hwmon1
           *-namespace:1
                description: NVMe disk
                physical id: 2
                logical name: /dev/ng0n1
           *-namespace:2
                description: NVMe disk
                physical id: 1
                bus info: nvme@0:1
                logical name: /dev/nvme0n1
                size: 931GiB (1TB)
                capabilities: gpt-1.00 partitioned partitioned:gpt
                configuration: guid= logicalsectorsize=512 sectorsize=512 wwid=eui.00000000000000000026b7784e21fbc5
              *-volume:0 UNCLAIMED
                   description: Windows FAT volume
                   vendor: mkfs.fat
                   physical id: 1
                   bus info: nvme@0:1,1
                   version: FAT32
                   serial:
                   size: 98MiB
                   capacity: 99MiB
                   capabilities: boot fat initialized
                   configuration: FATs=2 filesystem=fat
              *-volume:1
                   description: EFI partition
                   physical id: 2
                   bus info: nvme@0:1,2
                   logical name: /dev/nvme0n1p2
                   logical name: /
                   logical name: /home
                   serial:
                   capacity: 866GiB
                   configuration: mount.fstype=btrfs mount.options=rw,relatime,compress=zstd:1,ssd,discard=async,space_cache=v2,subvolid=257,subvol=/@home state=mounted
              *-volume:2
                   description: EFI partition
                   physical id: 3
                   bus info: nvme@0:1,3
                   logical name: /dev/nvme0n1p3
                   logical name: /boot
                   serial:
                   capacity: 1023MiB
                   configuration: mount.fstype=xfs mount.options=rw,relatime,attr2,discard,inode64,logbufs=8,logbsize=32k,noquota state=mounted
              *-volume:3
                   description: Linux swap volume
                   vendor: Linux
                   physical id: 4
                   bus info: nvme@0:1,4
                   logical name: /dev/nvme0n1p4
                   version: 1
                   serial:
                   size: 64GiB
                   capacity: 64GiB
                   capabilities: nofs swap initialized
                   configuration: filesystem=swap pagesize=4096
     *-pci:2
          description: PCI bridge
          product: HiSilicon PCIe Root Port with Gen4
          vendor: Huawei Technologies Co., Ltd.
          physical id: 102
          bus info: pci@0000:00:0a.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: irq:32 ioport:3000(size=4096) memory:e1100000-e12fffff ioport:80018200000(size=2097152)
        *-nvme
             description: NVMe device
             product: KXG60ZNV512G TOSHIBA
             vendor: Toshiba Corporation
             physical id: 0
             bus info: pci@0000:05:00.0
             logical name: /dev/nvme1
             version: AGXA4103
             serial:
             width: 64 bits
             clock: 33MHz
             capabilities: nvme pciexpress pm msi msix nvm_express bus_master cap_list
             configuration: driver=nvme latency=0 nqn=nqn.2017-03.jp.co.toshiba:KXG60ZNV512G TOSHIBA: state=live
             resources: irq:29 memory:e1100000-e1103fff
           *-namespace:0
                description: NVMe disk
                physical id: 0
                logical name: hwmon0
           *-namespace:1
                description: NVMe disk
                physical id: 2
                logical name: /dev/ng1n1
           *-namespace:2
                description: NVMe disk
                physical id: 1
                bus info: nvme@1:1
                logical name: /dev/nvme1n1
                size: 476GiB (512GB)
                capabilities: gpt-1.00 partitioned partitioned:gpt
                configuration: guid= logicalsectorsize=512 sectorsize=512 wwid=eui.00000000000000018ce38e0300176ffe
              *-volume
                   description: EFI partition
                   physical id: 1
                   bus info: nvme@1:1,1
                   logical name: /dev/nvme1n1p1
                   logical name: /mnt/SSD2
                   serial:
                   capacity: 476GiB
                   configuration: mount.fstype=btrfs mount.options=rw,relatime,compress=zstd:1,ssd,discard=async,space_cache=v2,subvolid=5,subvol=/ state=mounted
     *-pci:3
          description: PCI bridge
          product: HiSilicon PCIe Root Port with Gen4
          vendor: Huawei Technologies Co., Ltd.
          physical id: c
          bus info: pci@0000:00:0c.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: irq:33 ioport:4000(size=4096) memory:e0000000-e0bfffff ioport:80018400000(size=2097152)
        *-network UNCLAIMED
             description: Network controller
             product: Huawei Technologies Co., Ltd.
             vendor: Huawei Technologies Co., Ltd.
             physical id: 0
             bus info: pci@0000:06:00.0
             version: 02
             width: 64 bits
             clock: 33MHz
             capabilities: pm msi pciexpress cap_list
             configuration: latency=0
             resources: memory:e0000000-e07fffff memory:e0800000-e0803fff
     *-pci:4
          description: PCI bridge
          product: HiSilicon PCIe Root Port with Gen4
          vendor: Huawei Technologies Co., Ltd.
          physical id: d
          bus info: pci@0000:00:0d.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: irq:34 ioport:5000(size=4096) memory:e1300000-e14fffff ioport:80018600000(size=2097152)
        *-usb
             description: USB controller
             product: uPD720202 USB 3.0 Host Controller
             vendor: Renesas Technology Corp.
             physical id: 0
             bus info: pci@0000:07:00.0
             version: 02
             width: 64 bits
             clock: 33MHz
             capabilities: pm msi msix pciexpress xhci bus_master cap_list
             configuration: driver=xhci_hcd latency=0
             resources: irq:29 memory:e1300000-e1301fff
           *-usbhost:0
                product: xHCI Host Controller
                vendor: Linux 6.0.5-gentoo-arm64 xhci-hcd
                physical id: 0
                bus info: usb@2
                logical name: usb2
                version: 6.00
                capabilities: usb-2.00
                configuration: driver=hub slots=2 speed=480Mbit/s
              *-usb:0
                   description: USB hub
                   product: 4-Port USB 2.1 Hub
                   vendor: Generic
                   physical id: 1
                   bus info: usb@2:1
                   version: 1.01
                   capabilities: usb-2.10
                   configuration: driver=hub slots=4 speed=480Mbit/s
              *-usb:1 UNCLAIMED
                   description: Communication device
                   product: Goodix Fingerprint Device
                   vendor: Shenzhen Goodix Technology Co.,Ltd.
                   physical id: 2
                   bus info: usb@2:2
                   version: 2.00
                   capabilities: usb-2.00
                   configuration: maxpower=100mA speed=12Mbit/s
           *-usbhost:1
                product: xHCI Host Controller
                vendor: Linux 6.0.5-gentoo-arm64 xhci-hcd
                physical id: 1
                bus info: usb@3
                logical name: usb3
                version: 6.00
                capabilities: usb-3.00
                configuration: driver=hub slots=2 speed=5000Mbit/s
              *-usb
                   description: USB hub
                   product: 4-Port USB 3.1 Hub
                   vendor: Generic
                   physical id: 1
                   bus info: usb@3:1
                   version: 1.01
                   capabilities: usb-3.10
                   configuration: driver=hub slots=4 speed=5000Mbit/s
     *-pci:5
          description: PCI bridge
          product: HiSilicon PCIe Root Port with Gen4
          vendor: Huawei Technologies Co., Ltd.
          physical id: e
          bus info: pci@0000:00:0e.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: irq:35 ioport:6000(size=4096) memory:e1500000-e16fffff ioport:80018800000(size=2097152)
     *-pci:6
          description: PCI bridge
          product: HiSilicon PCIe Root Port with Gen4
          vendor: Huawei Technologies Co., Ltd.
          physical id: f
          bus info: pci@0000:00:0f.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: pci pciexpress msi pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: irq:36 ioport:7000(size=4096) memory:e1700000-e18fffff ioport:80018a00000(size=2097152)
     *-pci:7
          description: PCI bridge
          product: HiSilicon PCI-PCI Bridge
          vendor: Huawei Technologies Co., Ltd.
          physical id: 103
          bus info: pci@0000:74:00.0
          version: 20
          width: 64 bits
          clock: 33MHz
          capabilities: pci pciexpress pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: iomemory:1010-100f irq:0
     *-pci:8
          description: PCI bridge
          product: HiSilicon PCI-PCI Bridge
          vendor: Huawei Technologies Co., Ltd.
          physical id: 104
          bus info: pci@0000:74:01.0
          version: 20
          width: 64 bits
          clock: 33MHz
          capabilities: pci pciexpress pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: iomemory:1010-100f irq:0 ioport:141000000(size=8388608)
        *-generic UNCLAIMED
             description: Unassigned class
             product: SafeNet (wrong ID)
             vendor: SafeNet (wrong ID)
             physical id: 0
             bus info: pci@0000:76:00.0
             version: ff
             width: 32 bits
             clock: 66MHz
             capabilities: bus_master vga_palette cap_list
             configuration: latency=255 maxlatency=255 mingnt=255
             resources: memory:141000000-1413fffff memory:141400000-1417effff
     *-sas:0 UNCLAIMED
          description: Serial Attached SCSI controller
          product: HiSilicon SAS 3.0 HBA
          vendor: Huawei Technologies Co., Ltd.
          physical id: b
          bus info: pci@0000:74:02.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: sas pciexpress msi pm cap_list
          configuration: latency=0
          resources: memory:a2000000-a2007fff
     *-sata
          description: SATA controller
          product: HiSilicon AHCI HBA
          vendor: Huawei Technologies Co., Ltd.
          physical id: 3
          bus info: pci@0000:74:03.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: sata pciexpress msi pm ahci_1.0 bus_master cap_list
          configuration: driver=ahci latency=0
          resources: irq:235 memory:a2010000-a2010fff
     *-sas:1 UNCLAIMED
          description: Serial Attached SCSI controller
          product: HiSilicon SAS 3.0 HBA
          vendor: Huawei Technologies Co., Ltd.
          physical id: 4
          bus info: pci@0000:74:04.0
          version: 21
          width: 32 bits
          clock: 33MHz
          capabilities: sas pciexpress msi pm cap_list
          configuration: latency=0
          resources: memory:a2008000-a200ffff
     *-pci:9
          description: PCI bridge
          product: HiSilicon PCI-PCI Bridge
          vendor: Huawei Technologies Co., Ltd.
          physical id: 105
          bus info: pci@0000:78:00.0
          version: 20
          width: 32 bits
          clock: 33MHz
          capabilities: pci pciexpress pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: irq:0
     *-raid UNCLAIMED
          description: RAID bus controller
          product: HiSilicon RDE Engine
          vendor: Huawei Technologies Co., Ltd.
          physical id: 10
          bus info: pci@0000:78:01.0
          version: 21
          width: 64 bits
          clock: 33MHz
          capabilities: raid pciexpress msi pm cap_list
          configuration: latency=0
          resources: iomemory:20-1f memory:208000000-2083fffff
     *-usb:0
          description: USB controller
          product: HiSilicon USB 1.1 Host Controller
          vendor: Huawei Technologies Co., Ltd.
          physical id: 11
          bus info: pci@0000:7a:00.0
          version: 21
          width: 64 bits
          clock: 33MHz
          capabilities: pciexpress msi pm ohci bus_master cap_list
          configuration: driver=ohci-pci latency=0
          resources: iomemory:20-1f irq:249 memory:20c100000-20c100fff
        *-usbhost
             product: OHCI PCI host controller
             vendor: Linux 6.0.5-gentoo-arm64 ohci_hcd
             physical id: 1
             bus info: usb@6
             logical name: usb6
             version: 6.00
             capabilities: usb-1.10
             configuration: driver=hub slots=2 speed=12Mbit/s
     *-usb:1
          description: USB controller
          product: HiSilicon USB 2.0 2-port Host Controller
          vendor: Huawei Technologies Co., Ltd.
          physical id: 1
          bus info: pci@0000:7a:01.0
          version: 21
          width: 64 bits
          clock: 33MHz
          capabilities: pciexpress msi pm ehci bus_master cap_list
          configuration: driver=ehci-pci latency=0
          resources: iomemory:20-1f irq:237 memory:20c101000-20c101fff
        *-usbhost
             product: EHCI Host Controller
             vendor: Linux 6.0.5-gentoo-arm64 ehci_hcd
             physical id: 1
             bus info: usb@1
             logical name: usb1
             version: 6.00
             capabilities: usb-2.00
             configuration: driver=hub slots=2 speed=480Mbit/s
           *-usb
                description: Audio device
                product: Generic USB Audio
                vendor: Generic
                physical id: 1
                bus info: usb@1:1
                logical name: card1
                logical name: /dev/snd/controlC1
                logical name: /dev/snd/pcmC1D0c
                logical name: /dev/snd/pcmC1D0p
                logical name: /dev/snd/pcmC1D1c
                logical name: /dev/snd/pcmC1D1p
                logical name: /dev/snd/pcmC1D2c
                logical name: /dev/snd/pcmC1D2p
                logical name: input1
                logical name: /dev/input/event1
                version: 0.13
                capabilities: usb-2.00 audio-control usb
                configuration: driver=usbhid maxpower=100mA speed=480Mbit/s
     *-usb:2
          description: USB controller
          product: HiSilicon USB 3.0 Host Controller
          vendor: Huawei Technologies Co., Ltd.
          physical id: 12
          bus info: pci@0000:7a:02.0
          version: 21
          width: 64 bits
          clock: 33MHz
          capabilities: pciexpress msi pm xhci bus_master cap_list
          configuration: driver=xhci_hcd latency=0
          resources: iomemory:20-1f irq:246 memory:20c000000-20c0fffff
        *-usbhost:0
             product: xHCI Host Controller
             vendor: Linux 6.0.5-gentoo-arm64 xhci-hcd
             physical id: 0
             bus info: usb@4
             logical name: usb4
             version: 6.00
             capabilities: usb-2.00
             configuration: driver=hub slots=1 speed=480Mbit/s
           *-usb
                description: USB hub
                product: 4-Port USB 2.1 Hub
                vendor: Generic
                physical id: 1
                bus info: usb@4:1
                version: 1.01
                capabilities: usb-2.10
                configuration: driver=hub slots=4 speed=480Mbit/s
              *-usb:0
                   description: USB hub
                   product: USB2.1 Hub
                   vendor: GenesysLogic
                   physical id: 1
                   bus info: usb@4:1.1
                   version: 6.63
                   capabilities: usb-2.10
                   configuration: driver=hub maxpower=100mA slots=4 speed=480Mbit/s
                 *-usb:0
                      description: Bluetooth wireless interface
                      product: Bluetooth Radio
                      vendor: Realtek
                      physical id: 1
                      bus info: usb@4:1.1.1
                      version: 2.00
                      serial:
                      capabilities: bluetooth usb-1.10
                      configuration: driver=btusb maxpower=500mA speed=12Mbit/s
                 *-usb:1
                      description: Bluetooth wireless interface
                      product: Bluetooth Radio
                      vendor: Realtek
                      physical id: 2
                      bus info: usb@4:1.1.2
                      version: 2.00
                      serial:
                      capabilities: bluetooth usb-1.10
                      configuration: driver=btusb maxpower=500mA speed=12Mbit/s
                 *-usb:2
                      description: Video
                      product: FHD Camera Microphone: FHD Came
                      vendor: SunplusIT Inc
                      physical id: 3
                      bus info: usb@4:1.1.3
                      logical name: card3
                      logical name: /dev/snd/controlC3
                      logical name: /dev/snd/pcmC3D0c
                      logical name: input6
                      logical name: /dev/input/event6
                      version: 10.14
                      serial: 01.00.00
                      capabilities: usb-2.00 usb
                      configuration: driver=snd-usb-audio maxpower=500mA speed=480Mbit/s
                 *-usb:3
                      description: USB hub
                      product: HighSpeed Hub
                      vendor: NEC Corp.
                      physical id: 4
                      bus info: usb@4:1.1.4
                      version: 1.00
                      capabilities: usb-2.00
                      configuration: driver=hub maxpower=100mA slots=3 speed=480Mbit/s
                    *-usb
                         description: Keyboard
                         product: Topre Corporation HHKB Professional
                         vendor: Topre Corporation
                         physical id: 1
                         bus info: usb@4:1.1.4.1
                         logical name: input3
                         logical name: /dev/input/event3
                         logical name: input3::capslock
                         logical name: input3::compose
                         logical name: input3::kana
                         logical name: input3::numlock
                         logical name: input3::scrolllock
                         version: 1.02
                         capabilities: usb-1.10 usb
                         configuration: driver=usbhid maxpower=100mA speed=12Mbit/s
              *-usb:1
                   description: Human interface device
                   product: SAVITECH Bravo-X USB Audio
                   vendor: SAVITECH
                   physical id: 2
                   bus info: usb@4:1.2
                   logical name: card2
                   logical name: /dev/snd/controlC2
                   logical name: /dev/snd/pcmC2D0p
                   logical name: /dev/snd/pcmC2D1p
                   logical name: input2
                   logical name: /dev/input/event2
                   version: 0.01
                   capabilities: usb-1.10 usb
                   configuration: driver=snd-usb-audio maxpower=100mA speed=12Mbit/s
        *-usbhost:1
             product: xHCI Host Controller
             vendor: Linux 6.0.5-gentoo-arm64 xhci-hcd
             physical id: 1
             bus info: usb@5
             logical name: usb5
             version: 6.00
             capabilities: usb-3.00
             configuration: driver=hub slots=1 speed=5000Mbit/s
           *-usb
                description: USB hub
                product: 4-Port USB 3.1 Hub
                vendor: Generic
                physical id: 1
                bus info: usb@5:1
                version: 1.01
                capabilities: usb-3.10
                configuration: driver=hub slots=4 speed=5000Mbit/s
              *-usb
                   description: USB hub
                   product: USB3.1 Hub
                   vendor: GenesysLogic
                   physical id: 1
                   bus info: usb@5:1.1
                   version: 6.63
                   capabilities: usb-3.20
                   configuration: driver=hub slots=4 speed=5000Mbit/s
     *-generic
          description: System peripheral
          product: HiSilicon Embedded DMA Engine
          vendor: Huawei Technologies Co., Ltd.
          physical id: 13
          bus info: pci@0000:7b:00.0
          version: 21
          width: 64 bits
          clock: 33MHz
          capabilities: pciexpress msi pm bus_master cap_list
          configuration: driver=hisi_dma latency=0
          resources: iomemory:10-f irq:39 memory:148800000-148803fff
     *-pci:10
          description: PCI bridge
          product: HiSilicon PCI-PCI Bridge
          vendor: Huawei Technologies Co., Ltd.
          physical id: 0
          bus info: pci@0000:7c:00.0
          version: 20
          width: 64 bits
          clock: 33MHz
          capabilities: pci pciexpress pm normal_decode bus_master cap_list
          configuration: driver=pcieport
          resources: iomemory:1010-100f irq:0 ioport:120000000(size=2097152)
        *-network
             description: Ethernet interface
             product: HNS GE/10GE/25GE RDMA Network Controller
             vendor: Huawei Technologies Co., Ltd.
             physical id: 0
             bus info: pci@0000:7d:00.0
             logical name: enp125s0f0
             version: 21
             serial:
             size: 1Gbit/s
             capacity: 1Gbit/s
             width: 64 bits
             clock: 33MHz
             capabilities: pciexpress msix pm bus_master cap_list ethernet physical tp 10bt 10bt-fd 100bt 100bt-fd 1000bt-fd autonegotiation
             configuration: autonegotiation=on broadcast=yes driver=hns3 driverversion=6.0.5-gentoo-arm64 duplex=full firmware=1.8.15.0 ip=192.168.1.150 latency=0 link=yes multicast=yes port=twisted pair speed=1Gbit/s
             resources: iomemory:10-f iomemory:10-f irq:0 memory:120100000-12010ffff memory:120000000-1200fffff
     *-pnp00:00
          product: 16550A-compatible COM port
          physical id: 14
          capabilities: pnp
          configuration: driver=serial
  *-input:0
       product: Power Button
       physical id: 1
       logical name: input0
       logical name: /dev/input/event0
       capabilities: platform
  *-input:1
       product: MX Vertical Mouse
       physical id: 2
       logical name: input7
       logical name: /dev/input/event7
       logical name: /dev/input/mouse0
       capabilities: bluetooth

Am 27.10.22 um 17:38 schrieb Rodrigo Siqueira:
> Hi Ao,
>
> Could you share a link that describe your workstation?
>
> Thanks
>
> On 10/26/22 17:17, Ao Zhong wrote:
>> Hi Rodrigo,
>>
>> Thanks for your review! This is my first time submitting a patch to the kernel.
>>
>> I'm not very good at using these tools yet. 😂
>>
>> Recently I got a Huawei Qingyun W510 (擎云 W510) ARM workstation
>>
>> from the second-hand market in China. It's SBSA and has a Kunpeng 920 (3211k) SoC
>>
>> with 24 Huawei-customized TSV110 cores. Since it's SFF form factor, and my machine
>>
>> supports PCIe 4.0 (looks like some W510 have it disabled), I installed an RX 6400 on it
>>
>> as my daily drive machine. It has decent performance. I uploaded a benchmark result on Geekbench.
>>
>> Link: https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fbrowser.geekbench.com%2Fv5%2Fcpu%2F18237269&amp;data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=Iyq4tWJL%2FfXuKB9xAUaVTQQmJQ0GRZ2rH%2F%2BXPTT%2F2tc%3D&amp;reserved=0
>>
>> Ao
>>
>> Am 26.10.22 um 18:12 schrieb Rodrigo Siqueira:
>>>
>>>
>>> On 10/26/22 07:13, Ao Zhong wrote:
>>>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>>> pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>>> these two operations in dcn32/dcn32_resource.c still need to use FPU,
>>>> This will cause compilation to fail on ARM64 platforms because
>>>> -mgeneral-regs-only is enabled by default to disable the hardware FPU.
>>>> Therefore, imitate the dcn31_zero_pipe_dcc_fraction function in
>>>> dml/dcn31/dcn31_fpu.c, declare the dcn32_zero_pipe_dcc_fraction function
>>>> in dcn32_fpu.c, and move above two operations into this function.
>>>>
>>>> Acked-by: Christian König <christian.koenig@amd.com>
>>>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 5 +++--
>>>>    drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c  | 8 ++++++++
>>>>    drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h  | 3 +++
>>>>    3 files changed, 14 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>>> index a88dd7b3d1c1..287b7fa9bf41 100644
>>>> --- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>>> +++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
>>>> @@ -1918,8 +1918,9 @@ int dcn32_populate_dml_pipes_from_context(
>>>>            timing = &pipe->stream->timing;
>>>>              pipes[pipe_cnt].pipe.src.gpuvm = true;
>>>> -        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>>> -        pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>>> +        DC_FP_START();
>>>> +        dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
>>>> +        DC_FP_END();
>>>>            pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
>>>>            pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
>>>>            pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
>>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>>> index 819de0f11012..58772fce6437 100644
>>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
>>>> @@ -2521,3 +2521,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
>>>>        }
>>>>    }
>>>>    +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>>>> +                  int pipe_cnt)
>>>> +{
>>>> +    dc_assert_fp_enabled();
>>>> +
>>>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>>>> +    pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>>>> +}
>>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>>> index 3a3dc2ce4c73..ab010e7e840b 100644
>>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
>>>> @@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
>>>>      void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
>>>>    +void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
>>>> +                  int pipe_cnt);
>>>> +
>>>>    #endif
>>>
>>> Hi Ao,
>>>
>>> First of all, thanks a lot for your patchset.
>>>
>>> For both patches:
>>>
>>> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
>>>
>>> And I also applied them to amd-staging-drm-next.
>>>
>>> Btw, if you are using git-send-email for sending patches, I recommend the following options:
>>>
>>> git send-email --annotate --cover-letter --thread --no-chain-reply-to --to="EMAILS" --cc="mailing@list.com" <SHA>
>>>
>>> Always add a cover letter, it makes it easier to follow the patchset, and you can also describe each change in the cover letter.
>>>
>>> When you send that other patch enabling ARM64, please add as many details as possible in the cover letter. Keep in mind that we have been working for isolating those FPU codes in a way that we do not regress any of our ASICs, which means that every change was well-tested on multiple devices. Anyway, maybe you can refer to this cover letter to write down the commit message:
>>>
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpatchwork.freedesktop.org%2Fseries%2F93042%2F&amp;data=05%7C01%7CRodrigo.Siqueira%40amd.com%7Cdaa18df14f004d2d621d08dab7977866%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C638024158436988558%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=0GMN1Uj9iuQv2ZjipDHnl29V0UvWk6IL4XwlehdPNLA%3D&amp;reserved=0
>>>
>>> Finally, do you have a use case for this change? I mean, ARM64 + AMD dGPU.
>>>
>>> Thanks again!
>>> Siqueira
>>>

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2022-10-27 21:54 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-21  0:05 [PATCH] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
2022-10-21  0:31 ` [PATCH RESEND] " Ao Zhong
2022-10-21  4:31   ` [PATCH] drm/amd/display: add DCN support for ARM64 Ao Zhong
2022-10-25 15:48     ` Rodrigo Siqueira
2022-10-25 21:17       ` [PATCH v2 1/2] drm/amd/display: move remaining FPU code to dml folder Ao Zhong
2022-10-26  7:19         ` Christian König
2022-10-26 10:41           ` Ao Zhong
2022-10-25 21:17       ` [PATCH v2 2/2] " Ao Zhong
2022-10-25 15:42   ` [PATCH RESEND] " Rodrigo Siqueira
2022-10-26 11:02     ` [PATCH v2 1/2] " Ao Zhong
2022-10-26 11:02     ` [PATCH v2 2/2] " Ao Zhong
2022-10-26 11:13     ` [PATCH v3 1/2] " Ao Zhong
2022-10-26 11:13     ` [PATCH v3 2/2] " Ao Zhong
2022-10-26 16:12       ` Rodrigo Siqueira
2022-10-26 21:17         ` Ao Zhong
2022-10-27 15:38           ` Rodrigo Siqueira
2022-10-27 16:48             ` Ao Zhong
2022-10-27 16:51             ` Ao Zhong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.