Re: [PATCH v3 1/1] drm/amd/display: add DCN support for ARM64

From: Rodrigo Siqueira Jordao <Rodrigo.Siqueira@amd.com>
To: Nathan Chancellor <nathan@kernel.org>, Ao Zhong <hacc1225@gmail.com>
Cc: "Stephen Rothwell" <sfr@canb.auug.org.au>,
	"Arnd Bergmann" <arnd@arndb.de>, "Leo Li" <sunpeng.li@amd.com>,
	amd-gfx@lists.freedesktop.org,
	"Alex Deucher" <alexander.deucher@amd.com>,
	"Harry Wentland" <harry.wentland@amd.com>,
	"Christian König" <christian.koenig@amd.com>,
	linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v3 1/1] drm/amd/display: add DCN support for ARM64
Date: Fri, 28 Oct 2022 11:35:32 -0400	[thread overview]
Message-ID: <f31b8573-336a-dd0b-1628-d60066ce533c@amd.com> (raw)
In-Reply-To: <Y1vwk3J3HPGugBJO@dev-arch.thelio-3990X>

On 2022-10-28 11:09, Nathan Chancellor wrote:
> Hi Ao,
> 
> On Thu, Oct 27, 2022 at 09:52:29PM +0200, Ao Zhong wrote:
>> After moving all FPU code to the DML folder, we can enable DCN support
>> for the ARM64 platform. Remove the -mgeneral-regs-only CFLAG from the
>> code in the DML folder that needs to use hardware FPU, and add a control
>> mechanism for ARM Neon.
>>
>> Signed-off-by: Ao Zhong <hacc1225@gmail.com>
>> ---
>>   drivers/gpu/drm/amd/display/Kconfig           |  2 +-
>>   .../gpu/drm/amd/display/amdgpu_dm/dc_fpu.c    |  6 ++++++
>>   drivers/gpu/drm/amd/display/dc/dml/Makefile   | 20 +++++++++++++++----
>>   3 files changed, 23 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
>> index 0142affcdaa3..a7f1c4e51719 100644
>> --- a/drivers/gpu/drm/amd/display/Kconfig
>> +++ b/drivers/gpu/drm/amd/display/Kconfig
>> @@ -6,7 +6,7 @@ config DRM_AMD_DC
>>   	bool "AMD DC - Enable new display engine"
>>   	default y
>>   	select SND_HDA_COMPONENT if SND_HDA_CORE
>> -	select DRM_AMD_DC_DCN if (X86 || PPC64)
>> +	select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON))
>>   	help
>>   	  Choose this option if you want to use the new display engine
>>   	  support for AMDGPU. This adds required support for Vega and
>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> index ab0c6d191038..1743ca0a3641 100644
>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
>> @@ -31,6 +31,8 @@
>>   #elif defined(CONFIG_PPC64)
>>   #include <asm/switch_to.h>
>>   #include <asm/cputable.h>
>> +#elif defined(CONFIG_ARM64)
>> +#include <asm/neon.h>
>>   #endif
>>   
>>   /**
>> @@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line)
>>   			preempt_disable();
>>   			enable_kernel_fp();
>>   		}
>> +#elif defined(CONFIG_ARM64)
>> +		kernel_neon_begin();
>>   #endif
>>   	}
>>   
>> @@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line)
>>   			disable_kernel_fp();
>>   			preempt_enable();
>>   		}
>> +#elif defined(CONFIG_ARM64)
>> +		kernel_neon_end();
>>   #endif
>>   	}
>>   
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> index d0c6cf61c676..d4e93bed1c8e 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
>> @@ -33,6 +33,10 @@ ifdef CONFIG_PPC64
>>   dml_ccflags := -mhard-float -maltivec
>>   endif
>>   
>> +ifdef CONFIG_ARM64
>> +dml_rcflags := -mgeneral-regs-only
>> +endif
>> +
>>   ifdef CONFIG_CC_IS_GCC
>>   ifeq ($(call cc-ifversion, -lt, 0701, y), y)
>>   IS_OLD_GCC = 1
>> @@ -55,8 +59,6 @@ frame_warn_flag := -Wframe-larger-than=2048
>>   endif
>>   
>>   CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
>> -
>> -ifdef CONFIG_DRM_AMD_DC_DCN
>>   CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
>>   CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
>>   CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
>> @@ -88,7 +90,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
>>   CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
>>   CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
>>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
>> -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
>>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
>>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
>>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags)
>> @@ -105,7 +106,18 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcf
>>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
>>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
>>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o  := $(dml_rcflags)
>> -endif
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags)
>> +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
>>   CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
>>   CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
>>   CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags)
>> -- 
>> 2.37.4
>>
> 
> Enabling this code for arm64 reveals the following warnings when
> building allmodconfig with clang. A very recent change in LLVM added the
> variable and spill information so that will not be seen with older
> versions but I also saw the warnings with clang 15.0.0 from Fedora.
> 
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:1852:13: error: stack frame size (2112) exceeds limit (2048) in 'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation' [-Werror,-Wframe-larger-than]
>    static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
>                ^
>    1152/2112 (54.55%) spills, 960/2112 (45.45%) variables
> 
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn30/display_mode_vba_30.c:3542:6: error: stack frame size (2240) exceeds limit (2048) in 'dml30_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than]
>    void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>        ^
>    1451/2240 (64.78%) spills, 789/2240 (35.22%) variables
> 
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:3791:6: error: stack frame size (2736) exceeds limit (2048) in 'dml31_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than]
>    void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>        ^
>    1342/2736 (49.05%) spills, 1394/2736 (50.95%) variables
> 
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn314/display_mode_vba_314.c:3890:6: error: stack frame size (2720) exceeds limit (2048) in 'dml314_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than]
>    void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>        ^
>    1325/2720 (48.71%) spills, 1395/2720 (51.29%) variables
> 
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:59:13: error: stack frame size (2208) exceeds limit (2048) in 'DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation' [-Werror,-Wframe-larger-than]
>    static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(
>                ^
>    1397/2208 (63.27%) spills, 811/2208 (36.73%) variables
> 
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn32/display_mode_vba_32.c:1681:6: error: stack frame size (2496) exceeds limit (2048) in 'dml32_ModeSupportAndSystemConfigurationFull' [-Werror,-Wframe-larger-than]
>    void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>        ^
>    1504/2496 (60.26%) spills, 992/2496 (39.74%) variables
> 
> I can try to take a look at these next week, as I have prior obligations
> today, unless others wanted to help me out ;)

Hi Nathan,

Thanks a lot for checking this.

About those stack frame issues, I guess they are generic, right? I mean, 
I don't see why we only see it if we enable arm64.

> Additionally, I see the following errors with GCC 12.2.1 from Fedora
> when building allmodconfig. Seems like some $(dml_rcflags) might be
> missing.
> 
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c: In function ‘dcn10_resource_construct_fp’:
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:1313:52: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1313 |                 dcn_soc->dram_clock_change_latency = 23;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:1317:45: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1317 |                 dc->dcn_soc->urgent_latency = 3;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:1319:64: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1319 |                 dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:1329:64: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1329 |                 dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:1330:64: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1330 |                 dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:1331:65: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1331 |                 dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:1332:65: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1332 |                 dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.c:1334:72: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1334 |                         dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
>          |                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~
>    make[6]: *** [scripts/Makefile.build:250: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn10/dcn10_resource.o] Error 1
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_resource.c: In function ‘dcn32_populate_dml_pipes_from_context’:
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_resource.c:1921:70: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1921 |                 pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~
>    drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_resource.c:1922:72: error: ‘-mgeneral-regs-only’ is incompatible with the use of floating-point types
>    1922 |                 pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
>          |                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~
>    make[6]: *** [scripts/Makefile.build:250: drivers/gpu/drm/amd/amdgpu/../display/dc/dcn32/dcn32_resource.o] Error 1

Could you check if you have this commit in your branch?

drm/amd/display: move remaining FPU code to dml folder

iirc, I merged it around Tue:
https://gitlab.freedesktop.org/agd5f/linux/-/commits/amd-staging-drm-next

Thanks
Siqueira

> 
> Cheers,
> Nathan