linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Rodrigo Siqueira Jordao <Rodrigo.Siqueira@amd.com>
To: Melissa Wen <mwen@igalia.com>,
	harry.wentland@amd.com, sunpeng.li@amd.com,
	alexander.deucher@amd.com, christian.koenig@amd.com,
	Xinhui.Pan@amd.com, airlied@linux.ie, daniel@ffwll.ch
Cc: "Guenter Roeck" <linux@roeck-us.net>,
	"Maíra Canal" <mairacanal@riseup.net>,
	kernel-dev@igalia.com, amd-gfx@lists.freedesktop.org,
	dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 4/5] drm/amd/display: move FPU code from dcn30 clk mgr to DML folder
Date: Thu, 21 Jul 2022 14:58:41 -0400	[thread overview]
Message-ID: <cf4fb10d-f0f0-3cd2-3ccb-1f1cb0594295@amd.com> (raw)
In-Reply-To: <20220720193208.1131493-5-mwen@igalia.com>



On 2022-07-20 15:32, Melissa Wen wrote:
> The -mno-gnu-attribute option in clk mgr makefile for dcn30 hides a soft
> vs hard fp error for powerpc. After removing this flag, we can see some
> FPU code remains there:
> 
> gcc-11.3.0-nolibc/powerpc64-linux/bin/powerpc64-linux-ld:
> drivers/gpu/drm/amd/amdgpu/../display/dc/dml/display_mode_lib.o uses
> hard float,
> drivers/gpu/drm/amd/amdgpu/../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.o
> uses soft float
> 
> Therefore, remove the -mno-gnu-attribute flag for dcn30/powerpc and move
> FPU-associated code to DML folder.
> 
> Signed-off-by: Melissa Wen <mwen@igalia.com>
> ---
>   .../gpu/drm/amd/display/dc/clk_mgr/Makefile   |  6 --
>   .../display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c  | 63 ++-----------------
>   .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.c  | 63 ++++++++++++++++++-
>   .../drm/amd/display/dc/dml/dcn30/dcn30_fpu.h  |  1 +
>   4 files changed, 68 insertions(+), 65 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> index 66dc02c426e9..15b660a951a5 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
> @@ -115,12 +115,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
>   ###############################################################################
>   CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o
>   
> -# prevent build errors regarding soft-float vs hard-float FP ABI tags
> -# this code is currently unused on ppc64, as it applies to VanGogh APUs only
> -ifdef CONFIG_PPC64
> -CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
> -endif
> -
>   AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
>   
>   AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
> diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> index 914708cefc79..3ce0ee0d012f 100644
> --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_clk_mgr.c
> @@ -29,6 +29,7 @@
>   #include "dcn20/dcn20_clk_mgr.h"
>   #include "dce100/dce_clk_mgr.h"
>   #include "dcn30/dcn30_clk_mgr.h"
> +#include "dml/dcn30/dcn30_fpu.h"
>   #include "reg_helper.h"
>   #include "core_types.h"
>   #include "dm_helpers.h"
> @@ -97,65 +98,11 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, uint32_t cl
>   	}
>   }
>   
> -static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
> +static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
>   {
> -	/* defaults */
> -	double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
> -	double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
> -	double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
> -	uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
> -
> -	/* Set A - Normal - default values*/
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
> -
> -	/* Set B - Performance - higher minimum clocks */
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
> -//	clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
> -
> -	/* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
> -	clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
> -	clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
> -	clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
> -	clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
> -	clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
> -	clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
> -	clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
> -	clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
> -
> -	/* Set D - MALL - SR enter and exit times adjusted for MALL */
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
> -	clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
> +	DC_FP_START();
> +	dcn3_fpu_build_wm_range_table(&clk_mgr->base);
> +	DC_FP_END();
>   }
>   
>   void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> index a8db1306750e..c00f759fdded 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
> @@ -29,7 +29,7 @@
>   #include "dcn20/dcn20_resource.h"
>   #include "dcn30/dcn30_resource.h"
>   
> -
> +#include "clk_mgr/dcn30/dcn30_smu11_driver_if.h"
>   #include "display_mode_vba_30.h"
>   #include "dcn30_fpu.h"
>   
> @@ -616,4 +616,65 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
>   
>   }
>   
> +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base)
> +{
> +	/* defaults */
> +	double pstate_latency_us = base->ctx->dc->dml.soc.dram_clock_change_latency_us;
> +	double sr_exit_time_us = base->ctx->dc->dml.soc.sr_exit_time_us;
> +	double sr_enter_plus_exit_time_us = base->ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
> +	uint16_t min_uclk_mhz = base->bw_params->clk_table.entries[0].memclk_mhz;
>   
> +	dc_assert_fp_enabled();
> +
> +	/* Set A - Normal - default values*/
> +	base->bw_params->wm_table.nv_entries[WM_A].valid = true;
> +	base->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
> +	base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
> +	base->bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
> +	base->bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
> +
> +	/* Set B - Performance - higher minimum clocks */
> +//	base->bw_params->wm_table.nv_entries[WM_B].valid = true;
> +//	base->bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
> +//	base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
> +//	base->bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
> +//	base->bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
> +
> +	/* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
> +	base->bw_params->wm_table.nv_entries[WM_C].valid = true;
> +	base->bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
> +	base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
> +	base->bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
> +	base->bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
> +	base->bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
> +	base->bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
> +	base->bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
> +	base->bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
> +	base->bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
> +	base->bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
> +	base->bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
> +	base->bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
> +
> +	/* Set D - MALL - SR enter and exit times adjusted for MALL */
> +	base->bw_params->wm_table.nv_entries[WM_D].valid = true;
> +	base->bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
> +	base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
> +	base->bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
> +	base->bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
> +}
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
> index dedfe7b5f173..c2024052a497 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.h
> @@ -63,5 +63,6 @@ void dcn30_fpu_update_bw_bounding_box(struct dc *dc,
>   	unsigned int *dcfclk_mhz,
>   	unsigned int *dram_speed_mts);
>   
> +void dcn3_fpu_build_wm_range_table(struct clk_mgr *base);
>   
>   #endif /* __DCN30_FPU_H__*/

Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>


  reply	other threads:[~2022-07-21 18:58 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-20 19:32 [PATCH 0/5] drm/amd/display: FPU cleanup in clk_mgr files for powerpc Melissa Wen
2022-07-20 19:32 ` [PATCH 1/5] drm/amd/display: fix soft-fp vs hard-fp on DCN 3.1 family " Melissa Wen
2022-07-21 18:54   ` Rodrigo Siqueira Jordao
2022-07-20 19:32 ` [PATCH 2/5] drm/amd/display: remove useless FPU protection wrapper from dcn31_resource file Melissa Wen
2022-07-21 18:55   ` Rodrigo Siqueira Jordao
2022-07-20 19:32 ` [PATCH 3/5] drm/amd/display: move FPU code on dcn21 clk_mgr Melissa Wen
2022-07-21 18:57   ` Rodrigo Siqueira Jordao
2022-07-20 19:32 ` [PATCH 4/5] drm/amd/display: move FPU code from dcn30 clk mgr to DML folder Melissa Wen
2022-07-21 18:58   ` Rodrigo Siqueira Jordao [this message]
2022-07-20 19:32 ` [PATCH 5/5] drm/amd/display: move FPU code from dcn301 " Melissa Wen
2022-07-21 17:26   ` Maíra Canal
2022-07-21 18:59   ` Rodrigo Siqueira Jordao
2022-07-21 19:07 ` [PATCH 0/5] drm/amd/display: FPU cleanup in clk_mgr files for powerpc Rodrigo Siqueira Jordao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cf4fb10d-f0f0-3cd2-3ccb-1f1cb0594295@amd.com \
    --to=rodrigo.siqueira@amd.com \
    --cc=Xinhui.Pan@amd.com \
    --cc=airlied@linux.ie \
    --cc=alexander.deucher@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=christian.koenig@amd.com \
    --cc=daniel@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=harry.wentland@amd.com \
    --cc=kernel-dev@igalia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux@roeck-us.net \
    --cc=mairacanal@riseup.net \
    --cc=mwen@igalia.com \
    --cc=sunpeng.li@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).