All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull
@ 2021-12-09 16:46 Michel Dänzer
  2021-12-09 16:46 ` [PATCH 2/2] drm/amd/display: Reduce stack size for dml31 UseMinimumDCFCLK Michel Dänzer
  2021-12-11 12:20 ` [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull Rodrigo Siqueira Jordao
  0 siblings, 2 replies; 6+ messages in thread
From: Michel Dänzer @ 2021-12-09 16:46 UTC (permalink / raw)
  To: Harry Wentland, Leo Li, Rodrigo Siqueira, Nicholas Kazlauskas
  Cc: dri-devel, amd-gfx

From: Michel Dänzer <mdaenzer@redhat.com>

Move code using the Pipe struct to a new helper function.

Works around[0] this warning (resulting in failure to build a RHEL debug
kernel with Werror enabled):

../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c: In function ‘dml31_ModeSupportAndSystemConfigurationFull’:
../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:5740:1: warning: the frame size of 2144 bytes is larger than 2048 bytes [-Wframe-larger-than=]

The culprit seems to be the Pipe struct, so pull the relevant block out
into its own sub-function. (This is porting
a62427ef9b55 "drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull"
from dml31 to dml21)

[0] AFAICT this doesn't actually reduce the total amount of stack which
can be used, just moves some of it from
dml31_ModeSupportAndSystemConfigurationFull to the new helper function,
so the former happens to no longer exceed the limit for a single
function.

Signed-off-by: Michel Dänzer <mdaenzer@redhat.com>
---
 .../dc/dml/dcn31/display_mode_vba_31.c        | 185 ++++++++++--------
 1 file changed, 99 insertions(+), 86 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 7e937bdcea00..8965f9af9d0a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -3949,6 +3949,102 @@ static double TruncToValidBPP(
 	return BPP_INVALID;
 }
 
+static noinline void CalculatePrefetchSchedulePerPlane(
+		struct display_mode_lib *mode_lib,
+		double HostVMInefficiencyFactor,
+		int i,
+		unsigned j,
+		unsigned k)
+{
+	struct vba_vars_st *v = &mode_lib->vba;
+	Pipe myPipe;
+
+	myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
+	myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
+	myPipe.PixelClock = v->PixelClock[k];
+	myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
+	myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
+	myPipe.ScalerEnabled = v->ScalerEnabled[k];
+	myPipe.SourceScan = v->SourceScan[k];
+	myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
+	myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
+	myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
+	myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
+	myPipe.InterlaceEnable = v->Interlace[k];
+	myPipe.NumberOfCursors = v->NumberOfCursors[k];
+	myPipe.VBlank = v->VTotal[k] - v->VActive[k];
+	myPipe.HTotal = v->HTotal[k];
+	myPipe.DCCEnable = v->DCCEnable[k];
+	myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
+		|| v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
+	myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
+	myPipe.BytePerPixelY = v->BytePerPixelY[k];
+	myPipe.BytePerPixelC = v->BytePerPixelC[k];
+	myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
+	v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
+		mode_lib,
+		HostVMInefficiencyFactor,
+		&myPipe,
+		v->DSCDelayPerState[i][k],
+		v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
+		v->DPPCLKDelaySCL,
+		v->DPPCLKDelaySCLLBOnly,
+		v->DPPCLKDelayCNVCCursor,
+		v->DISPCLKDelaySubtotal,
+		v->SwathWidthYThisState[k] / v->HRatio[k],
+		v->OutputFormat[k],
+		v->MaxInterDCNTileRepeaters,
+		dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
+		v->MaximumVStartup[i][j][k],
+		v->GPUVMMaxPageTableLevels,
+		v->GPUVMEnable,
+		v->HostVMEnable,
+		v->HostVMMaxNonCachedPageTableLevels,
+		v->HostVMMinPageSize,
+		v->DynamicMetadataEnable[k],
+		v->DynamicMetadataVMEnabled,
+		v->DynamicMetadataLinesBeforeActiveRequired[k],
+		v->DynamicMetadataTransmittedBytes[k],
+		v->UrgLatency[i],
+		v->ExtraLatency,
+		v->TimeCalc,
+		v->PDEAndMetaPTEBytesPerFrame[i][j][k],
+		v->MetaRowBytes[i][j][k],
+		v->DPTEBytesPerRow[i][j][k],
+		v->PrefetchLinesY[i][j][k],
+		v->SwathWidthYThisState[k],
+		v->PrefillY[k],
+		v->MaxNumSwY[k],
+		v->PrefetchLinesC[i][j][k],
+		v->SwathWidthCThisState[k],
+		v->PrefillC[k],
+		v->MaxNumSwC[k],
+		v->swath_width_luma_ub_this_state[k],
+		v->swath_width_chroma_ub_this_state[k],
+		v->SwathHeightYThisState[k],
+		v->SwathHeightCThisState[k],
+		v->TWait,
+		&v->DSTXAfterScaler[k],
+		&v->DSTYAfterScaler[k],
+		&v->LineTimesForPrefetch[k],
+		&v->PrefetchBW[k],
+		&v->LinesForMetaPTE[k],
+		&v->LinesForMetaAndDPTERow[k],
+		&v->VRatioPreY[i][j][k],
+		&v->VRatioPreC[i][j][k],
+		&v->RequiredPrefetchPixelDataBWLuma[i][j][k],
+		&v->RequiredPrefetchPixelDataBWChroma[i][j][k],
+		&v->NoTimeForDynamicMetadata[i][j][k],
+		&v->Tno_bw[k],
+		&v->prefetch_vmrow_bw[k],
+		&v->dummy7[k],
+		&v->dummy8[k],
+		&v->dummy13[k],
+		&v->VUpdateOffsetPix[k],
+		&v->VUpdateWidthPix[k],
+		&v->VReadyOffsetPix[k]);
+}
+
 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
 {
 	struct vba_vars_st *v = &mode_lib->vba;
@@ -5276,92 +5372,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						v->SREnterPlusExitTime);
 
 				for (k = 0; k < v->NumberOfActivePlanes; k++) {
-					Pipe myPipe;
-
-					myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
-					myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
-					myPipe.PixelClock = v->PixelClock[k];
-					myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
-					myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
-					myPipe.ScalerEnabled = v->ScalerEnabled[k];
-					myPipe.SourceScan = v->SourceScan[k];
-					myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
-					myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
-					myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
-					myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
-					myPipe.InterlaceEnable = v->Interlace[k];
-					myPipe.NumberOfCursors = v->NumberOfCursors[k];
-					myPipe.VBlank = v->VTotal[k] - v->VActive[k];
-					myPipe.HTotal = v->HTotal[k];
-					myPipe.DCCEnable = v->DCCEnable[k];
-					myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
-							|| v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
-					myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
-					myPipe.BytePerPixelY = v->BytePerPixelY[k];
-					myPipe.BytePerPixelC = v->BytePerPixelC[k];
-					myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
-					v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
-							mode_lib,
-							HostVMInefficiencyFactor,
-							&myPipe,
-							v->DSCDelayPerState[i][k],
-							v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
-							v->DPPCLKDelaySCL,
-							v->DPPCLKDelaySCLLBOnly,
-							v->DPPCLKDelayCNVCCursor,
-							v->DISPCLKDelaySubtotal,
-							v->SwathWidthYThisState[k] / v->HRatio[k],
-							v->OutputFormat[k],
-							v->MaxInterDCNTileRepeaters,
-							dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
-							v->MaximumVStartup[i][j][k],
-							v->GPUVMMaxPageTableLevels,
-							v->GPUVMEnable,
-							v->HostVMEnable,
-							v->HostVMMaxNonCachedPageTableLevels,
-							v->HostVMMinPageSize,
-							v->DynamicMetadataEnable[k],
-							v->DynamicMetadataVMEnabled,
-							v->DynamicMetadataLinesBeforeActiveRequired[k],
-							v->DynamicMetadataTransmittedBytes[k],
-							v->UrgLatency[i],
-							v->ExtraLatency,
-							v->TimeCalc,
-							v->PDEAndMetaPTEBytesPerFrame[i][j][k],
-							v->MetaRowBytes[i][j][k],
-							v->DPTEBytesPerRow[i][j][k],
-							v->PrefetchLinesY[i][j][k],
-							v->SwathWidthYThisState[k],
-							v->PrefillY[k],
-							v->MaxNumSwY[k],
-							v->PrefetchLinesC[i][j][k],
-							v->SwathWidthCThisState[k],
-							v->PrefillC[k],
-							v->MaxNumSwC[k],
-							v->swath_width_luma_ub_this_state[k],
-							v->swath_width_chroma_ub_this_state[k],
-							v->SwathHeightYThisState[k],
-							v->SwathHeightCThisState[k],
-							v->TWait,
-							&v->DSTXAfterScaler[k],
-							&v->DSTYAfterScaler[k],
-							&v->LineTimesForPrefetch[k],
-							&v->PrefetchBW[k],
-							&v->LinesForMetaPTE[k],
-							&v->LinesForMetaAndDPTERow[k],
-							&v->VRatioPreY[i][j][k],
-							&v->VRatioPreC[i][j][k],
-							&v->RequiredPrefetchPixelDataBWLuma[i][j][k],
-							&v->RequiredPrefetchPixelDataBWChroma[i][j][k],
-							&v->NoTimeForDynamicMetadata[i][j][k],
-							&v->Tno_bw[k],
-							&v->prefetch_vmrow_bw[k],
-							&v->dummy7[k],
-							&v->dummy8[k],
-							&v->dummy13[k],
-							&v->VUpdateOffsetPix[k],
-							&v->VUpdateWidthPix[k],
-							&v->VReadyOffsetPix[k]);
+					CalculatePrefetchSchedulePerPlane(mode_lib,
+									  HostVMInefficiencyFactor,
+									  i, j,	k);
 				}
 
 				for (k = 0; k < v->NumberOfActivePlanes; k++) {
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/2] drm/amd/display: Reduce stack size for dml31 UseMinimumDCFCLK
  2021-12-09 16:46 [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull Michel Dänzer
@ 2021-12-09 16:46 ` Michel Dänzer
  2021-12-11 12:20 ` [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull Rodrigo Siqueira Jordao
  1 sibling, 0 replies; 6+ messages in thread
From: Michel Dänzer @ 2021-12-09 16:46 UTC (permalink / raw)
  To: Harry Wentland, Leo Li, Rodrigo Siqueira, Nicholas Kazlauskas
  Cc: dri-devel, amd-gfx

From: Michel Dänzer <mdaenzer@redhat.com>

Use the struct display_mode_lib pointer instead of passing lots of large
arrays as parameters by value.

Addresses this warning (resulting in failure to build a RHEL debug kernel
with Werror enabled):

../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c: In function ‘UseMinimumDCFCLK’:
../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:7478:1: warning: the frame size of 2128 bytes is larger than 2048 bytes [-Wframe-larger-than=]

NOTE: AFAICT this function previously had no observable effect, since it
only modified parameters passed by value and doesn't return anything.
Now it may modify some values in struct display_mode_lib passed in by
reference.

Signed-off-by: Michel Dänzer <mdaenzer@redhat.com>
---
 .../dc/dml/dcn31/display_mode_vba_31.c        | 304 ++++--------------
 1 file changed, 69 insertions(+), 235 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index 8965f9af9d0a..6feb23432f8d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -422,62 +422,8 @@ static void CalculateUrgentBurstFactor(
 
 static void UseMinimumDCFCLK(
 		struct display_mode_lib *mode_lib,
-		int MaxInterDCNTileRepeaters,
 		int MaxPrefetchMode,
-		double FinalDRAMClockChangeLatency,
-		double SREnterPlusExitTime,
-		int ReturnBusWidth,
-		int RoundTripPingLatencyCycles,
-		int ReorderingBytes,
-		int PixelChunkSizeInKByte,
-		int MetaChunkSize,
-		bool GPUVMEnable,
-		int GPUVMMaxPageTableLevels,
-		bool HostVMEnable,
-		int NumberOfActivePlanes,
-		double HostVMMinPageSize,
-		int HostVMMaxNonCachedPageTableLevels,
-		bool DynamicMetadataVMEnabled,
-		enum immediate_flip_requirement ImmediateFlipRequirement,
-		bool ProgressiveToInterlaceUnitInOPP,
-		double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
-		double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
-		int VTotal[],
-		int VActive[],
-		int DynamicMetadataTransmittedBytes[],
-		int DynamicMetadataLinesBeforeActiveRequired[],
-		bool Interlace[],
-		double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
-		double RequiredDISPCLK[][2],
-		double UrgLatency[],
-		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
-		double ProjectedDCFCLKDeepSleep[][2],
-		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
-		double TotalVActivePixelBandwidth[][2],
-		double TotalVActiveCursorBandwidth[][2],
-		double TotalMetaRowBandwidth[][2],
-		double TotalDPTERowBandwidth[][2],
-		unsigned int TotalNumberOfActiveDPP[][2],
-		unsigned int TotalNumberOfDCCActiveDPP[][2],
-		int dpte_group_bytes[],
-		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
-		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
-		int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
-		int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
-		int BytePerPixelY[],
-		int BytePerPixelC[],
-		int HTotal[],
-		double PixelClock[],
-		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
-		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
-		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
-		bool DynamicMetadataEnable[],
-		double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
-		double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
-		double ReadBandwidthLuma[],
-		double ReadBandwidthChroma[],
-		double DCFCLKPerState[],
-		double DCFCLKState[][2]);
+		int ReorderingBytes);
 
 static void CalculatePixelDeliveryTimes(
 		unsigned int NumberOfActivePlanes,
@@ -5175,66 +5121,8 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 		}
 	}
 
-	if (v->UseMinimumRequiredDCFCLK == true) {
-		UseMinimumDCFCLK(
-				mode_lib,
-				v->MaxInterDCNTileRepeaters,
-				MaxPrefetchMode,
-				v->DRAMClockChangeLatency,
-				v->SREnterPlusExitTime,
-				v->ReturnBusWidth,
-				v->RoundTripPingLatencyCycles,
-				ReorderingBytes,
-				v->PixelChunkSizeInKByte,
-				v->MetaChunkSize,
-				v->GPUVMEnable,
-				v->GPUVMMaxPageTableLevels,
-				v->HostVMEnable,
-				v->NumberOfActivePlanes,
-				v->HostVMMinPageSize,
-				v->HostVMMaxNonCachedPageTableLevels,
-				v->DynamicMetadataVMEnabled,
-				v->ImmediateFlipRequirement[0],
-				v->ProgressiveToInterlaceUnitInOPP,
-				v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
-				v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
-				v->VTotal,
-				v->VActive,
-				v->DynamicMetadataTransmittedBytes,
-				v->DynamicMetadataLinesBeforeActiveRequired,
-				v->Interlace,
-				v->RequiredDPPCLK,
-				v->RequiredDISPCLK,
-				v->UrgLatency,
-				v->NoOfDPP,
-				v->ProjectedDCFCLKDeepSleep,
-				v->MaximumVStartup,
-				v->TotalVActivePixelBandwidth,
-				v->TotalVActiveCursorBandwidth,
-				v->TotalMetaRowBandwidth,
-				v->TotalDPTERowBandwidth,
-				v->TotalNumberOfActiveDPP,
-				v->TotalNumberOfDCCActiveDPP,
-				v->dpte_group_bytes,
-				v->PrefetchLinesY,
-				v->PrefetchLinesC,
-				v->swath_width_luma_ub_all_states,
-				v->swath_width_chroma_ub_all_states,
-				v->BytePerPixelY,
-				v->BytePerPixelC,
-				v->HTotal,
-				v->PixelClock,
-				v->PDEAndMetaPTEBytesPerFrame,
-				v->DPTEBytesPerRow,
-				v->MetaRowBytes,
-				v->DynamicMetadataEnable,
-				v->VActivePixelBandwidth,
-				v->VActiveCursorBandwidth,
-				v->ReadBandwidthLuma,
-				v->ReadBandwidthChroma,
-				v->DCFCLKPerState,
-				v->DCFCLKState);
-	}
+	if (v->UseMinimumRequiredDCFCLK == true)
+		UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes);
 
 	for (i = 0; i < v->soc.num_states; ++i) {
 		for (j = 0; j <= 1; ++j) {
@@ -7262,69 +7150,15 @@ static double CalculateUrgentLatency(
 
 static void UseMinimumDCFCLK(
 		struct display_mode_lib *mode_lib,
-		int MaxInterDCNTileRepeaters,
 		int MaxPrefetchMode,
-		double FinalDRAMClockChangeLatency,
-		double SREnterPlusExitTime,
-		int ReturnBusWidth,
-		int RoundTripPingLatencyCycles,
-		int ReorderingBytes,
-		int PixelChunkSizeInKByte,
-		int MetaChunkSize,
-		bool GPUVMEnable,
-		int GPUVMMaxPageTableLevels,
-		bool HostVMEnable,
-		int NumberOfActivePlanes,
-		double HostVMMinPageSize,
-		int HostVMMaxNonCachedPageTableLevels,
-		bool DynamicMetadataVMEnabled,
-		enum immediate_flip_requirement ImmediateFlipRequirement,
-		bool ProgressiveToInterlaceUnitInOPP,
-		double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation,
-		double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency,
-		int VTotal[],
-		int VActive[],
-		int DynamicMetadataTransmittedBytes[],
-		int DynamicMetadataLinesBeforeActiveRequired[],
-		bool Interlace[],
-		double RequiredDPPCLK[][2][DC__NUM_DPP__MAX],
-		double RequiredDISPCLK[][2],
-		double UrgLatency[],
-		unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX],
-		double ProjectedDCFCLKDeepSleep[][2],
-		double MaximumVStartup[][2][DC__NUM_DPP__MAX],
-		double TotalVActivePixelBandwidth[][2],
-		double TotalVActiveCursorBandwidth[][2],
-		double TotalMetaRowBandwidth[][2],
-		double TotalDPTERowBandwidth[][2],
-		unsigned int TotalNumberOfActiveDPP[][2],
-		unsigned int TotalNumberOfDCCActiveDPP[][2],
-		int dpte_group_bytes[],
-		double PrefetchLinesY[][2][DC__NUM_DPP__MAX],
-		double PrefetchLinesC[][2][DC__NUM_DPP__MAX],
-		int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX],
-		int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX],
-		int BytePerPixelY[],
-		int BytePerPixelC[],
-		int HTotal[],
-		double PixelClock[],
-		double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX],
-		double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX],
-		double MetaRowBytes[][2][DC__NUM_DPP__MAX],
-		bool DynamicMetadataEnable[],
-		double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX],
-		double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX],
-		double ReadBandwidthLuma[],
-		double ReadBandwidthChroma[],
-		double DCFCLKPerState[],
-		double DCFCLKState[][2])
+		int ReorderingBytes)
 {
 	struct vba_vars_st *v = &mode_lib->vba;
 	int dummy1, i, j, k;
 	double NormalEfficiency,  dummy2, dummy3;
 	double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2];
 
-	NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
+	NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0;
 	for (i = 0; i < v->soc.num_states; ++i) {
 		for (j = 0; j <= 1; ++j) {
 			double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX];
@@ -7342,61 +7176,61 @@ static void UseMinimumDCFCLK(
 			double MinimumTvmPlus2Tr0;
 
 			TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0;
-			for (k = 0; k < NumberOfActivePlanes; ++k) {
+			for (k = 0; k < v->NumberOfActivePlanes; ++k) {
 				TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j]
-						+ NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]);
+						+ v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]);
 			}
 
-			for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
-				NoOfDPPState[k] = NoOfDPP[i][j][k];
+			for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
+				NoOfDPPState[k] = v->NoOfDPP[i][j][k];
 			}
 
-			MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime);
-			NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j];
-			DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ?
-					TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j];
+			MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime);
+			NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j];
+			DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ?
+					TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j];
 			DCFCLKRequiredForAverageBandwidth = dml_max3(
-					ProjectedDCFCLKDeepSleep[i][j],
-					(NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth
-							/ (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
-					(NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth);
+					v->ProjectedDCFCLKDeepSleep[i][j],
+					(NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth
+							/ (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100),
+					(NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth);
 
 			ExtraLatencyBytes = CalculateExtraLatencyBytes(
 					ReorderingBytes,
-					TotalNumberOfActiveDPP[i][j],
-					PixelChunkSizeInKByte,
-					TotalNumberOfDCCActiveDPP[i][j],
-					MetaChunkSize,
-					GPUVMEnable,
-					HostVMEnable,
-					NumberOfActivePlanes,
+					v->TotalNumberOfActiveDPP[i][j],
+					v->PixelChunkSizeInKByte,
+					v->TotalNumberOfDCCActiveDPP[i][j],
+					v->MetaChunkSize,
+					v->GPUVMEnable,
+					v->HostVMEnable,
+					v->NumberOfActivePlanes,
 					NoOfDPPState,
-					dpte_group_bytes,
+					v->dpte_group_bytes,
 					1,
-					HostVMMinPageSize,
-					HostVMMaxNonCachedPageTableLevels);
-			ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth;
-			for (k = 0; k < NumberOfActivePlanes; ++k) {
+					v->HostVMMinPageSize,
+					v->HostVMMaxNonCachedPageTableLevels);
+			ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth;
+			for (k = 0; k < v->NumberOfActivePlanes; ++k) {
 				double DCFCLKCyclesRequiredInPrefetch;
 				double ExpectedPrefetchBWAcceleration;
 				double PrefetchTime;
 
-				PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k]
-						+ PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth;
+				PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k]
+						+ v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth;
 				DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k]
-						+ PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0)
-						+ 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth
-						+ 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
-				PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k];
-				ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k])
-						/ (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]);
+						+ v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0)
+						+ 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth
+						+ 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k];
+				PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k];
+				ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k])
+						/ (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]);
 				DynamicMetadataVMExtraLatency[k] =
-						(GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ?
-								UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
-				PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait
-						- UrgLatency[i]
-								* ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2)
-										* (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
+						(v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ?
+								v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0;
+				PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait
+						- v->UrgLatency[i]
+								* ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2)
+										* (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1)
 						- DynamicMetadataVMExtraLatency[k];
 
 				if (PrefetchTime > 0) {
@@ -7405,14 +7239,14 @@ static void UseMinimumDCFCLK(
 							/ (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch);
 					DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k]
 							* dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration;
-					if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) {
+					if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) {
 						DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k]
-								+ NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth;
+								+ NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth;
 					}
 				} else {
-					DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
+					DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
 				}
-				if (DynamicMetadataEnable[k] == true) {
+				if (v->DynamicMetadataEnable[k] == true) {
 					double TSetupPipe;
 					double TdmbfPipe;
 					double TdmsksPipe;
@@ -7420,17 +7254,17 @@ static void UseMinimumDCFCLK(
 					double AllowedTimeForUrgentExtraLatency;
 
 					CalculateVupdateAndDynamicMetadataParameters(
-							MaxInterDCNTileRepeaters,
-							RequiredDPPCLK[i][j][k],
-							RequiredDISPCLK[i][j],
-							ProjectedDCFCLKDeepSleep[i][j],
-							PixelClock[k],
-							HTotal[k],
-							VTotal[k] - VActive[k],
-							DynamicMetadataTransmittedBytes[k],
-							DynamicMetadataLinesBeforeActiveRequired[k],
-							Interlace[k],
-							ProgressiveToInterlaceUnitInOPP,
+							v->MaxInterDCNTileRepeaters,
+							v->RequiredDPPCLK[i][j][k],
+							v->RequiredDISPCLK[i][j],
+							v->ProjectedDCFCLKDeepSleep[i][j],
+							v->PixelClock[k],
+							v->HTotal[k],
+							v->VTotal[k] - v->VActive[k],
+							v->DynamicMetadataTransmittedBytes[k],
+							v->DynamicMetadataLinesBeforeActiveRequired[k],
+							v->Interlace[k],
+							v->ProgressiveToInterlaceUnitInOPP,
 							&TSetupPipe,
 							&TdmbfPipe,
 							&TdmecPipe,
@@ -7438,31 +7272,31 @@ static void UseMinimumDCFCLK(
 							&dummy1,
 							&dummy2,
 							&dummy3);
-					AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
+					AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe
 							- TdmsksPipe - DynamicMetadataVMExtraLatency[k];
 					if (AllowedTimeForUrgentExtraLatency > 0) {
 						DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max(
 								DCFCLKRequiredForPeakBandwidthPerPlane[k],
 								ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency);
 					} else {
-						DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i];
+						DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i];
 					}
 				}
 			}
 			DCFCLKRequiredForPeakBandwidth = 0;
-			for (k = 0; k <= NumberOfActivePlanes - 1; ++k) {
+			for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) {
 				DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k];
 			}
-			MinimumTvmPlus2Tr0 = UrgLatency[i]
-					* (GPUVMEnable == true ?
-							(HostVMEnable == true ?
-									(GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) :
+			MinimumTvmPlus2Tr0 = v->UrgLatency[i]
+					* (v->GPUVMEnable == true ?
+							(v->HostVMEnable == true ?
+									(v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) :
 							0);
-			for (k = 0; k < NumberOfActivePlanes; ++k) {
+			for (k = 0; k < v->NumberOfActivePlanes; ++k) {
 				double MaximumTvmPlus2Tr0PlusTsw;
-				MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
+				MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k];
 				if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) {
-					DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i];
+					DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i];
 				} else {
 					DCFCLKRequiredForPeakBandwidth = dml_max3(
 							DCFCLKRequiredForPeakBandwidth,
@@ -7470,7 +7304,7 @@ static void UseMinimumDCFCLK(
 							(2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0));
 				}
 			}
-			DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
+			v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth));
 		}
 	}
 }
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull
  2021-12-09 16:46 [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull Michel Dänzer
  2021-12-09 16:46 ` [PATCH 2/2] drm/amd/display: Reduce stack size for dml31 UseMinimumDCFCLK Michel Dänzer
@ 2021-12-11 12:20 ` Rodrigo Siqueira Jordao
  2021-12-13  9:46   ` Michel Dänzer
  1 sibling, 1 reply; 6+ messages in thread
From: Rodrigo Siqueira Jordao @ 2021-12-11 12:20 UTC (permalink / raw)
  To: Michel Dänzer, Harry Wentland, Leo Li, Rodrigo Siqueira,
	Nicholas Kazlauskas
  Cc: dri-devel, amd-gfx



On 2021-12-09 11:46 a.m., Michel Dänzer wrote:
> From: Michel Dänzer <mdaenzer@redhat.com>
> 
> Move code using the Pipe struct to a new helper function.
> 
> Works around[0] this warning (resulting in failure to build a RHEL debug
> kernel with Werror enabled):
> 
> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c: In function ‘dml31_ModeSupportAndSystemConfigurationFull’:
> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:5740:1: warning: the frame size of 2144 bytes is larger than 2048 bytes [-Wframe-larger-than=]
> 
> The culprit seems to be the Pipe struct, so pull the relevant block out
> into its own sub-function. (This is porting
> a62427ef9b55 "drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull"
> from dml31 to dml21)
> 
> [0] AFAICT this doesn't actually reduce the total amount of stack which
> can be used, just moves some of it from
> dml31_ModeSupportAndSystemConfigurationFull to the new helper function,
> so the former happens to no longer exceed the limit for a single
> function.
> 
> Signed-off-by: Michel Dänzer <mdaenzer@redhat.com>
> ---
>   .../dc/dml/dcn31/display_mode_vba_31.c        | 185 ++++++++++--------
>   1 file changed, 99 insertions(+), 86 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> index 7e937bdcea00..8965f9af9d0a 100644
> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
> @@ -3949,6 +3949,102 @@ static double TruncToValidBPP(
>   	return BPP_INVALID;
>   }
>   
> +static noinline void CalculatePrefetchSchedulePerPlane(
> +		struct display_mode_lib *mode_lib,
> +		double HostVMInefficiencyFactor,
> +		int i,
> +		unsigned j,
> +		unsigned k)
> +{
> +	struct vba_vars_st *v = &mode_lib->vba;
> +	Pipe myPipe;
> +
> +	myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
> +	myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
> +	myPipe.PixelClock = v->PixelClock[k];
> +	myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
> +	myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
> +	myPipe.ScalerEnabled = v->ScalerEnabled[k];
> +	myPipe.SourceScan = v->SourceScan[k];
> +	myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
> +	myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
> +	myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
> +	myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
> +	myPipe.InterlaceEnable = v->Interlace[k];
> +	myPipe.NumberOfCursors = v->NumberOfCursors[k];
> +	myPipe.VBlank = v->VTotal[k] - v->VActive[k];
> +	myPipe.HTotal = v->HTotal[k];
> +	myPipe.DCCEnable = v->DCCEnable[k];
> +	myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
> +		|| v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
> +	myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
> +	myPipe.BytePerPixelY = v->BytePerPixelY[k];
> +	myPipe.BytePerPixelC = v->BytePerPixelC[k];
> +	myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
> +	v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
> +		mode_lib,
> +		HostVMInefficiencyFactor,
> +		&myPipe,
> +		v->DSCDelayPerState[i][k],
> +		v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
> +		v->DPPCLKDelaySCL,
> +		v->DPPCLKDelaySCLLBOnly,
> +		v->DPPCLKDelayCNVCCursor,
> +		v->DISPCLKDelaySubtotal,
> +		v->SwathWidthYThisState[k] / v->HRatio[k],
> +		v->OutputFormat[k],
> +		v->MaxInterDCNTileRepeaters,
> +		dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
> +		v->MaximumVStartup[i][j][k],
> +		v->GPUVMMaxPageTableLevels,
> +		v->GPUVMEnable,
> +		v->HostVMEnable,
> +		v->HostVMMaxNonCachedPageTableLevels,
> +		v->HostVMMinPageSize,
> +		v->DynamicMetadataEnable[k],
> +		v->DynamicMetadataVMEnabled,
> +		v->DynamicMetadataLinesBeforeActiveRequired[k],
> +		v->DynamicMetadataTransmittedBytes[k],
> +		v->UrgLatency[i],
> +		v->ExtraLatency,
> +		v->TimeCalc,
> +		v->PDEAndMetaPTEBytesPerFrame[i][j][k],
> +		v->MetaRowBytes[i][j][k],
> +		v->DPTEBytesPerRow[i][j][k],
> +		v->PrefetchLinesY[i][j][k],
> +		v->SwathWidthYThisState[k],
> +		v->PrefillY[k],
> +		v->MaxNumSwY[k],
> +		v->PrefetchLinesC[i][j][k],
> +		v->SwathWidthCThisState[k],
> +		v->PrefillC[k],
> +		v->MaxNumSwC[k],
> +		v->swath_width_luma_ub_this_state[k],
> +		v->swath_width_chroma_ub_this_state[k],
> +		v->SwathHeightYThisState[k],
> +		v->SwathHeightCThisState[k],
> +		v->TWait,
> +		&v->DSTXAfterScaler[k],
> +		&v->DSTYAfterScaler[k],
> +		&v->LineTimesForPrefetch[k],
> +		&v->PrefetchBW[k],
> +		&v->LinesForMetaPTE[k],
> +		&v->LinesForMetaAndDPTERow[k],
> +		&v->VRatioPreY[i][j][k],
> +		&v->VRatioPreC[i][j][k],
> +		&v->RequiredPrefetchPixelDataBWLuma[i][j][k],
> +		&v->RequiredPrefetchPixelDataBWChroma[i][j][k],
> +		&v->NoTimeForDynamicMetadata[i][j][k],
> +		&v->Tno_bw[k],
> +		&v->prefetch_vmrow_bw[k],
> +		&v->dummy7[k],
> +		&v->dummy8[k],
> +		&v->dummy13[k],
> +		&v->VUpdateOffsetPix[k],
> +		&v->VUpdateWidthPix[k],
> +		&v->VReadyOffsetPix[k]);
> +}
> +
>   void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>   {
>   	struct vba_vars_st *v = &mode_lib->vba;
> @@ -5276,92 +5372,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
>   						v->SREnterPlusExitTime);
>   
>   				for (k = 0; k < v->NumberOfActivePlanes; k++) {
> -					Pipe myPipe;
> -
> -					myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
> -					myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
> -					myPipe.PixelClock = v->PixelClock[k];
> -					myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
> -					myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
> -					myPipe.ScalerEnabled = v->ScalerEnabled[k];
> -					myPipe.SourceScan = v->SourceScan[k];
> -					myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
> -					myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
> -					myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
> -					myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
> -					myPipe.InterlaceEnable = v->Interlace[k];
> -					myPipe.NumberOfCursors = v->NumberOfCursors[k];
> -					myPipe.VBlank = v->VTotal[k] - v->VActive[k];
> -					myPipe.HTotal = v->HTotal[k];
> -					myPipe.DCCEnable = v->DCCEnable[k];
> -					myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
> -							|| v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
> -					myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
> -					myPipe.BytePerPixelY = v->BytePerPixelY[k];
> -					myPipe.BytePerPixelC = v->BytePerPixelC[k];
> -					myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
> -					v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
> -							mode_lib,
> -							HostVMInefficiencyFactor,
> -							&myPipe,
> -							v->DSCDelayPerState[i][k],
> -							v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
> -							v->DPPCLKDelaySCL,
> -							v->DPPCLKDelaySCLLBOnly,
> -							v->DPPCLKDelayCNVCCursor,
> -							v->DISPCLKDelaySubtotal,
> -							v->SwathWidthYThisState[k] / v->HRatio[k],
> -							v->OutputFormat[k],
> -							v->MaxInterDCNTileRepeaters,
> -							dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
> -							v->MaximumVStartup[i][j][k],
> -							v->GPUVMMaxPageTableLevels,
> -							v->GPUVMEnable,
> -							v->HostVMEnable,
> -							v->HostVMMaxNonCachedPageTableLevels,
> -							v->HostVMMinPageSize,
> -							v->DynamicMetadataEnable[k],
> -							v->DynamicMetadataVMEnabled,
> -							v->DynamicMetadataLinesBeforeActiveRequired[k],
> -							v->DynamicMetadataTransmittedBytes[k],
> -							v->UrgLatency[i],
> -							v->ExtraLatency,
> -							v->TimeCalc,
> -							v->PDEAndMetaPTEBytesPerFrame[i][j][k],
> -							v->MetaRowBytes[i][j][k],
> -							v->DPTEBytesPerRow[i][j][k],
> -							v->PrefetchLinesY[i][j][k],
> -							v->SwathWidthYThisState[k],
> -							v->PrefillY[k],
> -							v->MaxNumSwY[k],
> -							v->PrefetchLinesC[i][j][k],
> -							v->SwathWidthCThisState[k],
> -							v->PrefillC[k],
> -							v->MaxNumSwC[k],
> -							v->swath_width_luma_ub_this_state[k],
> -							v->swath_width_chroma_ub_this_state[k],
> -							v->SwathHeightYThisState[k],
> -							v->SwathHeightCThisState[k],
> -							v->TWait,
> -							&v->DSTXAfterScaler[k],
> -							&v->DSTYAfterScaler[k],
> -							&v->LineTimesForPrefetch[k],
> -							&v->PrefetchBW[k],
> -							&v->LinesForMetaPTE[k],
> -							&v->LinesForMetaAndDPTERow[k],
> -							&v->VRatioPreY[i][j][k],
> -							&v->VRatioPreC[i][j][k],
> -							&v->RequiredPrefetchPixelDataBWLuma[i][j][k],
> -							&v->RequiredPrefetchPixelDataBWChroma[i][j][k],
> -							&v->NoTimeForDynamicMetadata[i][j][k],
> -							&v->Tno_bw[k],
> -							&v->prefetch_vmrow_bw[k],
> -							&v->dummy7[k],
> -							&v->dummy8[k],
> -							&v->dummy13[k],
> -							&v->VUpdateOffsetPix[k],
> -							&v->VUpdateWidthPix[k],
> -							&v->VReadyOffsetPix[k]);
> +					CalculatePrefetchSchedulePerPlane(mode_lib,
> +									  HostVMInefficiencyFactor,
> +									  i, j,	k);
>   				}
>   
>   				for (k = 0; k < v->NumberOfActivePlanes; k++) {
> 

Hi Michel,

Overwall I think this series is good. I also run it in our internal CI 
and everything looks fine.

Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>

And applied to amd-staging-drm-next.

Thanks

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull
  2021-12-11 12:20 ` [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull Rodrigo Siqueira Jordao
@ 2021-12-13  9:46   ` Michel Dänzer
  2021-12-13 14:22     ` Rodrigo Siqueira Jordao
  0 siblings, 1 reply; 6+ messages in thread
From: Michel Dänzer @ 2021-12-13  9:46 UTC (permalink / raw)
  To: Rodrigo Siqueira Jordao, Harry Wentland, Leo Li,
	Rodrigo Siqueira, Nicholas Kazlauskas
  Cc: amd-gfx, dri-devel

On 2021-12-11 13:20, Rodrigo Siqueira Jordao wrote:
> 
> 
> On 2021-12-09 11:46 a.m., Michel Dänzer wrote:
>> From: Michel Dänzer <mdaenzer@redhat.com>
>>
>> Move code using the Pipe struct to a new helper function.
>>
>> Works around[0] this warning (resulting in failure to build a RHEL debug
>> kernel with Werror enabled):
>>
>> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c: In function ‘dml31_ModeSupportAndSystemConfigurationFull’:
>> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:5740:1: warning: the frame size of 2144 bytes is larger than 2048 bytes [-Wframe-larger-than=]
>>
>> The culprit seems to be the Pipe struct, so pull the relevant block out
>> into its own sub-function. (This is porting
>> a62427ef9b55 "drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull"
>> from dml31 to dml21)
>>
>> [0] AFAICT this doesn't actually reduce the total amount of stack which
>> can be used, just moves some of it from
>> dml31_ModeSupportAndSystemConfigurationFull to the new helper function,
>> so the former happens to no longer exceed the limit for a single
>> function.
>>
>> Signed-off-by: Michel Dänzer <mdaenzer@redhat.com>
>> ---
>>   .../dc/dml/dcn31/display_mode_vba_31.c        | 185 ++++++++++--------
>>   1 file changed, 99 insertions(+), 86 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>> index 7e937bdcea00..8965f9af9d0a 100644
>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>> @@ -3949,6 +3949,102 @@ static double TruncToValidBPP(
>>       return BPP_INVALID;
>>   }
>>   +static noinline void CalculatePrefetchSchedulePerPlane(
>> +        struct display_mode_lib *mode_lib,
>> +        double HostVMInefficiencyFactor,
>> +        int i,
>> +        unsigned j,
>> +        unsigned k)
>> +{
>> +    struct vba_vars_st *v = &mode_lib->vba;
>> +    Pipe myPipe;
>> +
>> +    myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
>> +    myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
>> +    myPipe.PixelClock = v->PixelClock[k];
>> +    myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
>> +    myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
>> +    myPipe.ScalerEnabled = v->ScalerEnabled[k];
>> +    myPipe.SourceScan = v->SourceScan[k];
>> +    myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
>> +    myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
>> +    myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
>> +    myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
>> +    myPipe.InterlaceEnable = v->Interlace[k];
>> +    myPipe.NumberOfCursors = v->NumberOfCursors[k];
>> +    myPipe.VBlank = v->VTotal[k] - v->VActive[k];
>> +    myPipe.HTotal = v->HTotal[k];
>> +    myPipe.DCCEnable = v->DCCEnable[k];
>> +    myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
>> +        || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
>> +    myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
>> +    myPipe.BytePerPixelY = v->BytePerPixelY[k];
>> +    myPipe.BytePerPixelC = v->BytePerPixelC[k];
>> +    myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
>> +    v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
>> +        mode_lib,
>> +        HostVMInefficiencyFactor,
>> +        &myPipe,
>> +        v->DSCDelayPerState[i][k],
>> +        v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
>> +        v->DPPCLKDelaySCL,
>> +        v->DPPCLKDelaySCLLBOnly,
>> +        v->DPPCLKDelayCNVCCursor,
>> +        v->DISPCLKDelaySubtotal,
>> +        v->SwathWidthYThisState[k] / v->HRatio[k],
>> +        v->OutputFormat[k],
>> +        v->MaxInterDCNTileRepeaters,
>> +        dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
>> +        v->MaximumVStartup[i][j][k],
>> +        v->GPUVMMaxPageTableLevels,
>> +        v->GPUVMEnable,
>> +        v->HostVMEnable,
>> +        v->HostVMMaxNonCachedPageTableLevels,
>> +        v->HostVMMinPageSize,
>> +        v->DynamicMetadataEnable[k],
>> +        v->DynamicMetadataVMEnabled,
>> +        v->DynamicMetadataLinesBeforeActiveRequired[k],
>> +        v->DynamicMetadataTransmittedBytes[k],
>> +        v->UrgLatency[i],
>> +        v->ExtraLatency,
>> +        v->TimeCalc,
>> +        v->PDEAndMetaPTEBytesPerFrame[i][j][k],
>> +        v->MetaRowBytes[i][j][k],
>> +        v->DPTEBytesPerRow[i][j][k],
>> +        v->PrefetchLinesY[i][j][k],
>> +        v->SwathWidthYThisState[k],
>> +        v->PrefillY[k],
>> +        v->MaxNumSwY[k],
>> +        v->PrefetchLinesC[i][j][k],
>> +        v->SwathWidthCThisState[k],
>> +        v->PrefillC[k],
>> +        v->MaxNumSwC[k],
>> +        v->swath_width_luma_ub_this_state[k],
>> +        v->swath_width_chroma_ub_this_state[k],
>> +        v->SwathHeightYThisState[k],
>> +        v->SwathHeightCThisState[k],
>> +        v->TWait,
>> +        &v->DSTXAfterScaler[k],
>> +        &v->DSTYAfterScaler[k],
>> +        &v->LineTimesForPrefetch[k],
>> +        &v->PrefetchBW[k],
>> +        &v->LinesForMetaPTE[k],
>> +        &v->LinesForMetaAndDPTERow[k],
>> +        &v->VRatioPreY[i][j][k],
>> +        &v->VRatioPreC[i][j][k],
>> +        &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
>> +        &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
>> +        &v->NoTimeForDynamicMetadata[i][j][k],
>> +        &v->Tno_bw[k],
>> +        &v->prefetch_vmrow_bw[k],
>> +        &v->dummy7[k],
>> +        &v->dummy8[k],
>> +        &v->dummy13[k],
>> +        &v->VUpdateOffsetPix[k],
>> +        &v->VUpdateWidthPix[k],
>> +        &v->VReadyOffsetPix[k]);
>> +}
>> +
>>   void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>>   {
>>       struct vba_vars_st *v = &mode_lib->vba;
>> @@ -5276,92 +5372,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
>>                           v->SREnterPlusExitTime);
>>                     for (k = 0; k < v->NumberOfActivePlanes; k++) {
>> -                    Pipe myPipe;
>> -
>> -                    myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
>> -                    myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
>> -                    myPipe.PixelClock = v->PixelClock[k];
>> -                    myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
>> -                    myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
>> -                    myPipe.ScalerEnabled = v->ScalerEnabled[k];
>> -                    myPipe.SourceScan = v->SourceScan[k];
>> -                    myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
>> -                    myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
>> -                    myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
>> -                    myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
>> -                    myPipe.InterlaceEnable = v->Interlace[k];
>> -                    myPipe.NumberOfCursors = v->NumberOfCursors[k];
>> -                    myPipe.VBlank = v->VTotal[k] - v->VActive[k];
>> -                    myPipe.HTotal = v->HTotal[k];
>> -                    myPipe.DCCEnable = v->DCCEnable[k];
>> -                    myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
>> -                            || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
>> -                    myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
>> -                    myPipe.BytePerPixelY = v->BytePerPixelY[k];
>> -                    myPipe.BytePerPixelC = v->BytePerPixelC[k];
>> -                    myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
>> -                    v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
>> -                            mode_lib,
>> -                            HostVMInefficiencyFactor,
>> -                            &myPipe,
>> -                            v->DSCDelayPerState[i][k],
>> -                            v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
>> -                            v->DPPCLKDelaySCL,
>> -                            v->DPPCLKDelaySCLLBOnly,
>> -                            v->DPPCLKDelayCNVCCursor,
>> -                            v->DISPCLKDelaySubtotal,
>> -                            v->SwathWidthYThisState[k] / v->HRatio[k],
>> -                            v->OutputFormat[k],
>> -                            v->MaxInterDCNTileRepeaters,
>> -                            dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
>> -                            v->MaximumVStartup[i][j][k],
>> -                            v->GPUVMMaxPageTableLevels,
>> -                            v->GPUVMEnable,
>> -                            v->HostVMEnable,
>> -                            v->HostVMMaxNonCachedPageTableLevels,
>> -                            v->HostVMMinPageSize,
>> -                            v->DynamicMetadataEnable[k],
>> -                            v->DynamicMetadataVMEnabled,
>> -                            v->DynamicMetadataLinesBeforeActiveRequired[k],
>> -                            v->DynamicMetadataTransmittedBytes[k],
>> -                            v->UrgLatency[i],
>> -                            v->ExtraLatency,
>> -                            v->TimeCalc,
>> -                            v->PDEAndMetaPTEBytesPerFrame[i][j][k],
>> -                            v->MetaRowBytes[i][j][k],
>> -                            v->DPTEBytesPerRow[i][j][k],
>> -                            v->PrefetchLinesY[i][j][k],
>> -                            v->SwathWidthYThisState[k],
>> -                            v->PrefillY[k],
>> -                            v->MaxNumSwY[k],
>> -                            v->PrefetchLinesC[i][j][k],
>> -                            v->SwathWidthCThisState[k],
>> -                            v->PrefillC[k],
>> -                            v->MaxNumSwC[k],
>> -                            v->swath_width_luma_ub_this_state[k],
>> -                            v->swath_width_chroma_ub_this_state[k],
>> -                            v->SwathHeightYThisState[k],
>> -                            v->SwathHeightCThisState[k],
>> -                            v->TWait,
>> -                            &v->DSTXAfterScaler[k],
>> -                            &v->DSTYAfterScaler[k],
>> -                            &v->LineTimesForPrefetch[k],
>> -                            &v->PrefetchBW[k],
>> -                            &v->LinesForMetaPTE[k],
>> -                            &v->LinesForMetaAndDPTERow[k],
>> -                            &v->VRatioPreY[i][j][k],
>> -                            &v->VRatioPreC[i][j][k],
>> -                            &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
>> -                            &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
>> -                            &v->NoTimeForDynamicMetadata[i][j][k],
>> -                            &v->Tno_bw[k],
>> -                            &v->prefetch_vmrow_bw[k],
>> -                            &v->dummy7[k],
>> -                            &v->dummy8[k],
>> -                            &v->dummy13[k],
>> -                            &v->VUpdateOffsetPix[k],
>> -                            &v->VUpdateWidthPix[k],
>> -                            &v->VReadyOffsetPix[k]);
>> +                    CalculatePrefetchSchedulePerPlane(mode_lib,
>> +                                      HostVMInefficiencyFactor,
>> +                                      i, j,    k);
>>                   }
>>                     for (k = 0; k < v->NumberOfActivePlanes; k++) {
>>
> 
> Hi Michel,
> 
> Overwall I think this series is good. I also run it in our internal CI and everything looks fine.
> 
> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
> 
> And applied to amd-staging-drm-next.

Thanks!

Are there any plans for actually reducing the combined amount of stack used by ModeSupportAndSystemConfigurationFull + CalculatePrefetchSchedulePerPlane?


Also, did you check that UseMinimumDCFCLK now modifying mode_lib->vba.DCFCLKState[i][j] and possibly other values in mode_lib->vba makes sense?


-- 
Earthling Michel Dänzer            |                  https://redhat.com
Libre software enthusiast          |         Mesa and Xwayland developer

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull
  2021-12-13  9:46   ` Michel Dänzer
@ 2021-12-13 14:22     ` Rodrigo Siqueira Jordao
  2021-12-13 15:08       ` Michel Dänzer
  0 siblings, 1 reply; 6+ messages in thread
From: Rodrigo Siqueira Jordao @ 2021-12-13 14:22 UTC (permalink / raw)
  To: michel.daenzer, Harry Wentland, Leo Li, Rodrigo Siqueira,
	Nicholas Kazlauskas
  Cc: dri-devel, amd-gfx



On 2021-12-13 4:46 a.m., Michel Dänzer wrote:
> On 2021-12-11 13:20, Rodrigo Siqueira Jordao wrote:
>>
>>
>> On 2021-12-09 11:46 a.m., Michel Dänzer wrote:
>>> From: Michel Dänzer <mdaenzer@redhat.com>
>>>
>>> Move code using the Pipe struct to a new helper function.
>>>
>>> Works around[0] this warning (resulting in failure to build a RHEL debug
>>> kernel with Werror enabled):
>>>
>>> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c: In function ‘dml31_ModeSupportAndSystemConfigurationFull’:
>>> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:5740:1: warning: the frame size of 2144 bytes is larger than 2048 bytes [-Wframe-larger-than=]
>>>
>>> The culprit seems to be the Pipe struct, so pull the relevant block out
>>> into its own sub-function. (This is porting
>>> a62427ef9b55 "drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull"
>>> from dml31 to dml21)
>>>
>>> [0] AFAICT this doesn't actually reduce the total amount of stack which
>>> can be used, just moves some of it from
>>> dml31_ModeSupportAndSystemConfigurationFull to the new helper function,
>>> so the former happens to no longer exceed the limit for a single
>>> function.
>>>
>>> Signed-off-by: Michel Dänzer <mdaenzer@redhat.com>
>>> ---
>>>    .../dc/dml/dcn31/display_mode_vba_31.c        | 185 ++++++++++--------
>>>    1 file changed, 99 insertions(+), 86 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>>> index 7e937bdcea00..8965f9af9d0a 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>>> @@ -3949,6 +3949,102 @@ static double TruncToValidBPP(
>>>        return BPP_INVALID;
>>>    }
>>>    +static noinline void CalculatePrefetchSchedulePerPlane(
>>> +        struct display_mode_lib *mode_lib,
>>> +        double HostVMInefficiencyFactor,
>>> +        int i,
>>> +        unsigned j,
>>> +        unsigned k)
>>> +{
>>> +    struct vba_vars_st *v = &mode_lib->vba;
>>> +    Pipe myPipe;
>>> +
>>> +    myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
>>> +    myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
>>> +    myPipe.PixelClock = v->PixelClock[k];
>>> +    myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
>>> +    myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
>>> +    myPipe.ScalerEnabled = v->ScalerEnabled[k];
>>> +    myPipe.SourceScan = v->SourceScan[k];
>>> +    myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
>>> +    myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
>>> +    myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
>>> +    myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
>>> +    myPipe.InterlaceEnable = v->Interlace[k];
>>> +    myPipe.NumberOfCursors = v->NumberOfCursors[k];
>>> +    myPipe.VBlank = v->VTotal[k] - v->VActive[k];
>>> +    myPipe.HTotal = v->HTotal[k];
>>> +    myPipe.DCCEnable = v->DCCEnable[k];
>>> +    myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
>>> +        || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
>>> +    myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
>>> +    myPipe.BytePerPixelY = v->BytePerPixelY[k];
>>> +    myPipe.BytePerPixelC = v->BytePerPixelC[k];
>>> +    myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
>>> +    v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
>>> +        mode_lib,
>>> +        HostVMInefficiencyFactor,
>>> +        &myPipe,
>>> +        v->DSCDelayPerState[i][k],
>>> +        v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
>>> +        v->DPPCLKDelaySCL,
>>> +        v->DPPCLKDelaySCLLBOnly,
>>> +        v->DPPCLKDelayCNVCCursor,
>>> +        v->DISPCLKDelaySubtotal,
>>> +        v->SwathWidthYThisState[k] / v->HRatio[k],
>>> +        v->OutputFormat[k],
>>> +        v->MaxInterDCNTileRepeaters,
>>> +        dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
>>> +        v->MaximumVStartup[i][j][k],
>>> +        v->GPUVMMaxPageTableLevels,
>>> +        v->GPUVMEnable,
>>> +        v->HostVMEnable,
>>> +        v->HostVMMaxNonCachedPageTableLevels,
>>> +        v->HostVMMinPageSize,
>>> +        v->DynamicMetadataEnable[k],
>>> +        v->DynamicMetadataVMEnabled,
>>> +        v->DynamicMetadataLinesBeforeActiveRequired[k],
>>> +        v->DynamicMetadataTransmittedBytes[k],
>>> +        v->UrgLatency[i],
>>> +        v->ExtraLatency,
>>> +        v->TimeCalc,
>>> +        v->PDEAndMetaPTEBytesPerFrame[i][j][k],
>>> +        v->MetaRowBytes[i][j][k],
>>> +        v->DPTEBytesPerRow[i][j][k],
>>> +        v->PrefetchLinesY[i][j][k],
>>> +        v->SwathWidthYThisState[k],
>>> +        v->PrefillY[k],
>>> +        v->MaxNumSwY[k],
>>> +        v->PrefetchLinesC[i][j][k],
>>> +        v->SwathWidthCThisState[k],
>>> +        v->PrefillC[k],
>>> +        v->MaxNumSwC[k],
>>> +        v->swath_width_luma_ub_this_state[k],
>>> +        v->swath_width_chroma_ub_this_state[k],
>>> +        v->SwathHeightYThisState[k],
>>> +        v->SwathHeightCThisState[k],
>>> +        v->TWait,
>>> +        &v->DSTXAfterScaler[k],
>>> +        &v->DSTYAfterScaler[k],
>>> +        &v->LineTimesForPrefetch[k],
>>> +        &v->PrefetchBW[k],
>>> +        &v->LinesForMetaPTE[k],
>>> +        &v->LinesForMetaAndDPTERow[k],
>>> +        &v->VRatioPreY[i][j][k],
>>> +        &v->VRatioPreC[i][j][k],
>>> +        &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
>>> +        &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
>>> +        &v->NoTimeForDynamicMetadata[i][j][k],
>>> +        &v->Tno_bw[k],
>>> +        &v->prefetch_vmrow_bw[k],
>>> +        &v->dummy7[k],
>>> +        &v->dummy8[k],
>>> +        &v->dummy13[k],
>>> +        &v->VUpdateOffsetPix[k],
>>> +        &v->VUpdateWidthPix[k],
>>> +        &v->VReadyOffsetPix[k]);
>>> +}
>>> +
>>>    void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>>>    {
>>>        struct vba_vars_st *v = &mode_lib->vba;
>>> @@ -5276,92 +5372,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
>>>                            v->SREnterPlusExitTime);
>>>                      for (k = 0; k < v->NumberOfActivePlanes; k++) {
>>> -                    Pipe myPipe;
>>> -
>>> -                    myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
>>> -                    myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
>>> -                    myPipe.PixelClock = v->PixelClock[k];
>>> -                    myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
>>> -                    myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
>>> -                    myPipe.ScalerEnabled = v->ScalerEnabled[k];
>>> -                    myPipe.SourceScan = v->SourceScan[k];
>>> -                    myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
>>> -                    myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
>>> -                    myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
>>> -                    myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
>>> -                    myPipe.InterlaceEnable = v->Interlace[k];
>>> -                    myPipe.NumberOfCursors = v->NumberOfCursors[k];
>>> -                    myPipe.VBlank = v->VTotal[k] - v->VActive[k];
>>> -                    myPipe.HTotal = v->HTotal[k];
>>> -                    myPipe.DCCEnable = v->DCCEnable[k];
>>> -                    myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
>>> -                            || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
>>> -                    myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
>>> -                    myPipe.BytePerPixelY = v->BytePerPixelY[k];
>>> -                    myPipe.BytePerPixelC = v->BytePerPixelC[k];
>>> -                    myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
>>> -                    v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
>>> -                            mode_lib,
>>> -                            HostVMInefficiencyFactor,
>>> -                            &myPipe,
>>> -                            v->DSCDelayPerState[i][k],
>>> -                            v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
>>> -                            v->DPPCLKDelaySCL,
>>> -                            v->DPPCLKDelaySCLLBOnly,
>>> -                            v->DPPCLKDelayCNVCCursor,
>>> -                            v->DISPCLKDelaySubtotal,
>>> -                            v->SwathWidthYThisState[k] / v->HRatio[k],
>>> -                            v->OutputFormat[k],
>>> -                            v->MaxInterDCNTileRepeaters,
>>> -                            dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
>>> -                            v->MaximumVStartup[i][j][k],
>>> -                            v->GPUVMMaxPageTableLevels,
>>> -                            v->GPUVMEnable,
>>> -                            v->HostVMEnable,
>>> -                            v->HostVMMaxNonCachedPageTableLevels,
>>> -                            v->HostVMMinPageSize,
>>> -                            v->DynamicMetadataEnable[k],
>>> -                            v->DynamicMetadataVMEnabled,
>>> -                            v->DynamicMetadataLinesBeforeActiveRequired[k],
>>> -                            v->DynamicMetadataTransmittedBytes[k],
>>> -                            v->UrgLatency[i],
>>> -                            v->ExtraLatency,
>>> -                            v->TimeCalc,
>>> -                            v->PDEAndMetaPTEBytesPerFrame[i][j][k],
>>> -                            v->MetaRowBytes[i][j][k],
>>> -                            v->DPTEBytesPerRow[i][j][k],
>>> -                            v->PrefetchLinesY[i][j][k],
>>> -                            v->SwathWidthYThisState[k],
>>> -                            v->PrefillY[k],
>>> -                            v->MaxNumSwY[k],
>>> -                            v->PrefetchLinesC[i][j][k],
>>> -                            v->SwathWidthCThisState[k],
>>> -                            v->PrefillC[k],
>>> -                            v->MaxNumSwC[k],
>>> -                            v->swath_width_luma_ub_this_state[k],
>>> -                            v->swath_width_chroma_ub_this_state[k],
>>> -                            v->SwathHeightYThisState[k],
>>> -                            v->SwathHeightCThisState[k],
>>> -                            v->TWait,
>>> -                            &v->DSTXAfterScaler[k],
>>> -                            &v->DSTYAfterScaler[k],
>>> -                            &v->LineTimesForPrefetch[k],
>>> -                            &v->PrefetchBW[k],
>>> -                            &v->LinesForMetaPTE[k],
>>> -                            &v->LinesForMetaAndDPTERow[k],
>>> -                            &v->VRatioPreY[i][j][k],
>>> -                            &v->VRatioPreC[i][j][k],
>>> -                            &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
>>> -                            &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
>>> -                            &v->NoTimeForDynamicMetadata[i][j][k],
>>> -                            &v->Tno_bw[k],
>>> -                            &v->prefetch_vmrow_bw[k],
>>> -                            &v->dummy7[k],
>>> -                            &v->dummy8[k],
>>> -                            &v->dummy13[k],
>>> -                            &v->VUpdateOffsetPix[k],
>>> -                            &v->VUpdateWidthPix[k],
>>> -                            &v->VReadyOffsetPix[k]);
>>> +                    CalculatePrefetchSchedulePerPlane(mode_lib,
>>> +                                      HostVMInefficiencyFactor,
>>> +                                      i, j,    k);
>>>                    }
>>>                      for (k = 0; k < v->NumberOfActivePlanes; k++) {
>>>
>>
>> Hi Michel,
>>
>> Overwall I think this series is good. I also run it in our internal CI and everything looks fine.
>>
>> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
>>
>> And applied to amd-staging-drm-next.
> 
> Thanks!
> 
> Are there any plans for actually reducing the combined amount of stack used by ModeSupportAndSystemConfigurationFull + CalculatePrefetchSchedulePerPlane?

Hi Michel,

Tbh, I'm not fully aware of the problem with the stack size used by 
"ModeSupportAndSystemConfigurationFull + 
CalculatePrefetchSchedulePerPlane". Could you help me to understand it 
better? Could you provide some background? Also, could you help me 
better understand the impact of this stack size issue in the DML code? 
Any information will be helpful.

> 
> Also, did you check that UseMinimumDCFCLK now modifying mode_lib->vba.DCFCLKState[i][j] and possibly other values in mode_lib->vba makes sense?

To check this patch, I submitted it to our Internal CI, where we ran a 
couple of IGT tests in multiple ASICs, and I conducted a simple smoke 
test using 5600XT and a Raven system. Everything was fine.

Finally, I checked Dmytro's opinion about this change, and he agreed 
that your patch is fine.

Thanks
Siqueira


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull
  2021-12-13 14:22     ` Rodrigo Siqueira Jordao
@ 2021-12-13 15:08       ` Michel Dänzer
  0 siblings, 0 replies; 6+ messages in thread
From: Michel Dänzer @ 2021-12-13 15:08 UTC (permalink / raw)
  To: Rodrigo Siqueira Jordao, Harry Wentland, Leo Li,
	Rodrigo Siqueira, Nicholas Kazlauskas
  Cc: amd-gfx, dri-devel

On 2021-12-13 15:22, Rodrigo Siqueira Jordao wrote:
> 
> 
> On 2021-12-13 4:46 a.m., Michel Dänzer wrote:
>> On 2021-12-11 13:20, Rodrigo Siqueira Jordao wrote:
>>>
>>>
>>> On 2021-12-09 11:46 a.m., Michel Dänzer wrote:
>>>> From: Michel Dänzer <mdaenzer@redhat.com>
>>>>
>>>> Move code using the Pipe struct to a new helper function.
>>>>
>>>> Works around[0] this warning (resulting in failure to build a RHEL debug
>>>> kernel with Werror enabled):
>>>>
>>>> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c: In function ‘dml31_ModeSupportAndSystemConfigurationFull’:
>>>> ../drivers/gpu/drm/amd/amdgpu/../display/dc/dml/dcn31/display_mode_vba_31.c:5740:1: warning: the frame size of 2144 bytes is larger than 2048 bytes [-Wframe-larger-than=]
>>>>
>>>> The culprit seems to be the Pipe struct, so pull the relevant block out
>>>> into its own sub-function. (This is porting
>>>> a62427ef9b55 "drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull"
>>>> from dml31 to dml21)
>>>>
>>>> [0] AFAICT this doesn't actually reduce the total amount of stack which
>>>> can be used, just moves some of it from
>>>> dml31_ModeSupportAndSystemConfigurationFull to the new helper function,
>>>> so the former happens to no longer exceed the limit for a single
>>>> function.
>>>>
>>>> Signed-off-by: Michel Dänzer <mdaenzer@redhat.com>
>>>> ---
>>>>    .../dc/dml/dcn31/display_mode_vba_31.c        | 185 ++++++++++--------
>>>>    1 file changed, 99 insertions(+), 86 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>>>> index 7e937bdcea00..8965f9af9d0a 100644
>>>> --- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>>>> +++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
>>>> @@ -3949,6 +3949,102 @@ static double TruncToValidBPP(
>>>>        return BPP_INVALID;
>>>>    }
>>>>    +static noinline void CalculatePrefetchSchedulePerPlane(
>>>> +        struct display_mode_lib *mode_lib,
>>>> +        double HostVMInefficiencyFactor,
>>>> +        int i,
>>>> +        unsigned j,
>>>> +        unsigned k)
>>>> +{
>>>> +    struct vba_vars_st *v = &mode_lib->vba;
>>>> +    Pipe myPipe;
>>>> +
>>>> +    myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
>>>> +    myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
>>>> +    myPipe.PixelClock = v->PixelClock[k];
>>>> +    myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
>>>> +    myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
>>>> +    myPipe.ScalerEnabled = v->ScalerEnabled[k];
>>>> +    myPipe.SourceScan = v->SourceScan[k];
>>>> +    myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
>>>> +    myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
>>>> +    myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
>>>> +    myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
>>>> +    myPipe.InterlaceEnable = v->Interlace[k];
>>>> +    myPipe.NumberOfCursors = v->NumberOfCursors[k];
>>>> +    myPipe.VBlank = v->VTotal[k] - v->VActive[k];
>>>> +    myPipe.HTotal = v->HTotal[k];
>>>> +    myPipe.DCCEnable = v->DCCEnable[k];
>>>> +    myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
>>>> +        || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
>>>> +    myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
>>>> +    myPipe.BytePerPixelY = v->BytePerPixelY[k];
>>>> +    myPipe.BytePerPixelC = v->BytePerPixelC[k];
>>>> +    myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
>>>> +    v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
>>>> +        mode_lib,
>>>> +        HostVMInefficiencyFactor,
>>>> +        &myPipe,
>>>> +        v->DSCDelayPerState[i][k],
>>>> +        v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
>>>> +        v->DPPCLKDelaySCL,
>>>> +        v->DPPCLKDelaySCLLBOnly,
>>>> +        v->DPPCLKDelayCNVCCursor,
>>>> +        v->DISPCLKDelaySubtotal,
>>>> +        v->SwathWidthYThisState[k] / v->HRatio[k],
>>>> +        v->OutputFormat[k],
>>>> +        v->MaxInterDCNTileRepeaters,
>>>> +        dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
>>>> +        v->MaximumVStartup[i][j][k],
>>>> +        v->GPUVMMaxPageTableLevels,
>>>> +        v->GPUVMEnable,
>>>> +        v->HostVMEnable,
>>>> +        v->HostVMMaxNonCachedPageTableLevels,
>>>> +        v->HostVMMinPageSize,
>>>> +        v->DynamicMetadataEnable[k],
>>>> +        v->DynamicMetadataVMEnabled,
>>>> +        v->DynamicMetadataLinesBeforeActiveRequired[k],
>>>> +        v->DynamicMetadataTransmittedBytes[k],
>>>> +        v->UrgLatency[i],
>>>> +        v->ExtraLatency,
>>>> +        v->TimeCalc,
>>>> +        v->PDEAndMetaPTEBytesPerFrame[i][j][k],
>>>> +        v->MetaRowBytes[i][j][k],
>>>> +        v->DPTEBytesPerRow[i][j][k],
>>>> +        v->PrefetchLinesY[i][j][k],
>>>> +        v->SwathWidthYThisState[k],
>>>> +        v->PrefillY[k],
>>>> +        v->MaxNumSwY[k],
>>>> +        v->PrefetchLinesC[i][j][k],
>>>> +        v->SwathWidthCThisState[k],
>>>> +        v->PrefillC[k],
>>>> +        v->MaxNumSwC[k],
>>>> +        v->swath_width_luma_ub_this_state[k],
>>>> +        v->swath_width_chroma_ub_this_state[k],
>>>> +        v->SwathHeightYThisState[k],
>>>> +        v->SwathHeightCThisState[k],
>>>> +        v->TWait,
>>>> +        &v->DSTXAfterScaler[k],
>>>> +        &v->DSTYAfterScaler[k],
>>>> +        &v->LineTimesForPrefetch[k],
>>>> +        &v->PrefetchBW[k],
>>>> +        &v->LinesForMetaPTE[k],
>>>> +        &v->LinesForMetaAndDPTERow[k],
>>>> +        &v->VRatioPreY[i][j][k],
>>>> +        &v->VRatioPreC[i][j][k],
>>>> +        &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
>>>> +        &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
>>>> +        &v->NoTimeForDynamicMetadata[i][j][k],
>>>> +        &v->Tno_bw[k],
>>>> +        &v->prefetch_vmrow_bw[k],
>>>> +        &v->dummy7[k],
>>>> +        &v->dummy8[k],
>>>> +        &v->dummy13[k],
>>>> +        &v->VUpdateOffsetPix[k],
>>>> +        &v->VUpdateWidthPix[k],
>>>> +        &v->VReadyOffsetPix[k]);
>>>> +}
>>>> +
>>>>    void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
>>>>    {
>>>>        struct vba_vars_st *v = &mode_lib->vba;
>>>> @@ -5276,92 +5372,9 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
>>>>                            v->SREnterPlusExitTime);
>>>>                      for (k = 0; k < v->NumberOfActivePlanes; k++) {
>>>> -                    Pipe myPipe;
>>>> -
>>>> -                    myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k];
>>>> -                    myPipe.DISPCLK = v->RequiredDISPCLK[i][j];
>>>> -                    myPipe.PixelClock = v->PixelClock[k];
>>>> -                    myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j];
>>>> -                    myPipe.DPPPerPlane = v->NoOfDPP[i][j][k];
>>>> -                    myPipe.ScalerEnabled = v->ScalerEnabled[k];
>>>> -                    myPipe.SourceScan = v->SourceScan[k];
>>>> -                    myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k];
>>>> -                    myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k];
>>>> -                    myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k];
>>>> -                    myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k];
>>>> -                    myPipe.InterlaceEnable = v->Interlace[k];
>>>> -                    myPipe.NumberOfCursors = v->NumberOfCursors[k];
>>>> -                    myPipe.VBlank = v->VTotal[k] - v->VActive[k];
>>>> -                    myPipe.HTotal = v->HTotal[k];
>>>> -                    myPipe.DCCEnable = v->DCCEnable[k];
>>>> -                    myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1
>>>> -                            || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1;
>>>> -                    myPipe.SourcePixelFormat = v->SourcePixelFormat[k];
>>>> -                    myPipe.BytePerPixelY = v->BytePerPixelY[k];
>>>> -                    myPipe.BytePerPixelC = v->BytePerPixelC[k];
>>>> -                    myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP;
>>>> -                    v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule(
>>>> -                            mode_lib,
>>>> -                            HostVMInefficiencyFactor,
>>>> -                            &myPipe,
>>>> -                            v->DSCDelayPerState[i][k],
>>>> -                            v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater,
>>>> -                            v->DPPCLKDelaySCL,
>>>> -                            v->DPPCLKDelaySCLLBOnly,
>>>> -                            v->DPPCLKDelayCNVCCursor,
>>>> -                            v->DISPCLKDelaySubtotal,
>>>> -                            v->SwathWidthYThisState[k] / v->HRatio[k],
>>>> -                            v->OutputFormat[k],
>>>> -                            v->MaxInterDCNTileRepeaters,
>>>> -                            dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]),
>>>> -                            v->MaximumVStartup[i][j][k],
>>>> -                            v->GPUVMMaxPageTableLevels,
>>>> -                            v->GPUVMEnable,
>>>> -                            v->HostVMEnable,
>>>> -                            v->HostVMMaxNonCachedPageTableLevels,
>>>> -                            v->HostVMMinPageSize,
>>>> -                            v->DynamicMetadataEnable[k],
>>>> -                            v->DynamicMetadataVMEnabled,
>>>> -                            v->DynamicMetadataLinesBeforeActiveRequired[k],
>>>> -                            v->DynamicMetadataTransmittedBytes[k],
>>>> -                            v->UrgLatency[i],
>>>> -                            v->ExtraLatency,
>>>> -                            v->TimeCalc,
>>>> -                            v->PDEAndMetaPTEBytesPerFrame[i][j][k],
>>>> -                            v->MetaRowBytes[i][j][k],
>>>> -                            v->DPTEBytesPerRow[i][j][k],
>>>> -                            v->PrefetchLinesY[i][j][k],
>>>> -                            v->SwathWidthYThisState[k],
>>>> -                            v->PrefillY[k],
>>>> -                            v->MaxNumSwY[k],
>>>> -                            v->PrefetchLinesC[i][j][k],
>>>> -                            v->SwathWidthCThisState[k],
>>>> -                            v->PrefillC[k],
>>>> -                            v->MaxNumSwC[k],
>>>> -                            v->swath_width_luma_ub_this_state[k],
>>>> -                            v->swath_width_chroma_ub_this_state[k],
>>>> -                            v->SwathHeightYThisState[k],
>>>> -                            v->SwathHeightCThisState[k],
>>>> -                            v->TWait,
>>>> -                            &v->DSTXAfterScaler[k],
>>>> -                            &v->DSTYAfterScaler[k],
>>>> -                            &v->LineTimesForPrefetch[k],
>>>> -                            &v->PrefetchBW[k],
>>>> -                            &v->LinesForMetaPTE[k],
>>>> -                            &v->LinesForMetaAndDPTERow[k],
>>>> -                            &v->VRatioPreY[i][j][k],
>>>> -                            &v->VRatioPreC[i][j][k],
>>>> -                            &v->RequiredPrefetchPixelDataBWLuma[i][j][k],
>>>> -                            &v->RequiredPrefetchPixelDataBWChroma[i][j][k],
>>>> -                            &v->NoTimeForDynamicMetadata[i][j][k],
>>>> -                            &v->Tno_bw[k],
>>>> -                            &v->prefetch_vmrow_bw[k],
>>>> -                            &v->dummy7[k],
>>>> -                            &v->dummy8[k],
>>>> -                            &v->dummy13[k],
>>>> -                            &v->VUpdateOffsetPix[k],
>>>> -                            &v->VUpdateWidthPix[k],
>>>> -                            &v->VReadyOffsetPix[k]);
>>>> +                    CalculatePrefetchSchedulePerPlane(mode_lib,
>>>> +                                      HostVMInefficiencyFactor,
>>>> +                                      i, j,    k);
>>>>                    }
>>>>                      for (k = 0; k < v->NumberOfActivePlanes; k++) {
>>>>
>>>
>>> Hi Michel,
>>>
>>> Overwall I think this series is good. I also run it in our internal CI and everything looks fine.
>>>
>>> Reviewed-by: Rodrigo Siqueira <Rodrigo.Siqueira@amd.com>
>>>
>>> And applied to amd-staging-drm-next.
>>
>> Thanks!
>>
>> Are there any plans for actually reducing the combined amount of stack used by ModeSupportAndSystemConfigurationFull + CalculatePrefetchSchedulePerPlane?
> 
> Hi Michel,
> 
> Tbh, I'm not fully aware of the problem with the stack size used by "ModeSupportAndSystemConfigurationFull + CalculatePrefetchSchedulePerPlane". Could you help me to understand it better?

The warning which inspired this patch (and the corresponding change to dml21_ModeSupportAndSystemConfigurationFull) is about ModeSupportAndSystemConfigurationFull exceeding a threshold of stack usage. The patch moves some of that stack usage to the new CalculatePrefetchSchedulePerPlane helper function. However, since the former calls the latter, together they still use as much stack (possibly even slightly more, due to the function calling convention) as before. So while we've silenced the warning, we haven't actually improved the situation the warning is about.

> Could you provide some background? Also, could you help me better understand the impact of this stack size issue in the DML code? Any information will be helpful.

I don't know the exact reasons for the warning offhand. Presumably the fact that this warning is enabled indicates that stack usage should be minimized though, or at least stack shouldn't be lightly wasted.


>> Also, did you check that UseMinimumDCFCLK now modifying mode_lib->vba.DCFCLKState[i][j] and possibly other values in mode_lib->vba makes sense?
> 
> To check this patch, I submitted it to our Internal CI, where we ran a couple of IGT tests in multiple ASICs, and I conducted a simple smoke test using 5600XT and a Raven system. Everything was fine.
> 
> Finally, I checked Dmytro's opinion about this change, and he agreed that your patch is fine.

Great, thanks.


-- 
Earthling Michel Dänzer            |                  https://redhat.com
Libre software enthusiast          |         Mesa and Xwayland developer

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-12-13 15:08 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-09 16:46 [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull Michel Dänzer
2021-12-09 16:46 ` [PATCH 2/2] drm/amd/display: Reduce stack size for dml31 UseMinimumDCFCLK Michel Dänzer
2021-12-11 12:20 ` [PATCH 1/2] drm/amd/display: Reduce stack size for dml31_ModeSupportAndSystemConfigurationFull Rodrigo Siqueira Jordao
2021-12-13  9:46   ` Michel Dänzer
2021-12-13 14:22     ` Rodrigo Siqueira Jordao
2021-12-13 15:08       ` Michel Dänzer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.