All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull
@ 2021-09-14 21:05 Harry Wentland
  2021-09-14 21:56 ` Arnd Bergmann
  2021-09-16 14:34 ` Leo Li
  0 siblings, 2 replies; 3+ messages in thread
From: Harry Wentland @ 2021-09-14 21:05 UTC (permalink / raw)
  To: amd-gfx
  Cc: ndesaulniers, torvalds, linux-kernel, arnd, sunpeng.li,
	alexander.deucher, christian.koenig, Xinhui.Pan, nathan, linux,
	llvm, Harry Wentland

[Why & How]
With Werror enabled in the kernel we were failing the clang build since
dml21_ModeSupportAndSystemConfigurationFull's stack frame is 1064 when
building with clang, and exceeding the default 1024 stack frame limit.

The culprit seems to be the Pipe struct, so pull the relevant block
out into its own sub-function.

Signed-off-by: Harry Wentland <harry.wentland@amd.com>
Fixes: 3fe617ccafd6 ("Enable '-Werror' by default for all kernel builds")
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: amd-gfx@lists.freedesktop.org
Cc: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Cc: Arnd Bergmann <arnd@kernel.org>
Cc: Leo Li <sunpeng.li@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Christian König <christian.koenig@amd.com>
Cc: Xinhui Pan <Xinhui.Pan@amd.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: llvm@lists.linux.dev
---
 .../dc/dml/dcn21/display_mode_vba_21.c        | 236 +++++++++---------
 1 file changed, 123 insertions(+), 113 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
index 4136eb8256cb..8a7485e21d53 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn21/display_mode_vba_21.c
@@ -3394,6 +3394,127 @@ static unsigned int TruncToValidBPP(
 	}
 }
 
+
+static noinline void CalculatePrefetchSchedulePerPlane(
+		struct display_mode_lib *mode_lib,
+		int i,
+		unsigned j,
+		unsigned k)
+{
+	struct vba_vars_st *locals = &mode_lib->vba;
+	Pipe myPipe;
+	HostVM myHostVM;
+
+	if (mode_lib->vba.XFCEnabled[k] == true) {
+		mode_lib->vba.XFCRemoteSurfaceFlipDelay =
+				CalculateRemoteSurfaceFlipDelay(
+						mode_lib,
+						mode_lib->vba.VRatio[k],
+						locals->SwathWidthYThisState[k],
+						dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
+						mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
+						mode_lib->vba.XFCTSlvVupdateOffset,
+						mode_lib->vba.XFCTSlvVupdateWidth,
+						mode_lib->vba.XFCTSlvVreadyOffset,
+						mode_lib->vba.XFCXBUFLatencyTolerance,
+						mode_lib->vba.XFCFillBWOverhead,
+						mode_lib->vba.XFCSlvChunkSize,
+						mode_lib->vba.XFCBusTransportTime,
+						mode_lib->vba.TimeCalc,
+						mode_lib->vba.TWait,
+						&mode_lib->vba.SrcActiveDrainRate,
+						&mode_lib->vba.TInitXFill,
+						&mode_lib->vba.TslvChk);
+	} else {
+		mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
+	}
+
+	myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
+	myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
+	myPipe.PixelClock = mode_lib->vba.PixelClock[k];
+	myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
+	myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
+	myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
+	myPipe.SourceScan = mode_lib->vba.SourceScan[k];
+	myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
+	myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
+	myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
+	myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
+	myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
+	myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
+	myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
+	myPipe.HTotal = mode_lib->vba.HTotal[k];
+
+
+	myHostVM.Enable = mode_lib->vba.HostVMEnable;
+	myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
+	myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
+
+
+	mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
+			mode_lib,
+			mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
+			mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
+			&myPipe,
+			locals->DSCDelayPerState[i][k],
+			mode_lib->vba.DPPCLKDelaySubtotal,
+			mode_lib->vba.DPPCLKDelaySCL,
+			mode_lib->vba.DPPCLKDelaySCLLBOnly,
+			mode_lib->vba.DPPCLKDelayCNVCFormater,
+			mode_lib->vba.DPPCLKDelayCNVCCursor,
+			mode_lib->vba.DISPCLKDelaySubtotal,
+			locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
+			mode_lib->vba.OutputFormat[k],
+			mode_lib->vba.MaxInterDCNTileRepeaters,
+			dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
+			locals->MaximumVStartup[0][0][k],
+			mode_lib->vba.GPUVMMaxPageTableLevels,
+			mode_lib->vba.GPUVMEnable,
+			&myHostVM,
+			mode_lib->vba.DynamicMetadataEnable[k],
+			mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
+			mode_lib->vba.DynamicMetadataTransmittedBytes[k],
+			mode_lib->vba.DCCEnable[k],
+			mode_lib->vba.UrgentLatency,
+			mode_lib->vba.ExtraLatency,
+			mode_lib->vba.TimeCalc,
+			locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
+			locals->MetaRowBytes[0][0][k],
+			locals->DPTEBytesPerRow[0][0][k],
+			locals->PrefetchLinesY[0][0][k],
+			locals->SwathWidthYThisState[k],
+			locals->BytePerPixelInDETY[k],
+			locals->PrefillY[k],
+			locals->MaxNumSwY[k],
+			locals->PrefetchLinesC[0][0][k],
+			locals->BytePerPixelInDETC[k],
+			locals->PrefillC[k],
+			locals->MaxNumSwC[k],
+			locals->SwathHeightYThisState[k],
+			locals->SwathHeightCThisState[k],
+			mode_lib->vba.TWait,
+			mode_lib->vba.XFCEnabled[k],
+			mode_lib->vba.XFCRemoteSurfaceFlipDelay,
+			mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
+			&locals->dst_x_after_scaler,
+			&locals->dst_y_after_scaler,
+			&locals->LineTimesForPrefetch[k],
+			&locals->PrefetchBW[k],
+			&locals->LinesForMetaPTE[k],
+			&locals->LinesForMetaAndDPTERow[k],
+			&locals->VRatioPreY[i][j][k],
+			&locals->VRatioPreC[i][j][k],
+			&locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
+			&locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
+			&locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
+			&locals->Tno_bw[k],
+			&locals->prefetch_vmrow_bw[k],
+			locals->swath_width_luma_ub,
+			locals->swath_width_chroma_ub,
+			&mode_lib->vba.VUpdateOffsetPix[k],
+			&mode_lib->vba.VUpdateWidthPix[k],
+			&mode_lib->vba.VReadyOffsetPix[k]);
+}
 void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib)
 {
 	struct vba_vars_st *locals = &mode_lib->vba;
@@ -4676,120 +4797,9 @@ void dml21_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 						mode_lib->vba.DRAMClockChangeLatency,
 						mode_lib->vba.UrgentLatency,
 						mode_lib->vba.SREnterPlusExitTime);
-				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-					Pipe myPipe;
-					HostVM myHostVM;
-
-					if (mode_lib->vba.XFCEnabled[k] == true) {
-						mode_lib->vba.XFCRemoteSurfaceFlipDelay =
-								CalculateRemoteSurfaceFlipDelay(
-										mode_lib,
-										mode_lib->vba.VRatio[k],
-										locals->SwathWidthYThisState[k],
-										dml_ceil(locals->BytePerPixelInDETY[k], 1.0),
-										mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k],
-										mode_lib->vba.XFCTSlvVupdateOffset,
-										mode_lib->vba.XFCTSlvVupdateWidth,
-										mode_lib->vba.XFCTSlvVreadyOffset,
-										mode_lib->vba.XFCXBUFLatencyTolerance,
-										mode_lib->vba.XFCFillBWOverhead,
-										mode_lib->vba.XFCSlvChunkSize,
-										mode_lib->vba.XFCBusTransportTime,
-										mode_lib->vba.TimeCalc,
-										mode_lib->vba.TWait,
-										&mode_lib->vba.SrcActiveDrainRate,
-										&mode_lib->vba.TInitXFill,
-										&mode_lib->vba.TslvChk);
-					} else {
-						mode_lib->vba.XFCRemoteSurfaceFlipDelay = 0.0;
-					}
+				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++)
+					CalculatePrefetchSchedulePerPlane(mode_lib, i, j, k);
 
-					myPipe.DPPCLK = locals->RequiredDPPCLK[i][j][k];
-					myPipe.DISPCLK = locals->RequiredDISPCLK[i][j];
-					myPipe.PixelClock = mode_lib->vba.PixelClock[k];
-					myPipe.DCFCLKDeepSleep = mode_lib->vba.ProjectedDCFCLKDeepSleep[0][0];
-					myPipe.DPPPerPlane = locals->NoOfDPP[i][j][k];
-					myPipe.ScalerEnabled = mode_lib->vba.ScalerEnabled[k];
-					myPipe.SourceScan = mode_lib->vba.SourceScan[k];
-					myPipe.BlockWidth256BytesY = locals->Read256BlockWidthY[k];
-					myPipe.BlockHeight256BytesY = locals->Read256BlockHeightY[k];
-					myPipe.BlockWidth256BytesC = locals->Read256BlockWidthC[k];
-					myPipe.BlockHeight256BytesC = locals->Read256BlockHeightC[k];
-					myPipe.InterlaceEnable = mode_lib->vba.Interlace[k];
-					myPipe.NumberOfCursors = mode_lib->vba.NumberOfCursors[k];
-					myPipe.VBlank = mode_lib->vba.VTotal[k] - mode_lib->vba.VActive[k];
-					myPipe.HTotal = mode_lib->vba.HTotal[k];
-
-
-					myHostVM.Enable = mode_lib->vba.HostVMEnable;
-					myHostVM.MaxPageTableLevels = mode_lib->vba.HostVMMaxPageTableLevels;
-					myHostVM.CachedPageTableLevels = mode_lib->vba.HostVMCachedPageTableLevels;
-
-
-					mode_lib->vba.IsErrorResult[i][j][k] = CalculatePrefetchSchedule(
-							mode_lib,
-							mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyPixelMixedWithVMData,
-							mode_lib->vba.PercentOfIdealDRAMFabricAndSDPPortBWReceivedAfterUrgLatencyVMDataOnly,
-							&myPipe,
-							locals->DSCDelayPerState[i][k],
-							mode_lib->vba.DPPCLKDelaySubtotal,
-							mode_lib->vba.DPPCLKDelaySCL,
-							mode_lib->vba.DPPCLKDelaySCLLBOnly,
-							mode_lib->vba.DPPCLKDelayCNVCFormater,
-							mode_lib->vba.DPPCLKDelayCNVCCursor,
-							mode_lib->vba.DISPCLKDelaySubtotal,
-							locals->SwathWidthYThisState[k] / mode_lib->vba.HRatio[k],
-							mode_lib->vba.OutputFormat[k],
-							mode_lib->vba.MaxInterDCNTileRepeaters,
-							dml_min(mode_lib->vba.MaxVStartup, locals->MaximumVStartup[0][0][k]),
-							locals->MaximumVStartup[0][0][k],
-							mode_lib->vba.GPUVMMaxPageTableLevels,
-							mode_lib->vba.GPUVMEnable,
-							&myHostVM,
-							mode_lib->vba.DynamicMetadataEnable[k],
-							mode_lib->vba.DynamicMetadataLinesBeforeActiveRequired[k],
-							mode_lib->vba.DynamicMetadataTransmittedBytes[k],
-							mode_lib->vba.DCCEnable[k],
-							mode_lib->vba.UrgentLatency,
-							mode_lib->vba.ExtraLatency,
-							mode_lib->vba.TimeCalc,
-							locals->PDEAndMetaPTEBytesPerFrame[0][0][k],
-							locals->MetaRowBytes[0][0][k],
-							locals->DPTEBytesPerRow[0][0][k],
-							locals->PrefetchLinesY[0][0][k],
-							locals->SwathWidthYThisState[k],
-							locals->BytePerPixelInDETY[k],
-							locals->PrefillY[k],
-							locals->MaxNumSwY[k],
-							locals->PrefetchLinesC[0][0][k],
-							locals->BytePerPixelInDETC[k],
-							locals->PrefillC[k],
-							locals->MaxNumSwC[k],
-							locals->SwathHeightYThisState[k],
-							locals->SwathHeightCThisState[k],
-							mode_lib->vba.TWait,
-							mode_lib->vba.XFCEnabled[k],
-							mode_lib->vba.XFCRemoteSurfaceFlipDelay,
-							mode_lib->vba.ProgressiveToInterlaceUnitInOPP,
-							&locals->dst_x_after_scaler,
-							&locals->dst_y_after_scaler,
-							&locals->LineTimesForPrefetch[k],
-							&locals->PrefetchBW[k],
-							&locals->LinesForMetaPTE[k],
-							&locals->LinesForMetaAndDPTERow[k],
-							&locals->VRatioPreY[i][j][k],
-							&locals->VRatioPreC[i][j][k],
-							&locals->RequiredPrefetchPixelDataBWLuma[i][j][k],
-							&locals->RequiredPrefetchPixelDataBWChroma[i][j][k],
-							&locals->VStartupRequiredWhenNotEnoughTimeForDynamicMetadata,
-							&locals->Tno_bw[k],
-							&locals->prefetch_vmrow_bw[k],
-							locals->swath_width_luma_ub,
-							locals->swath_width_chroma_ub,
-							&mode_lib->vba.VUpdateOffsetPix[k],
-							&mode_lib->vba.VUpdateWidthPix[k],
-							&mode_lib->vba.VReadyOffsetPix[k]);
-				}
 				mode_lib->vba.MaximumReadBandwidthWithoutPrefetch = 0.0;
 				mode_lib->vba.MaximumReadBandwidthWithPrefetch = 0.0;
 				for (k = 0; k <= mode_lib->vba.NumberOfActivePlanes - 1; k++) {
-- 
2.33.0


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull
  2021-09-14 21:05 [PATCH] drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull Harry Wentland
@ 2021-09-14 21:56 ` Arnd Bergmann
  2021-09-16 14:34 ` Leo Li
  1 sibling, 0 replies; 3+ messages in thread
From: Arnd Bergmann @ 2021-09-14 21:56 UTC (permalink / raw)
  To: Harry Wentland
  Cc: amd-gfx list, Nick Desaulniers, Linus Torvalds,
	Linux Kernel Mailing List, Leo Li, Alex Deucher,
	Christian König, xinhui pan, Nathan Chancellor,
	Guenter Roeck, llvm

On Tue, Sep 14, 2021 at 11:05 PM Harry Wentland <harry.wentland@amd.com> wrote:
>
> [Why & How]
> With Werror enabled in the kernel we were failing the clang build since
> dml21_ModeSupportAndSystemConfigurationFull's stack frame is 1064 when
> building with clang, and exceeding the default 1024 stack frame limit.
>
> The culprit seems to be the Pipe struct, so pull the relevant block
> out into its own sub-function.

I suspect it's not the Pipe struct but rather the way that you call another
function with a crazy number of arguments here. After your change,
this likely gets inlined and you avoid the problem, so the patch ends
up doing the right thing.

If you do more patches like this, I would suggest mentioning the new
stack usage of the calling function and the new noinline function, to
make sure that the combined number isn't actually worse than the old
number.

You can get these numbers by recompiling the file with the frame
size warning set to a low value, e.g. adding -Wframe-larger-than=100
to the command line.

Acked-by: Arnd Bergmann <arnd@arndb.de>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull
  2021-09-14 21:05 [PATCH] drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull Harry Wentland
  2021-09-14 21:56 ` Arnd Bergmann
@ 2021-09-16 14:34 ` Leo Li
  1 sibling, 0 replies; 3+ messages in thread
From: Leo Li @ 2021-09-16 14:34 UTC (permalink / raw)
  To: Harry Wentland, amd-gfx
  Cc: ndesaulniers, torvalds, linux-kernel, arnd, alexander.deucher,
	christian.koenig, Xinhui.Pan, nathan, linux, llvm



On 2021-09-14 17:05, Harry Wentland wrote:
> [Why & How]
> With Werror enabled in the kernel we were failing the clang build since
> dml21_ModeSupportAndSystemConfigurationFull's stack frame is 1064 when
> building with clang, and exceeding the default 1024 stack frame limit.
> 
> The culprit seems to be the Pipe struct, so pull the relevant block
> out into its own sub-function.
> 
> Signed-off-by: Harry Wentland<harry.wentland@amd.com>
> Fixes: 3fe617ccafd6 ("Enable '-Werror' by default for all kernel builds")
> Cc: Nick Desaulniers<ndesaulniers@google.com>
> Cc: Linus Torvalds<torvalds@linux-foundation.org>
> Cc:amd-gfx@lists.freedesktop.org
> Cc: Linux Kernel Mailing List<linux-kernel@vger.kernel.org>
> Cc: Arnd Bergmann<arnd@kernel.org>
> Cc: Leo Li<sunpeng.li@amd.com>
> Cc: Alex Deucher<alexander.deucher@amd.com>
> Cc: Christian König<christian.koenig@amd.com>
> Cc: Xinhui Pan<Xinhui.Pan@amd.com>
> Cc: Nathan Chancellor<nathan@kernel.org>
> Cc: Guenter Roeck<linux@roeck-us.net>
> Cc:llvm@lists.linux.dev
> ---

Reviewed-by: Leo Li <sunpeng.li@amd.com>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-09-16 14:34 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-14 21:05 [PATCH] drm/amd/display: Reduce stack size for dml21_ModeSupportAndSystemConfigurationFull Harry Wentland
2021-09-14 21:56 ` Arnd Bergmann
2021-09-16 14:34 ` Leo Li

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.