All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jasdeep Dhillon <jdhillon@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: stylon.wang@amd.com, Sunpeng.Li@amd.com, Harry.Wentland@amd.com,
	qingqing.zhuo@amd.com, Jasdeep Dhillon <jdhillon@amd.com>,
	Rodrigo.Siqueira@amd.com, roman.li@amd.com, solomon.chiu@amd.com,
	Aurabindo.Pillai@amd.com, Alvin Lee <Alvin.Lee2@amd.com>,
	wayne.lin@amd.com, Jun Lei <Jun.Lei@amd.com>,
	Bhawanpreet.Lakha@amd.com, agustin.gutierrez@amd.com,
	pavle.kotarac@amd.com
Subject: [PATCH 12/31] drm/amd/display: Update MALL SS NumWays calculation
Date: Wed, 21 Sep 2022 20:18:02 -0400	[thread overview]
Message-ID: <20220922001821.320255-13-jdhillon@amd.com> (raw)
In-Reply-To: <20220922001821.320255-1-jdhillon@amd.com>

From: Alvin Lee <Alvin.Lee2@amd.com>

[Description]
Update MALL SS NumWays calculation according
to programming guide.

Reviewed-by: Jun Lei <Jun.Lei@amd.com>
Acked-by: Jasdeep Dhillon <jdhillon@amd.com>
Signed-off-by: Alvin Lee <Alvin.Lee2@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dc.h           |   1 +
 .../drm/amd/display/dc/dcn32/dcn32_hwseq.c    | 206 ++++++++----------
 2 files changed, 97 insertions(+), 110 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index bbc352b18bf4..30274e8a6d23 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -832,6 +832,7 @@ struct dc_debug_options {
 	bool force_subvp_mclk_switch;
 	bool allow_sw_cursor_fallback;
 	unsigned int force_subvp_num_ways;
+	unsigned int force_mall_ss_num_ways;
 	bool alloc_extra_way_for_cursor;
 	bool force_usr_allow;
 	/* uses value at boot and disables switch */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index 6baea56f259c..ab47475c18ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -49,6 +49,7 @@
 #include "dcn20/dcn20_optc.h"
 #include "dmub_subvp_state.h"
 #include "dce/dmub_hw_lock_mgr.h"
+#include "dcn32_resource.h"
 #include "dc_link_dp.h"
 #include "dmub/inc/dmub_subvp_state.h"
 
@@ -198,42 +199,6 @@ static bool dcn32_check_no_memory_request_for_cab(struct dc *dc)
 	return false;
 }
 
-/* This function takes in the start address and surface size to be cached in CAB
- * and calculates the total number of cache lines required to store the surface.
- * The number of cache lines used for each surface is calculated independently of
- * one another. For example, if there is a primary surface(1), meta surface(2), and
- * cursor(3), this function should be called 3 times to calculate the number of cache
- * lines used for each of those surfaces.
- */
-static uint32_t dcn32_cache_lines_for_surface(struct dc *dc, uint32_t surface_size, uint64_t start_address)
-{
-	uint32_t lines_used = 1;
-	uint32_t num_cached_bytes = 0;
-	uint32_t remaining_size = 0;
-	uint32_t cache_line_size = dc->caps.cache_line_size;
-	uint32_t remainder = 0;
-
-	/* 1. Calculate surface size minus the number of bytes stored
-	 * in the first cache line (all bytes in first cache line might
-	 * not be fully used).
-	 */
-	div_u64_rem(start_address, cache_line_size, &remainder);
-	num_cached_bytes = cache_line_size - remainder;
-	remaining_size = surface_size - num_cached_bytes;
-
-	/* 2. Calculate number of cache lines that will be fully used with
-	 * the remaining number of bytes to be stored.
-	 */
-	lines_used += (remaining_size / cache_line_size);
-
-	/* 3. Check if we need an extra line due to the remaining size not being
-	 * a multiple of CACHE_LINE_SIZE.
-	 */
-	if (remaining_size % cache_line_size > 0)
-		lines_used++;
-
-	return lines_used;
-}
 
 /* This function loops through every surface that needs to be cached in CAB for SS,
  * and calculates the total number of ways required to store all surfaces (primary,
@@ -241,96 +206,115 @@ static uint32_t dcn32_cache_lines_for_surface(struct dc *dc, uint32_t surface_si
  */
 static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *ctx)
 {
-	uint8_t i, j;
+	uint8_t i; 
+	int j;
 	struct dc_stream_state *stream = NULL;
 	struct dc_plane_state *plane = NULL;
-	uint32_t surface_size = 0;
 	uint32_t cursor_size = 0;
-	uint32_t cache_lines_used = 0;
 	uint32_t total_lines = 0;
 	uint32_t lines_per_way = 0;
-	uint32_t num_ways = 0;
-	uint32_t prev_addr_low = 0;
+	uint8_t num_ways = 0;
+	uint8_t bytes_per_pixel = 0;
+	uint8_t cursor_bpp = 0;
+	uint16_t mblk_width = 0;
+	uint16_t mblk_height = 0;
+	uint16_t mall_alloc_width_blk_aligned = 0;
+	uint16_t mall_alloc_height_blk_aligned = 0;
+	uint16_t num_mblks = 0;
+	uint32_t bytes_in_mall = 0;
+	uint32_t cache_lines_per_plane = 0;
 
-	for (i = 0; i < ctx->stream_count; i++) {
-		stream = ctx->streams[i];
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		// Don't include PSR surface in the total surface size for CAB allocation
-		if (stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED)
+		if (!pipe->stream || !pipe->plane_state ||
+				pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED ||
+				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
 			continue;
 
-		if (ctx->stream_status[i].plane_count == 0)
-			continue;
+		bytes_per_pixel = pipe->plane_state->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4;
+		mblk_width = DCN3_2_MBLK_WIDTH;
+		mblk_height = bytes_per_pixel == 4 ? DCN3_2_MBLK_HEIGHT_4BPE : DCN3_2_MBLK_HEIGHT_8BPE;
 
-		// For each stream, loop through each plane to calculate the number of cache
-		// lines required to store the surface in CAB
-		for (j = 0; j < ctx->stream_status[i].plane_count; j++) {
-			plane = ctx->stream_status[i].plane_states[j];
+		/* full_vp_width_blk_aligned = FLOOR(vp_x_start + full_vp_width + blk_width - 1, blk_width) -
+		 * FLOOR(vp_x_start, blk_width)
+		 *
+		 * mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c
+		 */
+		mall_alloc_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
+				pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
+						(pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
+
+		/* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
+		 * FLOOR(vp_y_start, blk_height)
+		 *
+		 * mall_alloc_height_blk_aligned_l/c = full_vp_height_blk_aligned_l/c
+		 */
+		mall_alloc_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
+				pipe->plane_res.scl_data.viewport.height + mblk_height - 1) / mblk_height * mblk_height) +
+						(pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
 
-			// Calculate total surface size
-			if (prev_addr_low != plane->address.grph.addr.u.low_part) {
-				/* if plane address are different from prev FB, then userspace allocated separate FBs*/
-				surface_size += plane->plane_size.surface_pitch *
-					plane->plane_size.surface_size.height *
-					(plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4);
+		num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
+				((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
 
-				prev_addr_low = plane->address.grph.addr.u.low_part;
-			} else {
-				/* We have the same fb for all the planes.
-				 * Xorg always creates one giant fb that holds all surfaces,
-				 * so allocating it once is sufficient.
-				 * */
-				continue;
-			}
-			// Convert surface size + starting address to number of cache lines required
-			// (alignment accounted for)
-			cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
-					plane->address.grph.addr.quad_part);
-
-			if (plane->address.grph.meta_addr.quad_part) {
-				// Meta surface
-				cache_lines_used += dcn32_cache_lines_for_surface(dc, surface_size,
-						plane->address.grph.meta_addr.quad_part);
-			}
-		}
+		/* For DCC:
+		 * meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1)
+		 */
+		if (pipe->plane_state->dcc.enable)
+			num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel +
+					(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
 
-		// Include cursor size for CAB allocation
-		for (j = 0; j < dc->res_pool->pipe_count; j++) {
-			struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[j];
-			struct hubp *hubp = pipe->plane_res.hubp;
+		bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
 
-			if (pipe->stream && pipe->plane_state && hubp)
-				/* Find the cursor plane and use the exact size instead of
-				 * using the max for calculation
-				 */
-				if (hubp->curs_attr.width > 0) {
-					// Round cursor width to next multiple of 64
-					cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height;
-					break;
-				}
-		}
+		/* (cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
+		 * (MALL is 64-byte aligned)
+		 */
+		cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2;
+		cache_lines_used += cache_lines_per_plane;
+	}
 
-		switch (stream->cursor_attributes.color_format) {
-		case CURSOR_MODE_MONO:
-			cursor_size /= 2;
-			break;
-		case CURSOR_MODE_COLOR_1BIT_AND:
-		case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
-		case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
-			cursor_size *= 4;
-			break;
+	// Include cursor size for CAB allocation
+	for (j = 0; j < dc->res_pool->pipe_count; j++) {
+		struct pipe_ctx *pipe = &ctx->res_ctx.pipe_ctx[j];
+		struct hubp *hubp = pipe->plane_res.hubp;
 
-		case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
-		case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
-			cursor_size *= 8;
-			break;
-		}
+		if (pipe->stream && pipe->plane_state && hubp)
+			/* Find the cursor plane and use the exact size instead of
+			using the max for calculation */
 
-		if (stream->cursor_position.enable && !dc->debug.alloc_extra_way_for_cursor &&
-				cursor_size > 16384) {
-			cache_lines_used += dcn32_cache_lines_for_surface(dc, cursor_size,
-					plane->address.grph.cursor_cache_addr.quad_part);
-		}
+		if (hubp->curs_attr.width > 0) {
+				// Round cursor width to next multiple of 64
+				cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height;
+
+				switch (pipe->stream->cursor_attributes.color_format) {
+				case CURSOR_MODE_MONO:
+					cursor_size /= 2;
+					cursor_bpp = 4;
+					break;
+				case CURSOR_MODE_COLOR_1BIT_AND:
+				case CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA:
+				case CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA:
+					cursor_size *= 4;
+					cursor_bpp = 4;
+					break;
+
+				case CURSOR_MODE_COLOR_64BIT_FP_PRE_MULTIPLIED:
+				case CURSOR_MODE_COLOR_64BIT_FP_UN_PRE_MULTIPLIED:
+					cursor_size *= 8;
+					cursor_bpp = 8;
+					break;
+				}
+
+				if (pipe->stream->cursor_position.enable && !dc->debug.alloc_extra_way_for_cursor &&
+						cursor_size > 16384) {
+					/* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1)
+					 */
+					cache_lines_used += (((hubp->curs_attr.width * hubp->curs_attr.height * cursor_bpp +
+										DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES) *
+										DCN3_2_MALL_MBLK_SIZE_BYTES) / dc->caps.cache_line_size + 2;
+				}
+				break;
+			}
 	}
 
 	// Convert number of cache lines required to number of ways
@@ -360,7 +344,9 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
 			}
 		}
 	}
-
+	if (dc->debug.force_mall_ss_num_ways > 0) {
+		num_ways = dc->debug.force_mall_ss_num_ways;
+	}
 	return num_ways;
 }
 
-- 
2.25.1


  parent reply	other threads:[~2022-09-22  0:20 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-22  0:17 [PATCH 00/31] DC Patches Sept 26, 2022 Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 01/31] drm/amd/display: Refactor LTTPR mode selection Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 02/31] drm/amd/display: Update DCN32 to use new SR latencies Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 03/31] drm/amd/display: Fix various dynamic ODM transitions on DCN32 Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 04/31] drm/amd/display: Remove interface for periodic interrupt 1 Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 05/31] drm/amd/display: Avoid avoid unnecessary pixel rate divider programming Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 06/31] drm/amd/display: Wrap OTG disable workaround with FIFO control Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 07/31] drm/amd/display: Add explicit FIFO disable for DP blank Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 08/31] drm/amd/display: Do DIO FIFO enable after DP video stream enable Jasdeep Dhillon
2022-09-22  0:17 ` [PATCH 09/31] drm/amd/display: Change EDID fallback condition Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 10/31] drm/amd/display: skip phantom pipes when checking for pending flip Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 11/31] drm/amd/display: fix a divide by zero error Jasdeep Dhillon
2022-09-22  0:18 ` Jasdeep Dhillon [this message]
2022-09-22  0:18 ` [PATCH 13/31] drm/amd/display: add missing null check Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 14/31] drm/amd/display: Fix audio on display after unplugging another Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 15/31] drm/amd/display: add debug keys for override bios settings Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 16/31] drm/amd/display: Fix typo in get_pixel_rate_div Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 17/31] drm/amd/display: Disable MALL when TMZ surface Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 18/31] drm/amd/display: Fix CAB allocation calculation Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 19/31] drm/amd/display: fill in clock values when DPM is not enabled Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 20/31] drm/amd/display: polling vid stream status in hpo dp blank Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 21/31] Add ABM control to panel_config struct Jasdeep Dhillon
2022-09-22 14:12   ` Pillai, Aurabindo
2022-09-22  0:18 ` [PATCH 22/31] drm/amd/display: Avoid unnecessary pixel rate divider programming Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 23/31] Add debug option for exiting idle optimizations on cursor updates Jasdeep Dhillon
2022-09-22 14:13   ` Pillai, Aurabindo
2022-09-22  0:18 ` [PATCH 24/31] drm/amd/display: Cursor Info Update refactor Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 25/31] drm/amd/display: Remove assert for odm transition case Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 26/31] drm/amd/display: 3.2.205 Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 27/31] drm/amd/display: Reduce number of arguments of dml314's CalculateWatermarksAndDRAMSpeedChangeSupport() Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 28/31] drm/amd/display: Reduce number of arguments of dml314's CalculateFlipSchedule() Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 29/31] drm/amd/display: remove redundant CalculateTWait's Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 30/31] drm/amd/display: refactor CalculateWriteBackDelay to use vba_vars_st ptr Jasdeep Dhillon
2022-09-22  0:18 ` [PATCH 31/31] drm/amd/display: remove redundant CalculateRemoteSurfaceFlipDelay's Jasdeep Dhillon
2022-09-23 15:44 ` [PATCH 00/31] DC Patches Sept 26, 2022 Wheeler, Daniel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220922001821.320255-13-jdhillon@amd.com \
    --to=jdhillon@amd.com \
    --cc=Alvin.Lee2@amd.com \
    --cc=Aurabindo.Pillai@amd.com \
    --cc=Bhawanpreet.Lakha@amd.com \
    --cc=Harry.Wentland@amd.com \
    --cc=Jun.Lei@amd.com \
    --cc=Rodrigo.Siqueira@amd.com \
    --cc=Sunpeng.Li@amd.com \
    --cc=agustin.gutierrez@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    --cc=pavle.kotarac@amd.com \
    --cc=qingqing.zhuo@amd.com \
    --cc=roman.li@amd.com \
    --cc=solomon.chiu@amd.com \
    --cc=stylon.wang@amd.com \
    --cc=wayne.lin@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.