All of lore.kernel.org
 help / color / mirror / Atom feed
From: Joseph Greathouse <Joseph.Greathouse@amd.com>
To: <amd-gfx@lists.freedesktop.org>
Cc: Tom.StDenis@amd.com, Joseph Greathouse <Joseph.Greathouse@amd.com>
Subject: [PATCH umr 1/3] Improve handling of non-standard page tables in AI+
Date: Thu, 17 Jun 2021 14:25:38 -0500	[thread overview]
Message-ID: <20210617192540.4272-1-Joseph.Greathouse@amd.com> (raw)

Fixes handling of GPUVM page table decoding when not using 4-level
page tables with 512 entries per level. This includes:

- Calculating actual size of top-most PDB based on total VM range,
  page table depth, and page table block size.
- Calculating size of PTB based on the page table block size
  and the PDE0's block fragment size.
- Handling PTE offset and masks from from PDE0 with P-bit, normal
  PTBs, or PTBs from a translate-further layer.
- When using a PTE with F bit to go one layer deeper, pull new
  block fragment size out of that PTE to handle further-level PTBs
  of non-standard sizes.

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
---
 src/lib/read_vram.c | 199 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 153 insertions(+), 46 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index efcd081..049acd4 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -297,6 +297,26 @@ invalid_page:
 	return -1;
 }
 
+/** round_up_pot -- Round up value to next power of two */
+static uint64_t round_up_pot(uint64_t x)
+{
+	uint64_t y = (64ULL * 1024 * 1024); // start at 64MiB
+	while (y < x)
+		y <<= 1;
+	return y;
+}
+
+static uint64_t log2_vm_size(uint64_t page_table_start_addr, uint64_t page_table_end_addr)
+{
+	uint64_t size_of_vm_bytes = page_table_end_addr - page_table_start_addr + 4096;
+	size_of_vm_bytes = round_up_pot(size_of_vm_bytes);
+	// Find the highest bit set to get an estimate for log2(size)
+	uint32_t vm_bits = 0;
+	while (size_of_vm_bytes >>= 1)
+		vm_bits++;
+	return vm_bits;
+}
+
 /**
  * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms
  */
@@ -304,17 +324,19 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 			      uint64_t address, uint32_t size,
 			      void *dst, int write_en)
 {
-	uint64_t start_addr, page_table_start_addr, page_table_base_addr,
-		 page_table_block_size, pte_idx, pde_idx, pte_entry, pde_entry,
+	uint64_t start_addr, page_table_start_addr, page_table_end_addr, page_table_base_addr,
+		 page_table_block_size, log2_ptb_entries, pte_idx, pde_idx, pte_entry, pde_entry,
 		 pde_address, vm_fb_offset,
 		 va_mask, offset_mask, system_aperture_low, system_aperture_high,
-		 fb_top, fb_bottom, pte_page_mask, agp_base, agp_bot, agp_top, prev_addr;
+		 fb_top, fb_bottom, ptb_mask, pte_page_mask, agp_base, agp_bot, agp_top, prev_addr;
 	uint32_t chunk_size, tmp, pde0_block_fragment_size;
 	int pde_cnt, current_depth, page_table_depth, zfb, further;
 	struct {
 		uint32_t
 			mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32,
 			mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32,
+			mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32,
+			mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32,
 			mmVM_CONTEXTx_CNTL,
 			mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32,
 			mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32,
@@ -461,6 +483,12 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 	sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_HI32", regprefix, vmid);
 		registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
 		page_table_start_addr |= (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32 << 44;
+	sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_LO32", regprefix, vmid);
+		registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
+		page_table_end_addr = (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32 << 12;
+	sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_HI32", regprefix, vmid);
+		registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
+		page_table_end_addr |= (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32 << 44;
 
 	sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_CNTL", regprefix, vmid);
 		tmp = registers.mmVM_CONTEXTx_CNTL = umr_read_reg_by_name_by_ip(asic, hub, buf);
@@ -495,6 +523,8 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 		asic->mem_funcs.vm_message(
 				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_LO32=0x%" PRIx32 "\n"
 				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_HI32=0x%" PRIx32 "\n"
+				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_LO32=0x%" PRIx32 "\n"
+				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_HI32=0x%" PRIx32 "\n"
 				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_BASE_ADDR_LO32=0x%" PRIx32 "\n"
 				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_BASE_ADDR_HI32=0x%" PRIx32 "\n"
 				"mm%sVM_CONTEXT%" PRIu32 "_CNTL=0x%" PRIx32 "\n"
@@ -513,6 +543,8 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 				"mm%sMC_VM_AGP_TOP=0x%" PRIx32 "\n",
 			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32,
 			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32,
+			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32,
+			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32,
 			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32,
 			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32,
 			regprefix, vmid, registers.mmVM_CONTEXTx_CNTL,
@@ -535,10 +567,6 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 	// transform page_table_base
 	page_table_base_addr -= vm_fb_offset;
 
-	// convert some defaults to actual values AFTER printing out to user
-	// page_table_block_size of 0 means 9 (512 entries)
-	if (!page_table_block_size)
-		page_table_block_size = 9;
 	pde0_block_fragment_size = 0;
 
 	if (vmid == 0) {
@@ -593,7 +621,9 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 
 		// defaults in case we have to bail out before fully decoding to a PTE
 		pde_cnt = 0;
+		ptb_mask = (1ULL << 9) - 1;
 		pte_page_mask = (1ULL << 12) - 1;
+		log2_ptb_entries = 9;
 		further = 0;
 
 		if (page_table_depth >= 1) {
@@ -608,8 +638,23 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 			// AI+ supports more than 1 level of PDEs so we iterate for all of the depths
 			pde_address = pde_fields.pte_base_addr;
 
-			// TODO: Should "page_table_block_size" just be 9 to account for potential PTB1 selectors?
-			va_mask = ((uint64_t)511 << ((page_table_depth)*9 + (12 + pde0_block_fragment_size + page_table_block_size)));
+			/*
+			 * Size of the first PDB depends on the total coverage of the
+			 * page table and the PAGE_TABLE_BLOCK_SIZE.
+			 * Entire table takes ceil(log2(total_vm_size)) bits
+			 * All PDBs except the first one take 9 bits each
+			 * The PTB covers at least 2 MiB (21 bits)
+			 * And PAGE_TABLE_BLOCK_SIZE is log2(num 2MiB ranges PTB covers)
+			 * As such, the formula for the size of the first PDB is:
+			 *                       PDB1, PDB0, etc.      PTB covers at least 2 MiB
+			 *                                        Block size can make it cover more
+			 *   total_vm_bits - (9 * num_middle_pdbs) - (page_table_block_size + 21)
+			 */
+			int total_vm_bits = log2_vm_size(page_table_start_addr, page_table_end_addr);
+			int top_pdb_bits = total_vm_bits - (9 * (page_table_depth - 1)) - (page_table_block_size + 21);
+
+			va_mask = (1ULL << top_pdb_bits) - 1;
+			va_mask <<= (total_vm_bits - top_pdb_bits);
 
 			if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose)
 				asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n",
@@ -624,14 +669,19 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 
 			current_depth = page_table_depth;
 			while (current_depth) {
-				pde_idx = address >> (9 * (current_depth - 1) + page_table_block_size + 12);
-				// mask only 9 bits
-				if (current_depth != page_table_depth)
-					pde_idx &= (1ULL << 9) - 1;
-
-
-				// TODO: redo va_mask
-				va_mask = ((uint64_t)511 << ((page_table_depth - pde_cnt)*9 + (12 + pde0_block_fragment_size + page_table_block_size)));
+				// Every middle PDB has 512 entries, so shift a further 9 bits
+				// for every layer beyond the first one.
+				int amount_to_shift = (total_vm_bits - top_pdb_bits);
+				amount_to_shift -= ((page_table_depth - current_depth)*9);
+				pde_idx = address >> amount_to_shift;
+
+				// Middle layers need the upper bits masked out after the right-shift.
+				// For the top-most layer, the va_mask is set above the while loop,
+				// so we can skip re-setting it here.
+				if (current_depth != page_table_depth) {
+					pde_idx &= 511;
+					va_mask = (uint64_t)511 << amount_to_shift;
+				}
 
 				// read PDE entry
 				prev_addr = pde_address + pde_idx * 8;
@@ -671,9 +721,18 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 				pde_fields.pte           = (pde_entry >> 54) & 1;
 				if (current_depth == 1) {
 					pde0_block_fragment_size = pde_fields.frag_size;
-					// page_table_block_size is the number of entries in a PTB that spans 2MB
-					page_table_block_size = 21 - (12 + pde0_block_fragment_size);
-					pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;
+					/*
+					 * page_table_block_size is the number of 2MiB regions covered by a PTB
+					 * If we set it to 0, then PTB cover 2 MiB
+					 * If it's 9 PTB cover 1024 MiB
+					 * pde0_block_fragment_size tells us how many 4 KiB regions each PTE covers
+					 * If it's 0 PTEs cover 4 KiB
+					 * If it's 9 PTEs cover 2 MiB
+					 * So the number of PTEs in a PTB is 2^(9+ptbs-pbfs)
+					 */
+					log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size));
+					ptb_mask = (1ULL << log2_ptb_entries) - 1;
+					pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1;
 					if (asic->options.verbose)
 						asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n",
 							(unsigned)pde_fields.pte,
@@ -723,9 +782,13 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 				pde_address = pde_fields.pte_base_addr;
 			}
 
-			// read PTE selector (to select from PTB0)
-			// TODO:  support for page_table_block_size > 9
-			pte_idx = (address >> (12 + pde0_block_fragment_size)) & ((1ULL << page_table_block_size) - 1);
+			// If we fall through to here, we are pointing into PTB, so pull out
+			// the index and mask.
+			// At minimum, each PTE is 4 KiB (12 bits)
+			// PDE0.BFS tells us how many of these 4 KiB page each PTE covers
+			// So add those bits in.
+			// We also calculated the PTE mask up above, to know how many PTEs are in this PTB
+			pte_idx = (address >> (12 + pde0_block_fragment_size)) & ptb_mask;
 pte_further:
 			// now read PTE entry for this page
 			prev_addr = pde_fields.pte_base_addr + pte_idx*8;
@@ -778,20 +841,74 @@ pde_is_pte:
 					pte_fields.fragment,
 					pte_fields.further);
 
-			if (pte_fields.further) {
-				if (page_table_block_size == 9) {
-					// this case doesn't make sense unless we support PTBS > 9
-					asic->mem_funcs.vm_message("[ERROR]: PTE.further is set and *CNTL.PAGE_TABLE_BLOCK_SIZE is 9...\n");
-					return -1;
+			// How many bits in the address are used to index into the PTB?
+			// If further is set, that means we jumped back to pde_is_pte,
+			// and the va_mask was properly set down there.
+			if (!further) {
+				// total_vm_bits are all the bits in the VM space
+				// We want to ignore the top-most PDB, which uses top_pdb_bits
+				// We also want to ignore lower PDBs, which use 9 bits each
+				int bits_to_use = total_vm_bits - top_pdb_bits - (9 * (page_table_depth - 1));
+
+				// At a minimum, we want to ignore the bottom 12 bits for a 4 KiB page
+				int lower_bits_to_ignore = 12;
+
+				if (pde_fields.pte) {
+					// We are in here because we're in PDE0 with P bit. So we don't want
+					// to skip the 9 bits from PDB0.
+					bits_to_use += 9;
+
+					// If the P bit is set, we are coming from PDE0, thus this entry
+					// covers the whole page_table_block_size, instead of the PDE0.BFS.
+					// So we want to ignore those bits in the address.
+					lower_bits_to_ignore += page_table_block_size;
 				} else {
-					pte_idx = (address >> 12) & ((1ULL << pde0_block_fragment_size) - 1);
-					pte_page_mask = (1ULL << 12) - 1;
-
-					// grab PTE base address from the PTE that has the F bit set.
-					pde_fields.pte_base_addr = pte_fields.page_base_addr;
-					further = 1;
-					goto pte_further;
+					// If we are at an actual PTE, then based on PDE0.BFS, we want to ignore
+					// some of the lowest bits.
+					// If PDE0.BFS=0, the bottom 12 bits are used to index within the page
+					// If PDE0.BFS=9, the bottom 21 bits are used to index within the page
+					// etc.  These are the bits we want to ignore, and we already put 12 in.
+					lower_bits_to_ignore += pde0_block_fragment_size;
 				}
+
+				va_mask = (1 << bits_to_use) - 1;
+				int mask_to_ignore = (1 << lower_bits_to_ignore) - 1;
+				va_mask = va_mask & ~mask_to_ignore;
+			}
+
+			uint32_t pte_block_fragment_size = 0;
+			if (pte_fields.further) {
+				// Going to go one more layer deep, so now we need the Further-PTE's
+				// block_fragment_size. This tells us how many 4K pages each
+				// last-layer-PTE covers.
+				pte_block_fragment_size = (pte_entry >> 59) & 0x1F;
+
+				// Each entry covers the Further-PTE.block_fragment_size numbesr
+				// of 4K pages so we can potentially ignore some low-order bits.
+				int last_level_ptb_bits = 12 + pte_block_fragment_size;
+				pte_idx = address >> last_level_ptb_bits;
+
+				// The total size covered by the last-layer-PTB is a function of
+				// pde0_block_fragment_size, which tells us how many 4K entries the
+				// PTB covers.
+				// So number of bits needed to index the entries in the final PTE is:
+				uint32_t num_entry_bits =  pde0_block_fragment_size - pte_block_fragment_size;
+				// Clamp the index to the new last-level PTB's size.
+				pte_idx &= ((1 << num_entry_bits) - 1);
+
+				uint32_t upper_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;
+				pte_page_mask = (1ULL << last_level_ptb_bits) - 1;
+				va_mask &= (upper_mask & ~pte_page_mask);
+
+				// grab PTE base address and other data from the PTE that has the F bit set.
+				pde_fields.frag_size     = (pte_entry >> 59) & 0x1F;
+				pde_fields.pte_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL;
+				pde_fields.valid         = pte_entry & 1;
+				pde_fields.system        = (pte_entry >> 1) & 1;
+				pde_fields.cache         = (pte_entry >> 2) & 1;
+				pde_fields.pte            = 0;
+				further = 1;
+				goto pte_further;
 			}
 
 			if (!pte_fields.system)
@@ -802,11 +919,10 @@ pde_is_pte:
 
 			// compute starting address
 			// this also accounts for PDE-is-PTE masking since current_depth > 0 at this point
-			// if we are processing a PTE leaf node then the page size is 12 bits
 			if (!further)
 				offset_mask = (1ULL << ((current_depth * 9) + (12 + pde0_block_fragment_size))) - 1;
 			else
-				offset_mask = (1ULL << 12) - 1; // offset masks are always 12-bits wide with PTE.further set
+				offset_mask = (1ULL << (12 + pte_block_fragment_size)) - 1;
 
 			start_addr = asic->mem_funcs.gpu_bus_to_cpu_address(asic, pte_fields.page_base_addr) + (address & offset_mask);
 		} else {
@@ -935,15 +1051,6 @@ invalid_page:
 	return -1;
 }
 
-/** round_up_pot -- Round up value to next power of two */
-static uint64_t round_up_pot(uint64_t x)
-{
-	uint64_t y = (64ULL * 1024 * 1024); // start at 64MiB
-	while (y < x)
-		y <<= 1;
-	return y;
-}
-
 /**
  * umr_access_vram - Access GPU mapped memory
  *
-- 
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

             reply	other threads:[~2021-06-17 19:25 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-17 19:25 Joseph Greathouse [this message]
2021-06-17 19:25 ` [PATCH umr 2/3] Generalize decoding of PDEs and PTEs in AI+ Joseph Greathouse
2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse
2021-06-21 16:37   ` [PATCH v2 " Joseph Greathouse
2021-06-22 13:25     ` StDenis, Tom
2021-06-23 14:13     ` StDenis, Tom

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210617192540.4272-1-Joseph.Greathouse@amd.com \
    --to=joseph.greathouse@amd.com \
    --cc=Tom.StDenis@amd.com \
    --cc=amd-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.