All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH umr 1/3] Improve handling of non-standard page tables in AI+
@ 2021-06-17 19:25 Joseph Greathouse
  2021-06-17 19:25 ` [PATCH umr 2/3] Generalize decoding of PDEs and PTEs " Joseph Greathouse
  2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse
  0 siblings, 2 replies; 6+ messages in thread
From: Joseph Greathouse @ 2021-06-17 19:25 UTC (permalink / raw)
  To: amd-gfx; +Cc: Tom.StDenis, Joseph Greathouse

Fixes handling of GPUVM page table decoding when not using 4-level
page tables with 512 entries per level. This includes:

- Calculating actual size of top-most PDB based on total VM range,
  page table depth, and page table block size.
- Calculating size of PTB based on the page table block size
  and the PDE0's block fragment size.
- Handling PTE offset and masks from from PDE0 with P-bit, normal
  PTBs, or PTBs from a translate-further layer.
- When using a PTE with F bit to go one layer deeper, pull new
  block fragment size out of that PTE to handle further-level PTBs
  of non-standard sizes.

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
---
 src/lib/read_vram.c | 199 ++++++++++++++++++++++++++++++++++----------
 1 file changed, 153 insertions(+), 46 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index efcd081..049acd4 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -297,6 +297,26 @@ invalid_page:
 	return -1;
 }
 
+/** round_up_pot -- Round up value to next power of two */
+static uint64_t round_up_pot(uint64_t x)
+{
+	uint64_t y = (64ULL * 1024 * 1024); // start at 64MiB
+	while (y < x)
+		y <<= 1;
+	return y;
+}
+
+static uint64_t log2_vm_size(uint64_t page_table_start_addr, uint64_t page_table_end_addr)
+{
+	uint64_t size_of_vm_bytes = page_table_end_addr - page_table_start_addr + 4096;
+	size_of_vm_bytes = round_up_pot(size_of_vm_bytes);
+	// Find the highest bit set to get an estimate for log2(size)
+	uint32_t vm_bits = 0;
+	while (size_of_vm_bytes >>= 1)
+		vm_bits++;
+	return vm_bits;
+}
+
 /**
  * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms
  */
@@ -304,17 +324,19 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 			      uint64_t address, uint32_t size,
 			      void *dst, int write_en)
 {
-	uint64_t start_addr, page_table_start_addr, page_table_base_addr,
-		 page_table_block_size, pte_idx, pde_idx, pte_entry, pde_entry,
+	uint64_t start_addr, page_table_start_addr, page_table_end_addr, page_table_base_addr,
+		 page_table_block_size, log2_ptb_entries, pte_idx, pde_idx, pte_entry, pde_entry,
 		 pde_address, vm_fb_offset,
 		 va_mask, offset_mask, system_aperture_low, system_aperture_high,
-		 fb_top, fb_bottom, pte_page_mask, agp_base, agp_bot, agp_top, prev_addr;
+		 fb_top, fb_bottom, ptb_mask, pte_page_mask, agp_base, agp_bot, agp_top, prev_addr;
 	uint32_t chunk_size, tmp, pde0_block_fragment_size;
 	int pde_cnt, current_depth, page_table_depth, zfb, further;
 	struct {
 		uint32_t
 			mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32,
 			mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32,
+			mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32,
+			mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32,
 			mmVM_CONTEXTx_CNTL,
 			mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32,
 			mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32,
@@ -461,6 +483,12 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 	sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_HI32", regprefix, vmid);
 		registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
 		page_table_start_addr |= (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32 << 44;
+	sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_LO32", regprefix, vmid);
+		registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
+		page_table_end_addr = (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32 << 12;
+	sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_HI32", regprefix, vmid);
+		registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32 = umr_read_reg_by_name_by_ip(asic, hub, buf);
+		page_table_end_addr |= (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32 << 44;
 
 	sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_CNTL", regprefix, vmid);
 		tmp = registers.mmVM_CONTEXTx_CNTL = umr_read_reg_by_name_by_ip(asic, hub, buf);
@@ -495,6 +523,8 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 		asic->mem_funcs.vm_message(
 				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_LO32=0x%" PRIx32 "\n"
 				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_HI32=0x%" PRIx32 "\n"
+				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_LO32=0x%" PRIx32 "\n"
+				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_HI32=0x%" PRIx32 "\n"
 				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_BASE_ADDR_LO32=0x%" PRIx32 "\n"
 				"mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_BASE_ADDR_HI32=0x%" PRIx32 "\n"
 				"mm%sVM_CONTEXT%" PRIu32 "_CNTL=0x%" PRIx32 "\n"
@@ -513,6 +543,8 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 				"mm%sMC_VM_AGP_TOP=0x%" PRIx32 "\n",
 			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32,
 			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32,
+			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32,
+			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32,
 			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32,
 			regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32,
 			regprefix, vmid, registers.mmVM_CONTEXTx_CNTL,
@@ -535,10 +567,6 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 	// transform page_table_base
 	page_table_base_addr -= vm_fb_offset;
 
-	// convert some defaults to actual values AFTER printing out to user
-	// page_table_block_size of 0 means 9 (512 entries)
-	if (!page_table_block_size)
-		page_table_block_size = 9;
 	pde0_block_fragment_size = 0;
 
 	if (vmid == 0) {
@@ -593,7 +621,9 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 
 		// defaults in case we have to bail out before fully decoding to a PTE
 		pde_cnt = 0;
+		ptb_mask = (1ULL << 9) - 1;
 		pte_page_mask = (1ULL << 12) - 1;
+		log2_ptb_entries = 9;
 		further = 0;
 
 		if (page_table_depth >= 1) {
@@ -608,8 +638,23 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 			// AI+ supports more than 1 level of PDEs so we iterate for all of the depths
 			pde_address = pde_fields.pte_base_addr;
 
-			// TODO: Should "page_table_block_size" just be 9 to account for potential PTB1 selectors?
-			va_mask = ((uint64_t)511 << ((page_table_depth)*9 + (12 + pde0_block_fragment_size + page_table_block_size)));
+			/*
+			 * Size of the first PDB depends on the total coverage of the
+			 * page table and the PAGE_TABLE_BLOCK_SIZE.
+			 * Entire table takes ceil(log2(total_vm_size)) bits
+			 * All PDBs except the first one take 9 bits each
+			 * The PTB covers at least 2 MiB (21 bits)
+			 * And PAGE_TABLE_BLOCK_SIZE is log2(num 2MiB ranges PTB covers)
+			 * As such, the formula for the size of the first PDB is:
+			 *                       PDB1, PDB0, etc.      PTB covers at least 2 MiB
+			 *                                        Block size can make it cover more
+			 *   total_vm_bits - (9 * num_middle_pdbs) - (page_table_block_size + 21)
+			 */
+			int total_vm_bits = log2_vm_size(page_table_start_addr, page_table_end_addr);
+			int top_pdb_bits = total_vm_bits - (9 * (page_table_depth - 1)) - (page_table_block_size + 21);
+
+			va_mask = (1ULL << top_pdb_bits) - 1;
+			va_mask <<= (total_vm_bits - top_pdb_bits);
 
 			if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose)
 				asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n",
@@ -624,14 +669,19 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 
 			current_depth = page_table_depth;
 			while (current_depth) {
-				pde_idx = address >> (9 * (current_depth - 1) + page_table_block_size + 12);
-				// mask only 9 bits
-				if (current_depth != page_table_depth)
-					pde_idx &= (1ULL << 9) - 1;
-
-
-				// TODO: redo va_mask
-				va_mask = ((uint64_t)511 << ((page_table_depth - pde_cnt)*9 + (12 + pde0_block_fragment_size + page_table_block_size)));
+				// Every middle PDB has 512 entries, so shift a further 9 bits
+				// for every layer beyond the first one.
+				int amount_to_shift = (total_vm_bits - top_pdb_bits);
+				amount_to_shift -= ((page_table_depth - current_depth)*9);
+				pde_idx = address >> amount_to_shift;
+
+				// Middle layers need the upper bits masked out after the right-shift.
+				// For the top-most layer, the va_mask is set above the while loop,
+				// so we can skip re-setting it here.
+				if (current_depth != page_table_depth) {
+					pde_idx &= 511;
+					va_mask = (uint64_t)511 << amount_to_shift;
+				}
 
 				// read PDE entry
 				prev_addr = pde_address + pde_idx * 8;
@@ -671,9 +721,18 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 				pde_fields.pte           = (pde_entry >> 54) & 1;
 				if (current_depth == 1) {
 					pde0_block_fragment_size = pde_fields.frag_size;
-					// page_table_block_size is the number of entries in a PTB that spans 2MB
-					page_table_block_size = 21 - (12 + pde0_block_fragment_size);
-					pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;
+					/*
+					 * page_table_block_size is the number of 2MiB regions covered by a PTB
+					 * If we set it to 0, then PTB cover 2 MiB
+					 * If it's 9 PTB cover 1024 MiB
+					 * pde0_block_fragment_size tells us how many 4 KiB regions each PTE covers
+					 * If it's 0 PTEs cover 4 KiB
+					 * If it's 9 PTEs cover 2 MiB
+					 * So the number of PTEs in a PTB is 2^(9+ptbs-pbfs)
+					 */
+					log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size));
+					ptb_mask = (1ULL << log2_ptb_entries) - 1;
+					pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1;
 					if (asic->options.verbose)
 						asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n",
 							(unsigned)pde_fields.pte,
@@ -723,9 +782,13 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 				pde_address = pde_fields.pte_base_addr;
 			}
 
-			// read PTE selector (to select from PTB0)
-			// TODO:  support for page_table_block_size > 9
-			pte_idx = (address >> (12 + pde0_block_fragment_size)) & ((1ULL << page_table_block_size) - 1);
+			// If we fall through to here, we are pointing into PTB, so pull out
+			// the index and mask.
+			// At minimum, each PTE is 4 KiB (12 bits)
+			// PDE0.BFS tells us how many of these 4 KiB page each PTE covers
+			// So add those bits in.
+			// We also calculated the PTE mask up above, to know how many PTEs are in this PTB
+			pte_idx = (address >> (12 + pde0_block_fragment_size)) & ptb_mask;
 pte_further:
 			// now read PTE entry for this page
 			prev_addr = pde_fields.pte_base_addr + pte_idx*8;
@@ -778,20 +841,74 @@ pde_is_pte:
 					pte_fields.fragment,
 					pte_fields.further);
 
-			if (pte_fields.further) {
-				if (page_table_block_size == 9) {
-					// this case doesn't make sense unless we support PTBS > 9
-					asic->mem_funcs.vm_message("[ERROR]: PTE.further is set and *CNTL.PAGE_TABLE_BLOCK_SIZE is 9...\n");
-					return -1;
+			// How many bits in the address are used to index into the PTB?
+			// If further is set, that means we jumped back to pde_is_pte,
+			// and the va_mask was properly set down there.
+			if (!further) {
+				// total_vm_bits are all the bits in the VM space
+				// We want to ignore the top-most PDB, which uses top_pdb_bits
+				// We also want to ignore lower PDBs, which use 9 bits each
+				int bits_to_use = total_vm_bits - top_pdb_bits - (9 * (page_table_depth - 1));
+
+				// At a minimum, we want to ignore the bottom 12 bits for a 4 KiB page
+				int lower_bits_to_ignore = 12;
+
+				if (pde_fields.pte) {
+					// We are in here because we're in PDE0 with P bit. So we don't want
+					// to skip the 9 bits from PDB0.
+					bits_to_use += 9;
+
+					// If the P bit is set, we are coming from PDE0, thus this entry
+					// covers the whole page_table_block_size, instead of the PDE0.BFS.
+					// So we want to ignore those bits in the address.
+					lower_bits_to_ignore += page_table_block_size;
 				} else {
-					pte_idx = (address >> 12) & ((1ULL << pde0_block_fragment_size) - 1);
-					pte_page_mask = (1ULL << 12) - 1;
-
-					// grab PTE base address from the PTE that has the F bit set.
-					pde_fields.pte_base_addr = pte_fields.page_base_addr;
-					further = 1;
-					goto pte_further;
+					// If we are at an actual PTE, then based on PDE0.BFS, we want to ignore
+					// some of the lowest bits.
+					// If PDE0.BFS=0, the bottom 12 bits are used to index within the page
+					// If PDE0.BFS=9, the bottom 21 bits are used to index within the page
+					// etc.  These are the bits we want to ignore, and we already put 12 in.
+					lower_bits_to_ignore += pde0_block_fragment_size;
 				}
+
+				va_mask = (1 << bits_to_use) - 1;
+				int mask_to_ignore = (1 << lower_bits_to_ignore) - 1;
+				va_mask = va_mask & ~mask_to_ignore;
+			}
+
+			uint32_t pte_block_fragment_size = 0;
+			if (pte_fields.further) {
+				// Going to go one more layer deep, so now we need the Further-PTE's
+				// block_fragment_size. This tells us how many 4K pages each
+				// last-layer-PTE covers.
+				pte_block_fragment_size = (pte_entry >> 59) & 0x1F;
+
+				// Each entry covers the Further-PTE.block_fragment_size numbesr
+				// of 4K pages so we can potentially ignore some low-order bits.
+				int last_level_ptb_bits = 12 + pte_block_fragment_size;
+				pte_idx = address >> last_level_ptb_bits;
+
+				// The total size covered by the last-layer-PTB is a function of
+				// pde0_block_fragment_size, which tells us how many 4K entries the
+				// PTB covers.
+				// So number of bits needed to index the entries in the final PTE is:
+				uint32_t num_entry_bits =  pde0_block_fragment_size - pte_block_fragment_size;
+				// Clamp the index to the new last-level PTB's size.
+				pte_idx &= ((1 << num_entry_bits) - 1);
+
+				uint32_t upper_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;
+				pte_page_mask = (1ULL << last_level_ptb_bits) - 1;
+				va_mask &= (upper_mask & ~pte_page_mask);
+
+				// grab PTE base address and other data from the PTE that has the F bit set.
+				pde_fields.frag_size     = (pte_entry >> 59) & 0x1F;
+				pde_fields.pte_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL;
+				pde_fields.valid         = pte_entry & 1;
+				pde_fields.system        = (pte_entry >> 1) & 1;
+				pde_fields.cache         = (pte_entry >> 2) & 1;
+				pde_fields.pte            = 0;
+				further = 1;
+				goto pte_further;
 			}
 
 			if (!pte_fields.system)
@@ -802,11 +919,10 @@ pde_is_pte:
 
 			// compute starting address
 			// this also accounts for PDE-is-PTE masking since current_depth > 0 at this point
-			// if we are processing a PTE leaf node then the page size is 12 bits
 			if (!further)
 				offset_mask = (1ULL << ((current_depth * 9) + (12 + pde0_block_fragment_size))) - 1;
 			else
-				offset_mask = (1ULL << 12) - 1; // offset masks are always 12-bits wide with PTE.further set
+				offset_mask = (1ULL << (12 + pte_block_fragment_size)) - 1;
 
 			start_addr = asic->mem_funcs.gpu_bus_to_cpu_address(asic, pte_fields.page_base_addr) + (address & offset_mask);
 		} else {
@@ -935,15 +1051,6 @@ invalid_page:
 	return -1;
 }
 
-/** round_up_pot -- Round up value to next power of two */
-static uint64_t round_up_pot(uint64_t x)
-{
-	uint64_t y = (64ULL * 1024 * 1024); // start at 64MiB
-	while (y < x)
-		y <<= 1;
-	return y;
-}
-
 /**
  * umr_access_vram - Access GPU mapped memory
  *
-- 
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH umr 2/3] Generalize decoding of PDEs and PTEs in AI+
  2021-06-17 19:25 [PATCH umr 1/3] Improve handling of non-standard page tables in AI+ Joseph Greathouse
@ 2021-06-17 19:25 ` Joseph Greathouse
  2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse
  1 sibling, 0 replies; 6+ messages in thread
From: Joseph Greathouse @ 2021-06-17 19:25 UTC (permalink / raw)
  To: amd-gfx; +Cc: Tom.StDenis, Joseph Greathouse

Brings decoding of PDEs and PTEs for AI+ chips into their own
functions, so that we don't end up with subtly different decoding
bugs in the variety of places such decodings are done.

Also fixes a minor bug where we were pulling PTE.PRT from bit 61
instead of the proper bit 51.

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
---
 src/lib/read_vram.c | 187 ++++++++++++++++++++++++++------------------
 1 file changed, 109 insertions(+), 78 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 049acd4..2998873 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -317,6 +317,104 @@ static uint64_t log2_vm_size(uint64_t page_table_start_addr, uint64_t page_table
 	return vm_bits;
 }
 
+typedef struct {
+	uint64_t
+		frag_size,
+		pte_base_addr,
+		valid,
+		system,
+		coherent,
+		pte,
+		further;
+} pde_fields_ai_t;
+
+typedef struct {
+	uint64_t
+		valid,
+		system,
+		coherent,
+		tmz,
+		execute,
+		read,
+		write,
+		fragment,
+		page_base_addr,
+		prt,
+		pde,
+		further,
+		mtype;
+} pte_fields_ai_t;
+
+/*
+ * PDE format on AI:
+ * 63:59 block fragment size
+ * 58:55 reserved
+ *   But if bit 56 is set, this is a PTE with 'further' set,
+ *   which makes it act like a PDE.
+ * 54 pde-is-pte
+ * 53:48 reserved
+ * 47:6 physical base address of PTE
+ * 2 cache coherent/snoop
+ * 1 system
+ * 0 valid
+ */
+static pde_fields_ai_t decode_pde_entry_ai(uint64_t pde_entry)
+{
+	pde_fields_ai_t pde_fields;
+	pde_fields.frag_size     = (pde_entry >> 59) & 0x1F;
+	pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFFFC0ULL;
+	pde_fields.valid         = pde_entry & 1;
+	pde_fields.system        = (pde_entry >> 1) & 1;
+	pde_fields.coherent      = (pde_entry >> 2) & 1;
+	pde_fields.pte           = (pde_entry >> 54) & 1;
+	pde_fields.further       = (pde_entry >> 56) & 1;
+	return pde_fields;
+}
+
+/*
+ * PTE format on AI and PI:
+ * 58:57 mtype
+ * 56 further
+ * 54 reserved
+ *   But if it is set, then this is actually a PDE with 'P'
+ *   bit set, which makes the PDE act like a PTE.
+ * 51 prt
+ * 47:12 4k physical page base address
+ * 11:7 fragment
+ * 6 write
+ * 5 read
+ * 4 exe
+ * 3 tmz (PI+)
+ * 2 snooped / coherent
+ * 1 system
+ * 0 valid
+ */
+static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry)
+{
+	pte_fields_ai_t pte_fields;
+	pte_fields.valid          = pte_entry & 1;
+	pte_fields.system         = (pte_entry >> 1) & 1;
+	pte_fields.coherent       = (pte_entry >> 2) & 1;
+	pte_fields.tmz            = (pte_entry >> 3) & 1;
+	pte_fields.execute        = (pte_entry >> 4) & 1;
+	pte_fields.read           = (pte_entry >> 5) & 1;
+	pte_fields.write          = (pte_entry >> 6) & 1;
+	pte_fields.fragment       = (pte_entry >> 7) & 0x1F;
+	pte_fields.prt            = (pte_entry >> 51) & 1;
+	pte_fields.pde            = (pte_entry >> 54) & 1;
+	pte_fields.further        = (pte_entry >> 56) & 1;
+	pte_fields.mtype          = (pte_entry >> 57) & 3;
+
+	// PTEs hold physical address in 47:12
+	// PDEs hold physical address in 47:6, so if this is a PTE-as-PDE (further), need a differnt mask
+	if (pte_fields.further)
+		pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL;
+	else
+		pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL;
+
+	return pte_fields;
+}
+
 /**
  * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms
  */
@@ -352,24 +450,9 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 			mmMC_VM_AGP_BOT,
 			mmMC_VM_AGP_TOP;
 	} registers;
-	struct {
-		uint64_t
-			frag_size,
-			pte_base_addr,
-			valid,
-			system,
-			cache,
-			pte;
-	} pde_fields, pde_array[8];
-	struct {
-		uint64_t
-			page_base_addr,
-			fragment,
-			system,
-			valid,
-			prt,
-			further;
-	} pte_fields;
+
+	pde_fields_ai_t pde_fields, pde_array[8];
+	pte_fields_ai_t pte_fields;
 	char buf[64];
 	unsigned char *pdst = dst;
 	char *hub, *vm0prefix, *regprefix;
@@ -379,27 +462,6 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 	memset(&registers, 0, sizeof registers);
 	memset(&pde_array, 0xff, sizeof pde_array);
 
-	/*
-	 * PTE format on AI:
-	 * 47:12 4k physical page base address
-	 * 11:7 fragment
-	 * 6 write
-	 * 5 read
-	 * 4 exe
-	 * 3 reserved
-	 * 2 snooped
-	 * 1 system
-	 * 0 valid
-	 *
-	 * PDE format on AI:
-	 * 63:59 block fragment size
-	 * 58:40 reserved
-	 * 47:6 physical base address of PTE
-	 * 2 cache coherent/snoop
-	 * 1 system
-	 * 0 valid
-	 */
-
 	hubid = vmid & 0xFF00;
 	vmid &= 0xFF;
 
@@ -627,13 +689,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 		further = 0;
 
 		if (page_table_depth >= 1) {
-			// decode PDE values
-			pde_fields.frag_size     = (pde_entry >> 59) & 0x1F;
-			pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL;
-			pde_fields.valid         = pde_entry & 1;
-			pde_fields.system        = (pde_entry >> 1) & 1;
-			pde_fields.cache         = (pde_entry >> 2) & 1;
-			pde_fields.pte           = (pde_entry >> 54) & 1;
+			pde_fields = decode_pde_entry_ai(pde_entry);
 
 			// AI+ supports more than 1 level of PDEs so we iterate for all of the depths
 			pde_address = pde_fields.pte_base_addr;
@@ -663,7 +719,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 						pde_fields.pte_base_addr,
 						pde_fields.valid,
 						pde_fields.system,
-						pde_fields.cache,
+						pde_fields.coherent,
 						pde_fields.pte);
 			memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
 
@@ -712,13 +768,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 					}
 				}
 
-				// decode PDE values
-				pde_fields.frag_size     = (pde_entry >> 59) & 0x1F;
-				pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL;
-				pde_fields.valid         = pde_entry & 1;
-				pde_fields.system        = (pde_entry >> 1) & 1;
-				pde_fields.cache         = (pde_entry >> 2) & 1;
-				pde_fields.pte           = (pde_entry >> 54) & 1;
+				pde_fields = decode_pde_entry_ai(pde_entry);
 				if (current_depth == 1) {
 					pde0_block_fragment_size = pde_fields.frag_size;
 					/*
@@ -751,7 +801,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 								pde_fields.pte_base_addr,
 								pde_fields.valid,
 								pde_fields.system,
-								pde_fields.cache,
+								pde_fields.coherent,
 								pde_fields.pte,
 								pde_fields.frag_size);
 						memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
@@ -817,14 +867,8 @@ pte_further:
 					return -1;
 			}
 
-			// decode PTE values
 pde_is_pte:
-			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
-			pte_fields.system         = (pte_entry >> 1) & 1;
-			pte_fields.valid          = pte_entry & 1;
-			pte_fields.prt            = (pte_entry >> 61) & 1;
-			pte_fields.further        = (pte_entry >> 56) & 1;
-			pte_fields.page_base_addr = pte_entry & (pte_fields.further ? 0xFFFFFFFFFFC0ULL : 0xFFFFFFFFF000ULL);
+			pte_fields = decode_pte_entry_ai(pte_entry);
 
 			if (asic->options.verbose)
 				asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n",
@@ -901,12 +945,7 @@ pde_is_pte:
 				va_mask &= (upper_mask & ~pte_page_mask);
 
 				// grab PTE base address and other data from the PTE that has the F bit set.
-				pde_fields.frag_size     = (pte_entry >> 59) & 0x1F;
-				pde_fields.pte_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL;
-				pde_fields.valid         = pte_entry & 1;
-				pde_fields.system        = (pte_entry >> 1) & 1;
-				pde_fields.cache         = (pte_entry >> 2) & 1;
-				pde_fields.pte            = 0;
+				pde_fields = decode_pde_entry_ai(pte_entry);
 				further = 1;
 				goto pte_further;
 			}
@@ -928,12 +967,9 @@ pde_is_pte:
 		} else {
 			// in AI+ the BASE_ADDR is treated like a PDE entry...
 			// decode PDE values
-			pde_fields.frag_size     = (page_table_base_addr >> 59) & 0x1F;
+			pde_fields = decode_pde_entry_ai(pde_entry);
 			pde0_block_fragment_size = pde_fields.frag_size;
 			pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;
-			pde_fields.pte_base_addr = page_table_base_addr & 0xFFFFFFFFF000ULL;
-			pde_fields.system        = (page_table_base_addr >> 1) & 1;
-			pde_fields.valid         = page_table_base_addr & 1;
 
 			if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose)
 				asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n",
@@ -953,12 +989,7 @@ pde_is_pte:
 			if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0)
 				return -1;
 
-			// decode PTE values
-			pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL;
-			pte_fields.fragment       = (pte_entry >> 7)  & 0x1F;
-			pte_fields.system         = (pte_entry >> 1) & 1;
-			pte_fields.valid          = pte_entry & 1;
-			pte_fields.prt            = 0;
+			pte_fields = decode_pte_entry_ai(pte_entry);
 
 			if (asic->options.verbose)
 				asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n",
-- 
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH umr 3/3] Enhance printing of page tables in AI+
  2021-06-17 19:25 [PATCH umr 1/3] Improve handling of non-standard page tables in AI+ Joseph Greathouse
  2021-06-17 19:25 ` [PATCH umr 2/3] Generalize decoding of PDEs and PTEs " Joseph Greathouse
@ 2021-06-17 19:25 ` Joseph Greathouse
  2021-06-21 16:37   ` [PATCH v2 " Joseph Greathouse
  1 sibling, 1 reply; 6+ messages in thread
From: Joseph Greathouse @ 2021-06-17 19:25 UTC (permalink / raw)
  To: amd-gfx; +Cc: Tom.StDenis, Joseph Greathouse

Pulls print functions for GPUVM page tables on AI+ chips into their
own set of generalized functions, so that we don't have subtly
different printouts for different layers.

Explicitly prints PDEs with P bit (which makes it a PTE) and makes
the PTE with F bit set (further, which makes it a PDE) properly
indent the next layer of the print.

Prints remaining fields from the PTE and PDE printouts, such as
read/write/execute bits and MTYPE from PTE.

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
---
 src/lib/read_vram.c | 184 ++++++++++++++++++++++++++++++--------------
 1 file changed, 127 insertions(+), 57 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 2998873..cb38b60 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -415,6 +415,112 @@ static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry)
 	return pte_fields;
 }
 
+static void print_pde_fields_ai(struct umr_asic *asic,
+				pde_fields_ai_t pde_fields)
+{
+	asic->mem_funcs.vm_message(
+			", PBA==0x%012" PRIx64 ", V=%" PRIu64
+			", S=%" PRIu64 ", C=%" PRIu64
+			", P=%" PRIu64 ", FS=%" PRIu64 "\n",
+			pde_fields.pte_base_addr,
+			pde_fields.valid,
+			pde_fields.system,
+			pde_fields.coherent,
+			pde_fields.pte,
+			pde_fields.frag_size);
+}
+static void print_base_ai(struct umr_asic *asic,
+			  uint64_t pde_entry, uint64_t address,
+			  uint64_t va_mask, pde_fields_ai_t pde_fields,
+			  int is_base_not_pde)
+{
+	if (is_base_not_pde)
+		asic->mem_funcs.vm_message("BASE");
+	else
+		asic->mem_funcs.vm_message("PDE");
+	asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64,
+			pde_entry,
+			address & va_mask);
+	print_pde_fields_ai(asic, pde_fields);
+}
+
+static void print_pde_ai(struct umr_asic *asic,
+		const char * indentation, int pde_cnt,
+		int page_table_depth, uint64_t prev_addr,
+		uint64_t pde_idx, uint64_t pde_entry, uint64_t address,
+		uint64_t va_mask, pde_fields_ai_t pde_fields)
+{
+	asic->mem_funcs.vm_message("%s ", &indentation[18-pde_cnt*3]);
+	if (pde_fields.further)
+		asic->mem_funcs.vm_message("PTE-FURTHER");
+	else
+		asic->mem_funcs.vm_message("PDE%d", page_table_depth - pde_cnt);
+
+	asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64
+			"}=0x%016" PRIx64 ", VA=0x%012" PRIx64,
+			prev_addr,
+			pde_idx,
+			pde_entry,
+			address & va_mask);
+	print_pde_fields_ai(asic, pde_fields);
+}
+
+static void print_pte_ai(struct umr_asic *asic,
+		const char * indentation, int pde_cnt, uint64_t prev_addr,
+		uint64_t pte_idx, uint64_t pte_entry, uint64_t address,
+		uint64_t va_mask, pte_fields_ai_t pte_fields)
+{
+	if (asic == NULL) {
+		asic->mem_funcs.vm_message("\\-> PTE");
+	} else {
+		asic->mem_funcs.vm_message("%s ",
+				&indentation[18-pde_cnt*3]);
+		if (pte_fields.pde)
+			asic->mem_funcs.vm_message("PDE0-as-PTE");
+		else
+			asic->mem_funcs.vm_message("PTE");
+		asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64"}",
+				prev_addr,
+				pte_idx);
+	}
+	asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64
+			", PBA==0x%012" PRIx64 ", V=%" PRIu64
+			", S=%" PRIu64 ", C=%" PRIu64 ", Z=%" PRIu64
+			", X=%" PRIu64 ", R=%" PRIu64 ", W=%" PRIu64
+			", FS=%" PRIu64 ", T=%" PRIu64 ", MTYPE=",
+			pte_entry,
+			address & va_mask,
+			pte_fields.page_base_addr,
+			pte_fields.valid,
+			pte_fields.system,
+			pte_fields.coherent,
+			pte_fields.tmz,
+			pte_fields.execute,
+			pte_fields.read,
+			pte_fields.write,
+			pte_fields.fragment,
+			pte_fields.prt,
+			pte_fields.mtype);
+	switch (pte_fields.mtype) {
+		case 0:
+			asic->mem_funcs.vm_message("NC\n");
+			break;
+		case 1:
+			asic->mem_funcs.vm_message("RW\n");
+			break;
+		case 2:
+			asic->mem_funcs.vm_message("CC\n");
+			break;
+		case 3:
+			asic->mem_funcs.vm_message("UC\n");
+			break;
+		default:
+			asic->mem_funcs.vm_message("Unknown (%" PRIu64")\n",
+					pte_fields.mtype);
+			break;
+	}
+}
+
 /**
  * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms
  */
@@ -457,7 +563,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 	unsigned char *pdst = dst;
 	char *hub, *vm0prefix, *regprefix;
 	unsigned hubid;
-	static const char *indentation = "               \\->";
+	static const char *indentation = "                  \\->";
 
 	memset(&registers, 0, sizeof registers);
 	memset(&pde_array, 0xff, sizeof pde_array);
@@ -713,14 +819,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 			va_mask <<= (total_vm_bits - top_pdb_bits);
 
 			if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose)
-				asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n",
-						pde_entry,
-						address & va_mask,
-						pde_fields.pte_base_addr,
-						pde_fields.valid,
-						pde_fields.system,
-						pde_fields.coherent,
-						pde_fields.pte);
+				print_base_ai(asic, pde_entry, address, va_mask, pde_fields, 1);
 			memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
 
 			current_depth = page_table_depth;
@@ -783,27 +882,11 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 					log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size));
 					ptb_mask = (1ULL << log2_ptb_entries) - 1;
 					pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1;
-					if (asic->options.verbose)
-						asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n",
-							(unsigned)pde_fields.pte,
-							(unsigned)pde0_block_fragment_size,
-							(unsigned)page_table_block_size);
 				}
 				if (!pde_fields.pte) {
 					if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) {
-						asic->mem_funcs.vm_message("%s PDE%d@{0x%" PRIx64 "/%" PRIx64 "}=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 "\n",
-								&indentation[15-pde_cnt*3],
-								page_table_depth - pde_cnt,
-								prev_addr,
-								pde_idx,
-								pde_entry,
-								address & va_mask,
-								pde_fields.pte_base_addr,
-								pde_fields.valid,
-								pde_fields.system,
-								pde_fields.coherent,
-								pde_fields.pte,
-								pde_fields.frag_size);
+						print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr,
+								pde_idx, pde_entry, address, va_mask, pde_fields);
 						memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
 					}
 				} else {
@@ -870,21 +953,6 @@ pte_further:
 pde_is_pte:
 			pte_fields = decode_pte_entry_ai(pte_entry);
 
-			if (asic->options.verbose)
-				asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n",
-					&indentation[15-pde_cnt*3],
-					(pte_fields.further) ? "PTE-FURTHER" : "PTE",
-					prev_addr,
-					pte_idx,
-					pte_entry,
-					address & (((1ULL << page_table_block_size) - 1) << (12 + pde0_block_fragment_size)),
-					pte_fields.page_base_addr,
-					pte_fields.valid,
-					pte_fields.system,
-					pte_fields.prt,
-					pte_fields.fragment,
-					pte_fields.further);
-
 			// How many bits in the address are used to index into the PTB?
 			// If further is set, that means we jumped back to pde_is_pte,
 			// and the va_mask was properly set down there.
@@ -920,6 +988,17 @@ pde_is_pte:
 				va_mask = va_mask & ~mask_to_ignore;
 			}
 
+			if (asic->options.verbose) {
+				if (pte_fields.further) {
+					pde_fields.further = 1;
+					print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr,
+							pde_idx, pde_entry, address, va_mask, pde_fields);
+				} else {
+					print_pte_ai(asic, indentation, pde_cnt, prev_addr, pte_idx,
+							pte_entry, address, va_mask, pte_fields);
+				}
+			}
+
 			uint32_t pte_block_fragment_size = 0;
 			if (pte_fields.further) {
 				// Going to go one more layer deep, so now we need the Further-PTE's
@@ -946,6 +1025,7 @@ pde_is_pte:
 
 				// grab PTE base address and other data from the PTE that has the F bit set.
 				pde_fields = decode_pde_entry_ai(pte_entry);
+				pde_cnt++;
 				further = 1;
 				goto pte_further;
 			}
@@ -972,12 +1052,7 @@ pde_is_pte:
 			pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;
 
 			if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose)
-				asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n",
-						page_table_base_addr,
-						pde_fields.pte_base_addr,
-						pde_fields.valid,
-						pde_fields.system,
-						pde_fields.frag_size);
+				print_base_ai(asic, page_table_base_addr, address, -1, pde_fields, 0);
 			memcpy(&pde_array[0], &pde_fields, sizeof pde_fields);
 
 			if (!pde_fields.valid)
@@ -992,13 +1067,8 @@ pde_is_pte:
 			pte_fields = decode_pte_entry_ai(pte_entry);
 
 			if (asic->options.verbose)
-				asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n",
-					pte_entry,
-					address & ~((uint64_t)0xFFF),
-					pte_fields.page_base_addr,
-					pte_fields.fragment,
-					pte_fields.valid,
-					pte_fields.system);
+				print_pte_ai(asic, NULL, 0, 0, 0, pte_entry, address,
+						~((uint64_t)0xFFF), pte_fields);
 
 			if (pdst && !pte_fields.valid)
 				goto invalid_page;
@@ -1018,13 +1088,13 @@ next_page:
 		if (asic->options.verbose) {
 			if (pte_fields.system == 1) {
 				asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 ", (reading: %" PRIu32 " bytes)\n",
-											&indentation[15-pde_cnt*3-3],
+											&indentation[18-pde_cnt*3-3],
 											"sys",
 											start_addr,
 											chunk_size);
 			} else {
 				asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 " (MCA:%" PRIx64"), (reading: %" PRIu32 " bytes)\n",
-											&indentation[15-pde_cnt*3-3],
+											&indentation[18-pde_cnt*3-3],
 											"vram",
 											start_addr,
 											start_addr + vm_fb_offset,
-- 
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v2 umr 3/3] Enhance printing of page tables in AI+
  2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse
@ 2021-06-21 16:37   ` Joseph Greathouse
  2021-06-22 13:25     ` StDenis, Tom
  2021-06-23 14:13     ` StDenis, Tom
  0 siblings, 2 replies; 6+ messages in thread
From: Joseph Greathouse @ 2021-06-21 16:37 UTC (permalink / raw)
  To: amd-gfx; +Cc: Tom.StDenis, Joseph Greathouse

Pulls print functions for GPUVM page tables on AI+ chips into their
own set of generalized functions, so that we don't have subtly
different printouts for different layers.

Explicitly prints PDEs with P bit (which makes it a PTE) and makes
the PTE with F bit set (further, which makes it a PDE) properly
indent the next layer of the print.

Prints remaining fields from the PTE and PDE printouts, such as
read/write/execute bits and MTYPE from PTE.

v2: Correctly handle printing translate-further PTEs

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
---
 src/lib/read_vram.c | 184 ++++++++++++++++++++++++++++++--------------
 1 file changed, 127 insertions(+), 57 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 2998873..bea1232 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -415,6 +415,112 @@ static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry)
 	return pte_fields;
 }
 
+static void print_pde_fields_ai(struct umr_asic *asic,
+				pde_fields_ai_t pde_fields)
+{
+	asic->mem_funcs.vm_message(
+			", PBA==0x%012" PRIx64 ", V=%" PRIu64
+			", S=%" PRIu64 ", C=%" PRIu64
+			", P=%" PRIu64 ", FS=%" PRIu64 "\n",
+			pde_fields.pte_base_addr,
+			pde_fields.valid,
+			pde_fields.system,
+			pde_fields.coherent,
+			pde_fields.pte,
+			pde_fields.frag_size);
+}
+static void print_base_ai(struct umr_asic *asic,
+			  uint64_t pde_entry, uint64_t address,
+			  uint64_t va_mask, pde_fields_ai_t pde_fields,
+			  int is_base_not_pde)
+{
+	if (is_base_not_pde)
+		asic->mem_funcs.vm_message("BASE");
+	else
+		asic->mem_funcs.vm_message("PDE");
+	asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64,
+			pde_entry,
+			address & va_mask);
+	print_pde_fields_ai(asic, pde_fields);
+}
+
+static void print_pde_ai(struct umr_asic *asic,
+		const char * indentation, int pde_cnt,
+		int page_table_depth, uint64_t prev_addr,
+		uint64_t pde_idx, uint64_t pde_entry, uint64_t address,
+		uint64_t va_mask, pde_fields_ai_t pde_fields)
+{
+	asic->mem_funcs.vm_message("%s ", &indentation[18-pde_cnt*3]);
+	if (pde_fields.further)
+		asic->mem_funcs.vm_message("PTE-FURTHER");
+	else
+		asic->mem_funcs.vm_message("PDE%d", page_table_depth - pde_cnt);
+
+	asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64
+			"}=0x%016" PRIx64 ", VA=0x%012" PRIx64,
+			prev_addr,
+			pde_idx,
+			pde_entry,
+			address & va_mask);
+	print_pde_fields_ai(asic, pde_fields);
+}
+
+static void print_pte_ai(struct umr_asic *asic,
+		const char * indentation, int pde_cnt, uint64_t prev_addr,
+		uint64_t pte_idx, uint64_t pte_entry, uint64_t address,
+		uint64_t va_mask, pte_fields_ai_t pte_fields)
+{
+	if (asic == NULL) {
+		asic->mem_funcs.vm_message("\\-> PTE");
+	} else {
+		asic->mem_funcs.vm_message("%s ",
+				&indentation[18-pde_cnt*3]);
+		if (pte_fields.pde)
+			asic->mem_funcs.vm_message("PDE0-as-PTE");
+		else
+			asic->mem_funcs.vm_message("PTE");
+		asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64"}",
+				prev_addr,
+				pte_idx);
+	}
+	asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64
+			", PBA==0x%012" PRIx64 ", V=%" PRIu64
+			", S=%" PRIu64 ", C=%" PRIu64 ", Z=%" PRIu64
+			", X=%" PRIu64 ", R=%" PRIu64 ", W=%" PRIu64
+			", FS=%" PRIu64 ", T=%" PRIu64 ", MTYPE=",
+			pte_entry,
+			address & va_mask,
+			pte_fields.page_base_addr,
+			pte_fields.valid,
+			pte_fields.system,
+			pte_fields.coherent,
+			pte_fields.tmz,
+			pte_fields.execute,
+			pte_fields.read,
+			pte_fields.write,
+			pte_fields.fragment,
+			pte_fields.prt,
+			pte_fields.mtype);
+	switch (pte_fields.mtype) {
+		case 0:
+			asic->mem_funcs.vm_message("NC\n");
+			break;
+		case 1:
+			asic->mem_funcs.vm_message("RW\n");
+			break;
+		case 2:
+			asic->mem_funcs.vm_message("CC\n");
+			break;
+		case 3:
+			asic->mem_funcs.vm_message("UC\n");
+			break;
+		default:
+			asic->mem_funcs.vm_message("Unknown (%" PRIu64")\n",
+					pte_fields.mtype);
+			break;
+	}
+}
+
 /**
  * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms
  */
@@ -457,7 +563,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 	unsigned char *pdst = dst;
 	char *hub, *vm0prefix, *regprefix;
 	unsigned hubid;
-	static const char *indentation = "               \\->";
+	static const char *indentation = "                  \\->";
 
 	memset(&registers, 0, sizeof registers);
 	memset(&pde_array, 0xff, sizeof pde_array);
@@ -713,14 +819,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 			va_mask <<= (total_vm_bits - top_pdb_bits);
 
 			if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose)
-				asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n",
-						pde_entry,
-						address & va_mask,
-						pde_fields.pte_base_addr,
-						pde_fields.valid,
-						pde_fields.system,
-						pde_fields.coherent,
-						pde_fields.pte);
+				print_base_ai(asic, pde_entry, address, va_mask, pde_fields, 1);
 			memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
 
 			current_depth = page_table_depth;
@@ -783,27 +882,11 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
 					log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size));
 					ptb_mask = (1ULL << log2_ptb_entries) - 1;
 					pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1;
-					if (asic->options.verbose)
-						asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n",
-							(unsigned)pde_fields.pte,
-							(unsigned)pde0_block_fragment_size,
-							(unsigned)page_table_block_size);
 				}
 				if (!pde_fields.pte) {
 					if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) {
-						asic->mem_funcs.vm_message("%s PDE%d@{0x%" PRIx64 "/%" PRIx64 "}=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 "\n",
-								&indentation[15-pde_cnt*3],
-								page_table_depth - pde_cnt,
-								prev_addr,
-								pde_idx,
-								pde_entry,
-								address & va_mask,
-								pde_fields.pte_base_addr,
-								pde_fields.valid,
-								pde_fields.system,
-								pde_fields.coherent,
-								pde_fields.pte,
-								pde_fields.frag_size);
+						print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr,
+								pde_idx, pde_entry, address, va_mask, pde_fields);
 						memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
 					}
 				} else {
@@ -870,21 +953,6 @@ pte_further:
 pde_is_pte:
 			pte_fields = decode_pte_entry_ai(pte_entry);
 
-			if (asic->options.verbose)
-				asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n",
-					&indentation[15-pde_cnt*3],
-					(pte_fields.further) ? "PTE-FURTHER" : "PTE",
-					prev_addr,
-					pte_idx,
-					pte_entry,
-					address & (((1ULL << page_table_block_size) - 1) << (12 + pde0_block_fragment_size)),
-					pte_fields.page_base_addr,
-					pte_fields.valid,
-					pte_fields.system,
-					pte_fields.prt,
-					pte_fields.fragment,
-					pte_fields.further);
-
 			// How many bits in the address are used to index into the PTB?
 			// If further is set, that means we jumped back to pde_is_pte,
 			// and the va_mask was properly set down there.
@@ -920,6 +988,17 @@ pde_is_pte:
 				va_mask = va_mask & ~mask_to_ignore;
 			}
 
+			if (asic->options.verbose) {
+				if (pte_fields.further) {
+					pde_fields = decode_pde_entry_ai(pte_entry);
+					print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr,
+							pte_idx, pte_entry, address, va_mask, pde_fields);
+				} else {
+					print_pte_ai(asic, indentation, pde_cnt, prev_addr, pte_idx,
+							pte_entry, address, va_mask, pte_fields);
+				}
+			}
+
 			uint32_t pte_block_fragment_size = 0;
 			if (pte_fields.further) {
 				// Going to go one more layer deep, so now we need the Further-PTE's
@@ -946,6 +1025,7 @@ pde_is_pte:
 
 				// grab PTE base address and other data from the PTE that has the F bit set.
 				pde_fields = decode_pde_entry_ai(pte_entry);
+				pde_cnt++;
 				further = 1;
 				goto pte_further;
 			}
@@ -972,12 +1052,7 @@ pde_is_pte:
 			pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;
 
 			if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose)
-				asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n",
-						page_table_base_addr,
-						pde_fields.pte_base_addr,
-						pde_fields.valid,
-						pde_fields.system,
-						pde_fields.frag_size);
+				print_base_ai(asic, page_table_base_addr, address, -1, pde_fields, 0);
 			memcpy(&pde_array[0], &pde_fields, sizeof pde_fields);
 
 			if (!pde_fields.valid)
@@ -992,13 +1067,8 @@ pde_is_pte:
 			pte_fields = decode_pte_entry_ai(pte_entry);
 
 			if (asic->options.verbose)
-				asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n",
-					pte_entry,
-					address & ~((uint64_t)0xFFF),
-					pte_fields.page_base_addr,
-					pte_fields.fragment,
-					pte_fields.valid,
-					pte_fields.system);
+				print_pte_ai(asic, NULL, 0, 0, 0, pte_entry, address,
+						~((uint64_t)0xFFF), pte_fields);
 
 			if (pdst && !pte_fields.valid)
 				goto invalid_page;
@@ -1018,13 +1088,13 @@ next_page:
 		if (asic->options.verbose) {
 			if (pte_fields.system == 1) {
 				asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 ", (reading: %" PRIu32 " bytes)\n",
-											&indentation[15-pde_cnt*3-3],
+											&indentation[18-pde_cnt*3-3],
 											"sys",
 											start_addr,
 											chunk_size);
 			} else {
 				asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 " (MCA:%" PRIx64"), (reading: %" PRIu32 " bytes)\n",
-											&indentation[15-pde_cnt*3-3],
+											&indentation[18-pde_cnt*3-3],
 											"vram",
 											start_addr,
 											start_addr + vm_fb_offset,
-- 
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 umr 3/3] Enhance printing of page tables in AI+
  2021-06-21 16:37   ` [PATCH v2 " Joseph Greathouse
@ 2021-06-22 13:25     ` StDenis, Tom
  2021-06-23 14:13     ` StDenis, Tom
  1 sibling, 0 replies; 6+ messages in thread
From: StDenis, Tom @ 2021-06-22 13:25 UTC (permalink / raw)
  To: Greathouse, Joseph, amd-gfx

[AMD Official Use Only]

Hi,

Just a quick update.  Your first vector passes with your v2 patch in place.  I'll add the other 3 and then start reviewing the code.

Thanks,
Tom

________________________________________
From: Greathouse, Joseph <Joseph.Greathouse@amd.com>
Sent: Monday, June 21, 2021 12:37
To: amd-gfx@lists.freedesktop.org
Cc: StDenis, Tom; Greathouse, Joseph
Subject: [PATCH v2 umr 3/3] Enhance printing of page tables in AI+

Pulls print functions for GPUVM page tables on AI+ chips into their
own set of generalized functions, so that we don't have subtly
different printouts for different layers.

Explicitly prints PDEs with P bit (which makes it a PTE) and makes
the PTE with F bit set (further, which makes it a PDE) properly
indent the next layer of the print.

Prints remaining fields from the PTE and PDE printouts, such as
read/write/execute bits and MTYPE from PTE.

v2: Correctly handle printing translate-further PTEs

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
---
 src/lib/read_vram.c | 184 ++++++++++++++++++++++++++++++--------------
 1 file changed, 127 insertions(+), 57 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 2998873..bea1232 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -415,6 +415,112 @@ static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry)
        return pte_fields;
 }

+static void print_pde_fields_ai(struct umr_asic *asic,
+                               pde_fields_ai_t pde_fields)
+{
+       asic->mem_funcs.vm_message(
+                       ", PBA==0x%012" PRIx64 ", V=%" PRIu64
+                       ", S=%" PRIu64 ", C=%" PRIu64
+                       ", P=%" PRIu64 ", FS=%" PRIu64 "\n",
+                       pde_fields.pte_base_addr,
+                       pde_fields.valid,
+                       pde_fields.system,
+                       pde_fields.coherent,
+                       pde_fields.pte,
+                       pde_fields.frag_size);
+}
+static void print_base_ai(struct umr_asic *asic,
+                         uint64_t pde_entry, uint64_t address,
+                         uint64_t va_mask, pde_fields_ai_t pde_fields,
+                         int is_base_not_pde)
+{
+       if (is_base_not_pde)
+               asic->mem_funcs.vm_message("BASE");
+       else
+               asic->mem_funcs.vm_message("PDE");
+       asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64,
+                       pde_entry,
+                       address & va_mask);
+       print_pde_fields_ai(asic, pde_fields);
+}
+
+static void print_pde_ai(struct umr_asic *asic,
+               const char * indentation, int pde_cnt,
+               int page_table_depth, uint64_t prev_addr,
+               uint64_t pde_idx, uint64_t pde_entry, uint64_t address,
+               uint64_t va_mask, pde_fields_ai_t pde_fields)
+{
+       asic->mem_funcs.vm_message("%s ", &indentation[18-pde_cnt*3]);
+       if (pde_fields.further)
+               asic->mem_funcs.vm_message("PTE-FURTHER");
+       else
+               asic->mem_funcs.vm_message("PDE%d", page_table_depth - pde_cnt);
+
+       asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64
+                       "}=0x%016" PRIx64 ", VA=0x%012" PRIx64,
+                       prev_addr,
+                       pde_idx,
+                       pde_entry,
+                       address & va_mask);
+       print_pde_fields_ai(asic, pde_fields);
+}
+
+static void print_pte_ai(struct umr_asic *asic,
+               const char * indentation, int pde_cnt, uint64_t prev_addr,
+               uint64_t pte_idx, uint64_t pte_entry, uint64_t address,
+               uint64_t va_mask, pte_fields_ai_t pte_fields)
+{
+       if (asic == NULL) {
+               asic->mem_funcs.vm_message("\\-> PTE");
+       } else {
+               asic->mem_funcs.vm_message("%s ",
+                               &indentation[18-pde_cnt*3]);
+               if (pte_fields.pde)
+                       asic->mem_funcs.vm_message("PDE0-as-PTE");
+               else
+                       asic->mem_funcs.vm_message("PTE");
+               asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64"}",
+                               prev_addr,
+                               pte_idx);
+       }
+       asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64
+                       ", PBA==0x%012" PRIx64 ", V=%" PRIu64
+                       ", S=%" PRIu64 ", C=%" PRIu64 ", Z=%" PRIu64
+                       ", X=%" PRIu64 ", R=%" PRIu64 ", W=%" PRIu64
+                       ", FS=%" PRIu64 ", T=%" PRIu64 ", MTYPE=",
+                       pte_entry,
+                       address & va_mask,
+                       pte_fields.page_base_addr,
+                       pte_fields.valid,
+                       pte_fields.system,
+                       pte_fields.coherent,
+                       pte_fields.tmz,
+                       pte_fields.execute,
+                       pte_fields.read,
+                       pte_fields.write,
+                       pte_fields.fragment,
+                       pte_fields.prt,
+                       pte_fields.mtype);
+       switch (pte_fields.mtype) {
+               case 0:
+                       asic->mem_funcs.vm_message("NC\n");
+                       break;
+               case 1:
+                       asic->mem_funcs.vm_message("RW\n");
+                       break;
+               case 2:
+                       asic->mem_funcs.vm_message("CC\n");
+                       break;
+               case 3:
+                       asic->mem_funcs.vm_message("UC\n");
+                       break;
+               default:
+                       asic->mem_funcs.vm_message("Unknown (%" PRIu64")\n",
+                                       pte_fields.mtype);
+                       break;
+       }
+}
+
 /**
  * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms
  */
@@ -457,7 +563,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
        unsigned char *pdst = dst;
        char *hub, *vm0prefix, *regprefix;
        unsigned hubid;
-       static const char *indentation = "               \\->";
+       static const char *indentation = "                  \\->";

        memset(&registers, 0, sizeof registers);
        memset(&pde_array, 0xff, sizeof pde_array);
@@ -713,14 +819,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
                        va_mask <<= (total_vm_bits - top_pdb_bits);

                        if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose)
-                               asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n",
-                                               pde_entry,
-                                               address & va_mask,
-                                               pde_fields.pte_base_addr,
-                                               pde_fields.valid,
-                                               pde_fields.system,
-                                               pde_fields.coherent,
-                                               pde_fields.pte);
+                               print_base_ai(asic, pde_entry, address, va_mask, pde_fields, 1);
                        memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);

                        current_depth = page_table_depth;
@@ -783,27 +882,11 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
                                        log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size));
                                        ptb_mask = (1ULL << log2_ptb_entries) - 1;
                                        pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1;
-                                       if (asic->options.verbose)
-                                               asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n",
-                                                       (unsigned)pde_fields.pte,
-                                                       (unsigned)pde0_block_fragment_size,
-                                                       (unsigned)page_table_block_size);
                                }
                                if (!pde_fields.pte) {
                                        if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) {
-                                               asic->mem_funcs.vm_message("%s PDE%d@{0x%" PRIx64 "/%" PRIx64 "}=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 "\n",
-                                                               &indentation[15-pde_cnt*3],
-                                                               page_table_depth - pde_cnt,
-                                                               prev_addr,
-                                                               pde_idx,
-                                                               pde_entry,
-                                                               address & va_mask,
-                                                               pde_fields.pte_base_addr,
-                                                               pde_fields.valid,
-                                                               pde_fields.system,
-                                                               pde_fields.coherent,
-                                                               pde_fields.pte,
-                                                               pde_fields.frag_size);
+                                               print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr,
+                                                               pde_idx, pde_entry, address, va_mask, pde_fields);
                                                memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
                                        }
                                } else {
@@ -870,21 +953,6 @@ pte_further:
 pde_is_pte:
                        pte_fields = decode_pte_entry_ai(pte_entry);

-                       if (asic->options.verbose)
-                               asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n",
-                                       &indentation[15-pde_cnt*3],
-                                       (pte_fields.further) ? "PTE-FURTHER" : "PTE",
-                                       prev_addr,
-                                       pte_idx,
-                                       pte_entry,
-                                       address & (((1ULL << page_table_block_size) - 1) << (12 + pde0_block_fragment_size)),
-                                       pte_fields.page_base_addr,
-                                       pte_fields.valid,
-                                       pte_fields.system,
-                                       pte_fields.prt,
-                                       pte_fields.fragment,
-                                       pte_fields.further);
-
                        // How many bits in the address are used to index into the PTB?
                        // If further is set, that means we jumped back to pde_is_pte,
                        // and the va_mask was properly set down there.
@@ -920,6 +988,17 @@ pde_is_pte:
                                va_mask = va_mask & ~mask_to_ignore;
                        }

+                       if (asic->options.verbose) {
+                               if (pte_fields.further) {
+                                       pde_fields = decode_pde_entry_ai(pte_entry);
+                                       print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr,
+                                                       pte_idx, pte_entry, address, va_mask, pde_fields);
+                               } else {
+                                       print_pte_ai(asic, indentation, pde_cnt, prev_addr, pte_idx,
+                                                       pte_entry, address, va_mask, pte_fields);
+                               }
+                       }
+
                        uint32_t pte_block_fragment_size = 0;
                        if (pte_fields.further) {
                                // Going to go one more layer deep, so now we need the Further-PTE's
@@ -946,6 +1025,7 @@ pde_is_pte:

                                // grab PTE base address and other data from the PTE that has the F bit set.
                                pde_fields = decode_pde_entry_ai(pte_entry);
+                               pde_cnt++;
                                further = 1;
                                goto pte_further;
                        }
@@ -972,12 +1052,7 @@ pde_is_pte:
                        pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;

                        if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose)
-                               asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n",
-                                               page_table_base_addr,
-                                               pde_fields.pte_base_addr,
-                                               pde_fields.valid,
-                                               pde_fields.system,
-                                               pde_fields.frag_size);
+                               print_base_ai(asic, page_table_base_addr, address, -1, pde_fields, 0);
                        memcpy(&pde_array[0], &pde_fields, sizeof pde_fields);

                        if (!pde_fields.valid)
@@ -992,13 +1067,8 @@ pde_is_pte:
                        pte_fields = decode_pte_entry_ai(pte_entry);

                        if (asic->options.verbose)
-                               asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n",
-                                       pte_entry,
-                                       address & ~((uint64_t)0xFFF),
-                                       pte_fields.page_base_addr,
-                                       pte_fields.fragment,
-                                       pte_fields.valid,
-                                       pte_fields.system);
+                               print_pte_ai(asic, NULL, 0, 0, 0, pte_entry, address,
+                                               ~((uint64_t)0xFFF), pte_fields);

                        if (pdst && !pte_fields.valid)
                                goto invalid_page;
@@ -1018,13 +1088,13 @@ next_page:
                if (asic->options.verbose) {
                        if (pte_fields.system == 1) {
                                asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 ", (reading: %" PRIu32 " bytes)\n",
-                                                                                       &indentation[15-pde_cnt*3-3],
+                                                                                       &indentation[18-pde_cnt*3-3],
                                                                                        "sys",
                                                                                        start_addr,
                                                                                        chunk_size);
                        } else {
                                asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 " (MCA:%" PRIx64"), (reading: %" PRIu32 " bytes)\n",
-                                                                                       &indentation[15-pde_cnt*3-3],
+                                                                                       &indentation[18-pde_cnt*3-3],
                                                                                        "vram",
                                                                                        start_addr,
                                                                                        start_addr + vm_fb_offset,
--
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v2 umr 3/3] Enhance printing of page tables in AI+
  2021-06-21 16:37   ` [PATCH v2 " Joseph Greathouse
  2021-06-22 13:25     ` StDenis, Tom
@ 2021-06-23 14:13     ` StDenis, Tom
  1 sibling, 0 replies; 6+ messages in thread
From: StDenis, Tom @ 2021-06-23 14:13 UTC (permalink / raw)
  To: Greathouse, Joseph, amd-gfx

[AMD Official Use Only]

Tested and pushed out to main.

Thanks,
Tom

________________________________________
From: Greathouse, Joseph <Joseph.Greathouse@amd.com>
Sent: Monday, June 21, 2021 12:37
To: amd-gfx@lists.freedesktop.org
Cc: StDenis, Tom; Greathouse, Joseph
Subject: [PATCH v2 umr 3/3] Enhance printing of page tables in AI+

Pulls print functions for GPUVM page tables on AI+ chips into their
own set of generalized functions, so that we don't have subtly
different printouts for different layers.

Explicitly prints PDEs with P bit (which makes it a PTE) and makes
the PTE with F bit set (further, which makes it a PDE) properly
indent the next layer of the print.

Prints remaining fields from the PTE and PDE printouts, such as
read/write/execute bits and MTYPE from PTE.

v2: Correctly handle printing translate-further PTEs

Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com>
---
 src/lib/read_vram.c | 184 ++++++++++++++++++++++++++++++--------------
 1 file changed, 127 insertions(+), 57 deletions(-)

diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c
index 2998873..bea1232 100644
--- a/src/lib/read_vram.c
+++ b/src/lib/read_vram.c
@@ -415,6 +415,112 @@ static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry)
        return pte_fields;
 }

+static void print_pde_fields_ai(struct umr_asic *asic,
+                               pde_fields_ai_t pde_fields)
+{
+       asic->mem_funcs.vm_message(
+                       ", PBA==0x%012" PRIx64 ", V=%" PRIu64
+                       ", S=%" PRIu64 ", C=%" PRIu64
+                       ", P=%" PRIu64 ", FS=%" PRIu64 "\n",
+                       pde_fields.pte_base_addr,
+                       pde_fields.valid,
+                       pde_fields.system,
+                       pde_fields.coherent,
+                       pde_fields.pte,
+                       pde_fields.frag_size);
+}
+static void print_base_ai(struct umr_asic *asic,
+                         uint64_t pde_entry, uint64_t address,
+                         uint64_t va_mask, pde_fields_ai_t pde_fields,
+                         int is_base_not_pde)
+{
+       if (is_base_not_pde)
+               asic->mem_funcs.vm_message("BASE");
+       else
+               asic->mem_funcs.vm_message("PDE");
+       asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64,
+                       pde_entry,
+                       address & va_mask);
+       print_pde_fields_ai(asic, pde_fields);
+}
+
+static void print_pde_ai(struct umr_asic *asic,
+               const char * indentation, int pde_cnt,
+               int page_table_depth, uint64_t prev_addr,
+               uint64_t pde_idx, uint64_t pde_entry, uint64_t address,
+               uint64_t va_mask, pde_fields_ai_t pde_fields)
+{
+       asic->mem_funcs.vm_message("%s ", &indentation[18-pde_cnt*3]);
+       if (pde_fields.further)
+               asic->mem_funcs.vm_message("PTE-FURTHER");
+       else
+               asic->mem_funcs.vm_message("PDE%d", page_table_depth - pde_cnt);
+
+       asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64
+                       "}=0x%016" PRIx64 ", VA=0x%012" PRIx64,
+                       prev_addr,
+                       pde_idx,
+                       pde_entry,
+                       address & va_mask);
+       print_pde_fields_ai(asic, pde_fields);
+}
+
+static void print_pte_ai(struct umr_asic *asic,
+               const char * indentation, int pde_cnt, uint64_t prev_addr,
+               uint64_t pte_idx, uint64_t pte_entry, uint64_t address,
+               uint64_t va_mask, pte_fields_ai_t pte_fields)
+{
+       if (asic == NULL) {
+               asic->mem_funcs.vm_message("\\-> PTE");
+       } else {
+               asic->mem_funcs.vm_message("%s ",
+                               &indentation[18-pde_cnt*3]);
+               if (pte_fields.pde)
+                       asic->mem_funcs.vm_message("PDE0-as-PTE");
+               else
+                       asic->mem_funcs.vm_message("PTE");
+               asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64"}",
+                               prev_addr,
+                               pte_idx);
+       }
+       asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64
+                       ", PBA==0x%012" PRIx64 ", V=%" PRIu64
+                       ", S=%" PRIu64 ", C=%" PRIu64 ", Z=%" PRIu64
+                       ", X=%" PRIu64 ", R=%" PRIu64 ", W=%" PRIu64
+                       ", FS=%" PRIu64 ", T=%" PRIu64 ", MTYPE=",
+                       pte_entry,
+                       address & va_mask,
+                       pte_fields.page_base_addr,
+                       pte_fields.valid,
+                       pte_fields.system,
+                       pte_fields.coherent,
+                       pte_fields.tmz,
+                       pte_fields.execute,
+                       pte_fields.read,
+                       pte_fields.write,
+                       pte_fields.fragment,
+                       pte_fields.prt,
+                       pte_fields.mtype);
+       switch (pte_fields.mtype) {
+               case 0:
+                       asic->mem_funcs.vm_message("NC\n");
+                       break;
+               case 1:
+                       asic->mem_funcs.vm_message("RW\n");
+                       break;
+               case 2:
+                       asic->mem_funcs.vm_message("CC\n");
+                       break;
+               case 3:
+                       asic->mem_funcs.vm_message("UC\n");
+                       break;
+               default:
+                       asic->mem_funcs.vm_message("Unknown (%" PRIu64")\n",
+                                       pte_fields.mtype);
+                       break;
+       }
+}
+
 /**
  * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms
  */
@@ -457,7 +563,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
        unsigned char *pdst = dst;
        char *hub, *vm0prefix, *regprefix;
        unsigned hubid;
-       static const char *indentation = "               \\->";
+       static const char *indentation = "                  \\->";

        memset(&registers, 0, sizeof registers);
        memset(&pde_array, 0xff, sizeof pde_array);
@@ -713,14 +819,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
                        va_mask <<= (total_vm_bits - top_pdb_bits);

                        if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose)
-                               asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n",
-                                               pde_entry,
-                                               address & va_mask,
-                                               pde_fields.pte_base_addr,
-                                               pde_fields.valid,
-                                               pde_fields.system,
-                                               pde_fields.coherent,
-                                               pde_fields.pte);
+                               print_base_ai(asic, pde_entry, address, va_mask, pde_fields, 1);
                        memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);

                        current_depth = page_table_depth;
@@ -783,27 +882,11 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid,
                                        log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size));
                                        ptb_mask = (1ULL << log2_ptb_entries) - 1;
                                        pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1;
-                                       if (asic->options.verbose)
-                                               asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n",
-                                                       (unsigned)pde_fields.pte,
-                                                       (unsigned)pde0_block_fragment_size,
-                                                       (unsigned)page_table_block_size);
                                }
                                if (!pde_fields.pte) {
                                        if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) {
-                                               asic->mem_funcs.vm_message("%s PDE%d@{0x%" PRIx64 "/%" PRIx64 "}=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 "\n",
-                                                               &indentation[15-pde_cnt*3],
-                                                               page_table_depth - pde_cnt,
-                                                               prev_addr,
-                                                               pde_idx,
-                                                               pde_entry,
-                                                               address & va_mask,
-                                                               pde_fields.pte_base_addr,
-                                                               pde_fields.valid,
-                                                               pde_fields.system,
-                                                               pde_fields.coherent,
-                                                               pde_fields.pte,
-                                                               pde_fields.frag_size);
+                                               print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr,
+                                                               pde_idx, pde_entry, address, va_mask, pde_fields);
                                                memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields);
                                        }
                                } else {
@@ -870,21 +953,6 @@ pte_further:
 pde_is_pte:
                        pte_fields = decode_pte_entry_ai(pte_entry);

-                       if (asic->options.verbose)
-                               asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n",
-                                       &indentation[15-pde_cnt*3],
-                                       (pte_fields.further) ? "PTE-FURTHER" : "PTE",
-                                       prev_addr,
-                                       pte_idx,
-                                       pte_entry,
-                                       address & (((1ULL << page_table_block_size) - 1) << (12 + pde0_block_fragment_size)),
-                                       pte_fields.page_base_addr,
-                                       pte_fields.valid,
-                                       pte_fields.system,
-                                       pte_fields.prt,
-                                       pte_fields.fragment,
-                                       pte_fields.further);
-
                        // How many bits in the address are used to index into the PTB?
                        // If further is set, that means we jumped back to pde_is_pte,
                        // and the va_mask was properly set down there.
@@ -920,6 +988,17 @@ pde_is_pte:
                                va_mask = va_mask & ~mask_to_ignore;
                        }

+                       if (asic->options.verbose) {
+                               if (pte_fields.further) {
+                                       pde_fields = decode_pde_entry_ai(pte_entry);
+                                       print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr,
+                                                       pte_idx, pte_entry, address, va_mask, pde_fields);
+                               } else {
+                                       print_pte_ai(asic, indentation, pde_cnt, prev_addr, pte_idx,
+                                                       pte_entry, address, va_mask, pte_fields);
+                               }
+                       }
+
                        uint32_t pte_block_fragment_size = 0;
                        if (pte_fields.further) {
                                // Going to go one more layer deep, so now we need the Further-PTE's
@@ -946,6 +1025,7 @@ pde_is_pte:

                                // grab PTE base address and other data from the PTE that has the F bit set.
                                pde_fields = decode_pde_entry_ai(pte_entry);
+                               pde_cnt++;
                                further = 1;
                                goto pte_further;
                        }
@@ -972,12 +1052,7 @@ pde_is_pte:
                        pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1;

                        if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose)
-                               asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n",
-                                               page_table_base_addr,
-                                               pde_fields.pte_base_addr,
-                                               pde_fields.valid,
-                                               pde_fields.system,
-                                               pde_fields.frag_size);
+                               print_base_ai(asic, page_table_base_addr, address, -1, pde_fields, 0);
                        memcpy(&pde_array[0], &pde_fields, sizeof pde_fields);

                        if (!pde_fields.valid)
@@ -992,13 +1067,8 @@ pde_is_pte:
                        pte_fields = decode_pte_entry_ai(pte_entry);

                        if (asic->options.verbose)
-                               asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n",
-                                       pte_entry,
-                                       address & ~((uint64_t)0xFFF),
-                                       pte_fields.page_base_addr,
-                                       pte_fields.fragment,
-                                       pte_fields.valid,
-                                       pte_fields.system);
+                               print_pte_ai(asic, NULL, 0, 0, 0, pte_entry, address,
+                                               ~((uint64_t)0xFFF), pte_fields);

                        if (pdst && !pte_fields.valid)
                                goto invalid_page;
@@ -1018,13 +1088,13 @@ next_page:
                if (asic->options.verbose) {
                        if (pte_fields.system == 1) {
                                asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 ", (reading: %" PRIu32 " bytes)\n",
-                                                                                       &indentation[15-pde_cnt*3-3],
+                                                                                       &indentation[18-pde_cnt*3-3],
                                                                                        "sys",
                                                                                        start_addr,
                                                                                        chunk_size);
                        } else {
                                asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 " (MCA:%" PRIx64"), (reading: %" PRIu32 " bytes)\n",
-                                                                                       &indentation[15-pde_cnt*3-3],
+                                                                                       &indentation[18-pde_cnt*3-3],
                                                                                        "vram",
                                                                                        start_addr,
                                                                                        start_addr + vm_fb_offset,
--
2.20.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-06-23 14:14 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-17 19:25 [PATCH umr 1/3] Improve handling of non-standard page tables in AI+ Joseph Greathouse
2021-06-17 19:25 ` [PATCH umr 2/3] Generalize decoding of PDEs and PTEs " Joseph Greathouse
2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse
2021-06-21 16:37   ` [PATCH v2 " Joseph Greathouse
2021-06-22 13:25     ` StDenis, Tom
2021-06-23 14:13     ` StDenis, Tom

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.