* [PATCH umr 1/3] Improve handling of non-standard page tables in AI+ @ 2021-06-17 19:25 Joseph Greathouse 2021-06-17 19:25 ` [PATCH umr 2/3] Generalize decoding of PDEs and PTEs " Joseph Greathouse 2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse 0 siblings, 2 replies; 6+ messages in thread From: Joseph Greathouse @ 2021-06-17 19:25 UTC (permalink / raw) To: amd-gfx; +Cc: Tom.StDenis, Joseph Greathouse Fixes handling of GPUVM page table decoding when not using 4-level page tables with 512 entries per level. This includes: - Calculating actual size of top-most PDB based on total VM range, page table depth, and page table block size. - Calculating size of PTB based on the page table block size and the PDE0's block fragment size. - Handling PTE offset and masks from from PDE0 with P-bit, normal PTBs, or PTBs from a translate-further layer. - When using a PTE with F bit to go one layer deeper, pull new block fragment size out of that PTE to handle further-level PTBs of non-standard sizes. Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com> --- src/lib/read_vram.c | 199 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 153 insertions(+), 46 deletions(-) diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index efcd081..049acd4 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -297,6 +297,26 @@ invalid_page: return -1; } +/** round_up_pot -- Round up value to next power of two */ +static uint64_t round_up_pot(uint64_t x) +{ + uint64_t y = (64ULL * 1024 * 1024); // start at 64MiB + while (y < x) + y <<= 1; + return y; +} + +static uint64_t log2_vm_size(uint64_t page_table_start_addr, uint64_t page_table_end_addr) +{ + uint64_t size_of_vm_bytes = page_table_end_addr - page_table_start_addr + 4096; + size_of_vm_bytes = round_up_pot(size_of_vm_bytes); + // Find the highest bit set to get an estimate for log2(size) + uint32_t vm_bits = 0; + while (size_of_vm_bytes >>= 1) + vm_bits++; + return vm_bits; +} + /** * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms */ @@ -304,17 +324,19 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, uint64_t address, uint32_t size, void *dst, int write_en) { - uint64_t start_addr, page_table_start_addr, page_table_base_addr, - page_table_block_size, pte_idx, pde_idx, pte_entry, pde_entry, + uint64_t start_addr, page_table_start_addr, page_table_end_addr, page_table_base_addr, + page_table_block_size, log2_ptb_entries, pte_idx, pde_idx, pte_entry, pde_entry, pde_address, vm_fb_offset, va_mask, offset_mask, system_aperture_low, system_aperture_high, - fb_top, fb_bottom, pte_page_mask, agp_base, agp_bot, agp_top, prev_addr; + fb_top, fb_bottom, ptb_mask, pte_page_mask, agp_base, agp_bot, agp_top, prev_addr; uint32_t chunk_size, tmp, pde0_block_fragment_size; int pde_cnt, current_depth, page_table_depth, zfb, further; struct { uint32_t mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32, mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32, + mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32, + mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32, mmVM_CONTEXTx_CNTL, mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32, mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32, @@ -461,6 +483,12 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_HI32", regprefix, vmid); registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32 = umr_read_reg_by_name_by_ip(asic, hub, buf); page_table_start_addr |= (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32 << 44; + sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_LO32", regprefix, vmid); + registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32 = umr_read_reg_by_name_by_ip(asic, hub, buf); + page_table_end_addr = (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32 << 12; + sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_HI32", regprefix, vmid); + registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32 = umr_read_reg_by_name_by_ip(asic, hub, buf); + page_table_end_addr |= (uint64_t)registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32 << 44; sprintf(buf, "mm%sVM_CONTEXT%" PRIu32 "_CNTL", regprefix, vmid); tmp = registers.mmVM_CONTEXTx_CNTL = umr_read_reg_by_name_by_ip(asic, hub, buf); @@ -495,6 +523,8 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, asic->mem_funcs.vm_message( "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_LO32=0x%" PRIx32 "\n" "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_START_ADDR_HI32=0x%" PRIx32 "\n" + "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_LO32=0x%" PRIx32 "\n" + "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_END_ADDR_HI32=0x%" PRIx32 "\n" "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_BASE_ADDR_LO32=0x%" PRIx32 "\n" "mm%sVM_CONTEXT%" PRIu32 "_PAGE_TABLE_BASE_ADDR_HI32=0x%" PRIx32 "\n" "mm%sVM_CONTEXT%" PRIu32 "_CNTL=0x%" PRIx32 "\n" @@ -513,6 +543,8 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, "mm%sMC_VM_AGP_TOP=0x%" PRIx32 "\n", regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_LO32, regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_START_ADDR_HI32, + regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_LO32, + regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_END_ADDR_HI32, regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_LO32, regprefix, vmid, registers.mmVM_CONTEXTx_PAGE_TABLE_BASE_ADDR_HI32, regprefix, vmid, registers.mmVM_CONTEXTx_CNTL, @@ -535,10 +567,6 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, // transform page_table_base page_table_base_addr -= vm_fb_offset; - // convert some defaults to actual values AFTER printing out to user - // page_table_block_size of 0 means 9 (512 entries) - if (!page_table_block_size) - page_table_block_size = 9; pde0_block_fragment_size = 0; if (vmid == 0) { @@ -593,7 +621,9 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, // defaults in case we have to bail out before fully decoding to a PTE pde_cnt = 0; + ptb_mask = (1ULL << 9) - 1; pte_page_mask = (1ULL << 12) - 1; + log2_ptb_entries = 9; further = 0; if (page_table_depth >= 1) { @@ -608,8 +638,23 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, // AI+ supports more than 1 level of PDEs so we iterate for all of the depths pde_address = pde_fields.pte_base_addr; - // TODO: Should "page_table_block_size" just be 9 to account for potential PTB1 selectors? - va_mask = ((uint64_t)511 << ((page_table_depth)*9 + (12 + pde0_block_fragment_size + page_table_block_size))); + /* + * Size of the first PDB depends on the total coverage of the + * page table and the PAGE_TABLE_BLOCK_SIZE. + * Entire table takes ceil(log2(total_vm_size)) bits + * All PDBs except the first one take 9 bits each + * The PTB covers at least 2 MiB (21 bits) + * And PAGE_TABLE_BLOCK_SIZE is log2(num 2MiB ranges PTB covers) + * As such, the formula for the size of the first PDB is: + * PDB1, PDB0, etc. PTB covers at least 2 MiB + * Block size can make it cover more + * total_vm_bits - (9 * num_middle_pdbs) - (page_table_block_size + 21) + */ + int total_vm_bits = log2_vm_size(page_table_start_addr, page_table_end_addr); + int top_pdb_bits = total_vm_bits - (9 * (page_table_depth - 1)) - (page_table_block_size + 21); + + va_mask = (1ULL << top_pdb_bits) - 1; + va_mask <<= (total_vm_bits - top_pdb_bits); if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n", @@ -624,14 +669,19 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, current_depth = page_table_depth; while (current_depth) { - pde_idx = address >> (9 * (current_depth - 1) + page_table_block_size + 12); - // mask only 9 bits - if (current_depth != page_table_depth) - pde_idx &= (1ULL << 9) - 1; - - - // TODO: redo va_mask - va_mask = ((uint64_t)511 << ((page_table_depth - pde_cnt)*9 + (12 + pde0_block_fragment_size + page_table_block_size))); + // Every middle PDB has 512 entries, so shift a further 9 bits + // for every layer beyond the first one. + int amount_to_shift = (total_vm_bits - top_pdb_bits); + amount_to_shift -= ((page_table_depth - current_depth)*9); + pde_idx = address >> amount_to_shift; + + // Middle layers need the upper bits masked out after the right-shift. + // For the top-most layer, the va_mask is set above the while loop, + // so we can skip re-setting it here. + if (current_depth != page_table_depth) { + pde_idx &= 511; + va_mask = (uint64_t)511 << amount_to_shift; + } // read PDE entry prev_addr = pde_address + pde_idx * 8; @@ -671,9 +721,18 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, pde_fields.pte = (pde_entry >> 54) & 1; if (current_depth == 1) { pde0_block_fragment_size = pde_fields.frag_size; - // page_table_block_size is the number of entries in a PTB that spans 2MB - page_table_block_size = 21 - (12 + pde0_block_fragment_size); - pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1; + /* + * page_table_block_size is the number of 2MiB regions covered by a PTB + * If we set it to 0, then PTB cover 2 MiB + * If it's 9 PTB cover 1024 MiB + * pde0_block_fragment_size tells us how many 4 KiB regions each PTE covers + * If it's 0 PTEs cover 4 KiB + * If it's 9 PTEs cover 2 MiB + * So the number of PTEs in a PTB is 2^(9+ptbs-pbfs) + */ + log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size)); + ptb_mask = (1ULL << log2_ptb_entries) - 1; + pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1; if (asic->options.verbose) asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n", (unsigned)pde_fields.pte, @@ -723,9 +782,13 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, pde_address = pde_fields.pte_base_addr; } - // read PTE selector (to select from PTB0) - // TODO: support for page_table_block_size > 9 - pte_idx = (address >> (12 + pde0_block_fragment_size)) & ((1ULL << page_table_block_size) - 1); + // If we fall through to here, we are pointing into PTB, so pull out + // the index and mask. + // At minimum, each PTE is 4 KiB (12 bits) + // PDE0.BFS tells us how many of these 4 KiB page each PTE covers + // So add those bits in. + // We also calculated the PTE mask up above, to know how many PTEs are in this PTB + pte_idx = (address >> (12 + pde0_block_fragment_size)) & ptb_mask; pte_further: // now read PTE entry for this page prev_addr = pde_fields.pte_base_addr + pte_idx*8; @@ -778,20 +841,74 @@ pde_is_pte: pte_fields.fragment, pte_fields.further); - if (pte_fields.further) { - if (page_table_block_size == 9) { - // this case doesn't make sense unless we support PTBS > 9 - asic->mem_funcs.vm_message("[ERROR]: PTE.further is set and *CNTL.PAGE_TABLE_BLOCK_SIZE is 9...\n"); - return -1; + // How many bits in the address are used to index into the PTB? + // If further is set, that means we jumped back to pde_is_pte, + // and the va_mask was properly set down there. + if (!further) { + // total_vm_bits are all the bits in the VM space + // We want to ignore the top-most PDB, which uses top_pdb_bits + // We also want to ignore lower PDBs, which use 9 bits each + int bits_to_use = total_vm_bits - top_pdb_bits - (9 * (page_table_depth - 1)); + + // At a minimum, we want to ignore the bottom 12 bits for a 4 KiB page + int lower_bits_to_ignore = 12; + + if (pde_fields.pte) { + // We are in here because we're in PDE0 with P bit. So we don't want + // to skip the 9 bits from PDB0. + bits_to_use += 9; + + // If the P bit is set, we are coming from PDE0, thus this entry + // covers the whole page_table_block_size, instead of the PDE0.BFS. + // So we want to ignore those bits in the address. + lower_bits_to_ignore += page_table_block_size; } else { - pte_idx = (address >> 12) & ((1ULL << pde0_block_fragment_size) - 1); - pte_page_mask = (1ULL << 12) - 1; - - // grab PTE base address from the PTE that has the F bit set. - pde_fields.pte_base_addr = pte_fields.page_base_addr; - further = 1; - goto pte_further; + // If we are at an actual PTE, then based on PDE0.BFS, we want to ignore + // some of the lowest bits. + // If PDE0.BFS=0, the bottom 12 bits are used to index within the page + // If PDE0.BFS=9, the bottom 21 bits are used to index within the page + // etc. These are the bits we want to ignore, and we already put 12 in. + lower_bits_to_ignore += pde0_block_fragment_size; } + + va_mask = (1 << bits_to_use) - 1; + int mask_to_ignore = (1 << lower_bits_to_ignore) - 1; + va_mask = va_mask & ~mask_to_ignore; + } + + uint32_t pte_block_fragment_size = 0; + if (pte_fields.further) { + // Going to go one more layer deep, so now we need the Further-PTE's + // block_fragment_size. This tells us how many 4K pages each + // last-layer-PTE covers. + pte_block_fragment_size = (pte_entry >> 59) & 0x1F; + + // Each entry covers the Further-PTE.block_fragment_size numbesr + // of 4K pages so we can potentially ignore some low-order bits. + int last_level_ptb_bits = 12 + pte_block_fragment_size; + pte_idx = address >> last_level_ptb_bits; + + // The total size covered by the last-layer-PTB is a function of + // pde0_block_fragment_size, which tells us how many 4K entries the + // PTB covers. + // So number of bits needed to index the entries in the final PTE is: + uint32_t num_entry_bits = pde0_block_fragment_size - pte_block_fragment_size; + // Clamp the index to the new last-level PTB's size. + pte_idx &= ((1 << num_entry_bits) - 1); + + uint32_t upper_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1; + pte_page_mask = (1ULL << last_level_ptb_bits) - 1; + va_mask &= (upper_mask & ~pte_page_mask); + + // grab PTE base address and other data from the PTE that has the F bit set. + pde_fields.frag_size = (pte_entry >> 59) & 0x1F; + pde_fields.pte_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL; + pde_fields.valid = pte_entry & 1; + pde_fields.system = (pte_entry >> 1) & 1; + pde_fields.cache = (pte_entry >> 2) & 1; + pde_fields.pte = 0; + further = 1; + goto pte_further; } if (!pte_fields.system) @@ -802,11 +919,10 @@ pde_is_pte: // compute starting address // this also accounts for PDE-is-PTE masking since current_depth > 0 at this point - // if we are processing a PTE leaf node then the page size is 12 bits if (!further) offset_mask = (1ULL << ((current_depth * 9) + (12 + pde0_block_fragment_size))) - 1; else - offset_mask = (1ULL << 12) - 1; // offset masks are always 12-bits wide with PTE.further set + offset_mask = (1ULL << (12 + pte_block_fragment_size)) - 1; start_addr = asic->mem_funcs.gpu_bus_to_cpu_address(asic, pte_fields.page_base_addr) + (address & offset_mask); } else { @@ -935,15 +1051,6 @@ invalid_page: return -1; } -/** round_up_pot -- Round up value to next power of two */ -static uint64_t round_up_pot(uint64_t x) -{ - uint64_t y = (64ULL * 1024 * 1024); // start at 64MiB - while (y < x) - y <<= 1; - return y; -} - /** * umr_access_vram - Access GPU mapped memory * -- 2.20.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH umr 2/3] Generalize decoding of PDEs and PTEs in AI+ 2021-06-17 19:25 [PATCH umr 1/3] Improve handling of non-standard page tables in AI+ Joseph Greathouse @ 2021-06-17 19:25 ` Joseph Greathouse 2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse 1 sibling, 0 replies; 6+ messages in thread From: Joseph Greathouse @ 2021-06-17 19:25 UTC (permalink / raw) To: amd-gfx; +Cc: Tom.StDenis, Joseph Greathouse Brings decoding of PDEs and PTEs for AI+ chips into their own functions, so that we don't end up with subtly different decoding bugs in the variety of places such decodings are done. Also fixes a minor bug where we were pulling PTE.PRT from bit 61 instead of the proper bit 51. Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com> --- src/lib/read_vram.c | 187 ++++++++++++++++++++++++++------------------ 1 file changed, 109 insertions(+), 78 deletions(-) diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index 049acd4..2998873 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -317,6 +317,104 @@ static uint64_t log2_vm_size(uint64_t page_table_start_addr, uint64_t page_table return vm_bits; } +typedef struct { + uint64_t + frag_size, + pte_base_addr, + valid, + system, + coherent, + pte, + further; +} pde_fields_ai_t; + +typedef struct { + uint64_t + valid, + system, + coherent, + tmz, + execute, + read, + write, + fragment, + page_base_addr, + prt, + pde, + further, + mtype; +} pte_fields_ai_t; + +/* + * PDE format on AI: + * 63:59 block fragment size + * 58:55 reserved + * But if bit 56 is set, this is a PTE with 'further' set, + * which makes it act like a PDE. + * 54 pde-is-pte + * 53:48 reserved + * 47:6 physical base address of PTE + * 2 cache coherent/snoop + * 1 system + * 0 valid + */ +static pde_fields_ai_t decode_pde_entry_ai(uint64_t pde_entry) +{ + pde_fields_ai_t pde_fields; + pde_fields.frag_size = (pde_entry >> 59) & 0x1F; + pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFFFC0ULL; + pde_fields.valid = pde_entry & 1; + pde_fields.system = (pde_entry >> 1) & 1; + pde_fields.coherent = (pde_entry >> 2) & 1; + pde_fields.pte = (pde_entry >> 54) & 1; + pde_fields.further = (pde_entry >> 56) & 1; + return pde_fields; +} + +/* + * PTE format on AI and PI: + * 58:57 mtype + * 56 further + * 54 reserved + * But if it is set, then this is actually a PDE with 'P' + * bit set, which makes the PDE act like a PTE. + * 51 prt + * 47:12 4k physical page base address + * 11:7 fragment + * 6 write + * 5 read + * 4 exe + * 3 tmz (PI+) + * 2 snooped / coherent + * 1 system + * 0 valid + */ +static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry) +{ + pte_fields_ai_t pte_fields; + pte_fields.valid = pte_entry & 1; + pte_fields.system = (pte_entry >> 1) & 1; + pte_fields.coherent = (pte_entry >> 2) & 1; + pte_fields.tmz = (pte_entry >> 3) & 1; + pte_fields.execute = (pte_entry >> 4) & 1; + pte_fields.read = (pte_entry >> 5) & 1; + pte_fields.write = (pte_entry >> 6) & 1; + pte_fields.fragment = (pte_entry >> 7) & 0x1F; + pte_fields.prt = (pte_entry >> 51) & 1; + pte_fields.pde = (pte_entry >> 54) & 1; + pte_fields.further = (pte_entry >> 56) & 1; + pte_fields.mtype = (pte_entry >> 57) & 3; + + // PTEs hold physical address in 47:12 + // PDEs hold physical address in 47:6, so if this is a PTE-as-PDE (further), need a differnt mask + if (pte_fields.further) + pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL; + else + pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL; + + return pte_fields; +} + /** * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms */ @@ -352,24 +450,9 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, mmMC_VM_AGP_BOT, mmMC_VM_AGP_TOP; } registers; - struct { - uint64_t - frag_size, - pte_base_addr, - valid, - system, - cache, - pte; - } pde_fields, pde_array[8]; - struct { - uint64_t - page_base_addr, - fragment, - system, - valid, - prt, - further; - } pte_fields; + + pde_fields_ai_t pde_fields, pde_array[8]; + pte_fields_ai_t pte_fields; char buf[64]; unsigned char *pdst = dst; char *hub, *vm0prefix, *regprefix; @@ -379,27 +462,6 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, memset(®isters, 0, sizeof registers); memset(&pde_array, 0xff, sizeof pde_array); - /* - * PTE format on AI: - * 47:12 4k physical page base address - * 11:7 fragment - * 6 write - * 5 read - * 4 exe - * 3 reserved - * 2 snooped - * 1 system - * 0 valid - * - * PDE format on AI: - * 63:59 block fragment size - * 58:40 reserved - * 47:6 physical base address of PTE - * 2 cache coherent/snoop - * 1 system - * 0 valid - */ - hubid = vmid & 0xFF00; vmid &= 0xFF; @@ -627,13 +689,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, further = 0; if (page_table_depth >= 1) { - // decode PDE values - pde_fields.frag_size = (pde_entry >> 59) & 0x1F; - pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL; - pde_fields.valid = pde_entry & 1; - pde_fields.system = (pde_entry >> 1) & 1; - pde_fields.cache = (pde_entry >> 2) & 1; - pde_fields.pte = (pde_entry >> 54) & 1; + pde_fields = decode_pde_entry_ai(pde_entry); // AI+ supports more than 1 level of PDEs so we iterate for all of the depths pde_address = pde_fields.pte_base_addr; @@ -663,7 +719,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, pde_fields.pte_base_addr, pde_fields.valid, pde_fields.system, - pde_fields.cache, + pde_fields.coherent, pde_fields.pte); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); @@ -712,13 +768,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, } } - // decode PDE values - pde_fields.frag_size = (pde_entry >> 59) & 0x1F; - pde_fields.pte_base_addr = pde_entry & 0xFFFFFFFFF000ULL; - pde_fields.valid = pde_entry & 1; - pde_fields.system = (pde_entry >> 1) & 1; - pde_fields.cache = (pde_entry >> 2) & 1; - pde_fields.pte = (pde_entry >> 54) & 1; + pde_fields = decode_pde_entry_ai(pde_entry); if (current_depth == 1) { pde0_block_fragment_size = pde_fields.frag_size; /* @@ -751,7 +801,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, pde_fields.pte_base_addr, pde_fields.valid, pde_fields.system, - pde_fields.cache, + pde_fields.coherent, pde_fields.pte, pde_fields.frag_size); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); @@ -817,14 +867,8 @@ pte_further: return -1; } - // decode PTE values pde_is_pte: - pte_fields.fragment = (pte_entry >> 7) & 0x1F; - pte_fields.system = (pte_entry >> 1) & 1; - pte_fields.valid = pte_entry & 1; - pte_fields.prt = (pte_entry >> 61) & 1; - pte_fields.further = (pte_entry >> 56) & 1; - pte_fields.page_base_addr = pte_entry & (pte_fields.further ? 0xFFFFFFFFFFC0ULL : 0xFFFFFFFFF000ULL); + pte_fields = decode_pte_entry_ai(pte_entry); if (asic->options.verbose) asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n", @@ -901,12 +945,7 @@ pde_is_pte: va_mask &= (upper_mask & ~pte_page_mask); // grab PTE base address and other data from the PTE that has the F bit set. - pde_fields.frag_size = (pte_entry >> 59) & 0x1F; - pde_fields.pte_base_addr = pte_entry & 0xFFFFFFFFFFC0ULL; - pde_fields.valid = pte_entry & 1; - pde_fields.system = (pte_entry >> 1) & 1; - pde_fields.cache = (pte_entry >> 2) & 1; - pde_fields.pte = 0; + pde_fields = decode_pde_entry_ai(pte_entry); further = 1; goto pte_further; } @@ -928,12 +967,9 @@ pde_is_pte: } else { // in AI+ the BASE_ADDR is treated like a PDE entry... // decode PDE values - pde_fields.frag_size = (page_table_base_addr >> 59) & 0x1F; + pde_fields = decode_pde_entry_ai(pde_entry); pde0_block_fragment_size = pde_fields.frag_size; pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1; - pde_fields.pte_base_addr = page_table_base_addr & 0xFFFFFFFFF000ULL; - pde_fields.system = (page_table_base_addr >> 1) & 1; - pde_fields.valid = page_table_base_addr & 1; if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose) asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n", @@ -953,12 +989,7 @@ pde_is_pte: if (umr_read_vram(asic, UMR_LINEAR_HUB, pde_fields.pte_base_addr + pte_idx * 8, 8, &pte_entry) < 0) return -1; - // decode PTE values - pte_fields.page_base_addr = pte_entry & 0xFFFFFFFFF000ULL; - pte_fields.fragment = (pte_entry >> 7) & 0x1F; - pte_fields.system = (pte_entry >> 1) & 1; - pte_fields.valid = pte_entry & 1; - pte_fields.prt = 0; + pte_fields = decode_pte_entry_ai(pte_entry); if (asic->options.verbose) asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n", -- 2.20.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH umr 3/3] Enhance printing of page tables in AI+ 2021-06-17 19:25 [PATCH umr 1/3] Improve handling of non-standard page tables in AI+ Joseph Greathouse 2021-06-17 19:25 ` [PATCH umr 2/3] Generalize decoding of PDEs and PTEs " Joseph Greathouse @ 2021-06-17 19:25 ` Joseph Greathouse 2021-06-21 16:37 ` [PATCH v2 " Joseph Greathouse 1 sibling, 1 reply; 6+ messages in thread From: Joseph Greathouse @ 2021-06-17 19:25 UTC (permalink / raw) To: amd-gfx; +Cc: Tom.StDenis, Joseph Greathouse Pulls print functions for GPUVM page tables on AI+ chips into their own set of generalized functions, so that we don't have subtly different printouts for different layers. Explicitly prints PDEs with P bit (which makes it a PTE) and makes the PTE with F bit set (further, which makes it a PDE) properly indent the next layer of the print. Prints remaining fields from the PTE and PDE printouts, such as read/write/execute bits and MTYPE from PTE. Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com> --- src/lib/read_vram.c | 184 ++++++++++++++++++++++++++++++-------------- 1 file changed, 127 insertions(+), 57 deletions(-) diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index 2998873..cb38b60 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -415,6 +415,112 @@ static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry) return pte_fields; } +static void print_pde_fields_ai(struct umr_asic *asic, + pde_fields_ai_t pde_fields) +{ + asic->mem_funcs.vm_message( + ", PBA==0x%012" PRIx64 ", V=%" PRIu64 + ", S=%" PRIu64 ", C=%" PRIu64 + ", P=%" PRIu64 ", FS=%" PRIu64 "\n", + pde_fields.pte_base_addr, + pde_fields.valid, + pde_fields.system, + pde_fields.coherent, + pde_fields.pte, + pde_fields.frag_size); +} +static void print_base_ai(struct umr_asic *asic, + uint64_t pde_entry, uint64_t address, + uint64_t va_mask, pde_fields_ai_t pde_fields, + int is_base_not_pde) +{ + if (is_base_not_pde) + asic->mem_funcs.vm_message("BASE"); + else + asic->mem_funcs.vm_message("PDE"); + asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64, + pde_entry, + address & va_mask); + print_pde_fields_ai(asic, pde_fields); +} + +static void print_pde_ai(struct umr_asic *asic, + const char * indentation, int pde_cnt, + int page_table_depth, uint64_t prev_addr, + uint64_t pde_idx, uint64_t pde_entry, uint64_t address, + uint64_t va_mask, pde_fields_ai_t pde_fields) +{ + asic->mem_funcs.vm_message("%s ", &indentation[18-pde_cnt*3]); + if (pde_fields.further) + asic->mem_funcs.vm_message("PTE-FURTHER"); + else + asic->mem_funcs.vm_message("PDE%d", page_table_depth - pde_cnt); + + asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64 + "}=0x%016" PRIx64 ", VA=0x%012" PRIx64, + prev_addr, + pde_idx, + pde_entry, + address & va_mask); + print_pde_fields_ai(asic, pde_fields); +} + +static void print_pte_ai(struct umr_asic *asic, + const char * indentation, int pde_cnt, uint64_t prev_addr, + uint64_t pte_idx, uint64_t pte_entry, uint64_t address, + uint64_t va_mask, pte_fields_ai_t pte_fields) +{ + if (asic == NULL) { + asic->mem_funcs.vm_message("\\-> PTE"); + } else { + asic->mem_funcs.vm_message("%s ", + &indentation[18-pde_cnt*3]); + if (pte_fields.pde) + asic->mem_funcs.vm_message("PDE0-as-PTE"); + else + asic->mem_funcs.vm_message("PTE"); + asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64"}", + prev_addr, + pte_idx); + } + asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64 + ", PBA==0x%012" PRIx64 ", V=%" PRIu64 + ", S=%" PRIu64 ", C=%" PRIu64 ", Z=%" PRIu64 + ", X=%" PRIu64 ", R=%" PRIu64 ", W=%" PRIu64 + ", FS=%" PRIu64 ", T=%" PRIu64 ", MTYPE=", + pte_entry, + address & va_mask, + pte_fields.page_base_addr, + pte_fields.valid, + pte_fields.system, + pte_fields.coherent, + pte_fields.tmz, + pte_fields.execute, + pte_fields.read, + pte_fields.write, + pte_fields.fragment, + pte_fields.prt, + pte_fields.mtype); + switch (pte_fields.mtype) { + case 0: + asic->mem_funcs.vm_message("NC\n"); + break; + case 1: + asic->mem_funcs.vm_message("RW\n"); + break; + case 2: + asic->mem_funcs.vm_message("CC\n"); + break; + case 3: + asic->mem_funcs.vm_message("UC\n"); + break; + default: + asic->mem_funcs.vm_message("Unknown (%" PRIu64")\n", + pte_fields.mtype); + break; + } +} + /** * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms */ @@ -457,7 +563,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, unsigned char *pdst = dst; char *hub, *vm0prefix, *regprefix; unsigned hubid; - static const char *indentation = " \\->"; + static const char *indentation = " \\->"; memset(®isters, 0, sizeof registers); memset(&pde_array, 0xff, sizeof pde_array); @@ -713,14 +819,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, va_mask <<= (total_vm_bits - top_pdb_bits); if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) - asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n", - pde_entry, - address & va_mask, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.coherent, - pde_fields.pte); + print_base_ai(asic, pde_entry, address, va_mask, pde_fields, 1); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); current_depth = page_table_depth; @@ -783,27 +882,11 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size)); ptb_mask = (1ULL << log2_ptb_entries) - 1; pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1; - if (asic->options.verbose) - asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n", - (unsigned)pde_fields.pte, - (unsigned)pde0_block_fragment_size, - (unsigned)page_table_block_size); } if (!pde_fields.pte) { if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) { - asic->mem_funcs.vm_message("%s PDE%d@{0x%" PRIx64 "/%" PRIx64 "}=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 "\n", - &indentation[15-pde_cnt*3], - page_table_depth - pde_cnt, - prev_addr, - pde_idx, - pde_entry, - address & va_mask, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.coherent, - pde_fields.pte, - pde_fields.frag_size); + print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr, + pde_idx, pde_entry, address, va_mask, pde_fields); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); } } else { @@ -870,21 +953,6 @@ pte_further: pde_is_pte: pte_fields = decode_pte_entry_ai(pte_entry); - if (asic->options.verbose) - asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n", - &indentation[15-pde_cnt*3], - (pte_fields.further) ? "PTE-FURTHER" : "PTE", - prev_addr, - pte_idx, - pte_entry, - address & (((1ULL << page_table_block_size) - 1) << (12 + pde0_block_fragment_size)), - pte_fields.page_base_addr, - pte_fields.valid, - pte_fields.system, - pte_fields.prt, - pte_fields.fragment, - pte_fields.further); - // How many bits in the address are used to index into the PTB? // If further is set, that means we jumped back to pde_is_pte, // and the va_mask was properly set down there. @@ -920,6 +988,17 @@ pde_is_pte: va_mask = va_mask & ~mask_to_ignore; } + if (asic->options.verbose) { + if (pte_fields.further) { + pde_fields.further = 1; + print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr, + pde_idx, pde_entry, address, va_mask, pde_fields); + } else { + print_pte_ai(asic, indentation, pde_cnt, prev_addr, pte_idx, + pte_entry, address, va_mask, pte_fields); + } + } + uint32_t pte_block_fragment_size = 0; if (pte_fields.further) { // Going to go one more layer deep, so now we need the Further-PTE's @@ -946,6 +1025,7 @@ pde_is_pte: // grab PTE base address and other data from the PTE that has the F bit set. pde_fields = decode_pde_entry_ai(pte_entry); + pde_cnt++; further = 1; goto pte_further; } @@ -972,12 +1052,7 @@ pde_is_pte: pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1; if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose) - asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n", - page_table_base_addr, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.frag_size); + print_base_ai(asic, page_table_base_addr, address, -1, pde_fields, 0); memcpy(&pde_array[0], &pde_fields, sizeof pde_fields); if (!pde_fields.valid) @@ -992,13 +1067,8 @@ pde_is_pte: pte_fields = decode_pte_entry_ai(pte_entry); if (asic->options.verbose) - asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n", - pte_entry, - address & ~((uint64_t)0xFFF), - pte_fields.page_base_addr, - pte_fields.fragment, - pte_fields.valid, - pte_fields.system); + print_pte_ai(asic, NULL, 0, 0, 0, pte_entry, address, + ~((uint64_t)0xFFF), pte_fields); if (pdst && !pte_fields.valid) goto invalid_page; @@ -1018,13 +1088,13 @@ next_page: if (asic->options.verbose) { if (pte_fields.system == 1) { asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 ", (reading: %" PRIu32 " bytes)\n", - &indentation[15-pde_cnt*3-3], + &indentation[18-pde_cnt*3-3], "sys", start_addr, chunk_size); } else { asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 " (MCA:%" PRIx64"), (reading: %" PRIu32 " bytes)\n", - &indentation[15-pde_cnt*3-3], + &indentation[18-pde_cnt*3-3], "vram", start_addr, start_addr + vm_fb_offset, -- 2.20.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 umr 3/3] Enhance printing of page tables in AI+ 2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse @ 2021-06-21 16:37 ` Joseph Greathouse 2021-06-22 13:25 ` StDenis, Tom 2021-06-23 14:13 ` StDenis, Tom 0 siblings, 2 replies; 6+ messages in thread From: Joseph Greathouse @ 2021-06-21 16:37 UTC (permalink / raw) To: amd-gfx; +Cc: Tom.StDenis, Joseph Greathouse Pulls print functions for GPUVM page tables on AI+ chips into their own set of generalized functions, so that we don't have subtly different printouts for different layers. Explicitly prints PDEs with P bit (which makes it a PTE) and makes the PTE with F bit set (further, which makes it a PDE) properly indent the next layer of the print. Prints remaining fields from the PTE and PDE printouts, such as read/write/execute bits and MTYPE from PTE. v2: Correctly handle printing translate-further PTEs Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com> --- src/lib/read_vram.c | 184 ++++++++++++++++++++++++++++++-------------- 1 file changed, 127 insertions(+), 57 deletions(-) diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index 2998873..bea1232 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -415,6 +415,112 @@ static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry) return pte_fields; } +static void print_pde_fields_ai(struct umr_asic *asic, + pde_fields_ai_t pde_fields) +{ + asic->mem_funcs.vm_message( + ", PBA==0x%012" PRIx64 ", V=%" PRIu64 + ", S=%" PRIu64 ", C=%" PRIu64 + ", P=%" PRIu64 ", FS=%" PRIu64 "\n", + pde_fields.pte_base_addr, + pde_fields.valid, + pde_fields.system, + pde_fields.coherent, + pde_fields.pte, + pde_fields.frag_size); +} +static void print_base_ai(struct umr_asic *asic, + uint64_t pde_entry, uint64_t address, + uint64_t va_mask, pde_fields_ai_t pde_fields, + int is_base_not_pde) +{ + if (is_base_not_pde) + asic->mem_funcs.vm_message("BASE"); + else + asic->mem_funcs.vm_message("PDE"); + asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64, + pde_entry, + address & va_mask); + print_pde_fields_ai(asic, pde_fields); +} + +static void print_pde_ai(struct umr_asic *asic, + const char * indentation, int pde_cnt, + int page_table_depth, uint64_t prev_addr, + uint64_t pde_idx, uint64_t pde_entry, uint64_t address, + uint64_t va_mask, pde_fields_ai_t pde_fields) +{ + asic->mem_funcs.vm_message("%s ", &indentation[18-pde_cnt*3]); + if (pde_fields.further) + asic->mem_funcs.vm_message("PTE-FURTHER"); + else + asic->mem_funcs.vm_message("PDE%d", page_table_depth - pde_cnt); + + asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64 + "}=0x%016" PRIx64 ", VA=0x%012" PRIx64, + prev_addr, + pde_idx, + pde_entry, + address & va_mask); + print_pde_fields_ai(asic, pde_fields); +} + +static void print_pte_ai(struct umr_asic *asic, + const char * indentation, int pde_cnt, uint64_t prev_addr, + uint64_t pte_idx, uint64_t pte_entry, uint64_t address, + uint64_t va_mask, pte_fields_ai_t pte_fields) +{ + if (asic == NULL) { + asic->mem_funcs.vm_message("\\-> PTE"); + } else { + asic->mem_funcs.vm_message("%s ", + &indentation[18-pde_cnt*3]); + if (pte_fields.pde) + asic->mem_funcs.vm_message("PDE0-as-PTE"); + else + asic->mem_funcs.vm_message("PTE"); + asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64"}", + prev_addr, + pte_idx); + } + asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64 + ", PBA==0x%012" PRIx64 ", V=%" PRIu64 + ", S=%" PRIu64 ", C=%" PRIu64 ", Z=%" PRIu64 + ", X=%" PRIu64 ", R=%" PRIu64 ", W=%" PRIu64 + ", FS=%" PRIu64 ", T=%" PRIu64 ", MTYPE=", + pte_entry, + address & va_mask, + pte_fields.page_base_addr, + pte_fields.valid, + pte_fields.system, + pte_fields.coherent, + pte_fields.tmz, + pte_fields.execute, + pte_fields.read, + pte_fields.write, + pte_fields.fragment, + pte_fields.prt, + pte_fields.mtype); + switch (pte_fields.mtype) { + case 0: + asic->mem_funcs.vm_message("NC\n"); + break; + case 1: + asic->mem_funcs.vm_message("RW\n"); + break; + case 2: + asic->mem_funcs.vm_message("CC\n"); + break; + case 3: + asic->mem_funcs.vm_message("UC\n"); + break; + default: + asic->mem_funcs.vm_message("Unknown (%" PRIu64")\n", + pte_fields.mtype); + break; + } +} + /** * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms */ @@ -457,7 +563,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, unsigned char *pdst = dst; char *hub, *vm0prefix, *regprefix; unsigned hubid; - static const char *indentation = " \\->"; + static const char *indentation = " \\->"; memset(®isters, 0, sizeof registers); memset(&pde_array, 0xff, sizeof pde_array); @@ -713,14 +819,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, va_mask <<= (total_vm_bits - top_pdb_bits); if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) - asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n", - pde_entry, - address & va_mask, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.coherent, - pde_fields.pte); + print_base_ai(asic, pde_entry, address, va_mask, pde_fields, 1); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); current_depth = page_table_depth; @@ -783,27 +882,11 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size)); ptb_mask = (1ULL << log2_ptb_entries) - 1; pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1; - if (asic->options.verbose) - asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n", - (unsigned)pde_fields.pte, - (unsigned)pde0_block_fragment_size, - (unsigned)page_table_block_size); } if (!pde_fields.pte) { if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) { - asic->mem_funcs.vm_message("%s PDE%d@{0x%" PRIx64 "/%" PRIx64 "}=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 "\n", - &indentation[15-pde_cnt*3], - page_table_depth - pde_cnt, - prev_addr, - pde_idx, - pde_entry, - address & va_mask, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.coherent, - pde_fields.pte, - pde_fields.frag_size); + print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr, + pde_idx, pde_entry, address, va_mask, pde_fields); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); } } else { @@ -870,21 +953,6 @@ pte_further: pde_is_pte: pte_fields = decode_pte_entry_ai(pte_entry); - if (asic->options.verbose) - asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n", - &indentation[15-pde_cnt*3], - (pte_fields.further) ? "PTE-FURTHER" : "PTE", - prev_addr, - pte_idx, - pte_entry, - address & (((1ULL << page_table_block_size) - 1) << (12 + pde0_block_fragment_size)), - pte_fields.page_base_addr, - pte_fields.valid, - pte_fields.system, - pte_fields.prt, - pte_fields.fragment, - pte_fields.further); - // How many bits in the address are used to index into the PTB? // If further is set, that means we jumped back to pde_is_pte, // and the va_mask was properly set down there. @@ -920,6 +988,17 @@ pde_is_pte: va_mask = va_mask & ~mask_to_ignore; } + if (asic->options.verbose) { + if (pte_fields.further) { + pde_fields = decode_pde_entry_ai(pte_entry); + print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr, + pte_idx, pte_entry, address, va_mask, pde_fields); + } else { + print_pte_ai(asic, indentation, pde_cnt, prev_addr, pte_idx, + pte_entry, address, va_mask, pte_fields); + } + } + uint32_t pte_block_fragment_size = 0; if (pte_fields.further) { // Going to go one more layer deep, so now we need the Further-PTE's @@ -946,6 +1025,7 @@ pde_is_pte: // grab PTE base address and other data from the PTE that has the F bit set. pde_fields = decode_pde_entry_ai(pte_entry); + pde_cnt++; further = 1; goto pte_further; } @@ -972,12 +1052,7 @@ pde_is_pte: pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1; if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose) - asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n", - page_table_base_addr, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.frag_size); + print_base_ai(asic, page_table_base_addr, address, -1, pde_fields, 0); memcpy(&pde_array[0], &pde_fields, sizeof pde_fields); if (!pde_fields.valid) @@ -992,13 +1067,8 @@ pde_is_pte: pte_fields = decode_pte_entry_ai(pte_entry); if (asic->options.verbose) - asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n", - pte_entry, - address & ~((uint64_t)0xFFF), - pte_fields.page_base_addr, - pte_fields.fragment, - pte_fields.valid, - pte_fields.system); + print_pte_ai(asic, NULL, 0, 0, 0, pte_entry, address, + ~((uint64_t)0xFFF), pte_fields); if (pdst && !pte_fields.valid) goto invalid_page; @@ -1018,13 +1088,13 @@ next_page: if (asic->options.verbose) { if (pte_fields.system == 1) { asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 ", (reading: %" PRIu32 " bytes)\n", - &indentation[15-pde_cnt*3-3], + &indentation[18-pde_cnt*3-3], "sys", start_addr, chunk_size); } else { asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 " (MCA:%" PRIx64"), (reading: %" PRIu32 " bytes)\n", - &indentation[15-pde_cnt*3-3], + &indentation[18-pde_cnt*3-3], "vram", start_addr, start_addr + vm_fb_offset, -- 2.20.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v2 umr 3/3] Enhance printing of page tables in AI+ 2021-06-21 16:37 ` [PATCH v2 " Joseph Greathouse @ 2021-06-22 13:25 ` StDenis, Tom 2021-06-23 14:13 ` StDenis, Tom 1 sibling, 0 replies; 6+ messages in thread From: StDenis, Tom @ 2021-06-22 13:25 UTC (permalink / raw) To: Greathouse, Joseph, amd-gfx [AMD Official Use Only] Hi, Just a quick update. Your first vector passes with your v2 patch in place. I'll add the other 3 and then start reviewing the code. Thanks, Tom ________________________________________ From: Greathouse, Joseph <Joseph.Greathouse@amd.com> Sent: Monday, June 21, 2021 12:37 To: amd-gfx@lists.freedesktop.org Cc: StDenis, Tom; Greathouse, Joseph Subject: [PATCH v2 umr 3/3] Enhance printing of page tables in AI+ Pulls print functions for GPUVM page tables on AI+ chips into their own set of generalized functions, so that we don't have subtly different printouts for different layers. Explicitly prints PDEs with P bit (which makes it a PTE) and makes the PTE with F bit set (further, which makes it a PDE) properly indent the next layer of the print. Prints remaining fields from the PTE and PDE printouts, such as read/write/execute bits and MTYPE from PTE. v2: Correctly handle printing translate-further PTEs Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com> --- src/lib/read_vram.c | 184 ++++++++++++++++++++++++++++++-------------- 1 file changed, 127 insertions(+), 57 deletions(-) diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index 2998873..bea1232 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -415,6 +415,112 @@ static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry) return pte_fields; } +static void print_pde_fields_ai(struct umr_asic *asic, + pde_fields_ai_t pde_fields) +{ + asic->mem_funcs.vm_message( + ", PBA==0x%012" PRIx64 ", V=%" PRIu64 + ", S=%" PRIu64 ", C=%" PRIu64 + ", P=%" PRIu64 ", FS=%" PRIu64 "\n", + pde_fields.pte_base_addr, + pde_fields.valid, + pde_fields.system, + pde_fields.coherent, + pde_fields.pte, + pde_fields.frag_size); +} +static void print_base_ai(struct umr_asic *asic, + uint64_t pde_entry, uint64_t address, + uint64_t va_mask, pde_fields_ai_t pde_fields, + int is_base_not_pde) +{ + if (is_base_not_pde) + asic->mem_funcs.vm_message("BASE"); + else + asic->mem_funcs.vm_message("PDE"); + asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64, + pde_entry, + address & va_mask); + print_pde_fields_ai(asic, pde_fields); +} + +static void print_pde_ai(struct umr_asic *asic, + const char * indentation, int pde_cnt, + int page_table_depth, uint64_t prev_addr, + uint64_t pde_idx, uint64_t pde_entry, uint64_t address, + uint64_t va_mask, pde_fields_ai_t pde_fields) +{ + asic->mem_funcs.vm_message("%s ", &indentation[18-pde_cnt*3]); + if (pde_fields.further) + asic->mem_funcs.vm_message("PTE-FURTHER"); + else + asic->mem_funcs.vm_message("PDE%d", page_table_depth - pde_cnt); + + asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64 + "}=0x%016" PRIx64 ", VA=0x%012" PRIx64, + prev_addr, + pde_idx, + pde_entry, + address & va_mask); + print_pde_fields_ai(asic, pde_fields); +} + +static void print_pte_ai(struct umr_asic *asic, + const char * indentation, int pde_cnt, uint64_t prev_addr, + uint64_t pte_idx, uint64_t pte_entry, uint64_t address, + uint64_t va_mask, pte_fields_ai_t pte_fields) +{ + if (asic == NULL) { + asic->mem_funcs.vm_message("\\-> PTE"); + } else { + asic->mem_funcs.vm_message("%s ", + &indentation[18-pde_cnt*3]); + if (pte_fields.pde) + asic->mem_funcs.vm_message("PDE0-as-PTE"); + else + asic->mem_funcs.vm_message("PTE"); + asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64"}", + prev_addr, + pte_idx); + } + asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64 + ", PBA==0x%012" PRIx64 ", V=%" PRIu64 + ", S=%" PRIu64 ", C=%" PRIu64 ", Z=%" PRIu64 + ", X=%" PRIu64 ", R=%" PRIu64 ", W=%" PRIu64 + ", FS=%" PRIu64 ", T=%" PRIu64 ", MTYPE=", + pte_entry, + address & va_mask, + pte_fields.page_base_addr, + pte_fields.valid, + pte_fields.system, + pte_fields.coherent, + pte_fields.tmz, + pte_fields.execute, + pte_fields.read, + pte_fields.write, + pte_fields.fragment, + pte_fields.prt, + pte_fields.mtype); + switch (pte_fields.mtype) { + case 0: + asic->mem_funcs.vm_message("NC\n"); + break; + case 1: + asic->mem_funcs.vm_message("RW\n"); + break; + case 2: + asic->mem_funcs.vm_message("CC\n"); + break; + case 3: + asic->mem_funcs.vm_message("UC\n"); + break; + default: + asic->mem_funcs.vm_message("Unknown (%" PRIu64")\n", + pte_fields.mtype); + break; + } +} + /** * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms */ @@ -457,7 +563,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, unsigned char *pdst = dst; char *hub, *vm0prefix, *regprefix; unsigned hubid; - static const char *indentation = " \\->"; + static const char *indentation = " \\->"; memset(®isters, 0, sizeof registers); memset(&pde_array, 0xff, sizeof pde_array); @@ -713,14 +819,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, va_mask <<= (total_vm_bits - top_pdb_bits); if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) - asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n", - pde_entry, - address & va_mask, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.coherent, - pde_fields.pte); + print_base_ai(asic, pde_entry, address, va_mask, pde_fields, 1); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); current_depth = page_table_depth; @@ -783,27 +882,11 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size)); ptb_mask = (1ULL << log2_ptb_entries) - 1; pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1; - if (asic->options.verbose) - asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n", - (unsigned)pde_fields.pte, - (unsigned)pde0_block_fragment_size, - (unsigned)page_table_block_size); } if (!pde_fields.pte) { if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) { - asic->mem_funcs.vm_message("%s PDE%d@{0x%" PRIx64 "/%" PRIx64 "}=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 "\n", - &indentation[15-pde_cnt*3], - page_table_depth - pde_cnt, - prev_addr, - pde_idx, - pde_entry, - address & va_mask, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.coherent, - pde_fields.pte, - pde_fields.frag_size); + print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr, + pde_idx, pde_entry, address, va_mask, pde_fields); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); } } else { @@ -870,21 +953,6 @@ pte_further: pde_is_pte: pte_fields = decode_pte_entry_ai(pte_entry); - if (asic->options.verbose) - asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n", - &indentation[15-pde_cnt*3], - (pte_fields.further) ? "PTE-FURTHER" : "PTE", - prev_addr, - pte_idx, - pte_entry, - address & (((1ULL << page_table_block_size) - 1) << (12 + pde0_block_fragment_size)), - pte_fields.page_base_addr, - pte_fields.valid, - pte_fields.system, - pte_fields.prt, - pte_fields.fragment, - pte_fields.further); - // How many bits in the address are used to index into the PTB? // If further is set, that means we jumped back to pde_is_pte, // and the va_mask was properly set down there. @@ -920,6 +988,17 @@ pde_is_pte: va_mask = va_mask & ~mask_to_ignore; } + if (asic->options.verbose) { + if (pte_fields.further) { + pde_fields = decode_pde_entry_ai(pte_entry); + print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr, + pte_idx, pte_entry, address, va_mask, pde_fields); + } else { + print_pte_ai(asic, indentation, pde_cnt, prev_addr, pte_idx, + pte_entry, address, va_mask, pte_fields); + } + } + uint32_t pte_block_fragment_size = 0; if (pte_fields.further) { // Going to go one more layer deep, so now we need the Further-PTE's @@ -946,6 +1025,7 @@ pde_is_pte: // grab PTE base address and other data from the PTE that has the F bit set. pde_fields = decode_pde_entry_ai(pte_entry); + pde_cnt++; further = 1; goto pte_further; } @@ -972,12 +1052,7 @@ pde_is_pte: pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1; if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose) - asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n", - page_table_base_addr, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.frag_size); + print_base_ai(asic, page_table_base_addr, address, -1, pde_fields, 0); memcpy(&pde_array[0], &pde_fields, sizeof pde_fields); if (!pde_fields.valid) @@ -992,13 +1067,8 @@ pde_is_pte: pte_fields = decode_pte_entry_ai(pte_entry); if (asic->options.verbose) - asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n", - pte_entry, - address & ~((uint64_t)0xFFF), - pte_fields.page_base_addr, - pte_fields.fragment, - pte_fields.valid, - pte_fields.system); + print_pte_ai(asic, NULL, 0, 0, 0, pte_entry, address, + ~((uint64_t)0xFFF), pte_fields); if (pdst && !pte_fields.valid) goto invalid_page; @@ -1018,13 +1088,13 @@ next_page: if (asic->options.verbose) { if (pte_fields.system == 1) { asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 ", (reading: %" PRIu32 " bytes)\n", - &indentation[15-pde_cnt*3-3], + &indentation[18-pde_cnt*3-3], "sys", start_addr, chunk_size); } else { asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 " (MCA:%" PRIx64"), (reading: %" PRIu32 " bytes)\n", - &indentation[15-pde_cnt*3-3], + &indentation[18-pde_cnt*3-3], "vram", start_addr, start_addr + vm_fb_offset, -- 2.20.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v2 umr 3/3] Enhance printing of page tables in AI+ 2021-06-21 16:37 ` [PATCH v2 " Joseph Greathouse 2021-06-22 13:25 ` StDenis, Tom @ 2021-06-23 14:13 ` StDenis, Tom 1 sibling, 0 replies; 6+ messages in thread From: StDenis, Tom @ 2021-06-23 14:13 UTC (permalink / raw) To: Greathouse, Joseph, amd-gfx [AMD Official Use Only] Tested and pushed out to main. Thanks, Tom ________________________________________ From: Greathouse, Joseph <Joseph.Greathouse@amd.com> Sent: Monday, June 21, 2021 12:37 To: amd-gfx@lists.freedesktop.org Cc: StDenis, Tom; Greathouse, Joseph Subject: [PATCH v2 umr 3/3] Enhance printing of page tables in AI+ Pulls print functions for GPUVM page tables on AI+ chips into their own set of generalized functions, so that we don't have subtly different printouts for different layers. Explicitly prints PDEs with P bit (which makes it a PTE) and makes the PTE with F bit set (further, which makes it a PDE) properly indent the next layer of the print. Prints remaining fields from the PTE and PDE printouts, such as read/write/execute bits and MTYPE from PTE. v2: Correctly handle printing translate-further PTEs Signed-off-by: Joseph Greathouse <Joseph.Greathouse@amd.com> --- src/lib/read_vram.c | 184 ++++++++++++++++++++++++++++++-------------- 1 file changed, 127 insertions(+), 57 deletions(-) diff --git a/src/lib/read_vram.c b/src/lib/read_vram.c index 2998873..bea1232 100644 --- a/src/lib/read_vram.c +++ b/src/lib/read_vram.c @@ -415,6 +415,112 @@ static pte_fields_ai_t decode_pte_entry_ai(uint64_t pte_entry) return pte_fields; } +static void print_pde_fields_ai(struct umr_asic *asic, + pde_fields_ai_t pde_fields) +{ + asic->mem_funcs.vm_message( + ", PBA==0x%012" PRIx64 ", V=%" PRIu64 + ", S=%" PRIu64 ", C=%" PRIu64 + ", P=%" PRIu64 ", FS=%" PRIu64 "\n", + pde_fields.pte_base_addr, + pde_fields.valid, + pde_fields.system, + pde_fields.coherent, + pde_fields.pte, + pde_fields.frag_size); +} +static void print_base_ai(struct umr_asic *asic, + uint64_t pde_entry, uint64_t address, + uint64_t va_mask, pde_fields_ai_t pde_fields, + int is_base_not_pde) +{ + if (is_base_not_pde) + asic->mem_funcs.vm_message("BASE"); + else + asic->mem_funcs.vm_message("PDE"); + asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64, + pde_entry, + address & va_mask); + print_pde_fields_ai(asic, pde_fields); +} + +static void print_pde_ai(struct umr_asic *asic, + const char * indentation, int pde_cnt, + int page_table_depth, uint64_t prev_addr, + uint64_t pde_idx, uint64_t pde_entry, uint64_t address, + uint64_t va_mask, pde_fields_ai_t pde_fields) +{ + asic->mem_funcs.vm_message("%s ", &indentation[18-pde_cnt*3]); + if (pde_fields.further) + asic->mem_funcs.vm_message("PTE-FURTHER"); + else + asic->mem_funcs.vm_message("PDE%d", page_table_depth - pde_cnt); + + asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64 + "}=0x%016" PRIx64 ", VA=0x%012" PRIx64, + prev_addr, + pde_idx, + pde_entry, + address & va_mask); + print_pde_fields_ai(asic, pde_fields); +} + +static void print_pte_ai(struct umr_asic *asic, + const char * indentation, int pde_cnt, uint64_t prev_addr, + uint64_t pte_idx, uint64_t pte_entry, uint64_t address, + uint64_t va_mask, pte_fields_ai_t pte_fields) +{ + if (asic == NULL) { + asic->mem_funcs.vm_message("\\-> PTE"); + } else { + asic->mem_funcs.vm_message("%s ", + &indentation[18-pde_cnt*3]); + if (pte_fields.pde) + asic->mem_funcs.vm_message("PDE0-as-PTE"); + else + asic->mem_funcs.vm_message("PTE"); + asic->mem_funcs.vm_message("@{0x%" PRIx64 "/%" PRIx64"}", + prev_addr, + pte_idx); + } + asic->mem_funcs.vm_message("=0x%016" PRIx64 ", VA=0x%012" PRIx64 + ", PBA==0x%012" PRIx64 ", V=%" PRIu64 + ", S=%" PRIu64 ", C=%" PRIu64 ", Z=%" PRIu64 + ", X=%" PRIu64 ", R=%" PRIu64 ", W=%" PRIu64 + ", FS=%" PRIu64 ", T=%" PRIu64 ", MTYPE=", + pte_entry, + address & va_mask, + pte_fields.page_base_addr, + pte_fields.valid, + pte_fields.system, + pte_fields.coherent, + pte_fields.tmz, + pte_fields.execute, + pte_fields.read, + pte_fields.write, + pte_fields.fragment, + pte_fields.prt, + pte_fields.mtype); + switch (pte_fields.mtype) { + case 0: + asic->mem_funcs.vm_message("NC\n"); + break; + case 1: + asic->mem_funcs.vm_message("RW\n"); + break; + case 2: + asic->mem_funcs.vm_message("CC\n"); + break; + case 3: + asic->mem_funcs.vm_message("UC\n"); + break; + default: + asic->mem_funcs.vm_message("Unknown (%" PRIu64")\n", + pte_fields.mtype); + break; + } +} + /** * umr_access_vram_ai - Access GPU mapped memory for GFX9+ platforms */ @@ -457,7 +563,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, unsigned char *pdst = dst; char *hub, *vm0prefix, *regprefix; unsigned hubid; - static const char *indentation = " \\->"; + static const char *indentation = " \\->"; memset(®isters, 0, sizeof registers); memset(&pde_array, 0xff, sizeof pde_array); @@ -713,14 +819,7 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, va_mask <<= (total_vm_bits - top_pdb_bits); if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) - asic->mem_funcs.vm_message("BASE=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 "\n", - pde_entry, - address & va_mask, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.coherent, - pde_fields.pte); + print_base_ai(asic, pde_entry, address, va_mask, pde_fields, 1); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); current_depth = page_table_depth; @@ -783,27 +882,11 @@ static int umr_access_vram_ai(struct umr_asic *asic, uint32_t vmid, log2_ptb_entries = (9 + (page_table_block_size - pde0_block_fragment_size)); ptb_mask = (1ULL << log2_ptb_entries) - 1; pte_page_mask = (1ULL << (pde0_block_fragment_size + 12)) - 1; - if (asic->options.verbose) - asic->mem_funcs.vm_message("pde0.pte = %u\npde0.block_fragment_size = %u\npage_table_block_size = %u\n", - (unsigned)pde_fields.pte, - (unsigned)pde0_block_fragment_size, - (unsigned)page_table_block_size); } if (!pde_fields.pte) { if ((asic->options.no_fold_vm_decode || memcmp(&pde_fields, &pde_array[pde_cnt], sizeof pde_fields)) && asic->options.verbose) { - asic->mem_funcs.vm_message("%s PDE%d@{0x%" PRIx64 "/%" PRIx64 "}=0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", C=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 "\n", - &indentation[15-pde_cnt*3], - page_table_depth - pde_cnt, - prev_addr, - pde_idx, - pde_entry, - address & va_mask, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.coherent, - pde_fields.pte, - pde_fields.frag_size); + print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr, + pde_idx, pde_entry, address, va_mask, pde_fields); memcpy(&pde_array[pde_cnt++], &pde_fields, sizeof pde_fields); } } else { @@ -870,21 +953,6 @@ pte_further: pde_is_pte: pte_fields = decode_pte_entry_ai(pte_entry); - if (asic->options.verbose) - asic->mem_funcs.vm_message("%s %s@{0x%" PRIx64 "/%" PRIx64"}==0x%016" PRIx64 ", VA=0x%012" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", P=%" PRIu64 ", FS=%" PRIu64 ", F=%" PRIu64 "\n", - &indentation[15-pde_cnt*3], - (pte_fields.further) ? "PTE-FURTHER" : "PTE", - prev_addr, - pte_idx, - pte_entry, - address & (((1ULL << page_table_block_size) - 1) << (12 + pde0_block_fragment_size)), - pte_fields.page_base_addr, - pte_fields.valid, - pte_fields.system, - pte_fields.prt, - pte_fields.fragment, - pte_fields.further); - // How many bits in the address are used to index into the PTB? // If further is set, that means we jumped back to pde_is_pte, // and the va_mask was properly set down there. @@ -920,6 +988,17 @@ pde_is_pte: va_mask = va_mask & ~mask_to_ignore; } + if (asic->options.verbose) { + if (pte_fields.further) { + pde_fields = decode_pde_entry_ai(pte_entry); + print_pde_ai(asic, indentation, pde_cnt, page_table_depth, prev_addr, + pte_idx, pte_entry, address, va_mask, pde_fields); + } else { + print_pte_ai(asic, indentation, pde_cnt, prev_addr, pte_idx, + pte_entry, address, va_mask, pte_fields); + } + } + uint32_t pte_block_fragment_size = 0; if (pte_fields.further) { // Going to go one more layer deep, so now we need the Further-PTE's @@ -946,6 +1025,7 @@ pde_is_pte: // grab PTE base address and other data from the PTE that has the F bit set. pde_fields = decode_pde_entry_ai(pte_entry); + pde_cnt++; further = 1; goto pte_further; } @@ -972,12 +1052,7 @@ pde_is_pte: pte_page_mask = (1ULL << (12 + pde0_block_fragment_size)) - 1; if ((asic->options.no_fold_vm_decode || memcmp(&pde_array[0], &pde_fields, sizeof pde_fields)) && asic->options.verbose) - asic->mem_funcs.vm_message("PDE=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", V=%" PRIu64 ", S=%" PRIu64 ", FS=%" PRIu64 "\n", - page_table_base_addr, - pde_fields.pte_base_addr, - pde_fields.valid, - pde_fields.system, - pde_fields.frag_size); + print_base_ai(asic, page_table_base_addr, address, -1, pde_fields, 0); memcpy(&pde_array[0], &pde_fields, sizeof pde_fields); if (!pde_fields.valid) @@ -992,13 +1067,8 @@ pde_is_pte: pte_fields = decode_pte_entry_ai(pte_entry); if (asic->options.verbose) - asic->mem_funcs.vm_message("\\-> PTE=0x%016" PRIx64 ", VA=0x%016" PRIx64 ", PBA==0x%012" PRIx64 ", F=%" PRIu64 ", V=%" PRIu64 ", S=%" PRIu64 "\n", - pte_entry, - address & ~((uint64_t)0xFFF), - pte_fields.page_base_addr, - pte_fields.fragment, - pte_fields.valid, - pte_fields.system); + print_pte_ai(asic, NULL, 0, 0, 0, pte_entry, address, + ~((uint64_t)0xFFF), pte_fields); if (pdst && !pte_fields.valid) goto invalid_page; @@ -1018,13 +1088,13 @@ next_page: if (asic->options.verbose) { if (pte_fields.system == 1) { asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 ", (reading: %" PRIu32 " bytes)\n", - &indentation[15-pde_cnt*3-3], + &indentation[18-pde_cnt*3-3], "sys", start_addr, chunk_size); } else { asic->mem_funcs.vm_message("%s Computed address we will read from: %s:%" PRIx64 " (MCA:%" PRIx64"), (reading: %" PRIu32 " bytes)\n", - &indentation[15-pde_cnt*3-3], + &indentation[18-pde_cnt*3-3], "vram", start_addr, start_addr + vm_fb_offset, -- 2.20.1 _______________________________________________ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2021-06-23 14:14 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2021-06-17 19:25 [PATCH umr 1/3] Improve handling of non-standard page tables in AI+ Joseph Greathouse 2021-06-17 19:25 ` [PATCH umr 2/3] Generalize decoding of PDEs and PTEs " Joseph Greathouse 2021-06-17 19:25 ` [PATCH umr 3/3] Enhance printing of page tables " Joseph Greathouse 2021-06-21 16:37 ` [PATCH v2 " Joseph Greathouse 2021-06-22 13:25 ` StDenis, Tom 2021-06-23 14:13 ` StDenis, Tom
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.