All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] drm/amdgpu: add new trace event for page table update v3
@ 2020-08-12  4:33 Shashank Sharma
  2020-08-12  6:45 ` Christian König
  0 siblings, 1 reply; 6+ messages in thread
From: Shashank Sharma @ 2020-08-12  4:33 UTC (permalink / raw)
  To: amd-gfx; +Cc: Alex Deucher, Christian König, Shashank Sharma

This patch adds a new trace event to track the PTE update
events. This specific event will provide information like:
- start and end of virtual memory mapping
- HW engine flags for the map
- physical address for mapping

This will be particularly useful for memory profiling tools
(like RMV) which are monitoring the page table update events.

V2: Added physical address lookup logic in trace point
V3: switch to use __dynamic_array
    added nptes int the TPprint arguments list
    added page size in the arg list

Cc: Christian König <christian.koenig@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 38 +++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  9 ++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 63e734a125fb..b9aae7983b4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -321,6 +321,44 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
 	    TP_ARGS(mapping)
 );
 
+TRACE_EVENT(amdgpu_vm_update_ptes,
+	    TP_PROTO(struct amdgpu_vm_update_params *p,
+		     uint64_t start, uint64_t end,
+		     unsigned int nptes, uint64_t dst,
+		     uint64_t incr, uint64_t flags),
+	TP_ARGS(p, start, end, nptes, dst, incr, flags),
+	TP_STRUCT__entry(
+			 __field(u64, start)
+			 __field(u64, end)
+			 __field(u64, flags)
+			 __field(u64, incr)
+			 __field(unsigned int, nptes)
+			 __dynamic_array(u64, dst, nptes)
+	),
+
+	TP_fast_assign(
+			unsigned int i;
+
+			__entry->start = start;
+			__entry->end = end;
+			__entry->flags = flags;
+			__entry->incr = incr;
+			__entry->nptes = nptes;
+
+			for (i = 0; i < nptes; ++i) {
+				u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
+					p->pages_addr, dst) : dst;
+
+				((u64 *)__get_dynamic_array(dst))[i] = addr;
+				dst += incr;
+			}
+	),
+	TP_printk("seg:0x%010llx-0x%010llx, flags:0x%llx, nptr=%u, pgsz:%llu,"
+		  " dst:\n%s", __entry->start, __entry->end, __entry->flags,
+		  __entry->nptes, __entry->incr,
+		  __print_array(__get_dynamic_array(dst), __entry->nptes, 8))
+);
+
 TRACE_EVENT(amdgpu_vm_set_ptes,
 	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
 		     uint32_t incr, uint64_t flags, bool direct),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 71e005cf2952..b5dbb5e8bc61 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1513,17 +1513,22 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
 		do {
 			uint64_t upd_end = min(entry_end, frag_end);
 			unsigned nptes = (upd_end - frag_start) >> shift;
+			uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
 
 			/* This can happen when we set higher level PDs to
 			 * silent to stop fault floods.
 			 */
 			nptes = max(nptes, 1u);
+
+			trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
+						    nptes, dst, incr,
+						    upd_flags);
 			amdgpu_vm_update_flags(params, pt, cursor.level,
 					       pe_start, dst, nptes, incr,
-					       flags | AMDGPU_PTE_FRAG(frag));
+					       upd_flags);
 
 			pe_start += nptes * 8;
-			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
+			dst += nptes * incr;
 
 			frag_start = upd_end;
 			if (frag_start >= frag_end) {
-- 
2.25.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] drm/amdgpu: add new trace event for page table update v3
  2020-08-12  4:33 [PATCH v3] drm/amdgpu: add new trace event for page table update v3 Shashank Sharma
@ 2020-08-12  6:45 ` Christian König
  2020-08-12  8:15   ` Shashank Sharma
  0 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2020-08-12  6:45 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx; +Cc: Alex Deucher, Christian König

Am 12.08.20 um 06:33 schrieb Shashank Sharma:
> This patch adds a new trace event to track the PTE update
> events. This specific event will provide information like:
> - start and end of virtual memory mapping
> - HW engine flags for the map
> - physical address for mapping
>
> This will be particularly useful for memory profiling tools
> (like RMV) which are monitoring the page table update events.
>
> V2: Added physical address lookup logic in trace point
> V3: switch to use __dynamic_array
>      added nptes int the TPprint arguments list
>      added page size in the arg list
>
> Cc: Christian König <christian.koenig@amd.com>
> Cc: Alex Deucher <alexander.deucher@amd.com>
> Signed-off-by: Christian König <christian.koenig@amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 38 +++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  9 ++++--
>   2 files changed, 45 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index 63e734a125fb..b9aae7983b4b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -321,6 +321,44 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
>   	    TP_ARGS(mapping)
>   );
>   
> +TRACE_EVENT(amdgpu_vm_update_ptes,
> +	    TP_PROTO(struct amdgpu_vm_update_params *p,
> +		     uint64_t start, uint64_t end,
> +		     unsigned int nptes, uint64_t dst,
> +		     uint64_t incr, uint64_t flags),
> +	TP_ARGS(p, start, end, nptes, dst, incr, flags),
> +	TP_STRUCT__entry(
> +			 __field(u64, start)
> +			 __field(u64, end)
> +			 __field(u64, flags)
> +			 __field(u64, incr)
> +			 __field(unsigned int, nptes)
> +			 __dynamic_array(u64, dst, nptes)
> +	),
> +
> +	TP_fast_assign(
> +			unsigned int i;
> +
> +			__entry->start = start;
> +			__entry->end = end;
> +			__entry->flags = flags;
> +			__entry->incr = incr;
> +			__entry->nptes = nptes;
> +
> +			for (i = 0; i < nptes; ++i) {
> +				u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
> +					p->pages_addr, dst) : dst;
> +
> +				((u64 *)__get_dynamic_array(dst))[i] = addr;
> +				dst += incr;
> +			}
> +	),
> +	TP_printk("seg:0x%010llx-0x%010llx, flags:0x%llx, nptr=%u, pgsz:%llu,"
> +		  " dst:\n%s", __entry->start, __entry->end, __entry->flags,
> +		  __entry->nptes, __entry->incr,

This is not correct. The increment is NOT the page size, but rather the 
page size rounded down to a power of 512+4K.

In other words page size can be 4K, 8K, 16K, 32K, 64K....1M, 2M, 
4M....512M, 1G, 2G....

But the increment can only be 4K, 2M, 1G....

And do we need the nptes here? We just need it to print the correct 
number of destination addresses.

Regards,
Christian.

> +		  __print_array(__get_dynamic_array(dst), __entry->nptes, 8))
> +);
> +
>   TRACE_EVENT(amdgpu_vm_set_ptes,
>   	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
>   		     uint32_t incr, uint64_t flags, bool direct),
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 71e005cf2952..b5dbb5e8bc61 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1513,17 +1513,22 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>   		do {
>   			uint64_t upd_end = min(entry_end, frag_end);
>   			unsigned nptes = (upd_end - frag_start) >> shift;
> +			uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
>   
>   			/* This can happen when we set higher level PDs to
>   			 * silent to stop fault floods.
>   			 */
>   			nptes = max(nptes, 1u);
> +
> +			trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
> +						    nptes, dst, incr,
> +						    upd_flags);
>   			amdgpu_vm_update_flags(params, pt, cursor.level,
>   					       pe_start, dst, nptes, incr,
> -					       flags | AMDGPU_PTE_FRAG(frag));
> +					       upd_flags);
>   
>   			pe_start += nptes * 8;
> -			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
> +			dst += nptes * incr;
>   
>   			frag_start = upd_end;
>   			if (frag_start >= frag_end) {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] drm/amdgpu: add new trace event for page table update v3
  2020-08-12  6:45 ` Christian König
@ 2020-08-12  8:15   ` Shashank Sharma
  2020-08-12  8:32     ` Christian König
  0 siblings, 1 reply; 6+ messages in thread
From: Shashank Sharma @ 2020-08-12  8:15 UTC (permalink / raw)
  To: christian.koenig, amd-gfx; +Cc: Alex Deucher

Hello Christian,

On 12/08/20 12:15 pm, Christian König wrote:
> Am 12.08.20 um 06:33 schrieb Shashank Sharma:
>> This patch adds a new trace event to track the PTE update
>> events. This specific event will provide information like:
>> - start and end of virtual memory mapping
>> - HW engine flags for the map
>> - physical address for mapping
>>
>> This will be particularly useful for memory profiling tools
>> (like RMV) which are monitoring the page table update events.
>>
>> V2: Added physical address lookup logic in trace point
>> V3: switch to use __dynamic_array
>>      added nptes int the TPprint arguments list
>>      added page size in the arg list
>>
>> Cc: Christian König <christian.koenig@amd.com>
>> Cc: Alex Deucher <alexander.deucher@amd.com>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 38 +++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  9 ++++--
>>   2 files changed, 45 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> index 63e734a125fb..b9aae7983b4b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> @@ -321,6 +321,44 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
>>   	    TP_ARGS(mapping)
>>   );
>>   
>> +TRACE_EVENT(amdgpu_vm_update_ptes,
>> +	    TP_PROTO(struct amdgpu_vm_update_params *p,
>> +		     uint64_t start, uint64_t end,
>> +		     unsigned int nptes, uint64_t dst,
>> +		     uint64_t incr, uint64_t flags),
>> +	TP_ARGS(p, start, end, nptes, dst, incr, flags),
>> +	TP_STRUCT__entry(
>> +			 __field(u64, start)
>> +			 __field(u64, end)
>> +			 __field(u64, flags)
>> +			 __field(u64, incr)
>> +			 __field(unsigned int, nptes)
>> +			 __dynamic_array(u64, dst, nptes)
>> +	),
>> +
>> +	TP_fast_assign(
>> +			unsigned int i;
>> +
>> +			__entry->start = start;
>> +			__entry->end = end;
>> +			__entry->flags = flags;
>> +			__entry->incr = incr;
>> +			__entry->nptes = nptes;
>> +
>> +			for (i = 0; i < nptes; ++i) {
>> +				u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
>> +					p->pages_addr, dst) : dst;
>> +
>> +				((u64 *)__get_dynamic_array(dst))[i] = addr;
>> +				dst += incr;
>> +			}
>> +	),
>> +	TP_printk("seg:0x%010llx-0x%010llx, flags:0x%llx, nptr=%u, pgsz:%llu,"
>> +		  " dst:\n%s", __entry->start, __entry->end, __entry->flags,
>> +		  __entry->nptes, __entry->incr,
> This is not correct. The increment is NOT the page size, but rather the 
> page size rounded down to a power of 512+4K.
>
> In other words page size can be 4K, 8K, 16K, 32K, 64K....1M, 2M, 
> 4M....512M, 1G, 2G....
>
> But the increment can only be 4K, 2M, 1G....
Understood. But I think the requirement here is for increment. My understanding is that the tool needs to save the page entries, and for that, it will need start of virtual mem, start of physical mem, mapping size and step to increment the entries. If that's so, we can re-label this entry as "step" instead of "page size". Please let me know if you think it's the right thing to do. 
> And do we need the nptes here? We just need it to print the correct 
> number of destination addresses.

Agree, we don't really need nptes here, I will remove that and send V4.

- Shashank

>
> Regards,
> Christian.
>
>> +		  __print_array(__get_dynamic_array(dst), __entry->nptes, 8))
>> +);
>> +
>>   TRACE_EVENT(amdgpu_vm_set_ptes,
>>   	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
>>   		     uint32_t incr, uint64_t flags, bool direct),
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> index 71e005cf2952..b5dbb5e8bc61 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>> @@ -1513,17 +1513,22 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>   		do {
>>   			uint64_t upd_end = min(entry_end, frag_end);
>>   			unsigned nptes = (upd_end - frag_start) >> shift;
>> +			uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
>>   
>>   			/* This can happen when we set higher level PDs to
>>   			 * silent to stop fault floods.
>>   			 */
>>   			nptes = max(nptes, 1u);
>> +
>> +			trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
>> +						    nptes, dst, incr,
>> +						    upd_flags);
>>   			amdgpu_vm_update_flags(params, pt, cursor.level,
>>   					       pe_start, dst, nptes, incr,
>> -					       flags | AMDGPU_PTE_FRAG(frag));
>> +					       upd_flags);
>>   
>>   			pe_start += nptes * 8;
>> -			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
>> +			dst += nptes * incr;
>>   
>>   			frag_start = upd_end;
>>   			if (frag_start >= frag_end) {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] drm/amdgpu: add new trace event for page table update v3
  2020-08-12  8:15   ` Shashank Sharma
@ 2020-08-12  8:32     ` Christian König
  2020-08-12 12:09       ` Shashank Sharma
  0 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2020-08-12  8:32 UTC (permalink / raw)
  To: Shashank Sharma, amd-gfx; +Cc: Alex Deucher

Am 12.08.20 um 10:15 schrieb Shashank Sharma:
> Hello Christian,
>
> On 12/08/20 12:15 pm, Christian König wrote:
>> Am 12.08.20 um 06:33 schrieb Shashank Sharma:
>>> This patch adds a new trace event to track the PTE update
>>> events. This specific event will provide information like:
>>> - start and end of virtual memory mapping
>>> - HW engine flags for the map
>>> - physical address for mapping
>>>
>>> This will be particularly useful for memory profiling tools
>>> (like RMV) which are monitoring the page table update events.
>>>
>>> V2: Added physical address lookup logic in trace point
>>> V3: switch to use __dynamic_array
>>>       added nptes int the TPprint arguments list
>>>       added page size in the arg list
>>>
>>> Cc: Christian König <christian.koenig@amd.com>
>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 38 +++++++++++++++++++++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  9 ++++--
>>>    2 files changed, 45 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> index 63e734a125fb..b9aae7983b4b 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> @@ -321,6 +321,44 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
>>>    	    TP_ARGS(mapping)
>>>    );
>>>    
>>> +TRACE_EVENT(amdgpu_vm_update_ptes,
>>> +	    TP_PROTO(struct amdgpu_vm_update_params *p,
>>> +		     uint64_t start, uint64_t end,
>>> +		     unsigned int nptes, uint64_t dst,
>>> +		     uint64_t incr, uint64_t flags),
>>> +	TP_ARGS(p, start, end, nptes, dst, incr, flags),
>>> +	TP_STRUCT__entry(
>>> +			 __field(u64, start)
>>> +			 __field(u64, end)
>>> +			 __field(u64, flags)
>>> +			 __field(u64, incr)
>>> +			 __field(unsigned int, nptes)
>>> +			 __dynamic_array(u64, dst, nptes)
>>> +	),
>>> +
>>> +	TP_fast_assign(
>>> +			unsigned int i;
>>> +
>>> +			__entry->start = start;
>>> +			__entry->end = end;
>>> +			__entry->flags = flags;
>>> +			__entry->incr = incr;
>>> +			__entry->nptes = nptes;
>>> +
>>> +			for (i = 0; i < nptes; ++i) {
>>> +				u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
>>> +					p->pages_addr, dst) : dst;
>>> +
>>> +				((u64 *)__get_dynamic_array(dst))[i] = addr;
>>> +				dst += incr;
>>> +			}
>>> +	),
>>> +	TP_printk("seg:0x%010llx-0x%010llx, flags:0x%llx, nptr=%u, pgsz:%llu,"
>>> +		  " dst:\n%s", __entry->start, __entry->end, __entry->flags,
>>> +		  __entry->nptes, __entry->incr,
>> This is not correct. The increment is NOT the page size, but rather the
>> page size rounded down to a power of 512+4K.
>>
>> In other words page size can be 4K, 8K, 16K, 32K, 64K....1M, 2M,
>> 4M....512M, 1G, 2G....
>>
>> But the increment can only be 4K, 2M, 1G....
> Understood. But I think the requirement here is for increment. My understanding is that the tool needs to save the page entries, and for that, it will need start of virtual mem, start of physical mem, mapping size and step to increment the entries. If that's so, we can re-label this entry as "step" instead of "page size". Please let me know if you think it's the right thing to do.

We could stick with the naming increment if that helps, but this can 
also be derived from the number of destination addresses we have.

On the other hand explicitly mentioning it probably won't hurt us either.

And by the way what does "seg" mean?

Christian.

>> And do we need the nptes here? We just need it to print the correct
>> number of destination addresses.
> Agree, we don't really need nptes here, I will remove that and send V4.
>
> - Shashank
>
>> Regards,
>> Christian.
>>
>>> +		  __print_array(__get_dynamic_array(dst), __entry->nptes, 8))
>>> +);
>>> +
>>>    TRACE_EVENT(amdgpu_vm_set_ptes,
>>>    	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
>>>    		     uint32_t incr, uint64_t flags, bool direct),
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index 71e005cf2952..b5dbb5e8bc61 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -1513,17 +1513,22 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>>    		do {
>>>    			uint64_t upd_end = min(entry_end, frag_end);
>>>    			unsigned nptes = (upd_end - frag_start) >> shift;
>>> +			uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
>>>    
>>>    			/* This can happen when we set higher level PDs to
>>>    			 * silent to stop fault floods.
>>>    			 */
>>>    			nptes = max(nptes, 1u);
>>> +
>>> +			trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
>>> +						    nptes, dst, incr,
>>> +						    upd_flags);
>>>    			amdgpu_vm_update_flags(params, pt, cursor.level,
>>>    					       pe_start, dst, nptes, incr,
>>> -					       flags | AMDGPU_PTE_FRAG(frag));
>>> +					       upd_flags);
>>>    
>>>    			pe_start += nptes * 8;
>>> -			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
>>> +			dst += nptes * incr;
>>>    
>>>    			frag_start = upd_end;
>>>    			if (frag_start >= frag_end) {

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] drm/amdgpu: add new trace event for page table update v3
  2020-08-12  8:32     ` Christian König
@ 2020-08-12 12:09       ` Shashank Sharma
  2020-08-12 12:46         ` Christian König
  0 siblings, 1 reply; 6+ messages in thread
From: Shashank Sharma @ 2020-08-12 12:09 UTC (permalink / raw)
  To: Christian König, amd-gfx; +Cc: Alex Deucher


On 12/08/20 2:02 pm, Christian König wrote:
> Am 12.08.20 um 10:15 schrieb Shashank Sharma:
>> Hello Christian,
>>
>> On 12/08/20 12:15 pm, Christian König wrote:
>>> Am 12.08.20 um 06:33 schrieb Shashank Sharma:
>>>> This patch adds a new trace event to track the PTE update
>>>> events. This specific event will provide information like:
>>>> - start and end of virtual memory mapping
>>>> - HW engine flags for the map
>>>> - physical address for mapping
>>>>
>>>> This will be particularly useful for memory profiling tools
>>>> (like RMV) which are monitoring the page table update events.
>>>>
>>>> V2: Added physical address lookup logic in trace point
>>>> V3: switch to use __dynamic_array
>>>>       added nptes int the TPprint arguments list
>>>>       added page size in the arg list
>>>>
>>>> Cc: Christian König <christian.koenig@amd.com>
>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>>> ---
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 38 +++++++++++++++++++++++
>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  9 ++++--
>>>>    2 files changed, 45 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>>> index 63e734a125fb..b9aae7983b4b 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>>> @@ -321,6 +321,44 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
>>>>    	    TP_ARGS(mapping)
>>>>    );
>>>>    
>>>> +TRACE_EVENT(amdgpu_vm_update_ptes,
>>>> +	    TP_PROTO(struct amdgpu_vm_update_params *p,
>>>> +		     uint64_t start, uint64_t end,
>>>> +		     unsigned int nptes, uint64_t dst,
>>>> +		     uint64_t incr, uint64_t flags),
>>>> +	TP_ARGS(p, start, end, nptes, dst, incr, flags),
>>>> +	TP_STRUCT__entry(
>>>> +			 __field(u64, start)
>>>> +			 __field(u64, end)
>>>> +			 __field(u64, flags)
>>>> +			 __field(u64, incr)
>>>> +			 __field(unsigned int, nptes)
>>>> +			 __dynamic_array(u64, dst, nptes)
>>>> +	),
>>>> +
>>>> +	TP_fast_assign(
>>>> +			unsigned int i;
>>>> +
>>>> +			__entry->start = start;
>>>> +			__entry->end = end;
>>>> +			__entry->flags = flags;
>>>> +			__entry->incr = incr;
>>>> +			__entry->nptes = nptes;
>>>> +
>>>> +			for (i = 0; i < nptes; ++i) {
>>>> +				u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
>>>> +					p->pages_addr, dst) : dst;
>>>> +
>>>> +				((u64 *)__get_dynamic_array(dst))[i] = addr;
>>>> +				dst += incr;
>>>> +			}
>>>> +	),
>>>> +	TP_printk("seg:0x%010llx-0x%010llx, flags:0x%llx, nptr=%u, pgsz:%llu,"
>>>> +		  " dst:\n%s", __entry->start, __entry->end, __entry->flags,
>>>> +		  __entry->nptes, __entry->incr,
>>> This is not correct. The increment is NOT the page size, but rather the
>>> page size rounded down to a power of 512+4K.
>>>
>>> In other words page size can be 4K, 8K, 16K, 32K, 64K....1M, 2M,
>>> 4M....512M, 1G, 2G....
>>>
>>> But the increment can only be 4K, 2M, 1G....
>> Understood. But I think the requirement here is for increment. My understanding is that the tool needs to save the page entries, and for that, it will need start of virtual mem, start of physical mem, mapping size and step to increment the entries. If that's so, we can re-label this entry as "step" instead of "page size". Please let me know if you think it's the right thing to do.
> We could stick with the naming increment if that helps, but this can 
> also be derived from the number of destination addresses we have.
sure, i will make it increment.
>
> On the other hand explicitly mentioning it probably won't hurt us either.
>
> And by the way what does "seg" mean?

Ah, to get into 80 char limit, I made 'segment' as 'seg' and later just realized I have to still break the print into two lines :) . I will make it back to segment or start/end

- Shashank

>
> Christian.
>
>>> And do we need the nptes here? We just need it to print the correct
>>> number of destination addresses.
>> Agree, we don't really need nptes here, I will remove that and send V4.
>>
>> - Shashank
>>
>>> Regards,
>>> Christian.
>>>
>>>> +		  __print_array(__get_dynamic_array(dst), __entry->nptes, 8))
>>>> +);
>>>> +
>>>>    TRACE_EVENT(amdgpu_vm_set_ptes,
>>>>    	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
>>>>    		     uint32_t incr, uint64_t flags, bool direct),
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> index 71e005cf2952..b5dbb5e8bc61 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>> @@ -1513,17 +1513,22 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>>>    		do {
>>>>    			uint64_t upd_end = min(entry_end, frag_end);
>>>>    			unsigned nptes = (upd_end - frag_start) >> shift;
>>>> +			uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
>>>>    
>>>>    			/* This can happen when we set higher level PDs to
>>>>    			 * silent to stop fault floods.
>>>>    			 */
>>>>    			nptes = max(nptes, 1u);
>>>> +
>>>> +			trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
>>>> +						    nptes, dst, incr,
>>>> +						    upd_flags);
>>>>    			amdgpu_vm_update_flags(params, pt, cursor.level,
>>>>    					       pe_start, dst, nptes, incr,
>>>> -					       flags | AMDGPU_PTE_FRAG(frag));
>>>> +					       upd_flags);
>>>>    
>>>>    			pe_start += nptes * 8;
>>>> -			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
>>>> +			dst += nptes * incr;
>>>>    
>>>>    			frag_start = upd_end;
>>>>    			if (frag_start >= frag_end) {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3] drm/amdgpu: add new trace event for page table update v3
  2020-08-12 12:09       ` Shashank Sharma
@ 2020-08-12 12:46         ` Christian König
  0 siblings, 0 replies; 6+ messages in thread
From: Christian König @ 2020-08-12 12:46 UTC (permalink / raw)
  To: Shashank Sharma, Christian König, amd-gfx; +Cc: Alex Deucher

Am 12.08.20 um 14:09 schrieb Shashank Sharma:
> On 12/08/20 2:02 pm, Christian König wrote:
>> Am 12.08.20 um 10:15 schrieb Shashank Sharma:
>>> Hello Christian,
>>>
>>> On 12/08/20 12:15 pm, Christian König wrote:
>>>> Am 12.08.20 um 06:33 schrieb Shashank Sharma:
>>>>> This patch adds a new trace event to track the PTE update
>>>>> events. This specific event will provide information like:
>>>>> - start and end of virtual memory mapping
>>>>> - HW engine flags for the map
>>>>> - physical address for mapping
>>>>>
>>>>> This will be particularly useful for memory profiling tools
>>>>> (like RMV) which are monitoring the page table update events.
>>>>>
>>>>> V2: Added physical address lookup logic in trace point
>>>>> V3: switch to use __dynamic_array
>>>>>        added nptes int the TPprint arguments list
>>>>>        added page size in the arg list
>>>>>
>>>>> Cc: Christian König <christian.koenig@amd.com>
>>>>> Cc: Alex Deucher <alexander.deucher@amd.com>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>>> Signed-off-by: Shashank Sharma <shashank.sharma@amd.com>
>>>>> ---
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 38 +++++++++++++++++++++++
>>>>>     drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  9 ++++--
>>>>>     2 files changed, 45 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>>>> index 63e734a125fb..b9aae7983b4b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>>>> @@ -321,6 +321,44 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
>>>>>     	    TP_ARGS(mapping)
>>>>>     );
>>>>>     
>>>>> +TRACE_EVENT(amdgpu_vm_update_ptes,
>>>>> +	    TP_PROTO(struct amdgpu_vm_update_params *p,
>>>>> +		     uint64_t start, uint64_t end,
>>>>> +		     unsigned int nptes, uint64_t dst,
>>>>> +		     uint64_t incr, uint64_t flags),
>>>>> +	TP_ARGS(p, start, end, nptes, dst, incr, flags),
>>>>> +	TP_STRUCT__entry(
>>>>> +			 __field(u64, start)
>>>>> +			 __field(u64, end)
>>>>> +			 __field(u64, flags)
>>>>> +			 __field(u64, incr)
>>>>> +			 __field(unsigned int, nptes)
>>>>> +			 __dynamic_array(u64, dst, nptes)
>>>>> +	),
>>>>> +
>>>>> +	TP_fast_assign(
>>>>> +			unsigned int i;
>>>>> +
>>>>> +			__entry->start = start;
>>>>> +			__entry->end = end;
>>>>> +			__entry->flags = flags;
>>>>> +			__entry->incr = incr;
>>>>> +			__entry->nptes = nptes;
>>>>> +
>>>>> +			for (i = 0; i < nptes; ++i) {
>>>>> +				u64 addr = p->pages_addr ? amdgpu_vm_map_gart(
>>>>> +					p->pages_addr, dst) : dst;
>>>>> +
>>>>> +				((u64 *)__get_dynamic_array(dst))[i] = addr;
>>>>> +				dst += incr;
>>>>> +			}
>>>>> +	),
>>>>> +	TP_printk("seg:0x%010llx-0x%010llx, flags:0x%llx, nptr=%u, pgsz:%llu,"
>>>>> +		  " dst:\n%s", __entry->start, __entry->end, __entry->flags,
>>>>> +		  __entry->nptes, __entry->incr,
>>>> This is not correct. The increment is NOT the page size, but rather the
>>>> page size rounded down to a power of 512+4K.
>>>>
>>>> In other words page size can be 4K, 8K, 16K, 32K, 64K....1M, 2M,
>>>> 4M....512M, 1G, 2G....
>>>>
>>>> But the increment can only be 4K, 2M, 1G....
>>> Understood. But I think the requirement here is for increment. My understanding is that the tool needs to save the page entries, and for that, it will need start of virtual mem, start of physical mem, mapping size and step to increment the entries. If that's so, we can re-label this entry as "step" instead of "page size". Please let me know if you think it's the right thing to do.
>> We could stick with the naming increment if that helps, but this can
>> also be derived from the number of destination addresses we have.
> sure, i will make it increment.
>> On the other hand explicitly mentioning it probably won't hurt us either.
>>
>> And by the way what does "seg" mean?
> Ah, to get into 80 char limit, I made 'segment' as 'seg' and later just realized I have to still break the print into two lines :) . I will make it back to segment or start/end

Ah, maybe rather use VA for virtual address. But start/end is probably 
the best option.

Christian.

>
> - Shashank
>
>> Christian.
>>
>>>> And do we need the nptes here? We just need it to print the correct
>>>> number of destination addresses.
>>> Agree, we don't really need nptes here, I will remove that and send V4.
>>>
>>> - Shashank
>>>
>>>> Regards,
>>>> Christian.
>>>>
>>>>> +		  __print_array(__get_dynamic_array(dst), __entry->nptes, 8))
>>>>> +);
>>>>> +
>>>>>     TRACE_EVENT(amdgpu_vm_set_ptes,
>>>>>     	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
>>>>>     		     uint32_t incr, uint64_t flags, bool direct),
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> index 71e005cf2952..b5dbb5e8bc61 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>> @@ -1513,17 +1513,22 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
>>>>>     		do {
>>>>>     			uint64_t upd_end = min(entry_end, frag_end);
>>>>>     			unsigned nptes = (upd_end - frag_start) >> shift;
>>>>> +			uint64_t upd_flags = flags | AMDGPU_PTE_FRAG(frag);
>>>>>     
>>>>>     			/* This can happen when we set higher level PDs to
>>>>>     			 * silent to stop fault floods.
>>>>>     			 */
>>>>>     			nptes = max(nptes, 1u);
>>>>> +
>>>>> +			trace_amdgpu_vm_update_ptes(params, frag_start, upd_end,
>>>>> +						    nptes, dst, incr,
>>>>> +						    upd_flags);
>>>>>     			amdgpu_vm_update_flags(params, pt, cursor.level,
>>>>>     					       pe_start, dst, nptes, incr,
>>>>> -					       flags | AMDGPU_PTE_FRAG(frag));
>>>>> +					       upd_flags);
>>>>>     
>>>>>     			pe_start += nptes * 8;
>>>>> -			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
>>>>> +			dst += nptes * incr;
>>>>>     
>>>>>     			frag_start = upd_end;
>>>>>     			if (frag_start >= frag_end) {
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2020-08-12 12:46 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-12  4:33 [PATCH v3] drm/amdgpu: add new trace event for page table update v3 Shashank Sharma
2020-08-12  6:45 ` Christian König
2020-08-12  8:15   ` Shashank Sharma
2020-08-12  8:32     ` Christian König
2020-08-12 12:09       ` Shashank Sharma
2020-08-12 12:46         ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.