All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
@ 2017-08-01 11:51 Tom St Denis
       [not found] ` <20170801115131.27610-1-tom.stdenis-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Tom St Denis @ 2017-08-01 11:51 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Tom St Denis

This helps map DMA addresses back to physical addresses.

Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 ++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  8 ++++++++
 2 files changed, 36 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 509f7a63d40c..5b2bb28da504 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -14,6 +14,34 @@
 #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
 	 job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
 
+TRACE_EVENT(amdgpu_ttm_tt_populate,
+	    TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t func, uint64_t dma_address, uint64_t phys_address),
+	    TP_ARGS(domain, bus, slot, func, dma_address, phys_address),
+	    TP_STRUCT__entry(
+				__field(uint16_t, domain)
+				__field(uint8_t, bus)
+				__field(uint8_t, slot)
+				__field(uint8_t, func)
+				__field(uint64_t, dma)
+				__field(uint64_t, phys)
+			    ),
+	    TP_fast_assign(
+			   __entry->domain = domain;
+			   __entry->bus = bus;
+			   __entry->slot = slot;
+			   __entry->func = func;
+			   __entry->dma = dma_address;
+			   __entry->phys = phys_address;
+			   ),
+	    TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
+		      (unsigned)__entry->domain,
+		      (unsigned)__entry->bus,
+		      (unsigned)__entry->slot,
+		      (unsigned)__entry->func,
+		      (unsigned long long)__entry->dma,
+		      (unsigned long long)__entry->phys)
+);
+
 TRACE_EVENT(amdgpu_mm_rreg,
 	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
 	    TP_ARGS(did, reg, value),
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8da59d212b3b..1cf274603476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -43,6 +43,7 @@
 #include <linux/pagemap.h>
 #include <linux/debugfs.h>
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 #include "bif/bif_4_1_d.h"
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
 			ttm_pool_unpopulate(ttm);
 			return -EFAULT;
 		}
+		trace_amdgpu_ttm_tt_populate(
+			pci_domain_nr(adev->pdev->bus),
+			adev->pdev->bus->number,
+			PCI_SLOT(adev->pdev->devfn),
+			PCI_FUNC(adev->pdev->devfn),
+			gtt->ttm.dma_address[i],
+			page_to_phys(ttm->pages[i]));
 	}
 	return 0;
 }
-- 
2.12.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found] ` <20170801115131.27610-1-tom.stdenis-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-01 11:55   ` Christian König
       [not found]     ` <b4945fed-aea3-1b15-b383-26bc65307139-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2017-08-01 11:55 UTC (permalink / raw)
  To: Tom St Denis, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 01.08.2017 um 13:51 schrieb Tom St Denis:
> This helps map DMA addresses back to physical addresses.
>
> Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 ++++++++++++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  8 ++++++++
>   2 files changed, 36 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> index 509f7a63d40c..5b2bb28da504 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
> @@ -14,6 +14,34 @@
>   #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
>   	 job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished)
>   
> +TRACE_EVENT(amdgpu_ttm_tt_populate,
> +	    TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t func, uint64_t dma_address, uint64_t phys_address),
> +	    TP_ARGS(domain, bus, slot, func, dma_address, phys_address),
> +	    TP_STRUCT__entry(
> +				__field(uint16_t, domain)
> +				__field(uint8_t, bus)
> +				__field(uint8_t, slot)
> +				__field(uint8_t, func)
> +				__field(uint64_t, dma)
> +				__field(uint64_t, phys)
> +			    ),

Better just give adev here and extract the values during the fast assign.

> +	    TP_fast_assign(
> +			   __entry->domain = domain;
> +			   __entry->bus = bus;
> +			   __entry->slot = slot;
> +			   __entry->func = func;
> +			   __entry->dma = dma_address;
> +			   __entry->phys = phys_address;
> +			   ),
> +	    TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
> +		      (unsigned)__entry->domain,
> +		      (unsigned)__entry->bus,
> +		      (unsigned)__entry->slot,
> +		      (unsigned)__entry->func,
> +		      (unsigned long long)__entry->dma,
> +		      (unsigned long long)__entry->phys)
> +);
> +
>   TRACE_EVENT(amdgpu_mm_rreg,
>   	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
>   	    TP_ARGS(did, reg, value),
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 8da59d212b3b..1cf274603476 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -43,6 +43,7 @@
>   #include <linux/pagemap.h>
>   #include <linux/debugfs.h>
>   #include "amdgpu.h"
> +#include "amdgpu_trace.h"
>   #include "bif/bif_4_1_d.h"
>   
>   #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
> @@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
>   			ttm_pool_unpopulate(ttm);
>   			return -EFAULT;
>   		}
> +		trace_amdgpu_ttm_tt_populate(
> +			pci_domain_nr(adev->pdev->bus),
> +			adev->pdev->bus->number,
> +			PCI_SLOT(adev->pdev->devfn),
> +			PCI_FUNC(adev->pdev->devfn),
> +			gtt->ttm.dma_address[i],
> +			page_to_phys(ttm->pages[i]));

Please add that tracing for the dma pool path as well.

With that fixed the change looks good to me,
Christian.

>   	}
>   	return 0;
>   }


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]     ` <b4945fed-aea3-1b15-b383-26bc65307139-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-01 12:29       ` Tom St Denis
       [not found]         ` <21a75bbf-e9ca-e9e9-5831-e58908911e9b-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Tom St Denis @ 2017-08-01 12:29 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 01/08/17 07:55 AM, Christian König wrote:
> Am 01.08.2017 um 13:51 schrieb Tom St Denis:
>> This helps map DMA addresses back to physical addresses.
>>
>> Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 
>> ++++++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  8 ++++++++
>>   2 files changed, 36 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> index 509f7a63d40c..5b2bb28da504 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>> @@ -14,6 +14,34 @@
>>   #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
>>        
>> job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) 
>>
>> +TRACE_EVENT(amdgpu_ttm_tt_populate,
>> +        TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, uint8_t 
>> func, uint64_t dma_address, uint64_t phys_address),
>> +        TP_ARGS(domain, bus, slot, func, dma_address, phys_address),
>> +        TP_STRUCT__entry(
>> +                __field(uint16_t, domain)
>> +                __field(uint8_t, bus)
>> +                __field(uint8_t, slot)
>> +                __field(uint8_t, func)
>> +                __field(uint64_t, dma)
>> +                __field(uint64_t, phys)
>> +                ),
> 
> Better just give adev here and extract the values during the fast assign.

Easy enough, I've done this now.

> 
>> +        TP_fast_assign(
>> +               __entry->domain = domain;
>> +               __entry->bus = bus;
>> +               __entry->slot = slot;
>> +               __entry->func = func;
>> +               __entry->dma = dma_address;
>> +               __entry->phys = phys_address;
>> +               ),
>> +        TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
>> +              (unsigned)__entry->domain,
>> +              (unsigned)__entry->bus,
>> +              (unsigned)__entry->slot,
>> +              (unsigned)__entry->func,
>> +              (unsigned long long)__entry->dma,
>> +              (unsigned long long)__entry->phys)
>> +);
>> +
>>   TRACE_EVENT(amdgpu_mm_rreg,
>>           TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
>>           TP_ARGS(did, reg, value),
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 8da59d212b3b..1cf274603476 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -43,6 +43,7 @@
>>   #include <linux/pagemap.h>
>>   #include <linux/debugfs.h>
>>   #include "amdgpu.h"
>> +#include "amdgpu_trace.h"
>>   #include "bif/bif_4_1_d.h"
>>   #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
>> @@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
>> *ttm)
>>               ttm_pool_unpopulate(ttm);
>>               return -EFAULT;
>>           }
>> +        trace_amdgpu_ttm_tt_populate(
>> +            pci_domain_nr(adev->pdev->bus),
>> +            adev->pdev->bus->number,
>> +            PCI_SLOT(adev->pdev->devfn),
>> +            PCI_FUNC(adev->pdev->devfn),
>> +            gtt->ttm.dma_address[i],
>> +            page_to_phys(ttm->pages[i]));
> 
> Please add that tracing for the dma pool path as well.
> 
> With that fixed the change looks good to me,
> Christian.

Unsure what you mean here.  The ttm_pool_populate() seems to be 
preparing the page list to back the request.

Tom
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]         ` <21a75bbf-e9ca-e9e9-5831-e58908911e9b-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-01 14:10           ` Christian König
       [not found]             ` <56a6ecd5-d210-cd71-0fc0-2d0ffd36ab00-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2017-08-01 14:10 UTC (permalink / raw)
  To: Tom St Denis, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 01.08.2017 um 14:29 schrieb Tom St Denis:
> On 01/08/17 07:55 AM, Christian König wrote:
>> Am 01.08.2017 um 13:51 schrieb Tom St Denis:
>>> This helps map DMA addresses back to physical addresses.
>>>
>>> Signed-off-by: Tom St Denis <tom.stdenis@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 28 
>>> ++++++++++++++++++++++++++++
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  8 ++++++++
>>>   2 files changed, 36 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> index 509f7a63d40c..5b2bb28da504 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
>>> @@ -14,6 +14,34 @@
>>>   #define AMDGPU_JOB_GET_TIMELINE_NAME(job) \
>>> job->base.s_fence->finished.ops->get_timeline_name(&job->base.s_fence->finished) 
>>>
>>> +TRACE_EVENT(amdgpu_ttm_tt_populate,
>>> +        TP_PROTO(uint16_t domain, uint8_t bus, uint8_t slot, 
>>> uint8_t func, uint64_t dma_address, uint64_t phys_address),
>>> +        TP_ARGS(domain, bus, slot, func, dma_address, phys_address),
>>> +        TP_STRUCT__entry(
>>> +                __field(uint16_t, domain)
>>> +                __field(uint8_t, bus)
>>> +                __field(uint8_t, slot)
>>> +                __field(uint8_t, func)
>>> +                __field(uint64_t, dma)
>>> +                __field(uint64_t, phys)
>>> +                ),
>>
>> Better just give adev here and extract the values during the fast 
>> assign.
>
> Easy enough, I've done this now.
>
>>
>>> +        TP_fast_assign(
>>> +               __entry->domain = domain;
>>> +               __entry->bus = bus;
>>> +               __entry->slot = slot;
>>> +               __entry->func = func;
>>> +               __entry->dma = dma_address;
>>> +               __entry->phys = phys_address;
>>> +               ),
>>> +        TP_printk("%04x:%02x:%02x.%x: 0x%llx => 0x%llx",
>>> +              (unsigned)__entry->domain,
>>> +              (unsigned)__entry->bus,
>>> +              (unsigned)__entry->slot,
>>> +              (unsigned)__entry->func,
>>> +              (unsigned long long)__entry->dma,
>>> +              (unsigned long long)__entry->phys)
>>> +);
>>> +
>>>   TRACE_EVENT(amdgpu_mm_rreg,
>>>           TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
>>>           TP_ARGS(did, reg, value),
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 8da59d212b3b..1cf274603476 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -43,6 +43,7 @@
>>>   #include <linux/pagemap.h>
>>>   #include <linux/debugfs.h>
>>>   #include "amdgpu.h"
>>> +#include "amdgpu_trace.h"
>>>   #include "bif/bif_4_1_d.h"
>>>   #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
>>> @@ -944,6 +945,13 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
>>> *ttm)
>>>               ttm_pool_unpopulate(ttm);
>>>               return -EFAULT;
>>>           }
>>> +        trace_amdgpu_ttm_tt_populate(
>>> +            pci_domain_nr(adev->pdev->bus),
>>> +            adev->pdev->bus->number,
>>> +            PCI_SLOT(adev->pdev->devfn),
>>> +            PCI_FUNC(adev->pdev->devfn),
>>> +            gtt->ttm.dma_address[i],
>>> +            page_to_phys(ttm->pages[i]));
>>
>> Please add that tracing for the dma pool path as well.
>>
>> With that fixed the change looks good to me,
>> Christian.
>
> Unsure what you mean here.  The ttm_pool_populate() seems to be 
> preparing the page list to back the request.

You need to cover multiple code path here:
1. The one you currently implemented which uses ttm_dma_populate() and 
pci_map_page().
2. The one using ttm_dma_populate().
3. The one using drm_prime_sg_to_page_addr_arrays() a bit above for 
imported BOs.
4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for userptrs.

Basically all just take gtt->ttm.ttm.pages and fill gtt->ttm.dma_address.

I suggest to add a helper you can call to trace all pages->dma_address 
mappings inside a ttm.

Regards,
Christian.

>
> Tom
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]             ` <56a6ecd5-d210-cd71-0fc0-2d0ffd36ab00-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-01 14:26               ` Tom St Denis
       [not found]                 ` <29f09f3b-154d-70d2-f5c6-315022b408ea-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Tom St Denis @ 2017-08-01 14:26 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 01/08/17 10:10 AM, Christian König wrote:

> You need to cover multiple code path here:
> 1. The one you currently implemented which uses ttm_dma_populate() and 
> pci_map_page().
> 2. The one using ttm_dma_populate().

I'll have to look into this one though it's my understanding that code 
path is only followed if there's no (hardware) IOMMU right?  Which while 
it'd have to be covered isn't a high priority right now (our devel 
platforms have hardware IOMMU).

None the less I'll look into it once I figure out #3 and #4 per your 
comments.

> 3. The one using drm_prime_sg_to_page_addr_arrays() a bit above for 
> imported BOs.

This one seems rather straight forward but the only catch is I don't 
have access to "adev" inside that drm function.  Would it be taboo to do 
something like

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 376078334f54..cd97ef2144c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
                 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
                                                  gtt->ttm.dma_address, 
ttm->num_pages);
                 ttm->state = tt_unbound;
+               for (i = 0; i < ttm->num_pages; i++) {
+                       trace_amdgpu_ttm_tt_populate(
+                               adev,
+                               gtt->ttm.dma_address[i],
+                               page_to_phys(ttm->pages[i]));
+               }
                 return 0;
         }

This would normally result in a for loop around a nop which shouldn't be 
a huge performance hit but is one none the less.


> 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for userptrs.
> 
> Basically all just take gtt->ttm.ttm.pages and fill gtt->ttm.dma_address.

Same comment as #3.  I can tackle this with a for loop around the trace 
if that is ok.

> I suggest to add a helper you can call to trace all pages->dma_address 
> mappings inside a ttm.

One thing at a time :-).  That would probably be a bit better since the 
trace log gets filled with remappings (which is why my proof-of-concept 
umr patch only grabs the latest mapping as it reads the trace).

Is there a handy place we can grab a list of ttm_tt's bound to our 
device?  If so then in theory I could write a debugfs interface instead.

Thanks for your patience as I'm definitely less familiar with the VM 
side of things :-)

Cheers,
Tom
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]                 ` <29f09f3b-154d-70d2-f5c6-315022b408ea-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-01 14:54                   ` Christian König
       [not found]                     ` <67a7f3ec-fd54-e997-75a0-4d6e3d2ff908-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2017-08-01 14:54 UTC (permalink / raw)
  To: Tom St Denis, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 01.08.2017 um 16:26 schrieb Tom St Denis:
> On 01/08/17 10:10 AM, Christian König wrote:
>
>> You need to cover multiple code path here:
>> 1. The one you currently implemented which uses ttm_dma_populate() 
>> and pci_map_page().
>> 2. The one using ttm_dma_populate().
>
> I'll have to look into this one though it's my understanding that code 
> path is only followed if there's no (hardware) IOMMU right?

Wrong, that one is used when "anything" in the system used the SWIOTLB 
path once. So the detection doesn't always work reliable.

> This one seems rather straight forward but the only catch is I don't 
> have access to "adev" inside that drm function.

When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to 
get the adev.

>   Would it be taboo to do something like
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 376078334f54..cd97ef2144c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
> *ttm)
>                 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
> gtt->ttm.dma_address, ttm->num_pages);
>                 ttm->state = tt_unbound;
> +               for (i = 0; i < ttm->num_pages; i++) {
> +                       trace_amdgpu_ttm_tt_populate(
> +                               adev,
> +                               gtt->ttm.dma_address[i],
> +                               page_to_phys(ttm->pages[i]));
> +               }
>                 return 0;
>         }
>
> This would normally result in a for loop around a nop which shouldn't 
> be a huge performance hit but is one none the less.

Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those functions 
are generated by the trace subsystem automatically when you define a 
trace point.

It just doesn't use those nice tricks to modify the compiled binary on 
the fly.

Christian.

>
>> 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for 
>> userptrs.
>>
>> Basically all just take gtt->ttm.ttm.pages and fill 
>> gtt->ttm.dma_address.
>
> Same comment as #3.  I can tackle this with a for loop around the 
> trace if that is ok.
>
>> I suggest to add a helper you can call to trace all 
>> pages->dma_address mappings inside a ttm.
>
> One thing at a time :-).  That would probably be a bit better since 
> the trace log gets filled with remappings (which is why my 
> proof-of-concept umr patch only grabs the latest mapping as it reads 
> the trace).
>
> Is there a handy place we can grab a list of ttm_tt's bound to our 
> device?  If so then in theory I could write a debugfs interface instead.
>
> Thanks for your patience as I'm definitely less familiar with the VM 
> side of things :-)
>
> Cheers,
> Tom
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]                     ` <67a7f3ec-fd54-e997-75a0-4d6e3d2ff908-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-01 15:00                       ` axie
       [not found]                         ` <ae6e3523-dd8e-a14e-9bb2-297b0114bd5e-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: axie @ 2017-08-01 15:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Because this is used by a debug tool, can we use a macro to 
conditionally compile this feature?


On 2017-08-01 10:54 AM, Christian König wrote:
> Am 01.08.2017 um 16:26 schrieb Tom St Denis:
>> On 01/08/17 10:10 AM, Christian König wrote:
>>
>>> You need to cover multiple code path here:
>>> 1. The one you currently implemented which uses ttm_dma_populate() 
>>> and pci_map_page().
>>> 2. The one using ttm_dma_populate().
>>
>> I'll have to look into this one though it's my understanding that 
>> code path is only followed if there's no (hardware) IOMMU right?
>
> Wrong, that one is used when "anything" in the system used the SWIOTLB 
> path once. So the detection doesn't always work reliable.
>
>> This one seems rather straight forward but the only catch is I don't 
>> have access to "adev" inside that drm function.
>
> When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to 
> get the adev.
>
>>   Would it be taboo to do something like
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index 376078334f54..cd97ef2144c9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
>> *ttm)
>>                 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
>> gtt->ttm.dma_address, ttm->num_pages);
>>                 ttm->state = tt_unbound;
>> +               for (i = 0; i < ttm->num_pages; i++) {
>> +                       trace_amdgpu_ttm_tt_populate(
>> +                               adev,
>> +                               gtt->ttm.dma_address[i],
>> +                               page_to_phys(ttm->pages[i]));
>> +               }
>>                 return 0;
>>         }
>>
>> This would normally result in a for loop around a nop which shouldn't 
>> be a huge performance hit but is one none the less.
>
> Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those 
> functions are generated by the trace subsystem automatically when you 
> define a trace point.
>
> It just doesn't use those nice tricks to modify the compiled binary on 
> the fly.
>
> Christian.
>
>>
>>> 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for 
>>> userptrs.
>>>
>>> Basically all just take gtt->ttm.ttm.pages and fill 
>>> gtt->ttm.dma_address.
>>
>> Same comment as #3.  I can tackle this with a for loop around the 
>> trace if that is ok.
>>
>>> I suggest to add a helper you can call to trace all 
>>> pages->dma_address mappings inside a ttm.
>>
>> One thing at a time :-).  That would probably be a bit better since 
>> the trace log gets filled with remappings (which is why my 
>> proof-of-concept umr patch only grabs the latest mapping as it reads 
>> the trace).
>>
>> Is there a handy place we can grab a list of ttm_tt's bound to our 
>> device?  If so then in theory I could write a debugfs interface instead.
>>
>> Thanks for your patience as I'm definitely less familiar with the VM 
>> side of things :-)
>>
>> Cheers,
>> Tom
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]                         ` <ae6e3523-dd8e-a14e-9bb2-297b0114bd5e-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-01 15:41                           ` Christian König
       [not found]                             ` <b38d7cac-533d-7029-f73f-445a92d6b952-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Christian König @ 2017-08-01 15:41 UTC (permalink / raw)
  To: axie, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

We can turn of the trace subsystem during compile time already.

Additional to that the trace points use self modifying code to make sure 
that they don't have any overhead as long as they are turned off. But I 
don't think this works with the "if (trace_*_enabled()" as well.

Anyway, not a performance critical code path so I don't really bother.

Christian.

Am 01.08.2017 um 17:00 schrieb axie:
> Because this is used by a debug tool, can we use a macro to 
> conditionally compile this feature?
>
>
> On 2017-08-01 10:54 AM, Christian König wrote:
>> Am 01.08.2017 um 16:26 schrieb Tom St Denis:
>>> On 01/08/17 10:10 AM, Christian König wrote:
>>>
>>>> You need to cover multiple code path here:
>>>> 1. The one you currently implemented which uses ttm_dma_populate() 
>>>> and pci_map_page().
>>>> 2. The one using ttm_dma_populate().
>>>
>>> I'll have to look into this one though it's my understanding that 
>>> code path is only followed if there's no (hardware) IOMMU right?
>>
>> Wrong, that one is used when "anything" in the system used the 
>> SWIOTLB path once. So the detection doesn't always work reliable.
>>
>>> This one seems rather straight forward but the only catch is I don't 
>>> have access to "adev" inside that drm function.
>>
>> When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to 
>> get the adev.
>>
>>>   Would it be taboo to do something like
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 376078334f54..cd97ef2144c9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
>>> *ttm)
>>>                 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
>>> gtt->ttm.dma_address, ttm->num_pages);
>>>                 ttm->state = tt_unbound;
>>> +               for (i = 0; i < ttm->num_pages; i++) {
>>> +                       trace_amdgpu_ttm_tt_populate(
>>> +                               adev,
>>> +                               gtt->ttm.dma_address[i],
>>> + page_to_phys(ttm->pages[i]));
>>> +               }
>>>                 return 0;
>>>         }
>>>
>>> This would normally result in a for loop around a nop which 
>>> shouldn't be a huge performance hit but is one none the less.
>>
>> Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those 
>> functions are generated by the trace subsystem automatically when you 
>> define a trace point.
>>
>> It just doesn't use those nice tricks to modify the compiled binary 
>> on the fly.
>>
>> Christian.
>>
>>>
>>>> 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for 
>>>> userptrs.
>>>>
>>>> Basically all just take gtt->ttm.ttm.pages and fill 
>>>> gtt->ttm.dma_address.
>>>
>>> Same comment as #3.  I can tackle this with a for loop around the 
>>> trace if that is ok.
>>>
>>>> I suggest to add a helper you can call to trace all 
>>>> pages->dma_address mappings inside a ttm.
>>>
>>> One thing at a time :-).  That would probably be a bit better since 
>>> the trace log gets filled with remappings (which is why my 
>>> proof-of-concept umr patch only grabs the latest mapping as it reads 
>>> the trace).
>>>
>>> Is there a handy place we can grab a list of ttm_tt's bound to our 
>>> device?  If so then in theory I could write a debugfs interface 
>>> instead.
>>>
>>> Thanks for your patience as I'm definitely less familiar with the VM 
>>> side of things :-)
>>>
>>> Cheers,
>>> Tom
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]                             ` <b38d7cac-533d-7029-f73f-445a92d6b952-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
@ 2017-08-01 15:56                               ` Xie, AlexBin
       [not found]                                 ` <DM5PR12MB12572F535B64B97DF2C93C6EF2B30-2J9CzHegvk/NHlLGalgXawdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2017-08-01 17:00                               ` Tom St Denis
  1 sibling, 1 reply; 12+ messages in thread
From: Xie, AlexBin @ 2017-08-01 15:56 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW


[-- Attachment #1.1: Type: text/plain, Size: 5710 bytes --]

I don't know if compiler is smart enough to optimize the following for statement out...


+               for (i = 0; i < ttm->num_pages; i++) {
+                       trace_amdgpu_ttm_tt_populate(
+                               adev,
+                               gtt->ttm.dma_address[i],
+                               page_to_phys(ttm->pages[i]));
+               }


-Alex Bin

________________________________
From: Christian König <deathsimple-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
Sent: Tuesday, August 1, 2017 11:41 AM
To: Xie, AlexBin; amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Subject: Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping

We can turn of the trace subsystem during compile time already.

Additional to that the trace points use self modifying code to make sure
that they don't have any overhead as long as they are turned off. But I
don't think this works with the "if (trace_*_enabled()" as well.

Anyway, not a performance critical code path so I don't really bother.

Christian.

Am 01.08.2017 um 17:00 schrieb axie:
> Because this is used by a debug tool, can we use a macro to
> conditionally compile this feature?
>
>
> On 2017-08-01 10:54 AM, Christian König wrote:
>> Am 01.08.2017 um 16:26 schrieb Tom St Denis:
>>> On 01/08/17 10:10 AM, Christian König wrote:
>>>
>>>> You need to cover multiple code path here:
>>>> 1. The one you currently implemented which uses ttm_dma_populate()
>>>> and pci_map_page().
>>>> 2. The one using ttm_dma_populate().
>>>
>>> I'll have to look into this one though it's my understanding that
>>> code path is only followed if there's no (hardware) IOMMU right?
>>
>> Wrong, that one is used when "anything" in the system used the
>> SWIOTLB path once. So the detection doesn't always work reliable.
>>
>>> This one seems rather straight forward but the only catch is I don't
>>> have access to "adev" inside that drm function.
>>
>> When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to
>> get the adev.
>>
>>>   Would it be taboo to do something like
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> index 376078334f54..cd97ef2144c9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>> @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt
>>> *ttm)
>>>                 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
>>> gtt->ttm.dma_address, ttm->num_pages);
>>>                 ttm->state = tt_unbound;
>>> +               for (i = 0; i < ttm->num_pages; i++) {
>>> +                       trace_amdgpu_ttm_tt_populate(
>>> +                               adev,
>>> +                               gtt->ttm.dma_address[i],
>>> + page_to_phys(ttm->pages[i]));
>>> +               }
>>>                 return 0;
>>>         }
>>>
>>> This would normally result in a for loop around a nop which
>>> shouldn't be a huge performance hit but is one none the less.
>>
>> Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those
>> functions are generated by the trace subsystem automatically when you
>> define a trace point.
>>
>> It just doesn't use those nice tricks to modify the compiled binary
>> on the fly.
>>
>> Christian.
>>
>>>
>>>> 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for
>>>> userptrs.
>>>>
>>>> Basically all just take gtt->ttm.ttm.pages and fill
>>>> gtt->ttm.dma_address.
>>>
>>> Same comment as #3.  I can tackle this with a for loop around the
>>> trace if that is ok.
>>>
>>>> I suggest to add a helper you can call to trace all
>>>> pages->dma_address mappings inside a ttm.
>>>
>>> One thing at a time :-).  That would probably be a bit better since
>>> the trace log gets filled with remappings (which is why my
>>> proof-of-concept umr patch only grabs the latest mapping as it reads
>>> the trace).
>>>
>>> Is there a handy place we can grab a list of ttm_tt's bound to our
>>> device?  If so then in theory I could write a debugfs interface
>>> instead.
>>>
>>> Thanks for your patience as I'm definitely less familiar with the VM
>>> side of things :-)
>>>
>>> Cheers,
>>> Tom
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
amd-gfx Info Page - freedesktop.org<https://lists.freedesktop.org/mailman/listinfo/amd-gfx>
lists.freedesktop.org
Subscribing to amd-gfx: Subscribe to amd-gfx by filling out the following form. Use of all freedesktop.org lists is subject to our Code of ...


>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
amd-gfx Info Page - freedesktop.org<https://lists.freedesktop.org/mailman/listinfo/amd-gfx>
lists.freedesktop.org
Subscribing to amd-gfx: Subscribe to amd-gfx by filling out the following form. Use of all freedesktop.org lists is subject to our Code of ...


>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
amd-gfx Info Page - freedesktop.org<https://lists.freedesktop.org/mailman/listinfo/amd-gfx>
lists.freedesktop.org
Subscribing to amd-gfx: Subscribe to amd-gfx by filling out the following form. Use of all freedesktop.org lists is subject to our Code of ...





[-- Attachment #1.2: Type: text/html, Size: 15339 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]                                 ` <DM5PR12MB12572F535B64B97DF2C93C6EF2B30-2J9CzHegvk/NHlLGalgXawdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-08-01 16:26                                   ` Tom St Denis
  0 siblings, 0 replies; 12+ messages in thread
From: Tom St Denis @ 2017-08-01 16:26 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

In v2 of the patch I used the _enabled() function around the blocks so 
the for loop is only reached if the trace is enabled.

Tom

On 01/08/17 11:56 AM, Xie, AlexBin wrote:
> I don't know if compiler is smart enough to optimize the following for 
> statement out...
> 
> 
> +               for (i = 0; i < ttm->num_pages; i++) {
> +                       trace_amdgpu_ttm_tt_populate(
> +                               adev,
> +                               gtt->ttm.dma_address[i],
> +                               page_to_phys(ttm->pages[i]));
> +               }
> 
> 
> -Alex Bin
> 
> 
> ------------------------------------------------------------------------
> *From:* Christian König <deathsimple@vodafone.de>
> *Sent:* Tuesday, August 1, 2017 11:41 AM
> *To:* Xie, AlexBin; amd-gfx@lists.freedesktop.org
> *Subject:* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
> We can turn of the trace subsystem during compile time already.
> 
> Additional to that the trace points use self modifying code to make sure
> that they don't have any overhead as long as they are turned off. But I
> don't think this works with the "if (trace_*_enabled()" as well.
> 
> Anyway, not a performance critical code path so I don't really bother.
> 
> Christian.
> 
> Am 01.08.2017 um 17:00 schrieb axie:
>> Because this is used by a debug tool, can we use a macro to 
>> conditionally compile this feature?
>>
>>
>> On 2017-08-01 10:54 AM, Christian König wrote:
>>> Am 01.08.2017 um 16:26 schrieb Tom St Denis:
>>>> On 01/08/17 10:10 AM, Christian König wrote:
>>>>
>>>>> You need to cover multiple code path here:
>>>>> 1. The one you currently implemented which uses ttm_dma_populate() 
>>>>> and pci_map_page().
>>>>> 2. The one using ttm_dma_populate().
>>>>
>>>> I'll have to look into this one though it's my understanding that 
>>>> code path is only followed if there's no (hardware) IOMMU right?
>>>
>>> Wrong, that one is used when "anything" in the system used the 
>>> SWIOTLB path once. So the detection doesn't always work reliable.
>>>
>>>> This one seems rather straight forward but the only catch is I don't 
>>>> have access to "adev" inside that drm function.
>>>
>>> When you have a ttm you can always do "amdgpu_ttm_adev(ttm->bdev)" to 
>>> get the adev.
>>>
>>>>   Would it be taboo to do something like
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> index 376078334f54..cd97ef2144c9 100644
>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>>>> @@ -916,6 +916,12 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt 
>>>> *ttm)
>>>>                 drm_prime_sg_to_page_addr_arrays(ttm->sg, ttm->pages,
>>>> gtt->ttm.dma_address, ttm->num_pages);
>>>>                 ttm->state = tt_unbound;
>>>> +               for (i = 0; i < ttm->num_pages; i++) {
>>>> +                       trace_amdgpu_ttm_tt_populate(
>>>> +                               adev,
>>>> +                               gtt->ttm.dma_address[i],
>>>> + page_to_phys(ttm->pages[i]));
>>>> +               }
>>>>                 return 0;
>>>>         }
>>>>
>>>> This would normally result in a for loop around a nop which 
>>>> shouldn't be a huge performance hit but is one none the less.
>>>
>>> Try using trace_trace_amdgpu_ttm_tt_populate_enabled(), those 
>>> functions are generated by the trace subsystem automatically when you 
>>> define a trace point.
>>>
>>> It just doesn't use those nice tricks to modify the compiled binary 
>>> on the fly.
>>>
>>> Christian.
>>>
>>>>
>>>>> 4. The in amdgpu_ttm_tt_pin_userptr() which uses dma_map_sg() for 
>>>>> userptrs.
>>>>>
>>>>> Basically all just take gtt->ttm.ttm.pages and fill 
>>>>> gtt->ttm.dma_address.
>>>>
>>>> Same comment as #3.  I can tackle this with a for loop around the 
>>>> trace if that is ok.
>>>>
>>>>> I suggest to add a helper you can call to trace all 
>>>>> pages->dma_address mappings inside a ttm.
>>>>
>>>> One thing at a time :-).  That would probably be a bit better since 
>>>> the trace log gets filled with remappings (which is why my 
>>>> proof-of-concept umr patch only grabs the latest mapping as it reads 
>>>> the trace).
>>>>
>>>> Is there a handy place we can grab a list of ttm_tt's bound to our 
>>>> device?  If so then in theory I could write a debugfs interface 
>>>> instead.
>>>>
>>>> Thanks for your patience as I'm definitely less familiar with the VM 
>>>> side of things :-)
>>>>
>>>> Cheers,
>>>> Tom
>>>> _______________________________________________
>>>> amd-gfx mailing list
>>>> amd-gfx@lists.freedesktop.org
>>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> amd-gfx Info Page - freedesktop.org 
> <https://lists.freedesktop.org/mailman/listinfo/amd-gfx>
> lists.freedesktop.org
> Subscribing to amd-gfx: Subscribe to amd-gfx by filling out the 
> following form. Use of all freedesktop.org lists is subject to our Code 
> of ...
> 
> 
>>>
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx@lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> amd-gfx Info Page - freedesktop.org 
> <https://lists.freedesktop.org/mailman/listinfo/amd-gfx>
> lists.freedesktop.org
> Subscribing to amd-gfx: Subscribe to amd-gfx by filling out the 
> following form. Use of all freedesktop.org lists is subject to our Code 
> of ...
> 
> 
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> amd-gfx Info Page - freedesktop.org 
> <https://lists.freedesktop.org/mailman/listinfo/amd-gfx>
> lists.freedesktop.org
> Subscribing to amd-gfx: Subscribe to amd-gfx by filling out the 
> following form. Use of all freedesktop.org lists is subject to our Code 
> of ...
> 
> 
> 
> 
> 
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]                             ` <b38d7cac-533d-7029-f73f-445a92d6b952-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
  2017-08-01 15:56                               ` Xie, AlexBin
@ 2017-08-01 17:00                               ` Tom St Denis
       [not found]                                 ` <8ba09a77-9bc6-35a1-8169-246a144430e8-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 12+ messages in thread
From: Tom St Denis @ 2017-08-01 17:00 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On 01/08/17 11:41 AM, Christian König wrote:
> We can turn of the trace subsystem during compile time already.
> 
> Additional to that the trace points use self modifying code to make sure 
> that they don't have any overhead as long as they are turned off. But I 
> don't think this works with the "if (trace_*_enabled()" as well.
> 
> Anyway, not a performance critical code path so I don't really bother.

Well aside from that this will only really be useful in upstream kernels 
where we don't control the configuration.

Eventually if we come up with a better solution (tm) then we can revert 
this cleanly.

Tom
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping
       [not found]                                 ` <8ba09a77-9bc6-35a1-8169-246a144430e8-5C7GfCeVMHo@public.gmane.org>
@ 2017-08-01 18:25                                   ` Christian König
  0 siblings, 0 replies; 12+ messages in thread
From: Christian König @ 2017-08-01 18:25 UTC (permalink / raw)
  To: Tom St Denis, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 01.08.2017 um 19:00 schrieb Tom St Denis:
> On 01/08/17 11:41 AM, Christian König wrote:
>> We can turn of the trace subsystem during compile time already.
>>
>> Additional to that the trace points use self modifying code to make 
>> sure that they don't have any overhead as long as they are turned 
>> off. But I don't think this works with the "if (trace_*_enabled()" as 
>> well.
>>
>> Anyway, not a performance critical code path so I don't really bother.
>
> Well aside from that this will only really be useful in upstream 
> kernels where we don't control the configuration.
>
> Eventually if we come up with a better solution (tm) then we can 
> revert this cleanly.

Actually even if we come up with some other approach to access the GPU 
VM of a process I would rather like to keep that trace point because it 
is rather useful for some userptr debugging as well.

Christian.

>
> Tom
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-08-01 18:25 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-01 11:51 [PATCH] drm/amd/amdgpu: Add tracepoint for DMA page mapping Tom St Denis
     [not found] ` <20170801115131.27610-1-tom.stdenis-5C7GfCeVMHo@public.gmane.org>
2017-08-01 11:55   ` Christian König
     [not found]     ` <b4945fed-aea3-1b15-b383-26bc65307139-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-01 12:29       ` Tom St Denis
     [not found]         ` <21a75bbf-e9ca-e9e9-5831-e58908911e9b-5C7GfCeVMHo@public.gmane.org>
2017-08-01 14:10           ` Christian König
     [not found]             ` <56a6ecd5-d210-cd71-0fc0-2d0ffd36ab00-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-01 14:26               ` Tom St Denis
     [not found]                 ` <29f09f3b-154d-70d2-f5c6-315022b408ea-5C7GfCeVMHo@public.gmane.org>
2017-08-01 14:54                   ` Christian König
     [not found]                     ` <67a7f3ec-fd54-e997-75a0-4d6e3d2ff908-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-01 15:00                       ` axie
     [not found]                         ` <ae6e3523-dd8e-a14e-9bb2-297b0114bd5e-5C7GfCeVMHo@public.gmane.org>
2017-08-01 15:41                           ` Christian König
     [not found]                             ` <b38d7cac-533d-7029-f73f-445a92d6b952-ANTagKRnAhcb1SvskN2V4Q@public.gmane.org>
2017-08-01 15:56                               ` Xie, AlexBin
     [not found]                                 ` <DM5PR12MB12572F535B64B97DF2C93C6EF2B30-2J9CzHegvk/NHlLGalgXawdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-08-01 16:26                                   ` Tom St Denis
2017-08-01 17:00                               ` Tom St Denis
     [not found]                                 ` <8ba09a77-9bc6-35a1-8169-246a144430e8-5C7GfCeVMHo@public.gmane.org>
2017-08-01 18:25                                   ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.