All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] drm/amdkfd: Add eviction debug messages
@ 2020-06-12  3:34 Felix Kuehling
  2020-06-12 22:03 ` philip yang
  0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2020-06-12  3:34 UTC (permalink / raw)
  To: amd-gfx; +Cc: gang.ba

Use WARN to print messages with backtrace when evictions are triggered.
This can help determine the root cause of evictions and help spot driver
bugs triggering evictions unintentionally, or help with performance tuning
by avoiding conditions that cause evictions in a specific workload.

The messages are controlled by a new module parameter that can be changed
at runtime:

  echo Y > /sys/module/amdgpu/parameters/debug_evictions
  echo N > /sys/module/amdgpu/parameters/debug_evictions

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h      | 2 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 8 ++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 ++
 drivers/gpu/drm/amd/amdkfd/kfd_device.c  | 3 +++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    | 5 +++++
 5 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 10ae92e835f6..6c7dd0a707c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -186,8 +186,10 @@ extern int amdgpu_noretry;
 extern int amdgpu_force_asic_type;
 #ifdef CONFIG_HSA_AMD
 extern int sched_policy;
+extern bool debug_evictions;
 #else
 static const int sched_policy = KFD_SCHED_POLICY_HWS;
+static const bool debug_evictions; /* = false */
 #endif
 
 extern int amdgpu_tmz;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index d4d7cca1cc72..fdf350d5e7b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -705,6 +705,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false =
 int queue_preemption_timeout_ms = 9000;
 module_param(queue_preemption_timeout_ms, int, 0644);
 MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
+
+/**
+ * DOC: debug_evictions(bool)
+ * Enable extra debug messages to help determine the cause of evictions
+ */
+bool debug_evictions;
+module_param(debug_evictions, bool, 0644);
+MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
 #endif
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index b87ca171986a..072f0e1185a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -275,6 +275,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 			continue;
 		}
 
+		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
+		     "Adding eviction fence to sync obj");
 		r = amdgpu_sync_fence(sync, f, false);
 		if (r)
 			break;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 22348cebaf36..80393e0583bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -942,6 +942,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
 	if (!p)
 		return -ESRCH;
 
+	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
 	r = kfd_process_evict_queues(p);
 
 	kfd_unref_process(p);
@@ -1009,6 +1010,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
 	/* During process initialization eviction_work.dwork is initialized
 	 * to kfd_evict_bo_worker
 	 */
+	WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
+	     p->lead_thread->pid, delay_jiffies);
 	schedule_delayed_work(&p->eviction_work, delay_jiffies);
 out:
 	kfd_unref_process(p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 173d58b2d81f..51ba2020732e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -177,6 +177,11 @@ extern bool hws_gws_support;
  */
 extern int queue_preemption_timeout_ms;
 
+/*
+ * Enable eviction debug messages
+ */
+extern bool debug_evictions;
+
 enum cache_policy {
 	cache_policy_coherent,
 	cache_policy_noncoherent
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: Add eviction debug messages
  2020-06-12  3:34 [PATCH 1/1] drm/amdkfd: Add eviction debug messages Felix Kuehling
@ 2020-06-12 22:03 ` philip yang
  2020-06-12 23:43   ` Felix Kuehling
  0 siblings, 1 reply; 4+ messages in thread
From: philip yang @ 2020-06-12 22:03 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx; +Cc: gang.ba

It's good idea, better to add same print in system memory eviction path 
amdgpu_amdkfd_evict_userptr.

Use WARN_ONCE to avoid duplicate messages.

Regards,

Philip


On 2020-06-11 11:34 p.m., Felix Kuehling wrote:
> Use WARN to print messages with backtrace when evictions are triggered.
> This can help determine the root cause of evictions and help spot driver
> bugs triggering evictions unintentionally, or help with performance tuning
> by avoiding conditions that cause evictions in a specific workload.
>
> The messages are controlled by a new module parameter that can be changed
> at runtime:
>
>    echo Y > /sys/module/amdgpu/parameters/debug_evictions
>    echo N > /sys/module/amdgpu/parameters/debug_evictions
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      | 2 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 8 ++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 ++
>   drivers/gpu/drm/amd/amdkfd/kfd_device.c  | 3 +++
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h    | 5 +++++
>   5 files changed, 20 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 10ae92e835f6..6c7dd0a707c9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -186,8 +186,10 @@ extern int amdgpu_noretry;
>   extern int amdgpu_force_asic_type;
>   #ifdef CONFIG_HSA_AMD
>   extern int sched_policy;
> +extern bool debug_evictions;
>   #else
>   static const int sched_policy = KFD_SCHED_POLICY_HWS;
> +static const bool debug_evictions; /* = false */
>   #endif
>   
>   extern int amdgpu_tmz;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index d4d7cca1cc72..fdf350d5e7b7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -705,6 +705,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false =
>   int queue_preemption_timeout_ms = 9000;
>   module_param(queue_preemption_timeout_ms, int, 0644);
>   MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
> +
> +/**
> + * DOC: debug_evictions(bool)
> + * Enable extra debug messages to help determine the cause of evictions
> + */
> +bool debug_evictions;
> +module_param(debug_evictions, bool, 0644);
> +MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)");
>   #endif
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index b87ca171986a..072f0e1185a8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -275,6 +275,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>   			continue;
>   		}
>   
> +		WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
> +		     "Adding eviction fence to sync obj");
>   		r = amdgpu_sync_fence(sync, f, false);
>   		if (r)
>   			break;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 22348cebaf36..80393e0583bb 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -942,6 +942,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
>   	if (!p)
>   		return -ESRCH;
>   
> +	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
>   	r = kfd_process_evict_queues(p);
>   
>   	kfd_unref_process(p);
> @@ -1009,6 +1010,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
>   	/* During process initialization eviction_work.dwork is initialized
>   	 * to kfd_evict_bo_worker
>   	 */
> +	WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
> +	     p->lead_thread->pid, delay_jiffies);
>   	schedule_delayed_work(&p->eviction_work, delay_jiffies);
>   out:
>   	kfd_unref_process(p);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 173d58b2d81f..51ba2020732e 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -177,6 +177,11 @@ extern bool hws_gws_support;
>    */
>   extern int queue_preemption_timeout_ms;
>   
> +/*
> + * Enable eviction debug messages
> + */
> +extern bool debug_evictions;
> +
>   enum cache_policy {
>   	cache_policy_coherent,
>   	cache_policy_noncoherent
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: Add eviction debug messages
  2020-06-12 22:03 ` philip yang
@ 2020-06-12 23:43   ` Felix Kuehling
  2020-06-13  2:09     ` philip yang
  0 siblings, 1 reply; 4+ messages in thread
From: Felix Kuehling @ 2020-06-12 23:43 UTC (permalink / raw)
  To: philip yang, amd-gfx; +Cc: gang.ba

Am 2020-06-12 um 6:03 p.m. schrieb philip yang:
> It's good idea, better to add same print in system memory eviction
> path amdgpu_amdkfd_evict_userptr.

That's covered by the message in kgd2kfd_quiesce_mm.


>
> Use WARN_ONCE to avoid duplicate messages.

I want duplicate messages. If many different kinds of evictions are
happening I want to see them all. The module parameter is there so I can
turn it on/off for short bursts while interesting things are happening.
It's off by default.

I was considering WARN_RATELIMIT, but that may skip interesting
evictions I actually want to see.

Regards,
  Felix


>
> Regards,
>
> Philip
>
>
> On 2020-06-11 11:34 p.m., Felix Kuehling wrote:
>> Use WARN to print messages with backtrace when evictions are triggered.
>> This can help determine the root cause of evictions and help spot driver
>> bugs triggering evictions unintentionally, or help with performance
>> tuning
>> by avoiding conditions that cause evictions in a specific workload.
>>
>> The messages are controlled by a new module parameter that can be
>> changed
>> at runtime:
>>
>>    echo Y > /sys/module/amdgpu/parameters/debug_evictions
>>    echo N > /sys/module/amdgpu/parameters/debug_evictions
>>
>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      | 2 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 8 ++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 ++
>>   drivers/gpu/drm/amd/amdkfd/kfd_device.c  | 3 +++
>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h    | 5 +++++
>>   5 files changed, 20 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 10ae92e835f6..6c7dd0a707c9 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -186,8 +186,10 @@ extern int amdgpu_noretry;
>>   extern int amdgpu_force_asic_type;
>>   #ifdef CONFIG_HSA_AMD
>>   extern int sched_policy;
>> +extern bool debug_evictions;
>>   #else
>>   static const int sched_policy = KFD_SCHED_POLICY_HWS;
>> +static const bool debug_evictions; /* = false */
>>   #endif
>>     extern int amdgpu_tmz;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> index d4d7cca1cc72..fdf350d5e7b7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>> @@ -705,6 +705,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2
>> FW supports GWS barriers (false =
>>   int queue_preemption_timeout_ms = 9000;
>>   module_param(queue_preemption_timeout_ms, int, 0644);
>>   MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption
>> timeout in ms (1 = Minimum, 9000 = default)");
>> +
>> +/**
>> + * DOC: debug_evictions(bool)
>> + * Enable extra debug messages to help determine the cause of evictions
>> + */
>> +bool debug_evictions;
>> +module_param(debug_evictions, bool, 0644);
>> +MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages
>> (false = default)");
>>   #endif
>>     /**
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>> index b87ca171986a..072f0e1185a8 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>> @@ -275,6 +275,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>> struct amdgpu_sync *sync,
>>               continue;
>>           }
>>   +        WARN(debug_evictions && fence_owner ==
>> AMDGPU_FENCE_OWNER_KFD,
>> +             "Adding eviction fence to sync obj");
>>           r = amdgpu_sync_fence(sync, f, false);
>>           if (r)
>>               break;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 22348cebaf36..80393e0583bb 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -942,6 +942,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
>>       if (!p)
>>           return -ESRCH;
>>   +    WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
>>       r = kfd_process_evict_queues(p);
>>         kfd_unref_process(p);
>> @@ -1009,6 +1010,8 @@ int
>> kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
>>       /* During process initialization eviction_work.dwork is
>> initialized
>>        * to kfd_evict_bo_worker
>>        */
>> +    WARN(debug_evictions, "Scheduling eviction of pid %d in %ld
>> jiffies",
>> +         p->lead_thread->pid, delay_jiffies);
>>       schedule_delayed_work(&p->eviction_work, delay_jiffies);
>>   out:
>>       kfd_unref_process(p);
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> index 173d58b2d81f..51ba2020732e 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> @@ -177,6 +177,11 @@ extern bool hws_gws_support;
>>    */
>>   extern int queue_preemption_timeout_ms;
>>   +/*
>> + * Enable eviction debug messages
>> + */
>> +extern bool debug_evictions;
>> +
>>   enum cache_policy {
>>       cache_policy_coherent,
>>       cache_policy_noncoherent
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] drm/amdkfd: Add eviction debug messages
  2020-06-12 23:43   ` Felix Kuehling
@ 2020-06-13  2:09     ` philip yang
  0 siblings, 0 replies; 4+ messages in thread
From: philip yang @ 2020-06-13  2:09 UTC (permalink / raw)
  To: Felix Kuehling, amd-gfx; +Cc: gang.ba

Thanks for the explanation.

Reviewed-by: Philip Yang <Philip.Yang@amd.com>

On 2020-06-12 7:43 p.m., Felix Kuehling wrote:

> Am 2020-06-12 um 6:03 p.m. schrieb philip yang:
>> It's good idea, better to add same print in system memory eviction
>> path amdgpu_amdkfd_evict_userptr.
> That's covered by the message in kgd2kfd_quiesce_mm.
>
>
>> Use WARN_ONCE to avoid duplicate messages.
> I want duplicate messages. If many different kinds of evictions are
> happening I want to see them all. The module parameter is there so I can
> turn it on/off for short bursts while interesting things are happening.
> It's off by default.
>
> I was considering WARN_RATELIMIT, but that may skip interesting
> evictions I actually want to see.
>
> Regards,
>    Felix
>
>
>> Regards,
>>
>> Philip
>>
>>
>> On 2020-06-11 11:34 p.m., Felix Kuehling wrote:
>>> Use WARN to print messages with backtrace when evictions are triggered.
>>> This can help determine the root cause of evictions and help spot driver
>>> bugs triggering evictions unintentionally, or help with performance
>>> tuning
>>> by avoiding conditions that cause evictions in a specific workload.
>>>
>>> The messages are controlled by a new module parameter that can be
>>> changed
>>> at runtime:
>>>
>>>     echo Y > /sys/module/amdgpu/parameters/debug_evictions
>>>     echo N > /sys/module/amdgpu/parameters/debug_evictions
>>>
>>> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu.h      | 2 ++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 8 ++++++++
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 ++
>>>    drivers/gpu/drm/amd/amdkfd/kfd_device.c  | 3 +++
>>>    drivers/gpu/drm/amd/amdkfd/kfd_priv.h    | 5 +++++
>>>    5 files changed, 20 insertions(+)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 10ae92e835f6..6c7dd0a707c9 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -186,8 +186,10 @@ extern int amdgpu_noretry;
>>>    extern int amdgpu_force_asic_type;
>>>    #ifdef CONFIG_HSA_AMD
>>>    extern int sched_policy;
>>> +extern bool debug_evictions;
>>>    #else
>>>    static const int sched_policy = KFD_SCHED_POLICY_HWS;
>>> +static const bool debug_evictions; /* = false */
>>>    #endif
>>>      extern int amdgpu_tmz;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> index d4d7cca1cc72..fdf350d5e7b7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
>>> @@ -705,6 +705,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2
>>> FW supports GWS barriers (false =
>>>    int queue_preemption_timeout_ms = 9000;
>>>    module_param(queue_preemption_timeout_ms, int, 0644);
>>>    MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption
>>> timeout in ms (1 = Minimum, 9000 = default)");
>>> +
>>> +/**
>>> + * DOC: debug_evictions(bool)
>>> + * Enable extra debug messages to help determine the cause of evictions
>>> + */
>>> +bool debug_evictions;
>>> +module_param(debug_evictions, bool, 0644);
>>> +MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages
>>> (false = default)");
>>>    #endif
>>>      /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>>> index b87ca171986a..072f0e1185a8 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
>>> @@ -275,6 +275,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
>>> struct amdgpu_sync *sync,
>>>                continue;
>>>            }
>>>    +        WARN(debug_evictions && fence_owner ==
>>> AMDGPU_FENCE_OWNER_KFD,
>>> +             "Adding eviction fence to sync obj");
>>>            r = amdgpu_sync_fence(sync, f, false);
>>>            if (r)
>>>                break;
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> index 22348cebaf36..80393e0583bb 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>>> @@ -942,6 +942,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm)
>>>        if (!p)
>>>            return -ESRCH;
>>>    +    WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
>>>        r = kfd_process_evict_queues(p);
>>>          kfd_unref_process(p);
>>> @@ -1009,6 +1010,8 @@ int
>>> kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
>>>        /* During process initialization eviction_work.dwork is
>>> initialized
>>>         * to kfd_evict_bo_worker
>>>         */
>>> +    WARN(debug_evictions, "Scheduling eviction of pid %d in %ld
>>> jiffies",
>>> +         p->lead_thread->pid, delay_jiffies);
>>>        schedule_delayed_work(&p->eviction_work, delay_jiffies);
>>>    out:
>>>        kfd_unref_process(p);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index 173d58b2d81f..51ba2020732e 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -177,6 +177,11 @@ extern bool hws_gws_support;
>>>     */
>>>    extern int queue_preemption_timeout_ms;
>>>    +/*
>>> + * Enable eviction debug messages
>>> + */
>>> +extern bool debug_evictions;
>>> +
>>>    enum cache_policy {
>>>        cache_policy_coherent,
>>>        cache_policy_noncoherent
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-06-13  2:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-06-12  3:34 [PATCH 1/1] drm/amdkfd: Add eviction debug messages Felix Kuehling
2020-06-12 22:03 ` philip yang
2020-06-12 23:43   ` Felix Kuehling
2020-06-13  2:09     ` philip yang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.