All of lore.kernel.org
 help / color / mirror / Atom feed
From: Tomas Elf <tomas.elf@intel.com>
To: Daniel Vetter <daniel@ffwll.ch>
Cc: Intel-GFX@Lists.FreeDesktop.Org
Subject: Re: [PATCH 2/8] drm/i915: Migrate to safe iterators in error state capture
Date: Fri, 09 Oct 2015 12:40:51 +0100	[thread overview]
Message-ID: <5617A7C3.1050903@intel.com> (raw)
In-Reply-To: <20151009082712.GG26718@phenom.ffwll.local>

On 09/10/2015 09:27, Daniel Vetter wrote:
> On Thu, Oct 08, 2015 at 07:31:34PM +0100, Tomas Elf wrote:
>> Using safe list iterators alleviates the problem of unsynchronized driver list
>> manipulations while error state capture is in the process of traversing lists.
>>
>> Signed-off-by: Tomas Elf <tomas.elf@intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_gpu_error.c | 38 +++++++++++++++++------------------
>>   1 file changed, 19 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
>> index 2f04e4f..32c1799 100644
>> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
>> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
>> @@ -718,10 +718,10 @@ static void capture_bo(struct drm_i915_error_buffer *err,
>>   static u32 capture_active_bo(struct drm_i915_error_buffer *err,
>>   			     int count, struct list_head *head)
>>   {
>> -	struct i915_vma *vma;
>> +	struct i915_vma *vma, *tmpvma;
>>   	int i = 0;
>>
>> -	list_for_each_entry(vma, head, mm_list) {
>> +	list_for_each_entry_safe(vma, tmpvma, head, mm_list) {
>
> _safe isn't safe against anything, it's only safe against removal of the
> current list item done with a list_del/list_move from this thread. I.e.
> the only thing it does is have a temporary pointer to the next list
> element so if you delete the current one it won't fall over.
>
> It doesn't protect against anything else, and especially not against other
> threads totally stomping the list.

Absolutely! But it's good enough to make the test not fall over within 
an hour of testing and instead allow it to run for 12+ hours during 
continuous long-duration testing, which is what I need for the TDR 
validation.

>
> So we need proper protection for these lists, independent of
> dev->struct_mutex. The least invasive way to do that is probably rcu,
> which only requires us that we release the memory for any object sitting
> on these lists with kfree_rcu instead of normal kfree. Another option
> might be a spinlock, but that would be a lot more invasive, and Chris
> won't like the execbuf overhead this causes ;-)
> -Daniel

Awesome! Let's go with that then.

Thanks,
Tomas

>
>>   		capture_bo(err++, vma);
>>   		if (++i == count)
>>   			break;
>> @@ -734,17 +734,17 @@ static u32 capture_pinned_bo(struct drm_i915_error_buffer *err,
>>   			     int count, struct list_head *head,
>>   			     struct i915_address_space *vm)
>>   {
>> -	struct drm_i915_gem_object *obj;
>> +	struct drm_i915_gem_object *obj, *tmpobj;
>>   	struct drm_i915_error_buffer * const first = err;
>>   	struct drm_i915_error_buffer * const last = err + count;
>>
>> -	list_for_each_entry(obj, head, global_list) {
>> -		struct i915_vma *vma;
>> +	list_for_each_entry_safe(obj, tmpobj, head, global_list) {
>> +		struct i915_vma *vma, *tmpvma;
>>
>>   		if (err == last)
>>   			break;
>>
>> -		list_for_each_entry(vma, &obj->vma_list, vma_link)
>> +		list_for_each_entry_safe(vma, tmpvma, &obj->vma_list, vma_link)
>>   			if (vma->vm == vm && vma->pin_count > 0)
>>   				capture_bo(err++, vma);
>>   	}
>> @@ -958,13 +958,13 @@ static void i915_gem_record_active_context(struct intel_engine_cs *ring,
>>   					   struct drm_i915_error_ring *ering)
>>   {
>>   	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>> -	struct drm_i915_gem_object *obj;
>> +	struct drm_i915_gem_object *obj, *tmpobj;
>>
>>   	/* Currently render ring is the only HW context user */
>>   	if (ring->id != RCS || !error->ccid)
>>   		return;
>>
>> -	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
>> +	list_for_each_entry_safe(obj, tmpobj, &dev_priv->mm.bound_list, global_list) {
>>   		if (!i915_gem_obj_ggtt_bound(obj))
>>   			continue;
>>
>> @@ -979,7 +979,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>>   				  struct drm_i915_error_state *error)
>>   {
>>   	struct drm_i915_private *dev_priv = dev->dev_private;
>> -	struct drm_i915_gem_request *request;
>> +	struct drm_i915_gem_request *request, *tmpreq;
>>   	int i, count;
>>
>>   	for (i = 0; i < I915_NUM_RINGS; i++) {
>> @@ -1055,7 +1055,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>>   		i915_gem_record_active_context(ring, error, &error->ring[i]);
>>
>>   		count = 0;
>> -		list_for_each_entry(request, &ring->request_list, list)
>> +		list_for_each_entry_safe(request, tmpreq, &ring->request_list, list)
>>   			count++;
>>
>>   		error->ring[i].num_requests = count;
>> @@ -1068,7 +1068,7 @@ static void i915_gem_record_rings(struct drm_device *dev,
>>   		}
>>
>>   		count = 0;
>> -		list_for_each_entry(request, &ring->request_list, list) {
>> +		list_for_each_entry_safe(request, tmpreq, &ring->request_list, list) {
>>   			struct drm_i915_error_request *erq;
>>
>>   			erq = &error->ring[i].requests[count++];
>> @@ -1088,17 +1088,17 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
>>   				const int ndx)
>>   {
>>   	struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL;
>> -	struct drm_i915_gem_object *obj;
>> -	struct i915_vma *vma;
>> +	struct drm_i915_gem_object *obj, *tmpobj;
>> +	struct i915_vma *vma, *tmpvma;
>>   	int i;
>>
>>   	i = 0;
>> -	list_for_each_entry(vma, &vm->active_list, mm_list)
>> +	list_for_each_entry_safe(vma, tmpvma, &vm->active_list, mm_list)
>>   		i++;
>>   	error->active_bo_count[ndx] = i;
>>
>> -	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
>> -		list_for_each_entry(vma, &obj->vma_list, vma_link)
>> +	list_for_each_entry_safe(obj, tmpobj, &dev_priv->mm.bound_list, global_list) {
>> +		list_for_each_entry_safe(vma, tmpvma, &obj->vma_list, vma_link)
>>   			if (vma->vm == vm && vma->pin_count > 0)
>>   				i++;
>>   	}
>> @@ -1128,10 +1128,10 @@ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv,
>>   static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
>>   				     struct drm_i915_error_state *error)
>>   {
>> -	struct i915_address_space *vm;
>> +	struct i915_address_space *vm, *tmpvm;
>>   	int cnt = 0, i = 0;
>>
>> -	list_for_each_entry(vm, &dev_priv->vm_list, global_link)
>> +	list_for_each_entry_safe(vm, tmpvm, &dev_priv->vm_list, global_link)
>>   		cnt++;
>>
>>   	error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC);
>> @@ -1155,7 +1155,7 @@ static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv,
>>   		error->pinned_bo = NULL;
>>   		error->pinned_bo_count = NULL;
>>   	} else {
>> -		list_for_each_entry(vm, &dev_priv->vm_list, global_link)
>> +		list_for_each_entry_safe(vm, tmpvm, &dev_priv->vm_list, global_link)
>>   			i915_gem_capture_vm(dev_priv, error, vm, i++);
>>
>>   		error->vm_count = cnt;
>> --
>> 1.9.1
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

  reply	other threads:[~2015-10-09 11:40 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-08 18:31 [PATCH 0/8] Stability improvements to error state capture Tomas Elf
2015-10-08 18:31 ` [PATCH 1/8] drm/i915: Early exit from semaphore_waits_for for execlist mode Tomas Elf
2015-10-08 18:31 ` [PATCH 2/8] drm/i915: Migrate to safe iterators in error state capture Tomas Elf
2015-10-09  7:49   ` Chris Wilson
2015-10-09 11:38     ` Tomas Elf
2015-10-09  8:27   ` Daniel Vetter
2015-10-09 11:40     ` Tomas Elf [this message]
2015-10-13 11:37       ` Daniel Vetter
2015-10-13 11:47         ` Chris Wilson
2015-10-08 18:31 ` [PATCH 3/8] drm/i915: Cope with request list state change during " Tomas Elf
2015-10-09  7:48   ` Chris Wilson
2015-10-09 11:25     ` Tomas Elf
2015-10-13 11:39       ` Daniel Vetter
2015-10-14 11:46         ` Tomas Elf
2015-10-14 12:45           ` Daniel Vetter
2015-10-09  8:28   ` Daniel Vetter
2015-10-09 11:45     ` Tomas Elf
2015-10-13 11:40       ` Daniel Vetter
2015-10-08 18:31 ` [PATCH 4/8] drm/i915: NULL checking when capturing buffer objects " Tomas Elf
2015-10-09  7:49   ` Chris Wilson
2015-10-09 11:34     ` Tomas Elf
2015-10-09  8:32   ` Daniel Vetter
2015-10-09  8:47     ` Chris Wilson
2015-10-09 11:52       ` Tomas Elf
2015-10-09 11:45     ` Tomas Elf
2015-10-08 18:31 ` [PATCH 5/8] drm/i915: vma NULL pointer check Tomas Elf
2015-10-09  7:48   ` Chris Wilson
2015-10-09 11:30     ` Tomas Elf
2015-10-09 11:59       ` Chris Wilson
2015-10-13 11:43         ` Daniel Vetter
2015-10-09  8:33   ` Daniel Vetter
2015-10-09 11:46     ` Tomas Elf
2015-10-08 18:31 ` [PATCH 6/8] drm/i915: Use safe list iterators Tomas Elf
2015-10-09  7:41   ` Chris Wilson
2015-10-09 10:27     ` Tomas Elf
2015-10-09 10:38       ` Chris Wilson
2015-10-09 12:00         ` Tomas Elf
2015-10-08 18:31 ` [PATCH 7/8] drm/i915: Grab execlist spinlock to avoid post-reset concurrency issues Tomas Elf
2015-10-09  7:45   ` Chris Wilson
2015-10-09 10:28     ` Tomas Elf
2015-10-09  8:38   ` Daniel Vetter
2015-10-09  8:45     ` Chris Wilson
2015-10-13 11:46       ` Daniel Vetter
2015-10-13 11:45         ` Chris Wilson
2015-10-13 13:46           ` Daniel Vetter
2015-10-13 14:00             ` Chris Wilson
2015-10-19 15:32   ` [PATCH v2 " Tomas Elf
2015-10-22 16:49     ` Dave Gordon
2015-10-22 17:35       ` Daniel Vetter
2015-10-23  8:42     ` Tvrtko Ursulin
2015-10-23  8:59       ` Daniel Vetter
2015-10-23 11:02         ` Tomas Elf
2015-10-23 12:49           ` Dave Gordon
2015-10-23 13:08     ` [PATCH v3 " Tomas Elf
2015-10-23 14:53       ` Daniel, Thomas
2015-10-23 17:02     ` [PATCH] drm/i915: Update to post-reset execlist queue clean-up Tomas Elf
2015-12-01 11:46       ` Tvrtko Ursulin
2015-12-11 14:14         ` Dave Gordon
2015-12-11 16:40           ` Daniel Vetter
2015-12-14 10:21           ` Mika Kuoppala
2015-10-08 18:31 ` [PATCH 8/8] drm/i915: NULL check of unpin_work Tomas Elf
2015-10-09  7:46   ` Chris Wilson
2015-10-09  8:39     ` Daniel Vetter
2015-10-09 11:50       ` Tomas Elf
2015-10-09 10:30     ` Tomas Elf
2015-10-09 10:44       ` Chris Wilson
2015-10-09 12:06         ` Tomas Elf
2015-10-13 11:51           ` Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5617A7C3.1050903@intel.com \
    --to=tomas.elf@intel.com \
    --cc=Intel-GFX@Lists.FreeDesktop.Org \
    --cc=daniel@ffwll.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.