All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andres Rodriguez <andresx7-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
To: Andrey Grodzovsky
	<Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Cc: christian.koenig-5C7GfCeVMHo@public.gmane.org
Subject: Re: [PATCH 1/3] drm/amdgpu: Avoid accessing job->entity after the job is scheduled.
Date: Fri, 20 Oct 2017 11:59:01 -0400	[thread overview]
Message-ID: <ea764df5-6c80-e1cd-a4cd-562c86c7d553@gmail.com> (raw)
In-Reply-To: <1508506327-1084-1-git-send-email-Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>



On 2017-10-20 09:32 AM, Andrey Grodzovsky wrote:
> Bug: amdgpu_job_free_cb was accessing s_job->s_entity when the allocated
> amdgpu_ctx (and the entity inside it) were already deallocated from
> amdgpu_cs_parser_fini.
> 
> Fix: Save job's priority on it's creation instead of accessing it from
> s_entity later on.

I'm not sure if this is the correct approach for a fix.

Keeping s_entity as a dangling pointer could result in a similar bugs 
being reintroduced. For example, there would still be a race condition 
between amdgpu_cs_parser_fini() and amdgpu_job_dependency().

Instead, it might be better for the job to grab a reference to the 
context during job_init(), and put that reference on job free.

Regards,
Andres

> 
> Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  3 +--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_job.c       |  5 ++---
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.c |  1 +
>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 32 ++++++++++++---------------
>   4 files changed, 18 insertions(+), 23 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index f7fceb6..a760b6e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1192,8 +1192,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   	job->uf_sequence = seq;
>   
>   	amdgpu_job_free_resources(job);
> -	amdgpu_ring_priority_get(job->ring,
> -				 amd_sched_get_job_priority(&job->base));
> +	amdgpu_ring_priority_get(job->ring, job->base.s_priority);
>   
>   	trace_amdgpu_cs_ioctl(job);
>   	amd_sched_entity_push_job(&job->base);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> index 0cfc68d..a58e3c5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
> @@ -104,7 +104,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job)
>   {
>   	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
>   
> -	amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job));
> +	amdgpu_ring_priority_put(job->ring, s_job->s_priority);
>   	dma_fence_put(job->fence);
>   	amdgpu_sync_free(&job->sync);
>   	amdgpu_sync_free(&job->dep_sync);
> @@ -141,8 +141,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
>   	job->fence_ctx = entity->fence_context;
>   	*f = dma_fence_get(&job->base.s_fence->finished);
>   	amdgpu_job_free_resources(job);
> -	amdgpu_ring_priority_get(job->ring,
> -				 amd_sched_get_job_priority(&job->base));
> +	amdgpu_ring_priority_get(job->ring, job->base.s_priority);
>   	amd_sched_entity_push_job(&job->base);
>   
>   	return 0;
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> index e4d3b4e..1bbbce2 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
> @@ -529,6 +529,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
>   {
>   	job->sched = sched;
>   	job->s_entity = entity;
> +	job->s_priority = entity->rq - sched->sched_rq;
>   	job->s_fence = amd_sched_fence_create(entity, owner);
>   	if (!job->s_fence)
>   		return -ENOMEM;
> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> index 52c8e54..3f75b45 100644
> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
> @@ -30,6 +30,19 @@
>   struct amd_gpu_scheduler;
>   struct amd_sched_rq;
>   
> +enum amd_sched_priority {
> +	AMD_SCHED_PRIORITY_MIN,
> +	AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN,
> +	AMD_SCHED_PRIORITY_NORMAL,
> +	AMD_SCHED_PRIORITY_HIGH_SW,
> +	AMD_SCHED_PRIORITY_HIGH_HW,
> +	AMD_SCHED_PRIORITY_KERNEL,
> +	AMD_SCHED_PRIORITY_MAX,
> +	AMD_SCHED_PRIORITY_INVALID = -1,
> +	AMD_SCHED_PRIORITY_UNSET = -2
> +};
> +
> +
>   /**
>    * A scheduler entity is a wrapper around a job queue or a group
>    * of other entities. Entities take turns emitting jobs from their
> @@ -83,6 +96,7 @@ struct amd_sched_job {
>   	struct delayed_work		work_tdr;
>   	uint64_t			id;
>   	atomic_t karma;
> +	enum amd_sched_priority s_priority;
>   };
>   
>   extern const struct dma_fence_ops amd_sched_fence_ops_scheduled;
> @@ -114,18 +128,6 @@ struct amd_sched_backend_ops {
>   	void (*free_job)(struct amd_sched_job *sched_job);
>   };
>   
> -enum amd_sched_priority {
> -	AMD_SCHED_PRIORITY_MIN,
> -	AMD_SCHED_PRIORITY_LOW = AMD_SCHED_PRIORITY_MIN,
> -	AMD_SCHED_PRIORITY_NORMAL,
> -	AMD_SCHED_PRIORITY_HIGH_SW,
> -	AMD_SCHED_PRIORITY_HIGH_HW,
> -	AMD_SCHED_PRIORITY_KERNEL,
> -	AMD_SCHED_PRIORITY_MAX,
> -	AMD_SCHED_PRIORITY_INVALID = -1,
> -	AMD_SCHED_PRIORITY_UNSET = -2
> -};
> -
>   /**
>    * One scheduler is implemented for each hardware ring
>   */
> @@ -176,10 +178,4 @@ bool amd_sched_dependency_optimized(struct dma_fence* fence,
>   				    struct amd_sched_entity *entity);
>   void amd_sched_job_kickout(struct amd_sched_job *s_job);
>   
> -static inline enum amd_sched_priority
> -amd_sched_get_job_priority(struct amd_sched_job *job)
> -{
> -	return (job->s_entity->rq - job->sched->sched_rq);
> -}
> -
>   #endif
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

  parent reply	other threads:[~2017-10-20 15:59 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-20 13:32 [PATCH 1/3] drm/amdgpu: Avoid accessing job->entity after the job is scheduled Andrey Grodzovsky
     [not found] ` <1508506327-1084-1-git-send-email-Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>
2017-10-20 13:32   ` [PATCH 2/3] drm/amdgpu: Add SPSC queue to scheduler Andrey Grodzovsky
     [not found]     ` <1508506327-1084-2-git-send-email-Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>
2017-10-23  4:06       ` Liu, Monk
     [not found]         ` <BLUPR12MB04499B9F742E2CF76139F6BC84460-7LeqcoF/hwpTIQvHjXdJlwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-10-23 10:50           ` Andrey Grodzovsky
2017-10-23 12:38       ` Christian König
2017-10-20 13:32   ` [PATCH 3/3] drm/amdgpu: Fix deadlock during GPU reset Andrey Grodzovsky
     [not found]     ` <1508506327-1084-3-git-send-email-Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>
2017-10-23  3:03       ` Liu, Monk
     [not found]         ` <BLUPR12MB04491AF9EAD2623B5EA9D7B784460-7LeqcoF/hwpTIQvHjXdJlwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-10-23  7:08           ` Christian König
     [not found]             ` <8153c0a9-8509-c2c3-c53e-e25f6f7e83f1-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-23  7:24               ` Liu, Monk
     [not found]                 ` <BLUPR12MB04490DF904B056086D164BDD84460-7LeqcoF/hwpTIQvHjXdJlwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-10-23  7:30                   ` Christian König
     [not found]                     ` <a9ed61a0-6e0b-6093-5c0b-4459cfbb2282-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-23  7:38                       ` Liu, Monk
     [not found]                         ` <BLUPR12MB0449E8F1F121B277B598430684460-7LeqcoF/hwpTIQvHjXdJlwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-10-23  8:46                           ` Liu, Monk
2017-10-23 12:42       ` Christian König
2017-10-20 15:59   ` Andres Rodriguez [this message]
     [not found]     ` <ea764df5-6c80-e1cd-a4cd-562c86c7d553-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-20 16:19       ` [PATCH 1/3] drm/amdgpu: Avoid accessing job->entity after the job is scheduled Andrey Grodzovsky
     [not found]         ` <ecb8a9d2-ba94-c7a4-f5d7-156b061245d8-5C7GfCeVMHo@public.gmane.org>
2017-10-20 16:26           ` Andres Rodriguez
     [not found]             ` <86853e29-07c1-700b-8aa2-2183fbc64d2f-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-20 16:51               ` Andrey Grodzovsky
     [not found]                 ` <184ba414-981a-02eb-1a6f-7bd878b4f3b1-5C7GfCeVMHo@public.gmane.org>
2017-10-20 18:24                   ` Christian König
     [not found]                     ` <79fd05fa-711c-c454-36a1-54b31adda225-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-20 19:31                       ` Andres Rodriguez
2017-10-23 12:36   ` Christian König

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ea764df5-6c80-e1cd-a4cd-562c86c7d553@gmail.com \
    --to=andresx7-re5jqeeqqe8avxtiumwx3w@public.gmane.org \
    --cc=Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org \
    --cc=amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
    --cc=christian.koenig-5C7GfCeVMHo@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.