All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 1/1] drm/amdgpu: wait for sched to become ready on job submit
@ 2020-02-24 16:49 Nirmoy Das
  0 siblings, 0 replies; 5+ messages in thread
From: Nirmoy Das @ 2020-02-24 16:49 UTC (permalink / raw)
  To: amd-gfx
  Cc: alexander.deucher, nirmoy.das, christian.koenig, Monk.Liu, Dennis.Li

On reset, amdgpu can set a drm sched's ready status to false temporarily. drm job
init will fail if all of the drm scheds are not ready for a HW IP. This patch tries to make
kernel's internal drm job submit handle, amdgpu_job_submit() a bit more fault tolerant.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     | 35 +++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h     |  5 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  6 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c      |  2 +-
 7 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d42be880a236..0745df80112f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -139,7 +139,38 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	kfree(job);
 }
 
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+static int amdgpu_job_try_init(struct amdgpu_device *adev,
+			       struct drm_sched_job *base,
+			       struct drm_sched_entity *entity,
+			       void *owner)
+{
+	int r, i;
+
+	r = drm_sched_job_init(base, entity, owner);
+	if (r == -ENOENT) {
+		/* retry till we come out of reset phase */
+		while (!mutex_trylock(&adev->lock_reset))
+			msleep(10);
+		/* retry for a second for the sched to get ready*/
+		for (i = 0; i < 100; i++) {
+			msleep(10);
+			r = drm_sched_job_init(base, entity, owner);
+			if (r == -ENOENT)
+				continue;
+		}
+
+		mutex_unlock(&adev->lock_reset);
+		/* If after all these we failed to initialize a job
+		 * it means the IP is unrecoverable */
+		if (r == -ENOENT)
+			return -ENODEV;
+	}
+
+	return r;
+}
+
+int amdgpu_job_submit(struct amdgpu_device *adev,struct amdgpu_job *job,
+		      struct drm_sched_entity *entity,
 		      void *owner, struct dma_fence **f)
 {
 	enum drm_sched_priority priority;
@@ -149,7 +180,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 	if (!f)
 		return -EINVAL;
 
-	r = drm_sched_job_init(&job->base, entity, owner);
+	r = amdgpu_job_try_init(adev, &job->base, entity, owner);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 2e2110dddb76..fed87e96cacc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -70,8 +70,9 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
-		      void *owner, struct dma_fence **f);
+int amdgpu_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job,
+		      struct drm_sched_entity *entity, void *owner,
+		      struct dma_fence **f);
 int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
 			     struct dma_fence **fence);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 660867cf2597..adfde07eb75f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2066,7 +2066,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
 	if (r)
 		goto error_free;
 
-	r = amdgpu_job_submit(job, &adev->mman.entity,
+	r = amdgpu_job_submit(adev, job, &adev->mman.entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 	if (r)
 		goto error_free;
@@ -2137,7 +2137,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	if (direct_submit)
 		r = amdgpu_job_submit_direct(job, ring, fence);
 	else
-		r = amdgpu_job_submit(job, &adev->mman.entity,
+		r = amdgpu_job_submit(adev, job, &adev->mman.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 	if (r)
 		goto error_free;
@@ -2231,7 +2231,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	WARN_ON(job->ibs[0].length_dw > num_dw);
-	r = amdgpu_job_submit(job, &adev->mman.entity,
+	r = amdgpu_job_submit(adev, job, &adev->mman.entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 	if (r)
 		goto error_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5fd32ad1c575..8ff97b24914e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1104,7 +1104,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 		if (r)
 			goto err_free;
 
-		r = amdgpu_job_submit(job, &adev->uvd.entity,
+		r = amdgpu_job_submit(adev, job, &adev->uvd.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 		if (r)
 			goto err_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 59ddba137946..e721d3367783 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -554,7 +554,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	if (direct)
 		r = amdgpu_job_submit_direct(job, ring, &f);
 	else
-		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
+		r = amdgpu_job_submit(ring->adev, job, &ring->adev->vce.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 	if (r)
 		goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 4cc7881f438c..b536962c22d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -100,7 +100,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	WARN_ON(ib->length_dw == 0);
 	amdgpu_ring_pad_ib(ring, ib);
 	WARN_ON(ib->length_dw > p->num_dw_left);
-	r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
+	r = amdgpu_job_submit(p->adev, p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
 	if (r)
 		goto error;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9775eca6fe43..a4aaa2a1f878 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -377,7 +377,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	job->vm_needs_flush = true;
 	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-	r = amdgpu_job_submit(job, &adev->mman.entity,
+	r = amdgpu_job_submit(adev, job, &adev->mman.entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 	if (r)
 		goto error_submit;
-- 
2.25.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH 1/1] drm/amdgpu: wait for sched to become ready on job submit
  2020-02-24 18:15 ` Das, Nirmoy
@ 2020-02-24 18:44   ` Christian König
  0 siblings, 0 replies; 5+ messages in thread
From: Christian König @ 2020-02-24 18:44 UTC (permalink / raw)
  To: Das, Nirmoy; +Cc: Deucher, Alexander, Liu, Monk, amd-gfx, Li, Dennis


[-- Attachment #1.1: Type: text/plain, Size: 11017 bytes --]

I think that the IB tests already set the ready flag to false when 
something goes wrong, but for the ring tests your probably want to 
double check and maybe generalize that.

Christian.

Am 24.02.20 um 19:15 schrieb Das, Nirmoy:
>
> [AMD Official Use Only - Internal Distribution Only]
>
>
> Thanks Christian. I will try to send a updated patch soon.
>
> Get Outlook for Android <https://aka.ms/ghei36>
>
> ------------------------------------------------------------------------
> *From:* Koenig, Christian <Christian.Koenig@amd.com>
> *Sent:* Monday, February 24, 2020, 18:06
> *To:* Nirmoy Das
> *Cc:* amd-gfx@lists.freedesktop.org; Deucher, Alexander; Liu, Monk; 
> Li, Dennis; Das, Nirmoy
> *Subject:* Re: [RFC PATCH 1/1] drm/amdgpu: wait for sched to become 
> ready on job submit
>
> Hi Nirmoy,
>
> Am 24.02.2020 17:48 schrieb Nirmoy Das <nirmoy.aiemd@gmail.com>:
>
>     On reset, amdgpu can set a drm sched's ready status to false
>     temporarily. drm job
>     init will fail if all of the drm scheds are not ready for a HW IP.
>     This patch tries to make
>     kernel's internal drm job submit handle, amdgpu_job_submit() a bit
>     more fault tolerant.
>
>
> I don't think that this approach makes sense. Since it is a front end 
> property we should rather stop setting the scheduler ready status to 
> false during reset.
>
> Instead we should only set it to false when the ring/IB test fails and 
> we can't bring the ring back to life again.
>
> Christian.
>
>
>     Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
>     ---
>      drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 35 +++++++++++++++++++--
>      drivers/gpu/drm/amd/amdgpu/amdgpu_job.h |  5 +--
>      drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  6 ++--
>      drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c |  2 +-
>      drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c |  2 +-
>      drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  2 +-
>      drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c |  2 +-
>      7 files changed, 43 insertions(+), 11 deletions(-)
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>     index d42be880a236..0745df80112f 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>     @@ -139,7 +139,38 @@ void amdgpu_job_free(struct amdgpu_job *job)
>              kfree(job);
>      }
>
>     -int amdgpu_job_submit(struct amdgpu_job *job, struct
>     drm_sched_entity *entity,
>     +static int amdgpu_job_try_init(struct amdgpu_device *adev,
>     +                              struct drm_sched_job *base,
>     +                              struct drm_sched_entity *entity,
>     +                              void *owner)
>     +{
>     +       int r, i;
>     +
>     +       r = drm_sched_job_init(base, entity, owner);
>     +       if (r == -ENOENT) {
>     +               /* retry till we come out of reset phase */
>     +               while (!mutex_trylock(&adev->lock_reset))
>     +                       msleep(10);
>     +               /* retry for a second for the sched to get ready*/
>     +               for (i = 0; i < 100; i++) {
>     +                       msleep(10);
>     +                       r = drm_sched_job_init(base, entity, owner);
>     +                       if (r == -ENOENT)
>     +                               continue;
>     +               }
>     +
>     + mutex_unlock(&adev->lock_reset);
>     +               /* If after all these we failed to initialize a job
>     +                * it means the IP is unrecoverable */
>     +               if (r == -ENOENT)
>     +                       return -ENODEV;
>     +       }
>     +
>     +       return r;
>     +}
>     +
>     +int amdgpu_job_submit(struct amdgpu_device *adev,struct
>     amdgpu_job *job,
>     +                     struct drm_sched_entity *entity,
>                            void *owner, struct dma_fence **f)
>      {
>              enum drm_sched_priority priority;
>     @@ -149,7 +180,7 @@ int amdgpu_job_submit(struct amdgpu_job *job,
>     struct drm_sched_entity *entity,
>              if (!f)
>                      return -EINVAL;
>
>     -       r = drm_sched_job_init(&job->base, entity, owner);
>     +       r = amdgpu_job_try_init(adev, &job->base, entity, owner);
>              if (r)
>                      return r;
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>     index 2e2110dddb76..fed87e96cacc 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
>     @@ -70,8 +70,9 @@ int amdgpu_job_alloc_with_ib(struct
>     amdgpu_device *adev, unsigned size,
>
>      void amdgpu_job_free_resources(struct amdgpu_job *job);
>      void amdgpu_job_free(struct amdgpu_job *job);
>     -int amdgpu_job_submit(struct amdgpu_job *job, struct
>     drm_sched_entity *entity,
>     -                     void *owner, struct dma_fence **f);
>     +int amdgpu_job_submit(struct amdgpu_device *adev, struct
>     amdgpu_job *job,
>     +                     struct drm_sched_entity *entity, void *owner,
>     +                     struct dma_fence **f);
>      int amdgpu_job_submit_direct(struct amdgpu_job *job, struct
>     amdgpu_ring *ring,
>                                   struct dma_fence **fence);
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>     index 660867cf2597..adfde07eb75f 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>     @@ -2066,7 +2066,7 @@ static int amdgpu_map_buffer(struct
>     ttm_buffer_object *bo,
>              if (r)
>                      goto error_free;
>
>     -       r = amdgpu_job_submit(job, &adev->mman.entity,
>     +       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
>     AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>              if (r)
>                      goto error_free;
>     @@ -2137,7 +2137,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring
>     *ring, uint64_t src_offset,
>              if (direct_submit)
>                      r = amdgpu_job_submit_direct(job, ring, fence);
>              else
>     -               r = amdgpu_job_submit(job, &adev->mman.entity,
>     +               r = amdgpu_job_submit(adev, job, &adev->mman.entity,
>     AMDGPU_FENCE_OWNER_UNDEFINED, fence);
>              if (r)
>                      goto error_free;
>     @@ -2231,7 +2231,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
>
>              amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>              WARN_ON(job->ibs[0].length_dw > num_dw);
>     -       r = amdgpu_job_submit(job, &adev->mman.entity,
>     +       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
>     AMDGPU_FENCE_OWNER_UNDEFINED, fence);
>              if (r)
>                      goto error_free;
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>     index 5fd32ad1c575..8ff97b24914e 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
>     @@ -1104,7 +1104,7 @@ static int amdgpu_uvd_send_msg(struct
>     amdgpu_ring *ring, struct amdgpu_bo *bo,
>                      if (r)
>                              goto err_free;
>
>     -               r = amdgpu_job_submit(job, &adev->uvd.entity,
>     +               r = amdgpu_job_submit(adev, job, &adev->uvd.entity,
>     AMDGPU_FENCE_OWNER_UNDEFINED, &f);
>                      if (r)
>                              goto err_free;
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>     index 59ddba137946..e721d3367783 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
>     @@ -554,7 +554,7 @@ static int amdgpu_vce_get_destroy_msg(struct
>     amdgpu_ring *ring, uint32_t handle,
>              if (direct)
>                      r = amdgpu_job_submit_direct(job, ring, &f);
>              else
>     -               r = amdgpu_job_submit(job, &ring->adev->vce.entity,
>     +               r = amdgpu_job_submit(ring->adev, job,
>     &ring->adev->vce.entity,
>     AMDGPU_FENCE_OWNER_UNDEFINED, &f);
>              if (r)
>                      goto err;
>     diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>     index 4cc7881f438c..b536962c22d9 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>     @@ -100,7 +100,7 @@ static int amdgpu_vm_sdma_commit(struct
>     amdgpu_vm_update_params *p,
>              WARN_ON(ib->length_dw == 0);
>              amdgpu_ring_pad_ib(ring, ib);
>              WARN_ON(ib->length_dw > p->num_dw_left);
>     -       r = amdgpu_job_submit(p->job, entity,
>     AMDGPU_FENCE_OWNER_VM, &f);
>     +       r = amdgpu_job_submit(p->adev, p->job, entity,
>     AMDGPU_FENCE_OWNER_VM, &f);
>              if (r)
>                      goto error;
>
>     diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>     index 9775eca6fe43..a4aaa2a1f878 100644
>     --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>     +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>     @@ -377,7 +377,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct
>     amdgpu_device *adev, uint32_t vmid,
>              job->vm_needs_flush = true;
>     job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
>              amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>     -       r = amdgpu_job_submit(job, &adev->mman.entity,
>     +       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
>     AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
>              if (r)
>                      goto error_submit;
>     -- 
>     2.25.0
>
>
>


[-- Attachment #1.2: Type: text/html, Size: 28431 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH 1/1] drm/amdgpu: wait for sched to become ready on job submit
  2020-02-24 17:06 Koenig, Christian
@ 2020-02-24 18:15 ` Das, Nirmoy
  2020-02-24 18:44   ` Christian König
  0 siblings, 1 reply; 5+ messages in thread
From: Das, Nirmoy @ 2020-02-24 18:15 UTC (permalink / raw)
  To: Koenig, Christian; +Cc: Deucher, Alexander, Liu, Monk, amd-gfx, Li,  Dennis


[-- Attachment #1.1: Type: text/plain, Size: 8912 bytes --]

[AMD Official Use Only - Internal Distribution Only]

Thanks Christian. I will try to send a updated patch soon.

Get Outlook for Android<https://aka.ms/ghei36>

________________________________
From: Koenig, Christian <Christian.Koenig@amd.com>
Sent: Monday, February 24, 2020, 18:06
To: Nirmoy Das
Cc: amd-gfx@lists.freedesktop.org; Deucher, Alexander; Liu, Monk; Li, Dennis; Das, Nirmoy
Subject: Re: [RFC PATCH 1/1] drm/amdgpu: wait for sched to become ready on job submit

Hi Nirmoy,

Am 24.02.2020 17:48 schrieb Nirmoy Das <nirmoy.aiemd@gmail.com>:
On reset, amdgpu can set a drm sched's ready status to false temporarily. drm job
init will fail if all of the drm scheds are not ready for a HW IP. This patch tries to make
kernel's internal drm job submit handle, amdgpu_job_submit() a bit more fault tolerant.

I don't think that this approach makes sense. Since it is a front end property we should rather stop setting the scheduler ready status to false during reset.

Instead we should only set it to false when the ring/IB test fails and we can't bring the ring back to life again.

Christian.


Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     | 35 +++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h     |  5 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  6 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c      |  2 +-
 7 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d42be880a236..0745df80112f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -139,7 +139,38 @@ void amdgpu_job_free(struct amdgpu_job *job)
         kfree(job);
 }

-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+static int amdgpu_job_try_init(struct amdgpu_device *adev,
+                              struct drm_sched_job *base,
+                              struct drm_sched_entity *entity,
+                              void *owner)
+{
+       int r, i;
+
+       r = drm_sched_job_init(base, entity, owner);
+       if (r == -ENOENT) {
+               /* retry till we come out of reset phase */
+               while (!mutex_trylock(&adev->lock_reset))
+                       msleep(10);
+               /* retry for a second for the sched to get ready*/
+               for (i = 0; i < 100; i++) {
+                       msleep(10);
+                       r = drm_sched_job_init(base, entity, owner);
+                       if (r == -ENOENT)
+                               continue;
+               }
+
+               mutex_unlock(&adev->lock_reset);
+               /* If after all these we failed to initialize a job
+                * it means the IP is unrecoverable */
+               if (r == -ENOENT)
+                       return -ENODEV;
+       }
+
+       return r;
+}
+
+int amdgpu_job_submit(struct amdgpu_device *adev,struct amdgpu_job *job,
+                     struct drm_sched_entity *entity,
                       void *owner, struct dma_fence **f)
 {
         enum drm_sched_priority priority;
@@ -149,7 +180,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
         if (!f)
                 return -EINVAL;

-       r = drm_sched_job_init(&job->base, entity, owner);
+       r = amdgpu_job_try_init(adev, &job->base, entity, owner);
         if (r)
                 return r;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 2e2110dddb76..fed87e96cacc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -70,8 +70,9 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,

 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
-                     void *owner, struct dma_fence **f);
+int amdgpu_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job,
+                     struct drm_sched_entity *entity, void *owner,
+                     struct dma_fence **f);
 int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
                              struct dma_fence **fence);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 660867cf2597..adfde07eb75f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2066,7 +2066,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
         if (r)
                 goto error_free;

-       r = amdgpu_job_submit(job, &adev->mman.entity,
+       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
         if (r)
                 goto error_free;
@@ -2137,7 +2137,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
         if (direct_submit)
                 r = amdgpu_job_submit_direct(job, ring, fence);
         else
-               r = amdgpu_job_submit(job, &adev->mman.entity,
+               r = amdgpu_job_submit(adev, job, &adev->mman.entity,
                                       AMDGPU_FENCE_OWNER_UNDEFINED, fence);
         if (r)
                 goto error_free;
@@ -2231,7 +2231,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,

         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
         WARN_ON(job->ibs[0].length_dw > num_dw);
-       r = amdgpu_job_submit(job, &adev->mman.entity,
+       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
                               AMDGPU_FENCE_OWNER_UNDEFINED, fence);
         if (r)
                 goto error_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5fd32ad1c575..8ff97b24914e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1104,7 +1104,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
                 if (r)
                         goto err_free;

-               r = amdgpu_job_submit(job, &adev->uvd.entity,
+               r = amdgpu_job_submit(adev, job, &adev->uvd.entity,
                                       AMDGPU_FENCE_OWNER_UNDEFINED, &f);
                 if (r)
                         goto err_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 59ddba137946..e721d3367783 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -554,7 +554,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
         if (direct)
                 r = amdgpu_job_submit_direct(job, ring, &f);
         else
-               r = amdgpu_job_submit(job, &ring->adev->vce.entity,
+               r = amdgpu_job_submit(ring->adev, job, &ring->adev->vce.entity,
                                       AMDGPU_FENCE_OWNER_UNDEFINED, &f);
         if (r)
                 goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 4cc7881f438c..b536962c22d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -100,7 +100,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
         WARN_ON(ib->length_dw == 0);
         amdgpu_ring_pad_ib(ring, ib);
         WARN_ON(ib->length_dw > p->num_dw_left);
-       r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
+       r = amdgpu_job_submit(p->adev, p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
         if (r)
                 goto error;

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9775eca6fe43..a4aaa2a1f878 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -377,7 +377,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
         job->vm_needs_flush = true;
         job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-       r = amdgpu_job_submit(job, &adev->mman.entity,
+       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
         if (r)
                 goto error_submit;
--
2.25.0




[-- Attachment #1.2: Type: text/html, Size: 18865 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH 1/1] drm/amdgpu: wait for sched to become ready on job submit
@ 2020-02-24 17:06 Koenig, Christian
  2020-02-24 18:15 ` Das, Nirmoy
  0 siblings, 1 reply; 5+ messages in thread
From: Koenig, Christian @ 2020-02-24 17:06 UTC (permalink / raw)
  To: Nirmoy Das
  Cc: Deucher, Alexander, Das, Nirmoy, Liu, Monk, amd-gfx, Li,  Dennis


[-- Attachment #1.1: Type: text/plain, Size: 8420 bytes --]

Hi Nirmoy,

Am 24.02.2020 17:48 schrieb Nirmoy Das <nirmoy.aiemd@gmail.com>:
On reset, amdgpu can set a drm sched's ready status to false temporarily. drm job
init will fail if all of the drm scheds are not ready for a HW IP. This patch tries to make
kernel's internal drm job submit handle, amdgpu_job_submit() a bit more fault tolerant.

I don't think that this approach makes sense. Since it is a front end property we should rather stop setting the scheduler ready status to false during reset.

Instead we should only set it to false when the ring/IB test fails and we can't bring the ring back to life again.

Christian.


Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     | 35 +++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h     |  5 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  6 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c      |  2 +-
 7 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d42be880a236..0745df80112f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -139,7 +139,38 @@ void amdgpu_job_free(struct amdgpu_job *job)
         kfree(job);
 }

-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+static int amdgpu_job_try_init(struct amdgpu_device *adev,
+                              struct drm_sched_job *base,
+                              struct drm_sched_entity *entity,
+                              void *owner)
+{
+       int r, i;
+
+       r = drm_sched_job_init(base, entity, owner);
+       if (r == -ENOENT) {
+               /* retry till we come out of reset phase */
+               while (!mutex_trylock(&adev->lock_reset))
+                       msleep(10);
+               /* retry for a second for the sched to get ready*/
+               for (i = 0; i < 100; i++) {
+                       msleep(10);
+                       r = drm_sched_job_init(base, entity, owner);
+                       if (r == -ENOENT)
+                               continue;
+               }
+
+               mutex_unlock(&adev->lock_reset);
+               /* If after all these we failed to initialize a job
+                * it means the IP is unrecoverable */
+               if (r == -ENOENT)
+                       return -ENODEV;
+       }
+
+       return r;
+}
+
+int amdgpu_job_submit(struct amdgpu_device *adev,struct amdgpu_job *job,
+                     struct drm_sched_entity *entity,
                       void *owner, struct dma_fence **f)
 {
         enum drm_sched_priority priority;
@@ -149,7 +180,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
         if (!f)
                 return -EINVAL;

-       r = drm_sched_job_init(&job->base, entity, owner);
+       r = amdgpu_job_try_init(adev, &job->base, entity, owner);
         if (r)
                 return r;

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 2e2110dddb76..fed87e96cacc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -70,8 +70,9 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,

 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
-                     void *owner, struct dma_fence **f);
+int amdgpu_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job,
+                     struct drm_sched_entity *entity, void *owner,
+                     struct dma_fence **f);
 int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
                              struct dma_fence **fence);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 660867cf2597..adfde07eb75f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2066,7 +2066,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
         if (r)
                 goto error_free;

-       r = amdgpu_job_submit(job, &adev->mman.entity,
+       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
         if (r)
                 goto error_free;
@@ -2137,7 +2137,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
         if (direct_submit)
                 r = amdgpu_job_submit_direct(job, ring, fence);
         else
-               r = amdgpu_job_submit(job, &adev->mman.entity,
+               r = amdgpu_job_submit(adev, job, &adev->mman.entity,
                                       AMDGPU_FENCE_OWNER_UNDEFINED, fence);
         if (r)
                 goto error_free;
@@ -2231,7 +2231,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,

         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
         WARN_ON(job->ibs[0].length_dw > num_dw);
-       r = amdgpu_job_submit(job, &adev->mman.entity,
+       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
                               AMDGPU_FENCE_OWNER_UNDEFINED, fence);
         if (r)
                 goto error_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5fd32ad1c575..8ff97b24914e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1104,7 +1104,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
                 if (r)
                         goto err_free;

-               r = amdgpu_job_submit(job, &adev->uvd.entity,
+               r = amdgpu_job_submit(adev, job, &adev->uvd.entity,
                                       AMDGPU_FENCE_OWNER_UNDEFINED, &f);
                 if (r)
                         goto err_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 59ddba137946..e721d3367783 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -554,7 +554,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
         if (direct)
                 r = amdgpu_job_submit_direct(job, ring, &f);
         else
-               r = amdgpu_job_submit(job, &ring->adev->vce.entity,
+               r = amdgpu_job_submit(ring->adev, job, &ring->adev->vce.entity,
                                       AMDGPU_FENCE_OWNER_UNDEFINED, &f);
         if (r)
                 goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 4cc7881f438c..b536962c22d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -100,7 +100,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
         WARN_ON(ib->length_dw == 0);
         amdgpu_ring_pad_ib(ring, ib);
         WARN_ON(ib->length_dw > p->num_dw_left);
-       r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
+       r = amdgpu_job_submit(p->adev, p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
         if (r)
                 goto error;

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9775eca6fe43..a4aaa2a1f878 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -377,7 +377,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
         job->vm_needs_flush = true;
         job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
         amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-       r = amdgpu_job_submit(job, &adev->mman.entity,
+       r = amdgpu_job_submit(adev, job, &adev->mman.entity,
                               AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
         if (r)
                 goto error_submit;
--
2.25.0



[-- Attachment #1.2: Type: text/html, Size: 17500 bytes --]

[-- Attachment #2: Type: text/plain, Size: 154 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH 1/1] drm/amdgpu: wait for sched to become ready on job submit
@ 2020-02-24 16:51 Nirmoy Das
  0 siblings, 0 replies; 5+ messages in thread
From: Nirmoy Das @ 2020-02-24 16:51 UTC (permalink / raw)
  To: amd-gfx
  Cc: alexander.deucher, nirmoy.das, christian.koenig, Monk.Liu, Dennis.Li

On reset, amdgpu can set a drm sched's ready status to false temporarily. drm job
init will fail if all of the drm scheds are not ready for a HW IP. This patch tries to make
kernel's internal drm job submit handle, amdgpu_job_submit() a bit more fault tolerant.

Signed-off-by: Nirmoy Das <nirmoy.das@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     | 35 +++++++++++++++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h     |  5 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c     |  6 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c     |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c      |  2 +-
 7 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index d42be880a236..0745df80112f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -139,7 +139,38 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	kfree(job);
 }
 
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+static int amdgpu_job_try_init(struct amdgpu_device *adev,
+			       struct drm_sched_job *base,
+			       struct drm_sched_entity *entity,
+			       void *owner)
+{
+	int r, i;
+
+	r = drm_sched_job_init(base, entity, owner);
+	if (r == -ENOENT) {
+		/* retry till we come out of reset phase */
+		while (!mutex_trylock(&adev->lock_reset))
+			msleep(10);
+		/* retry for a second for the sched to get ready*/
+		for (i = 0; i < 100; i++) {
+			msleep(10);
+			r = drm_sched_job_init(base, entity, owner);
+			if (r == -ENOENT)
+				continue;
+		}
+
+		mutex_unlock(&adev->lock_reset);
+		/* If after all these we failed to initialize a job
+		 * it means the IP is unrecoverable */
+		if (r == -ENOENT)
+			return -ENODEV;
+	}
+
+	return r;
+}
+
+int amdgpu_job_submit(struct amdgpu_device *adev,struct amdgpu_job *job,
+		      struct drm_sched_entity *entity,
 		      void *owner, struct dma_fence **f)
 {
 	enum drm_sched_priority priority;
@@ -149,7 +180,7 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
 	if (!f)
 		return -EINVAL;
 
-	r = drm_sched_job_init(&job->base, entity, owner);
+	r = amdgpu_job_try_init(adev, &job->base, entity, owner);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 2e2110dddb76..fed87e96cacc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -70,8 +70,9 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
-		      void *owner, struct dma_fence **f);
+int amdgpu_job_submit(struct amdgpu_device *adev, struct amdgpu_job *job,
+		      struct drm_sched_entity *entity, void *owner,
+		      struct dma_fence **f);
 int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
 			     struct dma_fence **fence);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 660867cf2597..adfde07eb75f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -2066,7 +2066,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
 	if (r)
 		goto error_free;
 
-	r = amdgpu_job_submit(job, &adev->mman.entity,
+	r = amdgpu_job_submit(adev, job, &adev->mman.entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 	if (r)
 		goto error_free;
@@ -2137,7 +2137,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	if (direct_submit)
 		r = amdgpu_job_submit_direct(job, ring, fence);
 	else
-		r = amdgpu_job_submit(job, &adev->mman.entity,
+		r = amdgpu_job_submit(adev, job, &adev->mman.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 	if (r)
 		goto error_free;
@@ -2231,7 +2231,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	WARN_ON(job->ibs[0].length_dw > num_dw);
-	r = amdgpu_job_submit(job, &adev->mman.entity,
+	r = amdgpu_job_submit(adev, job, &adev->mman.entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 	if (r)
 		goto error_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 5fd32ad1c575..8ff97b24914e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1104,7 +1104,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 		if (r)
 			goto err_free;
 
-		r = amdgpu_job_submit(job, &adev->uvd.entity,
+		r = amdgpu_job_submit(adev, job, &adev->uvd.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 		if (r)
 			goto err_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 59ddba137946..e721d3367783 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -554,7 +554,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	if (direct)
 		r = amdgpu_job_submit_direct(job, ring, &f);
 	else
-		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
+		r = amdgpu_job_submit(ring->adev, job, &ring->adev->vce.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 	if (r)
 		goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 4cc7881f438c..b536962c22d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -100,7 +100,7 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	WARN_ON(ib->length_dw == 0);
 	amdgpu_ring_pad_ib(ring, ib);
 	WARN_ON(ib->length_dw > p->num_dw_left);
-	r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
+	r = amdgpu_job_submit(p->adev, p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
 	if (r)
 		goto error;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9775eca6fe43..a4aaa2a1f878 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -377,7 +377,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	job->vm_needs_flush = true;
 	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-	r = amdgpu_job_submit(job, &adev->mman.entity,
+	r = amdgpu_job_submit(adev, job, &adev->mman.entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 	if (r)
 		goto error_submit;
-- 
2.25.0

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-02-24 18:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-24 16:49 [RFC PATCH 1/1] drm/amdgpu: wait for sched to become ready on job submit Nirmoy Das
2020-02-24 16:51 Nirmoy Das
2020-02-24 17:06 Koenig, Christian
2020-02-24 18:15 ` Das, Nirmoy
2020-02-24 18:44   ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.