All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4.
@ 2017-10-06 18:20 Andrey Grodzovsky
       [not found] ` <1507314021-18323-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Andrey Grodzovsky @ 2017-10-06 18:20 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Andrey Grodzovsky, Christian.Koenig-5C7GfCeVMHo, Monk Liu

From: Monk Liu <Monk.Liu@amd.com>

need to unreserve ttm bo before "cs_add_fence" and "entity_push_job"
otherwise there will be deadlock between "recover_vram_from_shadow"
and previous two routines on the ttm bo's resv lock.

v2:
Add per ctx mutex.

v3:
Rellocate mutex aquisition into amdgpu_cs_parser_init and muex release
into amdgpu_cs_parser_fini to avoid nested locking lockup.
Add rollback code for amdgpu_ctx_add_fence in case of error or signal
interruption.

v4:
Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill to enable
old fence waiting before reservation lock is aquired.

Change-Id: Ia209beab5036bfc2c38cbf18324fa3efd4bab1cf
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 164 ++++++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |   4 +
 3 files changed, 100 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 53d8df3..baa2953 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -724,6 +724,7 @@ struct amdgpu_ctx {
 	struct dma_fence	**fences;
 	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
 	bool preamble_presented;
+	struct mutex		lock;
 };
 
 struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9f1202a..0fa1bc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -89,6 +89,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		goto free_chunk;
 	}
 
+
+	mutex_lock(&p->ctx->lock);
+
 	/* get chunks */
 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
@@ -715,28 +718,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 /**
  * cs_parser_fini() - clean parser states
  * @parser:	parser structure holding parsing context.
- * @error:	error number
- *
- * If error is set than unvalidate buffer, otherwise just free memory
- * used by parsing context.
  **/
-static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
-				  bool backoff)
+static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
 {
 	unsigned i;
 
-	if (error && backoff)
-		ttm_eu_backoff_reservation(&parser->ticket,
-					   &parser->validated);
-
 	for (i = 0; i < parser->num_post_dep_syncobjs; i++)
 		drm_syncobj_put(parser->post_dep_syncobjs[i]);
 	kfree(parser->post_dep_syncobjs);
 
 	dma_fence_put(parser->fence);
 
-	if (parser->ctx)
+	if (parser->ctx) {
+		mutex_unlock(&parser->ctx->lock);
 		amdgpu_ctx_put(parser->ctx);
+	}
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
 
@@ -843,7 +839,72 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_ring *ring = p->job->ring;
-	int i, r;
+	int i, j, r;
+
+	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
+
+		struct amdgpu_cs_chunk *chunk;
+		struct amdgpu_ib *ib;
+		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
+
+		chunk = &p->chunks[i];
+		ib = &p->job->ibs[j];
+		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
+
+		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+					continue;
+
+		if (p->job->ring->funcs->parse_cs) {
+			struct amdgpu_bo_va_mapping *m;
+			struct amdgpu_bo *aobj = NULL;
+			uint64_t offset;
+			uint8_t *kptr;
+
+			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
+						   &aobj, &m);
+			if (r) {
+				DRM_ERROR("IB va_start is invalid\n");
+				return r;
+			}
+
+			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
+				(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+				return -EINVAL;
+			}
+
+			/* the IB should be reserved at this point */
+			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+			if (r) {
+				return r;
+			}
+
+			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
+			kptr += chunk_ib->va_start - offset;
+
+			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
+			if (r) {
+				DRM_ERROR("Failed to get ib !\n");
+				return r;
+			}
+
+			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+			amdgpu_bo_kunmap(aobj);
+		} else {
+			r =  amdgpu_ib_get(adev, vm, 0, ib);
+			if (r) {
+				DRM_ERROR("Failed to get ib !\n");
+				return r;
+			}
+
+		}
+
+		ib->gpu_addr = chunk_ib->va_start;
+		ib->length_dw = chunk_ib->ib_bytes / 4;
+		ib->flags = chunk_ib->flags;
+		j++;
+
+	}
 
 	/* Only for UVD/VCE VM emulation */
 	if (ring->funcs->parse_cs) {
@@ -868,19 +929,15 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			     struct amdgpu_cs_parser *parser)
 {
-	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
-	struct amdgpu_vm *vm = &fpriv->vm;
 	int i, j;
 	int r, ce_preempt = 0, de_preempt = 0;
 
 	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 		struct amdgpu_cs_chunk *chunk;
-		struct amdgpu_ib *ib;
 		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 		struct amdgpu_ring *ring;
 
 		chunk = &parser->chunks[i];
-		ib = &parser->job->ibs[j];
 		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 
 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
@@ -917,54 +974,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 
 		parser->job->ring = ring;
 
-		if (ring->funcs->parse_cs) {
-			struct amdgpu_bo_va_mapping *m;
-			struct amdgpu_bo *aobj = NULL;
-			uint64_t offset;
-			uint8_t *kptr;
-
-			r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
-						   &aobj, &m);
-			if (r) {
-				DRM_ERROR("IB va_start is invalid\n");
-				return r;
-			}
-
-			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
-			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
-				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
-				return -EINVAL;
-			}
-
-			/* the IB should be reserved at this point */
-			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
-			if (r) {
-				return r;
-			}
-
-			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
-			kptr += chunk_ib->va_start - offset;
-
-			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
-			if (r) {
-				DRM_ERROR("Failed to get ib !\n");
-				return r;
-			}
-
-			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
-			amdgpu_bo_kunmap(aobj);
-		} else {
-			r =  amdgpu_ib_get(adev, vm, 0, ib);
-			if (r) {
-				DRM_ERROR("Failed to get ib !\n");
-				return r;
-			}
-
-		}
-
-		ib->gpu_addr = chunk_ib->va_start;
-		ib->length_dw = chunk_ib->ib_bytes / 4;
-		ib->flags = chunk_ib->flags;
 		j++;
 	}
 
@@ -1160,14 +1169,26 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	amdgpu_cs_post_dependencies(p);
 
+
+	/* hook sched fence to all BOs' reservation in validated list
+	 * and unreserve them.
+	 *
+	 * we unreserve at here is because otherwise
+	 * there'll be deadlock between ctx_add_fence/sched_entity_push_job
+	 * and gpu_reset routine's recover_bo_from_shadow on PD/PTEs' ttm bo lock
+	 */
+	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
+
+
 	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
+
+
 	job->uf_sequence = cs->out.handle;
 	amdgpu_job_free_resources(job);
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
 
-	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	amdgpu_mn_unlock(p->mn);
 
 	return 0;
@@ -1189,6 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 
 	parser.adev = adev;
 	parser.filp = filp;
+	fpriv = filp->driver_priv;
 
 	r = amdgpu_cs_parser_init(&parser, data);
 	if (r) {
@@ -1196,6 +1218,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		goto out;
 	}
 
+	r = amdgpu_cs_ib_fill(adev, &parser);
+	if (r)
+		goto out;
+
 	r = amdgpu_cs_parser_bos(&parser, data);
 	if (r) {
 		if (r == -ENOMEM)
@@ -1206,9 +1232,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	}
 
 	reserved_buffers = true;
-	r = amdgpu_cs_ib_fill(adev, &parser);
-	if (r)
-		goto out;
 
 	r = amdgpu_cs_dependencies(adev, &parser);
 	if (r) {
@@ -1226,7 +1249,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	r = amdgpu_cs_submit(&parser, cs);
 
 out:
-	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+	if (r && reserved_buffers)
+		ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
+
+	amdgpu_cs_parser_fini(&parser);
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a11e443..c073a68 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -39,6 +39,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
 	if (!ctx->fences)
 		return -ENOMEM;
 
+	mutex_init(&ctx->lock);
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
 		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
@@ -96,6 +98,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 				      &ctx->rings[i].entity);
 
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+
+	mutex_destroy(&ctx->lock);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/2] dmr/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found] ` <1507314021-18323-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-06 18:20   ` Andrey Grodzovsky
       [not found]     ` <1507314021-18323-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2017-10-09  2:56   ` [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4 Chunming Zhou
  2017-10-09  8:34   ` Christian König
  2 siblings, 1 reply; 18+ messages in thread
From: Andrey Grodzovsky @ 2017-10-06 18:20 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Andrey Grodzovsky, Christian.Koenig-5C7GfCeVMHo

From: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>

This allows to avoid deadlock during GPU reset.

Change-Id: I817e351b02e653f078063c57cec8a0d94062de12
Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 15 +++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  8 ++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0fa1bc7..79fefc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -931,6 +931,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 {
 	int i, j;
 	int r, ce_preempt = 0, de_preempt = 0;
+	struct amdgpu_ctx_ring *cring;
+	unsigned idx = 0;
+	struct dma_fence *other = NULL;
 
 	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 		struct amdgpu_cs_chunk *chunk;
@@ -983,6 +986,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
+	cring = &parser->ctx->rings[parser->job->ring->idx];
+	idx = cring->sequence & (amdgpu_sched_jobs - 1);
+	other = cring->fences[idx];
+	if (other) {
+		signed long r;
+		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+		if (r < 0) {
+			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+			return r;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c073a68..758b643 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -260,12 +260,8 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
-	if (other) {
-		signed long r;
-		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
-		if (r < 0)
-			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
-	}
+	if (other)
+		BUG_ON(!dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4.
       [not found] ` <1507314021-18323-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2017-10-06 18:20   ` [PATCH 2/2] dmr/amdgpu: Move old fence waiting before reservation lock is aquired Andrey Grodzovsky
@ 2017-10-09  2:56   ` Chunming Zhou
  2017-10-09  8:34   ` Christian König
  2 siblings, 0 replies; 18+ messages in thread
From: Chunming Zhou @ 2017-10-09  2:56 UTC (permalink / raw)
  To: Andrey Grodzovsky, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian.Koenig-5C7GfCeVMHo, Monk Liu

you added the ctx mutex to keep the job pushing in order, that's good, 
Acked-by: Chunming Zhou <david1.zhou@amd.com>


BTW: after you added ctx mutex for cs, I think the thread lock in libdrm 
isn't need any more, we can remove it now.


Regards,

David Zhou


On 2017年10月07日 02:20, Andrey Grodzovsky wrote:
> From: Monk Liu <Monk.Liu@amd.com>
>
> need to unreserve ttm bo before "cs_add_fence" and "entity_push_job"
> otherwise there will be deadlock between "recover_vram_from_shadow"
> and previous two routines on the ttm bo's resv lock.
>
> v2:
> Add per ctx mutex.
>
> v3:
> Rellocate mutex aquisition into amdgpu_cs_parser_init and muex release
> into amdgpu_cs_parser_fini to avoid nested locking lockup.
> Add rollback code for amdgpu_ctx_add_fence in case of error or signal
> interruption.
>
> v4:
> Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill to enable
> old fence waiting before reservation lock is aquired.
>
> Change-Id: Ia209beab5036bfc2c38cbf18324fa3efd4bab1cf
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 164 ++++++++++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |   4 +
>   3 files changed, 100 insertions(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 53d8df3..baa2953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -724,6 +724,7 @@ struct amdgpu_ctx {
>   	struct dma_fence	**fences;
>   	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
>   	bool preamble_presented;
> +	struct mutex		lock;
>   };
>   
>   struct amdgpu_ctx_mgr {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9f1202a..0fa1bc7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -89,6 +89,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   		goto free_chunk;
>   	}
>   
> +
> +	mutex_lock(&p->ctx->lock);
> +
>   	/* get chunks */
>   	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
>   	if (copy_from_user(chunk_array, chunk_array_user,
> @@ -715,28 +718,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>   /**
>    * cs_parser_fini() - clean parser states
>    * @parser:	parser structure holding parsing context.
> - * @error:	error number
> - *
> - * If error is set than unvalidate buffer, otherwise just free memory
> - * used by parsing context.
>    **/
> -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> -				  bool backoff)
> +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
>   {
>   	unsigned i;
>   
> -	if (error && backoff)
> -		ttm_eu_backoff_reservation(&parser->ticket,
> -					   &parser->validated);
> -
>   	for (i = 0; i < parser->num_post_dep_syncobjs; i++)
>   		drm_syncobj_put(parser->post_dep_syncobjs[i]);
>   	kfree(parser->post_dep_syncobjs);
>   
>   	dma_fence_put(parser->fence);
>   
> -	if (parser->ctx)
> +	if (parser->ctx) {
> +		mutex_unlock(&parser->ctx->lock);
>   		amdgpu_ctx_put(parser->ctx);
> +	}
>   	if (parser->bo_list)
>   		amdgpu_bo_list_put(parser->bo_list);
>   
> @@ -843,7 +839,72 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   	struct amdgpu_ring *ring = p->job->ring;
> -	int i, r;
> +	int i, j, r;
> +
> +	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
> +
> +		struct amdgpu_cs_chunk *chunk;
> +		struct amdgpu_ib *ib;
> +		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
> +
> +		chunk = &p->chunks[i];
> +		ib = &p->job->ibs[j];
> +		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
> +
> +		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> +					continue;
> +
> +		if (p->job->ring->funcs->parse_cs) {
> +			struct amdgpu_bo_va_mapping *m;
> +			struct amdgpu_bo *aobj = NULL;
> +			uint64_t offset;
> +			uint8_t *kptr;
> +
> +			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
> +						   &aobj, &m);
> +			if (r) {
> +				DRM_ERROR("IB va_start is invalid\n");
> +				return r;
> +			}
> +
> +			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
> +				(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
> +				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
> +				return -EINVAL;
> +			}
> +
> +			/* the IB should be reserved at this point */
> +			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
> +			if (r) {
> +				return r;
> +			}
> +
> +			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
> +			kptr += chunk_ib->va_start - offset;
> +
> +			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
> +			if (r) {
> +				DRM_ERROR("Failed to get ib !\n");
> +				return r;
> +			}
> +
> +			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
> +			amdgpu_bo_kunmap(aobj);
> +		} else {
> +			r =  amdgpu_ib_get(adev, vm, 0, ib);
> +			if (r) {
> +				DRM_ERROR("Failed to get ib !\n");
> +				return r;
> +			}
> +
> +		}
> +
> +		ib->gpu_addr = chunk_ib->va_start;
> +		ib->length_dw = chunk_ib->ib_bytes / 4;
> +		ib->flags = chunk_ib->flags;
> +		j++;
> +
> +	}
>   
>   	/* Only for UVD/VCE VM emulation */
>   	if (ring->funcs->parse_cs) {
> @@ -868,19 +929,15 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
>   static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   			     struct amdgpu_cs_parser *parser)
>   {
> -	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
> -	struct amdgpu_vm *vm = &fpriv->vm;
>   	int i, j;
>   	int r, ce_preempt = 0, de_preempt = 0;
>   
>   	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
>   		struct amdgpu_cs_chunk *chunk;
> -		struct amdgpu_ib *ib;
>   		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>   		struct amdgpu_ring *ring;
>   
>   		chunk = &parser->chunks[i];
> -		ib = &parser->job->ibs[j];
>   		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>   
>   		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> @@ -917,54 +974,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   
>   		parser->job->ring = ring;
>   
> -		if (ring->funcs->parse_cs) {
> -			struct amdgpu_bo_va_mapping *m;
> -			struct amdgpu_bo *aobj = NULL;
> -			uint64_t offset;
> -			uint8_t *kptr;
> -
> -			r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
> -						   &aobj, &m);
> -			if (r) {
> -				DRM_ERROR("IB va_start is invalid\n");
> -				return r;
> -			}
> -
> -			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
> -			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
> -				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
> -				return -EINVAL;
> -			}
> -
> -			/* the IB should be reserved at this point */
> -			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
> -			if (r) {
> -				return r;
> -			}
> -
> -			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
> -			kptr += chunk_ib->va_start - offset;
> -
> -			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
> -			if (r) {
> -				DRM_ERROR("Failed to get ib !\n");
> -				return r;
> -			}
> -
> -			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
> -			amdgpu_bo_kunmap(aobj);
> -		} else {
> -			r =  amdgpu_ib_get(adev, vm, 0, ib);
> -			if (r) {
> -				DRM_ERROR("Failed to get ib !\n");
> -				return r;
> -			}
> -
> -		}
> -
> -		ib->gpu_addr = chunk_ib->va_start;
> -		ib->length_dw = chunk_ib->ib_bytes / 4;
> -		ib->flags = chunk_ib->flags;
>   		j++;
>   	}
>   
> @@ -1160,14 +1169,26 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   
>   	amdgpu_cs_post_dependencies(p);
>   
> +
> +	/* hook sched fence to all BOs' reservation in validated list
> +	 * and unreserve them.
> +	 *
> +	 * we unreserve at here is because otherwise
> +	 * there'll be deadlock between ctx_add_fence/sched_entity_push_job
> +	 * and gpu_reset routine's recover_bo_from_shadow on PD/PTEs' ttm bo lock
> +	 */
> +	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> +
> +
>   	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
> +
> +
>   	job->uf_sequence = cs->out.handle;
>   	amdgpu_job_free_resources(job);
>   
>   	trace_amdgpu_cs_ioctl(job);
>   	amd_sched_entity_push_job(&job->base);
>   
> -	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>   	amdgpu_mn_unlock(p->mn);
>   
>   	return 0;
> @@ -1189,6 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   
>   	parser.adev = adev;
>   	parser.filp = filp;
> +	fpriv = filp->driver_priv;
>   
>   	r = amdgpu_cs_parser_init(&parser, data);
>   	if (r) {
> @@ -1196,6 +1218,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   		goto out;
>   	}
>   
> +	r = amdgpu_cs_ib_fill(adev, &parser);
> +	if (r)
> +		goto out;
> +
>   	r = amdgpu_cs_parser_bos(&parser, data);
>   	if (r) {
>   		if (r == -ENOMEM)
> @@ -1206,9 +1232,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	}
>   
>   	reserved_buffers = true;
> -	r = amdgpu_cs_ib_fill(adev, &parser);
> -	if (r)
> -		goto out;
>   
>   	r = amdgpu_cs_dependencies(adev, &parser);
>   	if (r) {
> @@ -1226,7 +1249,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	r = amdgpu_cs_submit(&parser, cs);
>   
>   out:
> -	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
> +	if (r && reserved_buffers)
> +		ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
> +
> +	amdgpu_cs_parser_fini(&parser);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a11e443..c073a68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -39,6 +39,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
>   	if (!ctx->fences)
>   		return -ENOMEM;
>   
> +	mutex_init(&ctx->lock);
> +
>   	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>   		ctx->rings[i].sequence = 1;
>   		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
> @@ -96,6 +98,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>   				      &ctx->rings[i].entity);
>   
>   	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
> +
> +	mutex_destroy(&ctx->lock);
>   }
>   
>   static int amdgpu_ctx_alloc(struct amdgpu_device *adev,

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH 2/2] dmr/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found]     ` <1507314021-18323-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-09  6:21       ` Liu, Monk
  2017-10-09  8:36       ` Christian König
  1 sibling, 0 replies; 18+ messages in thread
From: Liu, Monk @ 2017-10-09  6:21 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Grodzovsky, Andrey, Koenig, Christian

Hi Andrey

Where is your first patch (1/2)? I only saw the 2/2 

BR Monk

-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of Andrey Grodzovsky
Sent: 2017年10月7日 2:20
To: amd-gfx@lists.freedesktop.org
Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; Koenig, Christian <Christian.Koenig@amd.com>
Subject: [PATCH 2/2] dmr/amdgpu: Move old fence waiting before reservation lock is aquired.

From: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>

This allows to avoid deadlock during GPU reset.

Change-Id: I817e351b02e653f078063c57cec8a0d94062de12
Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 15 +++++++++++++++  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  8 ++------
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 0fa1bc7..79fefc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -931,6 +931,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,  {
 	int i, j;
 	int r, ce_preempt = 0, de_preempt = 0;
+	struct amdgpu_ctx_ring *cring;
+	unsigned idx = 0;
+	struct dma_fence *other = NULL;
 
 	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
 		struct amdgpu_cs_chunk *chunk;
@@ -983,6 +986,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
+	cring = &parser->ctx->rings[parser->job->ring->idx];
+	idx = cring->sequence & (amdgpu_sched_jobs - 1);
+	other = cring->fences[idx];
+	if (other) {
+		signed long r;
+		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+		if (r < 0) {
+			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+			return r;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index c073a68..758b643 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -260,12 +260,8 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
-	if (other) {
-		signed long r;
-		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
-		if (r < 0)
-			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
-	}
+	if (other)
+		BUG_ON(!dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
--
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4.
       [not found] ` <1507314021-18323-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2017-10-06 18:20   ` [PATCH 2/2] dmr/amdgpu: Move old fence waiting before reservation lock is aquired Andrey Grodzovsky
  2017-10-09  2:56   ` [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4 Chunming Zhou
@ 2017-10-09  8:34   ` Christian König
       [not found]     ` <9fc20357-db3e-3823-3073-c70ead162f4e-5C7GfCeVMHo@public.gmane.org>
  2 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2017-10-09  8:34 UTC (permalink / raw)
  To: Andrey Grodzovsky, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

Am 06.10.2017 um 20:20 schrieb Andrey Grodzovsky:
> From: Monk Liu <Monk.Liu@amd.com>
>
> need to unreserve ttm bo before "cs_add_fence" and "entity_push_job"
> otherwise there will be deadlock between "recover_vram_from_shadow"
> and previous two routines on the ttm bo's resv lock.
>
> v2:
> Add per ctx mutex.
>
> v3:
> Rellocate mutex aquisition into amdgpu_cs_parser_init and muex release
> into amdgpu_cs_parser_fini to avoid nested locking lockup.
> Add rollback code for amdgpu_ctx_add_fence in case of error or signal
> interruption.
>
> v4:
> Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill to enable
> old fence waiting before reservation lock is aquired.
>
> Change-Id: Ia209beab5036bfc2c38cbf18324fa3efd4bab1cf
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 164 ++++++++++++++++++--------------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |   4 +
>   3 files changed, 100 insertions(+), 69 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 53d8df3..baa2953 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -724,6 +724,7 @@ struct amdgpu_ctx {
>   	struct dma_fence	**fences;
>   	struct amdgpu_ctx_ring	rings[AMDGPU_MAX_RINGS];
>   	bool preamble_presented;
> +	struct mutex		lock;
>   };
>   
>   struct amdgpu_ctx_mgr {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 9f1202a..0fa1bc7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -89,6 +89,9 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   		goto free_chunk;
>   	}
>   
> +
> +	mutex_lock(&p->ctx->lock);
> +
>   	/* get chunks */
>   	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
>   	if (copy_from_user(chunk_array, chunk_array_user,
> @@ -715,28 +718,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
>   /**
>    * cs_parser_fini() - clean parser states
>    * @parser:	parser structure holding parsing context.
> - * @error:	error number
> - *
> - * If error is set than unvalidate buffer, otherwise just free memory
> - * used by parsing context.
>    **/
> -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
> -				  bool backoff)
> +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)

You can now drop this change and keep the logic as it is since we moved 
the wait before taking the reservation locks.

>   {
>   	unsigned i;
>   
> -	if (error && backoff)
> -		ttm_eu_backoff_reservation(&parser->ticket,
> -					   &parser->validated);
> -
>   	for (i = 0; i < parser->num_post_dep_syncobjs; i++)
>   		drm_syncobj_put(parser->post_dep_syncobjs[i]);
>   	kfree(parser->post_dep_syncobjs);
>   
>   	dma_fence_put(parser->fence);
>   
> -	if (parser->ctx)
> +	if (parser->ctx) {
> +		mutex_unlock(&parser->ctx->lock);
>   		amdgpu_ctx_put(parser->ctx);
> +	}
>   	if (parser->bo_list)
>   		amdgpu_bo_list_put(parser->bo_list);
>   
> @@ -843,7 +839,72 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   	struct amdgpu_ring *ring = p->job->ring;
> -	int i, r;
> +	int i, j, r;
> +
> +	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
> +
> +		struct amdgpu_cs_chunk *chunk;
> +		struct amdgpu_ib *ib;
> +		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
> +
> +		chunk = &p->chunks[i];
> +		ib = &p->job->ibs[j];
> +		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
> +
> +		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> +					continue;
> +
> +		if (p->job->ring->funcs->parse_cs) {
> +			struct amdgpu_bo_va_mapping *m;
> +			struct amdgpu_bo *aobj = NULL;
> +			uint64_t offset;
> +			uint8_t *kptr;
> +
> +			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
> +						   &aobj, &m);
> +			if (r) {
> +				DRM_ERROR("IB va_start is invalid\n");
> +				return r;
> +			}
> +
> +			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
> +				(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
> +				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
> +				return -EINVAL;
> +			}
> +
> +			/* the IB should be reserved at this point */
> +			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
> +			if (r) {
> +				return r;
> +			}
> +
> +			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
> +			kptr += chunk_ib->va_start - offset;
> +
> +			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
> +			if (r) {
> +				DRM_ERROR("Failed to get ib !\n");
> +				return r;
> +			}
> +
> +			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
> +			amdgpu_bo_kunmap(aobj);
> +		} else {
> +			r =  amdgpu_ib_get(adev, vm, 0, ib);
> +			if (r) {
> +				DRM_ERROR("Failed to get ib !\n");
> +				return r;
> +			}
> +
> +		}
> +
> +		ib->gpu_addr = chunk_ib->va_start;
> +		ib->length_dw = chunk_ib->ib_bytes / 4;
> +		ib->flags = chunk_ib->flags;

Please keep the calls to amdgpu_ib_get() inside amdgpu_cs_ib_fill().

> +		j++;
> +
> +	}
>   
>   	/* Only for UVD/VCE VM emulation */
>   	if (ring->funcs->parse_cs) {
> @@ -868,19 +929,15 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
>   static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   			     struct amdgpu_cs_parser *parser)
>   {
> -	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
> -	struct amdgpu_vm *vm = &fpriv->vm;
>   	int i, j;
>   	int r, ce_preempt = 0, de_preempt = 0;
>   
>   	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
>   		struct amdgpu_cs_chunk *chunk;
> -		struct amdgpu_ib *ib;
>   		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>   		struct amdgpu_ring *ring;
>   
>   		chunk = &parser->chunks[i];
> -		ib = &parser->job->ibs[j];
>   		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>   
>   		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> @@ -917,54 +974,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   
>   		parser->job->ring = ring;
>   
> -		if (ring->funcs->parse_cs) {
> -			struct amdgpu_bo_va_mapping *m;
> -			struct amdgpu_bo *aobj = NULL;
> -			uint64_t offset;
> -			uint8_t *kptr;
> -
> -			r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
> -						   &aobj, &m);
> -			if (r) {
> -				DRM_ERROR("IB va_start is invalid\n");
> -				return r;
> -			}
> -
> -			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
> -			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
> -				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
> -				return -EINVAL;
> -			}
> -
> -			/* the IB should be reserved at this point */
> -			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
> -			if (r) {
> -				return r;
> -			}
> -
> -			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
> -			kptr += chunk_ib->va_start - offset;
> -
> -			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
> -			if (r) {
> -				DRM_ERROR("Failed to get ib !\n");
> -				return r;
> -			}
> -
> -			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
> -			amdgpu_bo_kunmap(aobj);
> -		} else {
> -			r =  amdgpu_ib_get(adev, vm, 0, ib);
> -			if (r) {
> -				DRM_ERROR("Failed to get ib !\n");
> -				return r;
> -			}
> -
> -		}
> -
> -		ib->gpu_addr = chunk_ib->va_start;
> -		ib->length_dw = chunk_ib->ib_bytes / 4;
> -		ib->flags = chunk_ib->flags;
>   		j++;
>   	}
>   
> @@ -1160,14 +1169,26 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>   
>   	amdgpu_cs_post_dependencies(p);
>   
> +
> +	/* hook sched fence to all BOs' reservation in validated list
> +	 * and unreserve them.
> +	 *
> +	 * we unreserve at here is because otherwise
> +	 * there'll be deadlock between ctx_add_fence/sched_entity_push_job
> +	 * and gpu_reset routine's recover_bo_from_shadow on PD/PTEs' ttm bo lock
> +	 */
> +	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
> +
> +
>   	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
> +
> +

Please drop that change. It isn't necessary any more after you moved the 
waiting outside of the BO lock.

Dito for most of the other changes as well.

Regards,
Christian.

>   	job->uf_sequence = cs->out.handle;
>   	amdgpu_job_free_resources(job);
>   
>   	trace_amdgpu_cs_ioctl(job);
>   	amd_sched_entity_push_job(&job->base);
>   
> -	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>   	amdgpu_mn_unlock(p->mn);
>   
>   	return 0;
> @@ -1189,6 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   
>   	parser.adev = adev;
>   	parser.filp = filp;
> +	fpriv = filp->driver_priv;
>   
>   	r = amdgpu_cs_parser_init(&parser, data);
>   	if (r) {
> @@ -1196,6 +1218,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   		goto out;
>   	}
>   
> +	r = amdgpu_cs_ib_fill(adev, &parser);
> +	if (r)
> +		goto out;
> +
>   	r = amdgpu_cs_parser_bos(&parser, data);
>   	if (r) {
>   		if (r == -ENOMEM)
> @@ -1206,9 +1232,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	}
>   
>   	reserved_buffers = true;
> -	r = amdgpu_cs_ib_fill(adev, &parser);
> -	if (r)
> -		goto out;
>   
>   	r = amdgpu_cs_dependencies(adev, &parser);
>   	if (r) {
> @@ -1226,7 +1249,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	r = amdgpu_cs_submit(&parser, cs);
>   
>   out:
> -	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
> +	if (r && reserved_buffers)
> +		ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
> +
> +	amdgpu_cs_parser_fini(&parser);
>   	return r;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a11e443..c073a68 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -39,6 +39,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
>   	if (!ctx->fences)
>   		return -ENOMEM;
>   
> +	mutex_init(&ctx->lock);
> +
>   	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>   		ctx->rings[i].sequence = 1;
>   		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
> @@ -96,6 +98,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>   				      &ctx->rings[i].entity);
>   
>   	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
> +
> +	mutex_destroy(&ctx->lock);
>   }
>   
>   static int amdgpu_ctx_alloc(struct amdgpu_device *adev,


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] dmr/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found]     ` <1507314021-18323-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2017-10-09  6:21       ` Liu, Monk
@ 2017-10-09  8:36       ` Christian König
       [not found]         ` <715810ad-26cc-506d-624d-8e5024968ea0-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  1 sibling, 1 reply; 18+ messages in thread
From: Christian König @ 2017-10-09  8:36 UTC (permalink / raw)
  To: Andrey Grodzovsky, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Christian.Koenig-5C7GfCeVMHo

Am 06.10.2017 um 20:20 schrieb Andrey Grodzovsky:
> From: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
>
> This allows to avoid deadlock during GPU reset.
>
> Change-Id: I817e351b02e653f078063c57cec8a0d94062de12
> Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 15 +++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |  8 ++------
>   2 files changed, 17 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 0fa1bc7..79fefc8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -931,6 +931,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   {
>   	int i, j;
>   	int r, ce_preempt = 0, de_preempt = 0;
> +	struct amdgpu_ctx_ring *cring;
> +	unsigned idx = 0;
> +	struct dma_fence *other = NULL;
>   
>   	for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
>   		struct amdgpu_cs_chunk *chunk;
> @@ -983,6 +986,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
>   		return -EINVAL;
>   
> +	cring = &parser->ctx->rings[parser->job->ring->idx];
> +	idx = cring->sequence & (amdgpu_sched_jobs - 1);
> +	other = cring->fences[idx];
> +	if (other) {
> +		signed long r;
> +		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
> +		if (r < 0) {
> +			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
> +			return r;
> +		}
> +	}
> +

Please move that into a helper function in amdgpu_ctx.c

Apart from that the change looks good to me.

Christian.

>   	return 0;
>   }
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index c073a68..758b643 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -260,12 +260,8 @@ uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
>   
>   	idx = seq & (amdgpu_sched_jobs - 1);
>   	other = cring->fences[idx];
> -	if (other) {
> -		signed long r;
> -		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
> -		if (r < 0)
> -			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
> -	}
> +	if (other)
> +		BUG_ON(!dma_fence_is_signaled(other));
>   
>   	dma_fence_get(fence);
>   


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4.
       [not found]     ` <9fc20357-db3e-3823-3073-c70ead162f4e-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-10  3:31       ` andrey
       [not found]         ` <da1bc7eb-acce-b11b-44ba-694adf19cb59-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: andrey @ 2017-10-10  3:31 UTC (permalink / raw)
  To: Christian König, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu



On 2017-10-09 04:34 AM, Christian König wrote:
> Am 06.10.2017 um 20:20 schrieb Andrey Grodzovsky:
>> From: Monk Liu <Monk.Liu@amd.com>
>>
>> need to unreserve ttm bo before "cs_add_fence" and "entity_push_job"
>> otherwise there will be deadlock between "recover_vram_from_shadow"
>> and previous two routines on the ttm bo's resv lock.
>>
>> v2:
>> Add per ctx mutex.
>>
>> v3:
>> Rellocate mutex aquisition into amdgpu_cs_parser_init and muex release
>> into amdgpu_cs_parser_fini to avoid nested locking lockup.
>> Add rollback code for amdgpu_ctx_add_fence in case of error or signal
>> interruption.
>>
>> v4:
>> Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill to enable
>> old fence waiting before reservation lock is aquired.
>>
>> Change-Id: Ia209beab5036bfc2c38cbf18324fa3efd4bab1cf
>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 164 
>> ++++++++++++++++++--------------
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |   4 +
>>   3 files changed, 100 insertions(+), 69 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 53d8df3..baa2953 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -724,6 +724,7 @@ struct amdgpu_ctx {
>>       struct dma_fence    **fences;
>>       struct amdgpu_ctx_ring    rings[AMDGPU_MAX_RINGS];
>>       bool preamble_presented;
>> +    struct mutex        lock;
>>   };
>>     struct amdgpu_ctx_mgr {
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 9f1202a..0fa1bc7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -89,6 +89,9 @@ static int amdgpu_cs_parser_init(struct 
>> amdgpu_cs_parser *p, void *data)
>>           goto free_chunk;
>>       }
>>   +
>> +    mutex_lock(&p->ctx->lock);
>> +
>>       /* get chunks */
>>       chunk_array_user = u64_to_user_ptr(cs->in.chunks);
>>       if (copy_from_user(chunk_array, chunk_array_user,
>> @@ -715,28 +718,21 @@ static int amdgpu_cs_sync_rings(struct 
>> amdgpu_cs_parser *p)
>>   /**
>>    * cs_parser_fini() - clean parser states
>>    * @parser:    parser structure holding parsing context.
>> - * @error:    error number
>> - *
>> - * If error is set than unvalidate buffer, otherwise just free memory
>> - * used by parsing context.
>>    **/
>> -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, 
>> int error,
>> -                  bool backoff)
>> +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
>
> You can now drop this change and keep the logic as it is since we 
> moved the wait before taking the reservation locks.

By "this change" here and in the comment later bellow I guess you mean 
the original change by Monk about moving ttm_eu_fence_buffer_objects to 
before amd_sched_entity_push_job ? In that case I am not sure how we can 
drop it since the wait we moved in the second patch was the one for the 
old fence while Monk's change was about the wait happening when the 
kfifo is full. Isn't reverting his change will just put us again in the 
original deadlock of blocking on full kfifo while holding the 
reservation lock during gpu reset with VRAM recovery ?

Thanks,
Andrey

>
>>   {
>>       unsigned i;
>>   -    if (error && backoff)
>> -        ttm_eu_backoff_reservation(&parser->ticket,
>> -                       &parser->validated);
>> -
>>       for (i = 0; i < parser->num_post_dep_syncobjs; i++)
>>           drm_syncobj_put(parser->post_dep_syncobjs[i]);
>>       kfree(parser->post_dep_syncobjs);
>>         dma_fence_put(parser->fence);
>>   -    if (parser->ctx)
>> +    if (parser->ctx) {
>> +        mutex_unlock(&parser->ctx->lock);
>>           amdgpu_ctx_put(parser->ctx);
>> +    }
>>       if (parser->bo_list)
>>           amdgpu_bo_list_put(parser->bo_list);
>>   @@ -843,7 +839,72 @@ static int amdgpu_cs_ib_vm_chunk(struct 
>> amdgpu_device *adev,
>>       struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>       struct amdgpu_vm *vm = &fpriv->vm;
>>       struct amdgpu_ring *ring = p->job->ring;
>> -    int i, r;
>> +    int i, j, r;
>> +
>> +    for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
>> +
>> +        struct amdgpu_cs_chunk *chunk;
>> +        struct amdgpu_ib *ib;
>> +        struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>> +
>> +        chunk = &p->chunks[i];
>> +        ib = &p->job->ibs[j];
>> +        chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>> +
>> +        if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>> +                    continue;
>> +
>> +        if (p->job->ring->funcs->parse_cs) {
>> +            struct amdgpu_bo_va_mapping *m;
>> +            struct amdgpu_bo *aobj = NULL;
>> +            uint64_t offset;
>> +            uint8_t *kptr;
>> +
>> +            r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
>> +                           &aobj, &m);
>> +            if (r) {
>> +                DRM_ERROR("IB va_start is invalid\n");
>> +                return r;
>> +            }
>> +
>> +            if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
>> +                (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
>> +                DRM_ERROR("IB va_start+ib_bytes is invalid\n");
>> +                return -EINVAL;
>> +            }
>> +
>> +            /* the IB should be reserved at this point */
>> +            r = amdgpu_bo_kmap(aobj, (void **)&kptr);
>> +            if (r) {
>> +                return r;
>> +            }
>> +
>> +            offset = m->start * AMDGPU_GPU_PAGE_SIZE;
>> +            kptr += chunk_ib->va_start - offset;
>> +
>> +            r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
>> +            if (r) {
>> +                DRM_ERROR("Failed to get ib !\n");
>> +                return r;
>> +            }
>> +
>> +            memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
>> +            amdgpu_bo_kunmap(aobj);
>> +        } else {
>> +            r =  amdgpu_ib_get(adev, vm, 0, ib);
>> +            if (r) {
>> +                DRM_ERROR("Failed to get ib !\n");
>> +                return r;
>> +            }
>> +
>> +        }
>> +
>> +        ib->gpu_addr = chunk_ib->va_start;
>> +        ib->length_dw = chunk_ib->ib_bytes / 4;
>> +        ib->flags = chunk_ib->flags;
>
> Please keep the calls to amdgpu_ib_get() inside amdgpu_cs_ib_fill().
>
>> +        j++;
>> +
>> +    }
>>         /* Only for UVD/VCE VM emulation */
>>       if (ring->funcs->parse_cs) {
>> @@ -868,19 +929,15 @@ static int amdgpu_cs_ib_vm_chunk(struct 
>> amdgpu_device *adev,
>>   static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>>                    struct amdgpu_cs_parser *parser)
>>   {
>> -    struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
>> -    struct amdgpu_vm *vm = &fpriv->vm;
>>       int i, j;
>>       int r, ce_preempt = 0, de_preempt = 0;
>>         for (i = 0, j = 0; i < parser->nchunks && j < 
>> parser->job->num_ibs; i++) {
>>           struct amdgpu_cs_chunk *chunk;
>> -        struct amdgpu_ib *ib;
>>           struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>>           struct amdgpu_ring *ring;
>>             chunk = &parser->chunks[i];
>> -        ib = &parser->job->ibs[j];
>>           chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>>             if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>> @@ -917,54 +974,6 @@ static int amdgpu_cs_ib_fill(struct 
>> amdgpu_device *adev,
>>             parser->job->ring = ring;
>>   -        if (ring->funcs->parse_cs) {
>> -            struct amdgpu_bo_va_mapping *m;
>> -            struct amdgpu_bo *aobj = NULL;
>> -            uint64_t offset;
>> -            uint8_t *kptr;
>> -
>> -            r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
>> -                           &aobj, &m);
>> -            if (r) {
>> -                DRM_ERROR("IB va_start is invalid\n");
>> -                return r;
>> -            }
>> -
>> -            if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
>> -                (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
>> -                DRM_ERROR("IB va_start+ib_bytes is invalid\n");
>> -                return -EINVAL;
>> -            }
>> -
>> -            /* the IB should be reserved at this point */
>> -            r = amdgpu_bo_kmap(aobj, (void **)&kptr);
>> -            if (r) {
>> -                return r;
>> -            }
>> -
>> -            offset = m->start * AMDGPU_GPU_PAGE_SIZE;
>> -            kptr += chunk_ib->va_start - offset;
>> -
>> -            r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
>> -            if (r) {
>> -                DRM_ERROR("Failed to get ib !\n");
>> -                return r;
>> -            }
>> -
>> -            memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
>> -            amdgpu_bo_kunmap(aobj);
>> -        } else {
>> -            r =  amdgpu_ib_get(adev, vm, 0, ib);
>> -            if (r) {
>> -                DRM_ERROR("Failed to get ib !\n");
>> -                return r;
>> -            }
>> -
>> -        }
>> -
>> -        ib->gpu_addr = chunk_ib->va_start;
>> -        ib->length_dw = chunk_ib->ib_bytes / 4;
>> -        ib->flags = chunk_ib->flags;
>>           j++;
>>       }
>>   @@ -1160,14 +1169,26 @@ static int amdgpu_cs_submit(struct 
>> amdgpu_cs_parser *p,
>>         amdgpu_cs_post_dependencies(p);
>>   +
>> +    /* hook sched fence to all BOs' reservation in validated list
>> +     * and unreserve them.
>> +     *
>> +     * we unreserve at here is because otherwise
>> +     * there'll be deadlock between ctx_add_fence/sched_entity_push_job
>> +     * and gpu_reset routine's recover_bo_from_shadow on PD/PTEs' 
>> ttm bo lock
>> +     */
>> +    ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>> +
>> +
>>       cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
>> +
>> +
>
> Please drop that change. It isn't necessary any more after you moved 
> the waiting outside of the BO lock.
>
> Dito for most of the other changes as well.
>
> Regards,
> Christian.
>
>>       job->uf_sequence = cs->out.handle;
>>       amdgpu_job_free_resources(job);
>>         trace_amdgpu_cs_ioctl(job);
>>       amd_sched_entity_push_job(&job->base);
>>   -    ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>       amdgpu_mn_unlock(p->mn);
>>         return 0;
>> @@ -1189,6 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file *filp)
>>         parser.adev = adev;
>>       parser.filp = filp;
>> +    fpriv = filp->driver_priv;
>>         r = amdgpu_cs_parser_init(&parser, data);
>>       if (r) {
>> @@ -1196,6 +1218,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file *filp)
>>           goto out;
>>       }
>>   +    r = amdgpu_cs_ib_fill(adev, &parser);
>> +    if (r)
>> +        goto out;
>> +
>>       r = amdgpu_cs_parser_bos(&parser, data);
>>       if (r) {
>>           if (r == -ENOMEM)
>> @@ -1206,9 +1232,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file *filp)
>>       }
>>         reserved_buffers = true;
>> -    r = amdgpu_cs_ib_fill(adev, &parser);
>> -    if (r)
>> -        goto out;
>>         r = amdgpu_cs_dependencies(adev, &parser);
>>       if (r) {
>> @@ -1226,7 +1249,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>> void *data, struct drm_file *filp)
>>       r = amdgpu_cs_submit(&parser, cs);
>>     out:
>> -    amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>> +    if (r && reserved_buffers)
>> +        ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
>> +
>> +    amdgpu_cs_parser_fini(&parser);
>>       return r;
>>   }
>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index a11e443..c073a68 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -39,6 +39,8 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev, struct amdgpu_ctx *ctx)
>>       if (!ctx->fences)
>>           return -ENOMEM;
>>   +    mutex_init(&ctx->lock);
>> +
>>       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>           ctx->rings[i].sequence = 1;
>>           ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
>> @@ -96,6 +98,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>>                         &ctx->rings[i].entity);
>>         amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
>> +
>> +    mutex_destroy(&ctx->lock);
>>   }
>>     static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4.
       [not found]         ` <da1bc7eb-acce-b11b-44ba-694adf19cb59-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-10  7:05           ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2017-10-10  7:05 UTC (permalink / raw)
  To: andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu

Am 10.10.2017 um 05:31 schrieb andrey:
>
>
> On 2017-10-09 04:34 AM, Christian König wrote:
>> Am 06.10.2017 um 20:20 schrieb Andrey Grodzovsky:
>>> From: Monk Liu <Monk.Liu@amd.com>
>>>
>>> need to unreserve ttm bo before "cs_add_fence" and "entity_push_job"
>>> otherwise there will be deadlock between "recover_vram_from_shadow"
>>> and previous two routines on the ttm bo's resv lock.
>>>
>>> v2:
>>> Add per ctx mutex.
>>>
>>> v3:
>>> Rellocate mutex aquisition into amdgpu_cs_parser_init and muex release
>>> into amdgpu_cs_parser_fini to avoid nested locking lockup.
>>> Add rollback code for amdgpu_ctx_add_fence in case of error or signal
>>> interruption.
>>>
>>> v4:
>>> Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill to enable
>>> old fence waiting before reservation lock is aquired.
>>>
>>> Change-Id: Ia209beab5036bfc2c38cbf18324fa3efd4bab1cf
>>> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
>>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |   1 +
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 164 
>>> ++++++++++++++++++--------------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c |   4 +
>>>   3 files changed, 100 insertions(+), 69 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> index 53d8df3..baa2953 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>>> @@ -724,6 +724,7 @@ struct amdgpu_ctx {
>>>       struct dma_fence    **fences;
>>>       struct amdgpu_ctx_ring    rings[AMDGPU_MAX_RINGS];
>>>       bool preamble_presented;
>>> +    struct mutex        lock;
>>>   };
>>>     struct amdgpu_ctx_mgr {
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> index 9f1202a..0fa1bc7 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>>> @@ -89,6 +89,9 @@ static int amdgpu_cs_parser_init(struct 
>>> amdgpu_cs_parser *p, void *data)
>>>           goto free_chunk;
>>>       }
>>>   +
>>> +    mutex_lock(&p->ctx->lock);
>>> +
>>>       /* get chunks */
>>>       chunk_array_user = u64_to_user_ptr(cs->in.chunks);
>>>       if (copy_from_user(chunk_array, chunk_array_user,
>>> @@ -715,28 +718,21 @@ static int amdgpu_cs_sync_rings(struct 
>>> amdgpu_cs_parser *p)
>>>   /**
>>>    * cs_parser_fini() - clean parser states
>>>    * @parser:    parser structure holding parsing context.
>>> - * @error:    error number
>>> - *
>>> - * If error is set than unvalidate buffer, otherwise just free memory
>>> - * used by parsing context.
>>>    **/
>>> -static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, 
>>> int error,
>>> -                  bool backoff)
>>> +static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
>>
>> You can now drop this change and keep the logic as it is since we 
>> moved the wait before taking the reservation locks.
>
> By "this change" here and in the comment later bellow I guess you mean 
> the original change by Monk about moving ttm_eu_fence_buffer_objects 
> to before amd_sched_entity_push_job ?

Yes, correct.

> In that case I am not sure how we can drop it since the wait we moved 
> in the second patch was the one for the old fence while Monk's change 
> was about the wait happening when the kfifo is full. Isn't reverting 
> his change will just put us again in the original deadlock of blocking 
> on full kfifo while holding the reservation lock during gpu reset with 
> VRAM recovery ?

Yeah, but we need to fix this kfifo issue in another way anyway. Cause 
we push to the kfifo during BO migration and page tables updates as well 
and that can only happen while the BOs are reserved.

In general please concentrate on one change at a time. In other words 
this first patch should just move filling the IBs before taking the BO 
reservation lock.

To do this you just need to move the calls to amdgpu_cs_find_mapping() 
and amdgpu_bo_kmap() and amdgpu_bo_kunmap() out of amdgpu_cs_fill_ib(), 
but please try to keep the amdgpu_ib_get() at the original place.

Regards,
Christian.

>
> Thanks,
> Andrey
>
>>
>>>   {
>>>       unsigned i;
>>>   -    if (error && backoff)
>>> -        ttm_eu_backoff_reservation(&parser->ticket,
>>> -                       &parser->validated);
>>> -
>>>       for (i = 0; i < parser->num_post_dep_syncobjs; i++)
>>>           drm_syncobj_put(parser->post_dep_syncobjs[i]);
>>>       kfree(parser->post_dep_syncobjs);
>>>         dma_fence_put(parser->fence);
>>>   -    if (parser->ctx)
>>> +    if (parser->ctx) {
>>> +        mutex_unlock(&parser->ctx->lock);
>>>           amdgpu_ctx_put(parser->ctx);
>>> +    }
>>>       if (parser->bo_list)
>>>           amdgpu_bo_list_put(parser->bo_list);
>>>   @@ -843,7 +839,72 @@ static int amdgpu_cs_ib_vm_chunk(struct 
>>> amdgpu_device *adev,
>>>       struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>>>       struct amdgpu_vm *vm = &fpriv->vm;
>>>       struct amdgpu_ring *ring = p->job->ring;
>>> -    int i, r;
>>> +    int i, j, r;
>>> +
>>> +    for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
>>> +
>>> +        struct amdgpu_cs_chunk *chunk;
>>> +        struct amdgpu_ib *ib;
>>> +        struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>>> +
>>> +        chunk = &p->chunks[i];
>>> +        ib = &p->job->ibs[j];
>>> +        chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>>> +
>>> +        if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>>> +                    continue;
>>> +
>>> +        if (p->job->ring->funcs->parse_cs) {
>>> +            struct amdgpu_bo_va_mapping *m;
>>> +            struct amdgpu_bo *aobj = NULL;
>>> +            uint64_t offset;
>>> +            uint8_t *kptr;
>>> +
>>> +            r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
>>> +                           &aobj, &m);
>>> +            if (r) {
>>> +                DRM_ERROR("IB va_start is invalid\n");
>>> +                return r;
>>> +            }
>>> +
>>> +            if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
>>> +                (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
>>> +                DRM_ERROR("IB va_start+ib_bytes is invalid\n");
>>> +                return -EINVAL;
>>> +            }
>>> +
>>> +            /* the IB should be reserved at this point */
>>> +            r = amdgpu_bo_kmap(aobj, (void **)&kptr);
>>> +            if (r) {
>>> +                return r;
>>> +            }
>>> +
>>> +            offset = m->start * AMDGPU_GPU_PAGE_SIZE;
>>> +            kptr += chunk_ib->va_start - offset;
>>> +
>>> +            r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
>>> +            if (r) {
>>> +                DRM_ERROR("Failed to get ib !\n");
>>> +                return r;
>>> +            }
>>> +
>>> +            memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
>>> +            amdgpu_bo_kunmap(aobj);
>>> +        } else {
>>> +            r =  amdgpu_ib_get(adev, vm, 0, ib);
>>> +            if (r) {
>>> +                DRM_ERROR("Failed to get ib !\n");
>>> +                return r;
>>> +            }
>>> +
>>> +        }
>>> +
>>> +        ib->gpu_addr = chunk_ib->va_start;
>>> +        ib->length_dw = chunk_ib->ib_bytes / 4;
>>> +        ib->flags = chunk_ib->flags;
>>
>> Please keep the calls to amdgpu_ib_get() inside amdgpu_cs_ib_fill().
>>
>>> +        j++;
>>> +
>>> +    }
>>>         /* Only for UVD/VCE VM emulation */
>>>       if (ring->funcs->parse_cs) {
>>> @@ -868,19 +929,15 @@ static int amdgpu_cs_ib_vm_chunk(struct 
>>> amdgpu_device *adev,
>>>   static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>>>                    struct amdgpu_cs_parser *parser)
>>>   {
>>> -    struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
>>> -    struct amdgpu_vm *vm = &fpriv->vm;
>>>       int i, j;
>>>       int r, ce_preempt = 0, de_preempt = 0;
>>>         for (i = 0, j = 0; i < parser->nchunks && j < 
>>> parser->job->num_ibs; i++) {
>>>           struct amdgpu_cs_chunk *chunk;
>>> -        struct amdgpu_ib *ib;
>>>           struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>>>           struct amdgpu_ring *ring;
>>>             chunk = &parser->chunks[i];
>>> -        ib = &parser->job->ibs[j];
>>>           chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>>>             if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>>> @@ -917,54 +974,6 @@ static int amdgpu_cs_ib_fill(struct 
>>> amdgpu_device *adev,
>>>             parser->job->ring = ring;
>>>   -        if (ring->funcs->parse_cs) {
>>> -            struct amdgpu_bo_va_mapping *m;
>>> -            struct amdgpu_bo *aobj = NULL;
>>> -            uint64_t offset;
>>> -            uint8_t *kptr;
>>> -
>>> -            r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
>>> -                           &aobj, &m);
>>> -            if (r) {
>>> -                DRM_ERROR("IB va_start is invalid\n");
>>> -                return r;
>>> -            }
>>> -
>>> -            if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
>>> -                (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
>>> -                DRM_ERROR("IB va_start+ib_bytes is invalid\n");
>>> -                return -EINVAL;
>>> -            }
>>> -
>>> -            /* the IB should be reserved at this point */
>>> -            r = amdgpu_bo_kmap(aobj, (void **)&kptr);
>>> -            if (r) {
>>> -                return r;
>>> -            }
>>> -
>>> -            offset = m->start * AMDGPU_GPU_PAGE_SIZE;
>>> -            kptr += chunk_ib->va_start - offset;
>>> -
>>> -            r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
>>> -            if (r) {
>>> -                DRM_ERROR("Failed to get ib !\n");
>>> -                return r;
>>> -            }
>>> -
>>> -            memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
>>> -            amdgpu_bo_kunmap(aobj);
>>> -        } else {
>>> -            r =  amdgpu_ib_get(adev, vm, 0, ib);
>>> -            if (r) {
>>> -                DRM_ERROR("Failed to get ib !\n");
>>> -                return r;
>>> -            }
>>> -
>>> -        }
>>> -
>>> -        ib->gpu_addr = chunk_ib->va_start;
>>> -        ib->length_dw = chunk_ib->ib_bytes / 4;
>>> -        ib->flags = chunk_ib->flags;
>>>           j++;
>>>       }
>>>   @@ -1160,14 +1169,26 @@ static int amdgpu_cs_submit(struct 
>>> amdgpu_cs_parser *p,
>>>         amdgpu_cs_post_dependencies(p);
>>>   +
>>> +    /* hook sched fence to all BOs' reservation in validated list
>>> +     * and unreserve them.
>>> +     *
>>> +     * we unreserve at here is because otherwise
>>> +     * there'll be deadlock between 
>>> ctx_add_fence/sched_entity_push_job
>>> +     * and gpu_reset routine's recover_bo_from_shadow on PD/PTEs' 
>>> ttm bo lock
>>> +     */
>>> +    ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>>> +
>>> +
>>>       cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
>>> +
>>> +
>>
>> Please drop that change. It isn't necessary any more after you moved 
>> the waiting outside of the BO lock.
>>
>> Dito for most of the other changes as well.
>>
>> Regards,
>> Christian.
>>
>>>       job->uf_sequence = cs->out.handle;
>>>       amdgpu_job_free_resources(job);
>>>         trace_amdgpu_cs_ioctl(job);
>>>       amd_sched_entity_push_job(&job->base);
>>>   -    ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, 
>>> p->fence);
>>>       amdgpu_mn_unlock(p->mn);
>>>         return 0;
>>> @@ -1189,6 +1210,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>>> void *data, struct drm_file *filp)
>>>         parser.adev = adev;
>>>       parser.filp = filp;
>>> +    fpriv = filp->driver_priv;
>>>         r = amdgpu_cs_parser_init(&parser, data);
>>>       if (r) {
>>> @@ -1196,6 +1218,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>>> void *data, struct drm_file *filp)
>>>           goto out;
>>>       }
>>>   +    r = amdgpu_cs_ib_fill(adev, &parser);
>>> +    if (r)
>>> +        goto out;
>>> +
>>>       r = amdgpu_cs_parser_bos(&parser, data);
>>>       if (r) {
>>>           if (r == -ENOMEM)
>>> @@ -1206,9 +1232,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>>> void *data, struct drm_file *filp)
>>>       }
>>>         reserved_buffers = true;
>>> -    r = amdgpu_cs_ib_fill(adev, &parser);
>>> -    if (r)
>>> -        goto out;
>>>         r = amdgpu_cs_dependencies(adev, &parser);
>>>       if (r) {
>>> @@ -1226,7 +1249,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, 
>>> void *data, struct drm_file *filp)
>>>       r = amdgpu_cs_submit(&parser, cs);
>>>     out:
>>> -    amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
>>> +    if (r && reserved_buffers)
>>> +        ttm_eu_backoff_reservation(&parser.ticket, &parser.validated);
>>> +
>>> +    amdgpu_cs_parser_fini(&parser);
>>>       return r;
>>>   }
>>>   diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> index a11e443..c073a68 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>>> @@ -39,6 +39,8 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>>> *adev, struct amdgpu_ctx *ctx)
>>>       if (!ctx->fences)
>>>           return -ENOMEM;
>>>   +    mutex_init(&ctx->lock);
>>> +
>>>       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>>           ctx->rings[i].sequence = 1;
>>>           ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
>>> @@ -96,6 +98,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>>>                         &ctx->rings[i].entity);
>>>         amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
>>> +
>>> +    mutex_destroy(&ctx->lock);
>>>   }
>>>     static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
>>
>>
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill.
       [not found]         ` <715810ad-26cc-506d-624d-8e5024968ea0-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2017-10-10 20:50           ` Andrey Grodzovsky
       [not found]             ` <1507668617-27299-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Andrey Grodzovsky @ 2017-10-10 20:50 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, Monk.Liu-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Andrey Grodzovsky

This enables old fence waiting before reservation lock is aquired
which in turn is part of a bigger solution to deadlock happening
when gpu reset with VRAM recovery accures during intensive rendering.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 111 +++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index fe7dd44..1a54e53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -739,6 +739,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 
 	if (parser->ctx)
 		amdgpu_ctx_put(parser->ctx);
+
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
 
@@ -845,7 +846,56 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_ring *ring = p->job->ring;
-	int i, r;
+	int i, j, r;
+
+	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
+
+		struct amdgpu_cs_chunk *chunk;
+		struct amdgpu_ib *ib;
+		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
+
+		chunk = &p->chunks[i];
+		ib = &p->job->ibs[j];
+		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
+
+		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
+				       continue;
+
+		if (p->job->ring->funcs->parse_cs) {
+			struct amdgpu_bo_va_mapping *m;
+			struct amdgpu_bo *aobj = NULL;
+			uint64_t offset;
+			uint8_t *kptr;
+
+			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
+					&aobj, &m);
+			if (r) {
+				DRM_ERROR("IB va_start is invalid\n");
+				return r;
+			}
+
+			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
+				(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
+				return -EINVAL;
+			}
+
+			/* the IB should be reserved at this point */
+			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
+			if (r) {
+				return r;
+			}
+
+			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
+			kptr += chunk_ib->va_start - offset;
+
+			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+			amdgpu_bo_kunmap(aobj);
+		}
+
+		j++;
+	}
+
 
 	/* Only for UVD/VCE VM emulation */
 	if (ring->funcs->parse_cs) {
@@ -919,54 +969,20 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 
 		parser->job->ring = ring;
 
-		if (ring->funcs->parse_cs) {
-			struct amdgpu_bo_va_mapping *m;
-			struct amdgpu_bo *aobj = NULL;
-			uint64_t offset;
-			uint8_t *kptr;
-
-			r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
-						   &aobj, &m);
-			if (r) {
-				DRM_ERROR("IB va_start is invalid\n");
-				return r;
-			}
-
-			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
-			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
-				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
-				return -EINVAL;
-			}
-
-			/* the IB should be reserved at this point */
-			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
-			if (r) {
-				return r;
-			}
-
-			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
-			kptr += chunk_ib->va_start - offset;
-
-			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
-			if (r) {
-				DRM_ERROR("Failed to get ib !\n");
-				return r;
-			}
-
-			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
-			amdgpu_bo_kunmap(aobj);
-		} else {
-			r =  amdgpu_ib_get(adev, vm, 0, ib);
-			if (r) {
-				DRM_ERROR("Failed to get ib !\n");
-				return r;
-			}
-
+		r =  amdgpu_ib_get(
+				adev,
+				vm,
+				ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
+				ib);
+		if (r) {
+			DRM_ERROR("Failed to get ib !\n");
+			return r;
 		}
 
 		ib->gpu_addr = chunk_ib->va_start;
 		ib->length_dw = chunk_ib->ib_bytes / 4;
 		ib->flags = chunk_ib->flags;
+
 		j++;
 	}
 
@@ -1212,6 +1228,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		goto out;
 	}
 
+	r = amdgpu_cs_ib_fill(adev, &parser);
+	if (r)
+		goto out;
+
 	r = amdgpu_cs_parser_bos(&parser, data);
 	if (r) {
 		if (r == -ENOMEM)
@@ -1222,9 +1242,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	}
 
 	reserved_buffers = true;
-	r = amdgpu_cs_ib_fill(adev, &parser);
-	if (r)
-		goto out;
 
 	r = amdgpu_cs_dependencies(adev, &parser);
 	if (r) {
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found]             ` <1507668617-27299-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-10 20:50               ` Andrey Grodzovsky
       [not found]                 ` <1507668617-27299-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2017-10-11  7:33               ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill Christian König
  1 sibling, 1 reply; 18+ messages in thread
From: Andrey Grodzovsky @ 2017-10-10 20:50 UTC (permalink / raw)
  To: christian.koenig-5C7GfCeVMHo, Monk.Liu-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Andrey Grodzovsky

Helps avoiding deadlock during GPU reset.
Added mutex to amdgpu_ctx to preserve order of fences on a ring.

v2:
Put waiting logic in a function in a seperate function in amdgpu_ctx.c

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  8 ++++++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index da48f97..235eca5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -741,6 +741,7 @@ struct amdgpu_ctx {
 	bool 			preamble_presented;
 	enum amd_sched_priority init_priority;
 	enum amd_sched_priority override_priority;
+	struct mutex            lock;
 };
 
 struct amdgpu_ctx_mgr {
@@ -763,9 +764,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp);
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
+
 /*
  * file private structure
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1a54e53..c36297c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		goto free_chunk;
 	}
 
+	mutex_lock(&p->ctx->lock);
+
 	/* get chunks */
 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
@@ -737,8 +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 
 	dma_fence_put(parser->fence);
 
-	if (parser->ctx)
+	if (parser->ctx) {
+		mutex_unlock(&parser->ctx->lock);
 		amdgpu_ctx_put(parser->ctx);
+	}
 
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
@@ -992,7 +996,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
-	return 0;
+	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
 }
 
 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a78b03f6..4309820 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	if (!ctx->fences)
 		return -ENOMEM;
 
+	mutex_init(&ctx->lock);
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
 		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
@@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 				      &ctx->rings[i].entity);
 
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+
+	mutex_destroy(&ctx->lock);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
-	if (other) {
-		signed long r;
-		r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
-		if (r < 0)
-			return r;
-	}
+	if (other)
+		BUG_ON(!dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
@@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 	}
 }
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
+{
+	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
+	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
+	struct dma_fence *other = cring->fences[idx];
+
+	if (other) {
+		signed long r;
+		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+		if (r < 0) {
+			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+			return r;
+		}
+	}
+
+	return 0;
+}
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 {
 	mutex_init(&mgr->lock);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* RE: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found]                 ` <1507668617-27299-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-11  3:28                   ` Liu, Monk
       [not found]                     ` <BLUPR12MB0449BE6E515A61B36980F88F844A0-7LeqcoF/hwpTIQvHjXdJlwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
  2017-10-11  7:57                   ` Christian König
  1 sibling, 1 reply; 18+ messages in thread
From: Liu, Monk @ 2017-10-11  3:28 UTC (permalink / raw)
  To: Koenig, Christian, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Grodzovsky, Andrey

Hi Andrey & Christian 

Do we really need the mutext lock here?
Libdrm_amdgpu already has the pthread_mutext to protect multi-thread racing issues, kernel side should be safe with that 

BR Monk

-----Original Message-----
From: Andrey Grodzovsky [mailto:andrey.grodzovsky@amd.com] 
Sent: Wednesday, October 11, 2017 4:50 AM
To: Koenig, Christian <Christian.Koenig@amd.com>; Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
Subject: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.

Helps avoiding deadlock during GPU reset.
Added mutex to amdgpu_ctx to preserve order of fences on a ring.

v2:
Put waiting logic in a function in a seperate function in amdgpu_ctx.c

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  8 ++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index da48f97..235eca5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -741,6 +741,7 @@ struct amdgpu_ctx {
 	bool 			preamble_presented;
 	enum amd_sched_priority init_priority;
 	enum amd_sched_priority override_priority;
+	struct mutex            lock;
 };
 
 struct amdgpu_ctx_mgr {
@@ -763,9 +764,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,  int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 		     struct drm_file *filp);
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned 
+ring_id);
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
 
+
 /*
  * file private structure
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1a54e53..c36297c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		goto free_chunk;
 	}
 
+	mutex_lock(&p->ctx->lock);
+
 	/* get chunks */
 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user, @@ -737,8 +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
 
 	dma_fence_put(parser->fence);
 
-	if (parser->ctx)
+	if (parser->ctx) {
+		mutex_unlock(&parser->ctx->lock);
 		amdgpu_ctx_put(parser->ctx);
+	}
 
 	if (parser->bo_list)
 		amdgpu_bo_list_put(parser->bo_list);
@@ -992,7 +996,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
 
-	return 0;
+	return amdgpu_ctx_wait_prev_fence(parser->ctx, 
+parser->job->ring->idx);
 }
 
 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index a78b03f6..4309820 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 	if (!ctx->fences)
 		return -ENOMEM;
 
+	mutex_init(&ctx->lock);
+
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		ctx->rings[i].sequence = 1;
 		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; @@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
 				      &ctx->rings[i].entity);
 
 	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
+
+	mutex_destroy(&ctx->lock);
 }
 
 static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
 
 	idx = seq & (amdgpu_sched_jobs - 1);
 	other = cring->fences[idx];
-	if (other) {
-		signed long r;
-		r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
-		if (r < 0)
-			return r;
-	}
+	if (other)
+		BUG_ON(!dma_fence_is_signaled(other));
 
 	dma_fence_get(fence);
 
@@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
 	}
 }
 
+int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned 
+ring_id) {
+	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
+	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
+	struct dma_fence *other = cring->fences[idx];
+
+	if (other) {
+		signed long r;
+		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+		if (r < 0) {
+			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+			return r;
+		}
+	}
+
+	return 0;
+}
+
 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)  {
 	mutex_init(&mgr->lock);
--
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found]                     ` <BLUPR12MB0449BE6E515A61B36980F88F844A0-7LeqcoF/hwpTIQvHjXdJlwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
@ 2017-10-11  7:25                       ` Christian König
       [not found]                         ` <aaf4f94a-8099-ca6a-9c2f-8355909c7b60-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Christian König @ 2017-10-11  7:25 UTC (permalink / raw)
  To: Liu, Monk, Grodzovsky, Andrey, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Yes, the mutex is mandatory.

As I explained before it doesn't matter what userspace is doing, the 
kernel IOCTL must always be thread safe.

Otherwise userspace could force the kernel to run into a BUG_ON() or worse.

Additional to that we already use an CS interface upstream which doesn't 
have a pthread_mutex any more.

Regards,
Christian.

Am 11.10.2017 um 05:28 schrieb Liu, Monk:
> Hi Andrey & Christian
>
> Do we really need the mutext lock here?
> Libdrm_amdgpu already has the pthread_mutext to protect multi-thread racing issues, kernel side should be safe with that
>
> BR Monk
>
> -----Original Message-----
> From: Andrey Grodzovsky [mailto:andrey.grodzovsky@amd.com]
> Sent: Wednesday, October 11, 2017 4:50 AM
> To: Koenig, Christian <Christian.Koenig@amd.com>; Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
> Subject: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
>
> Helps avoiding deadlock during GPU reset.
> Added mutex to amdgpu_ctx to preserve order of fences on a ring.
>
> v2:
> Put waiting logic in a function in a seperate function in amdgpu_ctx.c
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  8 ++++++--  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------
>   3 files changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index da48f97..235eca5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -741,6 +741,7 @@ struct amdgpu_ctx {
>   	bool 			preamble_presented;
>   	enum amd_sched_priority init_priority;
>   	enum amd_sched_priority override_priority;
> +	struct mutex            lock;
>   };
>   
>   struct amdgpu_ctx_mgr {
> @@ -763,9 +764,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,  int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
>   		     struct drm_file *filp);
>   
> +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
> +ring_id);
> +
>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);  void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
>   
> +
>   /*
>    * file private structure
>    */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1a54e53..c36297c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   		goto free_chunk;
>   	}
>   
> +	mutex_lock(&p->ctx->lock);
> +
>   	/* get chunks */
>   	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
>   	if (copy_from_user(chunk_array, chunk_array_user, @@ -737,8 +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>   
>   	dma_fence_put(parser->fence);
>   
> -	if (parser->ctx)
> +	if (parser->ctx) {
> +		mutex_unlock(&parser->ctx->lock);
>   		amdgpu_ctx_put(parser->ctx);
> +	}
>   
>   	if (parser->bo_list)
>   		amdgpu_bo_list_put(parser->bo_list);
> @@ -992,7 +996,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
>   		return -EINVAL;
>   
> -	return 0;
> +	return amdgpu_ctx_wait_prev_fence(parser->ctx,
> +parser->job->ring->idx);
>   }
>   
>   static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a78b03f6..4309820 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   	if (!ctx->fences)
>   		return -ENOMEM;
>   
> +	mutex_init(&ctx->lock);
> +
>   	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>   		ctx->rings[i].sequence = 1;
>   		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; @@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>   				      &ctx->rings[i].entity);
>   
>   	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
> +
> +	mutex_destroy(&ctx->lock);
>   }
>   
>   static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
>   
>   	idx = seq & (amdgpu_sched_jobs - 1);
>   	other = cring->fences[idx];
> -	if (other) {
> -		signed long r;
> -		r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
> -		if (r < 0)
> -			return r;
> -	}
> +	if (other)
> +		BUG_ON(!dma_fence_is_signaled(other));
>   
>   	dma_fence_get(fence);
>   
> @@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>   	}
>   }
>   
> +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
> +ring_id) {
> +	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
> +	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
> +	struct dma_fence *other = cring->fences[idx];
> +
> +	if (other) {
> +		signed long r;
> +		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
> +		if (r < 0) {
> +			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
> +			return r;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)  {
>   	mutex_init(&mgr->lock);
> --
> 2.7.4
>

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill.
       [not found]             ` <1507668617-27299-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2017-10-10 20:50               ` [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired Andrey Grodzovsky
@ 2017-10-11  7:33               ` Christian König
  1 sibling, 0 replies; 18+ messages in thread
From: Christian König @ 2017-10-11  7:33 UTC (permalink / raw)
  To: Andrey Grodzovsky, Monk.Liu-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 10.10.2017 um 22:50 schrieb Andrey Grodzovsky:
> This enables old fence waiting before reservation lock is aquired
> which in turn is part of a bigger solution to deadlock happening
> when gpu reset with VRAM recovery accures during intensive rendering.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

That looks like it should work, just a few style nit picks below.

With those fixed the patch is Reviewed-by: Christian König 
<christian.koenig@amd.com>.

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 111 +++++++++++++++++++--------------
>   1 file changed, 64 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index fe7dd44..1a54e53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -739,6 +739,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>   
>   	if (parser->ctx)
>   		amdgpu_ctx_put(parser->ctx);
> +

Unrelated whitespace change, please drop from the patch.

(BTW: Do you know how to efficiently modify patches with "git add -p" 
and "git commit --amend"?).

>   	if (parser->bo_list)
>   		amdgpu_bo_list_put(parser->bo_list);
>   
> @@ -845,7 +846,56 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
>   	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>   	struct amdgpu_vm *vm = &fpriv->vm;
>   	struct amdgpu_ring *ring = p->job->ring;
> -	int i, r;
> +	int i, j, r;
> +
> +	for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
> +
> +		struct amdgpu_cs_chunk *chunk;
> +		struct amdgpu_ib *ib;
> +		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
> +
> +		chunk = &p->chunks[i];
> +		ib = &p->job->ibs[j];
> +		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
> +
> +		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> +				       continue;

The indentation of the continue looks wrong in the mail client.

> +
> +		if (p->job->ring->funcs->parse_cs) {
> +			struct amdgpu_bo_va_mapping *m;
> +			struct amdgpu_bo *aobj = NULL;
> +			uint64_t offset;
> +			uint8_t *kptr;
> +
> +			r = amdgpu_cs_find_mapping(p, chunk_ib->va_start,
> +					&aobj, &m);
> +			if (r) {
> +				DRM_ERROR("IB va_start is invalid\n");
> +				return r;
> +			}
> +
> +			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
> +				(m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
> +				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
> +				return -EINVAL;
> +			}
> +
> +			/* the IB should be reserved at this point */
> +			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
> +			if (r) {
> +				return r;
> +			}
> +
> +			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
> +			kptr += chunk_ib->va_start - offset;
> +
> +			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
> +			amdgpu_bo_kunmap(aobj);
> +		}
> +
> +		j++;
> +	}
> +
>   
>   	/* Only for UVD/VCE VM emulation */
>   	if (ring->funcs->parse_cs) {

The loop only does something if (p->job->ring->funcs->parse_cs), so we 
should be able to move it under the following if (ring->funcs->parse_cs).

> @@ -919,54 +969,20 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   
>   		parser->job->ring = ring;
>   
> -		if (ring->funcs->parse_cs) {
> -			struct amdgpu_bo_va_mapping *m;
> -			struct amdgpu_bo *aobj = NULL;
> -			uint64_t offset;
> -			uint8_t *kptr;
> -
> -			r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start,
> -						   &aobj, &m);
> -			if (r) {
> -				DRM_ERROR("IB va_start is invalid\n");
> -				return r;
> -			}
> -
> -			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
> -			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
> -				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
> -				return -EINVAL;
> -			}
> -
> -			/* the IB should be reserved at this point */
> -			r = amdgpu_bo_kmap(aobj, (void **)&kptr);
> -			if (r) {
> -				return r;
> -			}
> -
> -			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
> -			kptr += chunk_ib->va_start - offset;
> -
> -			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
> -			if (r) {
> -				DRM_ERROR("Failed to get ib !\n");
> -				return r;
> -			}
> -
> -			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
> -			amdgpu_bo_kunmap(aobj);
> -		} else {
> -			r =  amdgpu_ib_get(adev, vm, 0, ib);
> -			if (r) {
> -				DRM_ERROR("Failed to get ib !\n");
> -				return r;
> -			}
> -
> +		r =  amdgpu_ib_get(
> +				adev,
> +				vm,
> +				ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
> +				ib);

Looks correct to me, but the coding style should be more something like 
this:

r =  amdgpu_ib_get(adev, vm,
            ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
            ib);

BTW: What editor do you use? I tend to forget the coding style all the 
time as well, so I've just use appropriate editor settings.

Thanks for the help,
Christian.

> +		if (r) {
> +			DRM_ERROR("Failed to get ib !\n");
> +			return r;
>   		}
>   
>   		ib->gpu_addr = chunk_ib->va_start;
>   		ib->length_dw = chunk_ib->ib_bytes / 4;
>   		ib->flags = chunk_ib->flags;
> +
>   		j++;
>   	}
>   
> @@ -1212,6 +1228,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   		goto out;
>   	}
>   
> +	r = amdgpu_cs_ib_fill(adev, &parser);
> +	if (r)
> +		goto out;
> +
>   	r = amdgpu_cs_parser_bos(&parser, data);
>   	if (r) {
>   		if (r == -ENOMEM)
> @@ -1222,9 +1242,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
>   	}
>   
>   	reserved_buffers = true;
> -	r = amdgpu_cs_ib_fill(adev, &parser);
> -	if (r)
> -		goto out;
>   
>   	r = amdgpu_cs_dependencies(adev, &parser);
>   	if (r) {


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found]                         ` <aaf4f94a-8099-ca6a-9c2f-8355909c7b60-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-11  7:41                           ` Chunming Zhou
       [not found]                             ` <59b614d8-a5ba-e4c4-dcf2-60984f0cd257-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Chunming Zhou @ 2017-10-11  7:41 UTC (permalink / raw)
  To: Christian König, Liu, Monk, Grodzovsky, Andrey,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

After ctx mutex is added, pthread_mutext in libdrm can be removed now.

David Zhou


On 2017年10月11日 15:25, Christian König wrote:
> Yes, the mutex is mandatory.
>
> As I explained before it doesn't matter what userspace is doing, the 
> kernel IOCTL must always be thread safe.
>
> Otherwise userspace could force the kernel to run into a BUG_ON() or 
> worse.
>
> Additional to that we already use an CS interface upstream which 
> doesn't have a pthread_mutex any more.
>
> Regards,
> Christian.
>
> Am 11.10.2017 um 05:28 schrieb Liu, Monk:
>> Hi Andrey & Christian
>>
>> Do we really need the mutext lock here?
>> Libdrm_amdgpu already has the pthread_mutext to protect multi-thread 
>> racing issues, kernel side should be safe with that
>>
>> BR Monk
>>
>> -----Original Message-----
>> From: Andrey Grodzovsky [mailto:andrey.grodzovsky@amd.com]
>> Sent: Wednesday, October 11, 2017 4:50 AM
>> To: Koenig, Christian <Christian.Koenig@amd.com>; Liu, Monk 
>> <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>> Subject: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before 
>> reservation lock is aquired.
>>
>> Helps avoiding deadlock during GPU reset.
>> Added mutex to amdgpu_ctx to preserve order of fences on a ring.
>>
>> v2:
>> Put waiting logic in a function in a seperate function in amdgpu_ctx.c
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  8 ++++++-- 
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 
>> ++++++++++++++++++++++++------
>>   3 files changed, 34 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index da48f97..235eca5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -741,6 +741,7 @@ struct amdgpu_ctx {
>>       bool             preamble_presented;
>>       enum amd_sched_priority init_priority;
>>       enum amd_sched_priority override_priority;
>> +    struct mutex            lock;
>>   };
>>     struct amdgpu_ctx_mgr {
>> @@ -763,9 +764,12 @@ void amdgpu_ctx_priority_override(struct 
>> amdgpu_ctx *ctx,  int amdgpu_ctx_ioctl(struct drm_device *dev, void 
>> *data,
>>                struct drm_file *filp);
>>   +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
>> +ring_id);
>> +
>>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);  void 
>> amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
>>   +
>>   /*
>>    * file private structure
>>    */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 1a54e53..c36297c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct 
>> amdgpu_cs_parser *p, void *data)
>>           goto free_chunk;
>>       }
>>   +    mutex_lock(&p->ctx->lock);
>> +
>>       /* get chunks */
>>       chunk_array_user = u64_to_user_ptr(cs->in.chunks);
>>       if (copy_from_user(chunk_array, chunk_array_user, @@ -737,8 
>> +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser 
>> *parser, int error,
>>         dma_fence_put(parser->fence);
>>   -    if (parser->ctx)
>> +    if (parser->ctx) {
>> +        mutex_unlock(&parser->ctx->lock);
>>           amdgpu_ctx_put(parser->ctx);
>> +    }
>>         if (parser->bo_list)
>>           amdgpu_bo_list_put(parser->bo_list);
>> @@ -992,7 +996,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device 
>> *adev,
>>           parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
>>           return -EINVAL;
>>   -    return 0;
>> +    return amdgpu_ctx_wait_prev_fence(parser->ctx,
>> +parser->job->ring->idx);
>>   }
>>     static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser 
>> *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c 
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index a78b03f6..4309820 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>>       if (!ctx->fences)
>>           return -ENOMEM;
>>   +    mutex_init(&ctx->lock);
>> +
>>       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>           ctx->rings[i].sequence = 1;
>>           ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; 
>> @@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>>                         &ctx->rings[i].entity);
>>         amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
>> +
>> +    mutex_destroy(&ctx->lock);
>>   }
>>     static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ 
>> -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 
>> struct amdgpu_ring *ring,
>>         idx = seq & (amdgpu_sched_jobs - 1);
>>       other = cring->fences[idx];
>> -    if (other) {
>> -        signed long r;
>> -        r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
>> -        if (r < 0)
>> -            return r;
>> -    }
>> +    if (other)
>> +        BUG_ON(!dma_fence_is_signaled(other));
>>         dma_fence_get(fence);
>>   @@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct 
>> amdgpu_ctx *ctx,
>>       }
>>   }
>>   +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
>> +ring_id) {
>> +    struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
>> +    unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
>> +    struct dma_fence *other = cring->fences[idx];
>> +
>> +    if (other) {
>> +        signed long r;
>> +        r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
>> +        if (r < 0) {
>> +            DRM_ERROR("Error (%ld) waiting for fence!\n", r);
>> +            return r;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)  {
>>       mutex_init(&mgr->lock);
>> -- 
>> 2.7.4
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* RE: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found]                             ` <59b614d8-a5ba-e4c4-dcf2-60984f0cd257-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-11  7:49                               ` Liu, Monk
  0 siblings, 0 replies; 18+ messages in thread
From: Liu, Monk @ 2017-10-11  7:49 UTC (permalink / raw)
  To: Zhou, David(ChunMing),
	Koenig, Christian, Grodzovsky, Andrey,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

No pthread_mutex cannot retired, because its protection range is not only the IOCTL, there are other struct field need pthread_mutex's protection 

-----Original Message-----
From: Zhou, David(ChunMing) 
Sent: Wednesday, October 11, 2017 3:42 PM
To: Koenig, Christian <Christian.Koenig@amd.com>; Liu, Monk <Monk.Liu@amd.com>; Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.

After ctx mutex is added, pthread_mutext in libdrm can be removed now.

David Zhou


On 2017年10月11日 15:25, Christian König wrote:
> Yes, the mutex is mandatory.
>
> As I explained before it doesn't matter what userspace is doing, the 
> kernel IOCTL must always be thread safe.
>
> Otherwise userspace could force the kernel to run into a BUG_ON() or 
> worse.
>
> Additional to that we already use an CS interface upstream which 
> doesn't have a pthread_mutex any more.
>
> Regards,
> Christian.
>
> Am 11.10.2017 um 05:28 schrieb Liu, Monk:
>> Hi Andrey & Christian
>>
>> Do we really need the mutext lock here?
>> Libdrm_amdgpu already has the pthread_mutext to protect multi-thread 
>> racing issues, kernel side should be safe with that
>>
>> BR Monk
>>
>> -----Original Message-----
>> From: Andrey Grodzovsky [mailto:andrey.grodzovsky@amd.com]
>> Sent: Wednesday, October 11, 2017 4:50 AM
>> To: Koenig, Christian <Christian.Koenig@amd.com>; Liu, Monk 
>> <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
>> Cc: Grodzovsky, Andrey <Andrey.Grodzovsky@amd.com>
>> Subject: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before 
>> reservation lock is aquired.
>>
>> Helps avoiding deadlock during GPU reset.
>> Added mutex to amdgpu_ctx to preserve order of fences on a ring.
>>
>> v2:
>> Put waiting logic in a function in a seperate function in 
>> amdgpu_ctx.c
>>
>> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  8 ++++++-- 
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30
>> ++++++++++++++++++++++++------
>>   3 files changed, 34 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index da48f97..235eca5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -741,6 +741,7 @@ struct amdgpu_ctx {
>>       bool             preamble_presented;
>>       enum amd_sched_priority init_priority;
>>       enum amd_sched_priority override_priority;
>> +    struct mutex            lock;
>>   };
>>     struct amdgpu_ctx_mgr {
>> @@ -763,9 +764,12 @@ void amdgpu_ctx_priority_override(struct
>> amdgpu_ctx *ctx,  int amdgpu_ctx_ioctl(struct drm_device *dev, void 
>> *data,
>>                struct drm_file *filp);
>>   +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
>> +ring_id);
>> +
>>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);  void 
>> amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
>>   +
>>   /*
>>    * file private structure
>>    */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index 1a54e53..c36297c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct 
>> amdgpu_cs_parser *p, void *data)
>>           goto free_chunk;
>>       }
>>   +    mutex_lock(&p->ctx->lock);
>> +
>>       /* get chunks */
>>       chunk_array_user = u64_to_user_ptr(cs->in.chunks);
>>       if (copy_from_user(chunk_array, chunk_array_user, @@ -737,8
>> +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser
>> *parser, int error,
>>         dma_fence_put(parser->fence);
>>   -    if (parser->ctx)
>> +    if (parser->ctx) {
>> +        mutex_unlock(&parser->ctx->lock);
>>           amdgpu_ctx_put(parser->ctx);
>> +    }
>>         if (parser->bo_list)
>>           amdgpu_bo_list_put(parser->bo_list);
>> @@ -992,7 +996,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device 
>> *adev,
>>           parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
>>           return -EINVAL;
>>   -    return 0;
>> +    return amdgpu_ctx_wait_prev_fence(parser->ctx,
>> +parser->job->ring->idx);
>>   }
>>     static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser 
>> *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index a78b03f6..4309820 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device 
>> *adev,
>>       if (!ctx->fences)
>>           return -ENOMEM;
>>   +    mutex_init(&ctx->lock);
>> +
>>       for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>>           ctx->rings[i].sequence = 1;
>>           ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; 
>> @@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx 
>> *ctx)
>>                         &ctx->rings[i].entity);
>>         amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
>> +
>> +    mutex_destroy(&ctx->lock);
>>   }
>>     static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@
>> -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, 
>> struct amdgpu_ring *ring,
>>         idx = seq & (amdgpu_sched_jobs - 1);
>>       other = cring->fences[idx];
>> -    if (other) {
>> -        signed long r;
>> -        r = dma_fence_wait_timeout(other, true, 
>> MAX_SCHEDULE_TIMEOUT);
>> -        if (r < 0)
>> -            return r;
>> -    }
>> +    if (other)
>> +        BUG_ON(!dma_fence_is_signaled(other));
>>         dma_fence_get(fence);
>>   @@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct
>> amdgpu_ctx *ctx,
>>       }
>>   }
>>   +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned
>> +ring_id) {
>> +    struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
>> +    unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
>> +    struct dma_fence *other = cring->fences[idx];
>> +
>> +    if (other) {
>> +        signed long r;
>> +        r = dma_fence_wait_timeout(other, false, 
>> +MAX_SCHEDULE_TIMEOUT);
>> +        if (r < 0) {
>> +            DRM_ERROR("Error (%ld) waiting for fence!\n", r);
>> +            return r;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)  {
>>       mutex_init(&mgr->lock);
>> --
>> 2.7.4
>>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired.
       [not found]                 ` <1507668617-27299-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
  2017-10-11  3:28                   ` Liu, Monk
@ 2017-10-11  7:57                   ` Christian König
       [not found]                     ` <35727ec2-b806-0944-ed55-bda3136b760a-5C7GfCeVMHo@public.gmane.org>
  1 sibling, 1 reply; 18+ messages in thread
From: Christian König @ 2017-10-11  7:57 UTC (permalink / raw)
  To: Andrey Grodzovsky, Monk.Liu-5C7GfCeVMHo,
	amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 10.10.2017 um 22:50 schrieb Andrey Grodzovsky:
> Helps avoiding deadlock during GPU reset.
> Added mutex to amdgpu_ctx to preserve order of fences on a ring.
>
> v2:
> Put waiting logic in a function in a seperate function in amdgpu_ctx.c
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  |  8 ++++++--
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 30 ++++++++++++++++++++++++------
>   3 files changed, 34 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index da48f97..235eca5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -741,6 +741,7 @@ struct amdgpu_ctx {
>   	bool 			preamble_presented;
>   	enum amd_sched_priority init_priority;
>   	enum amd_sched_priority override_priority;
> +	struct mutex            lock;
>   };
>   
>   struct amdgpu_ctx_mgr {
> @@ -763,9 +764,12 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>   int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
>   		     struct drm_file *filp);
>   
> +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);
> +
>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
>   void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);
>   
> +
>   /*
>    * file private structure
>    */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1a54e53..c36297c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -90,6 +90,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   		goto free_chunk;
>   	}
>   
> +	mutex_lock(&p->ctx->lock);
> +

There is a bug in amdgpu_cs_parser_init(), take a look at the put_ctx 
label. It calls amdgpu_ctx_put() without setting p->ctx to NULL after that.

This way amdgpu_cs_parser_fini() will call amdgpu_ctx_put() again and 
mess up the reference count.

That is not a bug introduced by this patch, so this one is Reviewed-by: 
Christian König <christian.koenig@amd.com>.

But please provide a follow up patch just removing the extra 
amdgpu_ctx_put() from amdgpu_cs_parser_init().

Thanks for the help,
Christian.

>   	/* get chunks */
>   	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
>   	if (copy_from_user(chunk_array, chunk_array_user,
> @@ -737,8 +739,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>   
>   	dma_fence_put(parser->fence);
>   
> -	if (parser->ctx)
> +	if (parser->ctx) {
> +		mutex_unlock(&parser->ctx->lock);
>   		amdgpu_ctx_put(parser->ctx);
> +	}
>   
>   	if (parser->bo_list)
>   		amdgpu_bo_list_put(parser->bo_list);
> @@ -992,7 +996,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
>   		return -EINVAL;
>   
> -	return 0;
> +	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
>   }
>   
>   static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> index a78b03f6..4309820 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
> @@ -67,6 +67,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
>   	if (!ctx->fences)
>   		return -ENOMEM;
>   
> +	mutex_init(&ctx->lock);
> +
>   	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>   		ctx->rings[i].sequence = 1;
>   		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
> @@ -126,6 +128,8 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
>   				      &ctx->rings[i].entity);
>   
>   	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
> +
> +	mutex_destroy(&ctx->lock);
>   }
>   
>   static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
> @@ -296,12 +300,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
>   
>   	idx = seq & (amdgpu_sched_jobs - 1);
>   	other = cring->fences[idx];
> -	if (other) {
> -		signed long r;
> -		r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT);
> -		if (r < 0)
> -			return r;
> -	}
> +	if (other)
> +		BUG_ON(!dma_fence_is_signaled(other));
>   
>   	dma_fence_get(fence);
>   
> @@ -372,6 +372,24 @@ void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
>   	}
>   }
>   
> +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
> +{
> +	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
> +	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
> +	struct dma_fence *other = cring->fences[idx];
> +
> +	if (other) {
> +		signed long r;
> +		r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
> +		if (r < 0) {
> +			DRM_ERROR("Error (%ld) waiting for fence!\n", r);
> +			return r;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
>   void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
>   {
>   	mutex_init(&mgr->lock);


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] drm/amdgpu: Fix extra call to amdgpu_ctx_put.
       [not found]                     ` <35727ec2-b806-0944-ed55-bda3136b760a-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-11 21:13                       ` Andrey Grodzovsky
       [not found]                         ` <1507756382-19390-1-git-send-email-Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Andrey Grodzovsky @ 2017-10-11 21:13 UTC (permalink / raw)
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
  Cc: Andrey Grodzovsky, christian.koenig-5C7GfCeVMHo

In amdgpu_cs_parser_init() in case of error handling
amdgpu_ctx_put() is called without setting p->ctx to NULL after that,
later amdgpu_cs_parser_fini() also calls amdgpu_ctx_put() again and
mess up the reference count.

Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5de092e..8513e44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -97,7 +97,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (copy_from_user(chunk_array, chunk_array_user,
 			   sizeof(uint64_t)*cs->in.num_chunks)) {
 		ret = -EFAULT;
-		goto put_ctx;
+		goto free_chunk;
 	}
 
 	p->nchunks = cs->in.num_chunks;
@@ -105,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 			    GFP_KERNEL);
 	if (!p->chunks) {
 		ret = -ENOMEM;
-		goto put_ctx;
+		goto free_chunk;
 	}
 
 	for (i = 0; i < p->nchunks; i++) {
@@ -185,8 +185,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	kfree(p->chunks);
 	p->chunks = NULL;
 	p->nchunks = 0;
-put_ctx:
-	amdgpu_ctx_put(p->ctx);
 free_chunk:
 	kfree(chunk_array);
 
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] drm/amdgpu: Fix extra call to amdgpu_ctx_put.
       [not found]                         ` <1507756382-19390-1-git-send-email-Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>
@ 2017-10-12  7:17                           ` Christian König
  0 siblings, 0 replies; 18+ messages in thread
From: Christian König @ 2017-10-12  7:17 UTC (permalink / raw)
  To: Andrey Grodzovsky, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Am 11.10.2017 um 23:13 schrieb Andrey Grodzovsky:
> In amdgpu_cs_parser_init() in case of error handling
> amdgpu_ctx_put() is called without setting p->ctx to NULL after that,
> later amdgpu_cs_parser_fini() also calls amdgpu_ctx_put() again and
> mess up the reference count.
>
> Signed-off-by: Andrey Grodzovsky <Andrey.Grodzovsky@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com>

> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 6 ++----
>   1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 5de092e..8513e44 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -97,7 +97,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   	if (copy_from_user(chunk_array, chunk_array_user,
>   			   sizeof(uint64_t)*cs->in.num_chunks)) {
>   		ret = -EFAULT;
> -		goto put_ctx;
> +		goto free_chunk;
>   	}
>   
>   	p->nchunks = cs->in.num_chunks;
> @@ -105,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   			    GFP_KERNEL);
>   	if (!p->chunks) {
>   		ret = -ENOMEM;
> -		goto put_ctx;
> +		goto free_chunk;
>   	}
>   
>   	for (i = 0; i < p->nchunks; i++) {
> @@ -185,8 +185,6 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
>   	kfree(p->chunks);
>   	p->chunks = NULL;
>   	p->nchunks = 0;
> -put_ctx:
> -	amdgpu_ctx_put(p->ctx);
>   free_chunk:
>   	kfree(chunk_array);
>   


_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2017-10-12  7:17 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-10-06 18:20 [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4 Andrey Grodzovsky
     [not found] ` <1507314021-18323-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2017-10-06 18:20   ` [PATCH 2/2] dmr/amdgpu: Move old fence waiting before reservation lock is aquired Andrey Grodzovsky
     [not found]     ` <1507314021-18323-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2017-10-09  6:21       ` Liu, Monk
2017-10-09  8:36       ` Christian König
     [not found]         ` <715810ad-26cc-506d-624d-8e5024968ea0-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2017-10-10 20:50           ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill Andrey Grodzovsky
     [not found]             ` <1507668617-27299-1-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2017-10-10 20:50               ` [PATCH v2 2/2] drm/amdgpu: Move old fence waiting before reservation lock is aquired Andrey Grodzovsky
     [not found]                 ` <1507668617-27299-2-git-send-email-andrey.grodzovsky-5C7GfCeVMHo@public.gmane.org>
2017-10-11  3:28                   ` Liu, Monk
     [not found]                     ` <BLUPR12MB0449BE6E515A61B36980F88F844A0-7LeqcoF/hwpTIQvHjXdJlwdYzm3356FpvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>
2017-10-11  7:25                       ` Christian König
     [not found]                         ` <aaf4f94a-8099-ca6a-9c2f-8355909c7b60-5C7GfCeVMHo@public.gmane.org>
2017-10-11  7:41                           ` Chunming Zhou
     [not found]                             ` <59b614d8-a5ba-e4c4-dcf2-60984f0cd257-5C7GfCeVMHo@public.gmane.org>
2017-10-11  7:49                               ` Liu, Monk
2017-10-11  7:57                   ` Christian König
     [not found]                     ` <35727ec2-b806-0944-ed55-bda3136b760a-5C7GfCeVMHo@public.gmane.org>
2017-10-11 21:13                       ` [PATCH] drm/amdgpu: Fix extra call to amdgpu_ctx_put Andrey Grodzovsky
     [not found]                         ` <1507756382-19390-1-git-send-email-Andrey.Grodzovsky-5C7GfCeVMHo@public.gmane.org>
2017-10-12  7:17                           ` Christian König
2017-10-11  7:33               ` [PATCH v2 1/2] drm/amdgpu: Refactor amdgpu_cs_ib_vm_chunk and amdgpu_cs_ib_fill Christian König
2017-10-09  2:56   ` [PATCH 1/2] drm/amdgpu:resolv deadlock between reset and cs_ioctl v4 Chunming Zhou
2017-10-09  8:34   ` Christian König
     [not found]     ` <9fc20357-db3e-3823-3073-c70ead162f4e-5C7GfCeVMHo@public.gmane.org>
2017-10-10  3:31       ` andrey
     [not found]         ` <da1bc7eb-acce-b11b-44ba-694adf19cb59-5C7GfCeVMHo@public.gmane.org>
2017-10-10  7:05           ` Christian König

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.