* [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence
@ 2019-01-28 21:52 Marek Olšák
[not found] ` <20190128215239.32498-1-maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Marek Olšák @ 2019-01-28 21:52 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
From: Marek Olšák <marek.olsak@amd.com>
Normal syncobjs signal when an IB finishes. Start syncobjs signal when
an IB starts.
Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 ++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
include/uapi/drm/amdgpu_drm.h | 13 ++++++++++++-
4 files changed, 33 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index d67f8b1dfe80..8e2f7e558bc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
struct dma_fence *fence;
uint64_t bytes_moved_threshold;
uint64_t bytes_moved_vis_threshold;
uint64_t bytes_moved;
uint64_t bytes_moved_vis;
struct amdgpu_bo_list_entry *evictable;
/* user fence */
struct amdgpu_bo_list_entry uf_entry;
+ bool get_start_syncobj;
unsigned num_post_dep_syncobjs;
struct drm_syncobj **post_dep_syncobjs;
};
static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
uint32_t ib_idx, int idx)
{
return p->job->ibs[ib_idx].ptr[idx];
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1c49b8266d69..917f3818c61c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
chunk_ib->ip_instance, chunk_ib->ring,
&entity);
if (r)
return r;
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
parser->job->preamble_status |=
AMDGPU_PREAMBLE_IB_PRESENT;
+ if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
+ parser->get_start_syncobj = true;
+
if (parser->entity && parser->entity != entity)
return -EINVAL;
parser->entity = entity;
ring = to_amdgpu_ring(entity->rq->sched);
r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
chunk_ib->ib_bytes : 0, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
@@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
amdgpu_mn_lock(p->mn);
amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
r = -ERESTARTSYS;
goto error_abort;
}
}
+ if (p->get_start_syncobj) {
+ struct drm_syncobj *syncobj;
+
+ r = drm_syncobj_create(&syncobj, 0,
+ &job->base.s_fence->scheduled);
+ if (r)
+ goto error_abort;
+
+ r = drm_syncobj_get_handle(p->filp, syncobj,
+ &cs->out.start_syncobj);
+ if (r)
+ goto error_abort;
+ drm_syncobj_put(syncobj);
+ }
+
job->owner = p->filp;
p->fence = dma_fence_get(&job->base.s_fence->finished);
amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
amdgpu_cs_post_dependencies(p);
if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
!p->ctx->preamble_presented) {
job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
p->ctx->preamble_presented = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index c806f984bcc5..a230a30722d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -64,23 +64,24 @@
* - 3.18.0 - Export gpu always on cu bitmap
* - 3.19.0 - Add support for UVD MJPEG decode
* - 3.20.0 - Add support for local BOs
* - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
* - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
* - 3.23.0 - Add query for VRAM lost counter
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
* - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 27
+#define KMS_DRIVER_MINOR 28
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
int amdgpu_vis_vram_limit = 0;
int amdgpu_gart_size = -1; /* auto */
int amdgpu_gtt_size = -1; /* auto */
int amdgpu_moverate = -1; /* auto */
int amdgpu_benchmarking = 0;
int amdgpu_testing = 0;
int amdgpu_audio = -1;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 662d379ea624..d0e0c99cea32 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in {
__u32 ctx_id;
/** Handle of resource list associated with CS */
__u32 bo_list_handle;
__u32 num_chunks;
__u32 _pad;
/** this points to __u64 * which point to cs chunks */
__u64 chunks;
};
struct drm_amdgpu_cs_out {
- __u64 handle;
+ __u64 handle; /* sequence number */
+ __u32 start_syncobj; /* signalled when IB execution begins */
+ __u32 _pad;
};
union drm_amdgpu_cs {
struct drm_amdgpu_cs_in in;
struct drm_amdgpu_cs_out out;
};
/* Specify flags to be used for IB */
/* This IB should be submitted to CE */
@@ -566,20 +568,29 @@ union drm_amdgpu_cs {
/* The IB fence should do the L2 writeback but not invalidate any shader
* caches (L2/vL1/sL1/I$). */
#define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
* This will reset wave ID counters for the IB.
*/
#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+/* The CS ioctl will return a syncobj representing when all IBs begin
+ * execution. If set, this applies to all IBs. The returned syncobj can be
+ * used as an IB dependency for other IBs.
+ *
+ * This is used for GPU deadlock prevention when userspace uses mid-IB fences
+ * to wait for mid-IB work on other rings.
+ */
+#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)
+
struct drm_amdgpu_cs_chunk_ib {
__u32 _pad;
/** AMDGPU_IB_FLAG_* */
__u32 flags;
/** Virtual address to begin IB execution */
__u64 va_start;
/** Size of submission */
__u32 ib_bytes;
/** HW IP to submit to */
__u32 ip_type;
--
2.17.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence
[not found] ` <20190128215239.32498-1-maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-01-29 8:01 ` Christian König
[not found] ` <762485e9-7fe4-3ffb-16a4-f83436871451-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
0 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2019-01-29 8:01 UTC (permalink / raw)
To: Marek Olšák, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 28.01.19 um 22:52 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com>
>
> Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> an IB starts.
That approach has quite a number of problems (for example you can't
allocate memory at this point).
Better add a flag that we should only sync on scheduling for a
dependency/syncobj instead.
Christian.
>
> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 ++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
> include/uapi/drm/amdgpu_drm.h | 13 ++++++++++++-
> 4 files changed, 33 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d67f8b1dfe80..8e2f7e558bc9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
> struct dma_fence *fence;
> uint64_t bytes_moved_threshold;
> uint64_t bytes_moved_vis_threshold;
> uint64_t bytes_moved;
> uint64_t bytes_moved_vis;
> struct amdgpu_bo_list_entry *evictable;
>
> /* user fence */
> struct amdgpu_bo_list_entry uf_entry;
>
> + bool get_start_syncobj;
> unsigned num_post_dep_syncobjs;
> struct drm_syncobj **post_dep_syncobjs;
> };
>
> static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
> uint32_t ib_idx, int idx)
> {
> return p->job->ibs[ib_idx].ptr[idx];
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1c49b8266d69..917f3818c61c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
> chunk_ib->ip_instance, chunk_ib->ring,
> &entity);
> if (r)
> return r;
>
> if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
> parser->job->preamble_status |=
> AMDGPU_PREAMBLE_IB_PRESENT;
>
> + if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
> + parser->get_start_syncobj = true;
> +
> if (parser->entity && parser->entity != entity)
> return -EINVAL;
>
> parser->entity = entity;
>
> ring = to_amdgpu_ring(entity->rq->sched);
> r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
> chunk_ib->ib_bytes : 0, ib);
> if (r) {
> DRM_ERROR("Failed to get ib !\n");
> @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> amdgpu_mn_lock(p->mn);
> amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>
> if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
> r = -ERESTARTSYS;
> goto error_abort;
> }
> }
>
> + if (p->get_start_syncobj) {
> + struct drm_syncobj *syncobj;
> +
> + r = drm_syncobj_create(&syncobj, 0,
> + &job->base.s_fence->scheduled);
> + if (r)
> + goto error_abort;
> +
> + r = drm_syncobj_get_handle(p->filp, syncobj,
> + &cs->out.start_syncobj);
> + if (r)
> + goto error_abort;
> + drm_syncobj_put(syncobj);
> + }
> +
> job->owner = p->filp;
> p->fence = dma_fence_get(&job->base.s_fence->finished);
>
> amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
> amdgpu_cs_post_dependencies(p);
>
> if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
> !p->ctx->preamble_presented) {
> job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
> p->ctx->preamble_presented = true;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index c806f984bcc5..a230a30722d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -64,23 +64,24 @@
> * - 3.18.0 - Export gpu always on cu bitmap
> * - 3.19.0 - Add support for UVD MJPEG decode
> * - 3.20.0 - Add support for local BOs
> * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
> * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
> * - 3.23.0 - Add query for VRAM lost counter
> * - 3.24.0 - Add high priority compute support for gfx9
> * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
> * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
> * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
> + * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ
> */
> #define KMS_DRIVER_MAJOR 3
> -#define KMS_DRIVER_MINOR 27
> +#define KMS_DRIVER_MINOR 28
> #define KMS_DRIVER_PATCHLEVEL 0
>
> int amdgpu_vram_limit = 0;
> int amdgpu_vis_vram_limit = 0;
> int amdgpu_gart_size = -1; /* auto */
> int amdgpu_gtt_size = -1; /* auto */
> int amdgpu_moverate = -1; /* auto */
> int amdgpu_benchmarking = 0;
> int amdgpu_testing = 0;
> int amdgpu_audio = -1;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 662d379ea624..d0e0c99cea32 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in {
> __u32 ctx_id;
> /** Handle of resource list associated with CS */
> __u32 bo_list_handle;
> __u32 num_chunks;
> __u32 _pad;
> /** this points to __u64 * which point to cs chunks */
> __u64 chunks;
> };
>
> struct drm_amdgpu_cs_out {
> - __u64 handle;
> + __u64 handle; /* sequence number */
> + __u32 start_syncobj; /* signalled when IB execution begins */
> + __u32 _pad;
> };
>
> union drm_amdgpu_cs {
> struct drm_amdgpu_cs_in in;
> struct drm_amdgpu_cs_out out;
> };
>
> /* Specify flags to be used for IB */
>
> /* This IB should be submitted to CE */
> @@ -566,20 +568,29 @@ union drm_amdgpu_cs {
>
> /* The IB fence should do the L2 writeback but not invalidate any shader
> * caches (L2/vL1/sL1/I$). */
> #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
>
> /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
> * This will reset wave ID counters for the IB.
> */
> #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
>
> +/* The CS ioctl will return a syncobj representing when all IBs begin
> + * execution. If set, this applies to all IBs. The returned syncobj can be
> + * used as an IB dependency for other IBs.
> + *
> + * This is used for GPU deadlock prevention when userspace uses mid-IB fences
> + * to wait for mid-IB work on other rings.
> + */
> +#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)
> +
> struct drm_amdgpu_cs_chunk_ib {
> __u32 _pad;
> /** AMDGPU_IB_FLAG_* */
> __u32 flags;
> /** Virtual address to begin IB execution */
> __u64 va_start;
> /** Size of submission */
> __u32 ib_bytes;
> /** HW IP to submit to */
> __u32 ip_type;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence
[not found] ` <762485e9-7fe4-3ffb-16a4-f83436871451-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-01-29 13:32 ` Marek Olšák
[not found] ` <CAAxE2A6F+Wexxe_fY22P7iWk1DwKbP6DOmxyoUV164YQFTvdaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-01-29 21:54 ` Marek Olšák
1 sibling, 1 reply; 6+ messages in thread
From: Marek Olšák @ 2019-01-29 13:32 UTC (permalink / raw)
To: Christian König; +Cc: amd-gfx mailing list
[-- Attachment #1.1: Type: text/plain, Size: 8791 bytes --]
On Tue, Jan 29, 2019, 3:01 AM Christian König <
ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org wrote:
> Am 28.01.19 um 22:52 schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org>
> >
> > Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> > an IB starts.
>
> That approach has quite a number of problems (for example you can't
> allocate memory at this point).
>
> Better add a flag that we should only sync on scheduling for a
> dependency/syncobj instead.
>
I don't understand. Can you give me an example of the interface and how the
implementation would look?
Thanks,
Marek
> Christian.
>
> >
> > Signed-off-by: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org>
> > ---
> > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 ++++++++++++++++++
> > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
> > include/uapi/drm/amdgpu_drm.h | 13 ++++++++++++-
> > 4 files changed, 33 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index d67f8b1dfe80..8e2f7e558bc9 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
> > struct dma_fence *fence;
> > uint64_t bytes_moved_threshold;
> > uint64_t bytes_moved_vis_threshold;
> > uint64_t bytes_moved;
> > uint64_t bytes_moved_vis;
> > struct amdgpu_bo_list_entry *evictable;
> >
> > /* user fence */
> > struct amdgpu_bo_list_entry uf_entry;
> >
> > + bool get_start_syncobj;
> > unsigned num_post_dep_syncobjs;
> > struct drm_syncobj **post_dep_syncobjs;
> > };
> >
> > static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
> > uint32_t ib_idx, int idx)
> > {
> > return p->job->ibs[ib_idx].ptr[idx];
> > }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index 1c49b8266d69..917f3818c61c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct
> amdgpu_device *adev,
> > r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
> > chunk_ib->ip_instance,
> chunk_ib->ring,
> > &entity);
> > if (r)
> > return r;
> >
> > if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
> > parser->job->preamble_status |=
> > AMDGPU_PREAMBLE_IB_PRESENT;
> >
> > + if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
> > + parser->get_start_syncobj = true;
> > +
> > if (parser->entity && parser->entity != entity)
> > return -EINVAL;
> >
> > parser->entity = entity;
> >
> > ring = to_amdgpu_ring(entity->rq->sched);
> > r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
> > chunk_ib->ib_bytes : 0, ib);
> > if (r) {
> > DRM_ERROR("Failed to get ib !\n");
> > @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct
> amdgpu_cs_parser *p,
> > amdgpu_mn_lock(p->mn);
> > amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> > struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> >
> > if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
> > r = -ERESTARTSYS;
> > goto error_abort;
> > }
> > }
> >
> > + if (p->get_start_syncobj) {
> > + struct drm_syncobj *syncobj;
> > +
> > + r = drm_syncobj_create(&syncobj, 0,
> > + &job->base.s_fence->scheduled);
> > + if (r)
> > + goto error_abort;
> > +
> > + r = drm_syncobj_get_handle(p->filp, syncobj,
> > + &cs->out.start_syncobj);
> > + if (r)
> > + goto error_abort;
> > + drm_syncobj_put(syncobj);
> > + }
> > +
> > job->owner = p->filp;
> > p->fence = dma_fence_get(&job->base.s_fence->finished);
> >
> > amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
> > amdgpu_cs_post_dependencies(p);
> >
> > if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
> > !p->ctx->preamble_presented) {
> > job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
> > p->ctx->preamble_presented = true;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > index c806f984bcc5..a230a30722d4 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> > @@ -64,23 +64,24 @@
> > * - 3.18.0 - Export gpu always on cu bitmap
> > * - 3.19.0 - Add support for UVD MJPEG decode
> > * - 3.20.0 - Add support for local BOs
> > * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
> > * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
> > * - 3.23.0 - Add query for VRAM lost counter
> > * - 3.24.0 - Add high priority compute support for gfx9
> > * - 3.25.0 - Add support for sensor query info (stable pstate
> sclk/mclk).
> > * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
> > * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
> > + * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ
> > */
> > #define KMS_DRIVER_MAJOR 3
> > -#define KMS_DRIVER_MINOR 27
> > +#define KMS_DRIVER_MINOR 28
> > #define KMS_DRIVER_PATCHLEVEL 0
> >
> > int amdgpu_vram_limit = 0;
> > int amdgpu_vis_vram_limit = 0;
> > int amdgpu_gart_size = -1; /* auto */
> > int amdgpu_gtt_size = -1; /* auto */
> > int amdgpu_moverate = -1; /* auto */
> > int amdgpu_benchmarking = 0;
> > int amdgpu_testing = 0;
> > int amdgpu_audio = -1;
> > diff --git a/include/uapi/drm/amdgpu_drm.h
> b/include/uapi/drm/amdgpu_drm.h
> > index 662d379ea624..d0e0c99cea32 100644
> > --- a/include/uapi/drm/amdgpu_drm.h
> > +++ b/include/uapi/drm/amdgpu_drm.h
> > @@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in {
> > __u32 ctx_id;
> > /** Handle of resource list associated with CS */
> > __u32 bo_list_handle;
> > __u32 num_chunks;
> > __u32 _pad;
> > /** this points to __u64 * which point to cs chunks */
> > __u64 chunks;
> > };
> >
> > struct drm_amdgpu_cs_out {
> > - __u64 handle;
> > + __u64 handle; /* sequence number */
> > + __u32 start_syncobj; /* signalled when IB execution begins */
> > + __u32 _pad;
> > };
> >
> > union drm_amdgpu_cs {
> > struct drm_amdgpu_cs_in in;
> > struct drm_amdgpu_cs_out out;
> > };
> >
> > /* Specify flags to be used for IB */
> >
> > /* This IB should be submitted to CE */
> > @@ -566,20 +568,29 @@ union drm_amdgpu_cs {
> >
> > /* The IB fence should do the L2 writeback but not invalidate any
> shader
> > * caches (L2/vL1/sL1/I$). */
> > #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
> >
> > /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before
> PACKET3_INDIRECT_BUFFER.
> > * This will reset wave ID counters for the IB.
> > */
> > #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
> >
> > +/* The CS ioctl will return a syncobj representing when all IBs begin
> > + * execution. If set, this applies to all IBs. The returned syncobj can
> be
> > + * used as an IB dependency for other IBs.
> > + *
> > + * This is used for GPU deadlock prevention when userspace uses mid-IB
> fences
> > + * to wait for mid-IB work on other rings.
> > + */
> > +#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)
> > +
> > struct drm_amdgpu_cs_chunk_ib {
> > __u32 _pad;
> > /** AMDGPU_IB_FLAG_* */
> > __u32 flags;
> > /** Virtual address to begin IB execution */
> > __u64 va_start;
> > /** Size of submission */
> > __u32 ib_bytes;
> > /** HW IP to submit to */
> > __u32 ip_type;
>
>
[-- Attachment #1.2: Type: text/html, Size: 11683 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence
[not found] ` <CAAxE2A6F+Wexxe_fY22P7iWk1DwKbP6DOmxyoUV164YQFTvdaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-01-29 13:46 ` Koenig, Christian
0 siblings, 0 replies; 6+ messages in thread
From: Koenig, Christian @ 2019-01-29 13:46 UTC (permalink / raw)
To: Marek Olšák; +Cc: amd-gfx mailing list
[-- Attachment #1.1: Type: text/plain, Size: 8753 bytes --]
Am 29.01.19 um 14:32 schrieb Marek Olšák:
On Tue, Jan 29, 2019, 3:01 AM Christian König <ckoenig.leichtzumerken@gmail.com<mailto:ckoenig.leichtzumerken@gmail.com> wrote:
Am 28.01.19 um 22:52 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com<mailto:marek.olsak@amd.com>>
>
> Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> an IB starts.
That approach has quite a number of problems (for example you can't
allocate memory at this point).
Better add a flag that we should only sync on scheduling for a
dependency/syncobj instead.
I don't understand. Can you give me an example of the interface and how the implementation would look?
For example we add a new chunk type AMDGPU_CHUNK_ID_SCHEDULED which is handled the same way as AMDGPU_CHUNK_ID_DEPENDENCIES.
Then in amdgpu_cs_process_fence_dep() we check if its a AMDGPU_CHUNK_ID_SCHEDULED and if yes extract the scheduled fence from the fence we got from amdgpu_ctx_get_fence().
Christian.
Thanks,
Marek
Christian.
>
> Signed-off-by: Marek Olšák <marek.olsak@amd.com<mailto:marek.olsak@amd.com>>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 18 ++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
> include/uapi/drm/amdgpu_drm.h | 13 ++++++++++++-
> 4 files changed, 33 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index d67f8b1dfe80..8e2f7e558bc9 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -453,20 +453,21 @@ struct amdgpu_cs_parser {
> struct dma_fence *fence;
> uint64_t bytes_moved_threshold;
> uint64_t bytes_moved_vis_threshold;
> uint64_t bytes_moved;
> uint64_t bytes_moved_vis;
> struct amdgpu_bo_list_entry *evictable;
>
> /* user fence */
> struct amdgpu_bo_list_entry uf_entry;
>
> + bool get_start_syncobj;
> unsigned num_post_dep_syncobjs;
> struct drm_syncobj **post_dep_syncobjs;
> };
>
> static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
> uint32_t ib_idx, int idx)
> {
> return p->job->ibs[ib_idx].ptr[idx];
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 1c49b8266d69..917f3818c61c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1022,20 +1022,23 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type,
> chunk_ib->ip_instance, chunk_ib->ring,
> &entity);
> if (r)
> return r;
>
> if (chunk_ib->flags & AMDGPU_IB_FLAG_PREAMBLE)
> parser->job->preamble_status |=
> AMDGPU_PREAMBLE_IB_PRESENT;
>
> + if (chunk_ib->flags & AMDGPU_IB_FLAG_GET_START_SYNCOBJ)
> + parser->get_start_syncobj = true;
> +
> if (parser->entity && parser->entity != entity)
> return -EINVAL;
>
> parser->entity = entity;
>
> ring = to_amdgpu_ring(entity->rq->sched);
> r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ?
> chunk_ib->ib_bytes : 0, ib);
> if (r) {
> DRM_ERROR("Failed to get ib !\n");
> @@ -1227,20 +1230,35 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
> amdgpu_mn_lock(p->mn);
> amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
> struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo<http://tv.bo>);
>
> if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
> r = -ERESTARTSYS;
> goto error_abort;
> }
> }
>
> + if (p->get_start_syncobj) {
> + struct drm_syncobj *syncobj;
> +
> + r = drm_syncobj_create(&syncobj, 0,
> + &job->base.s_fence->scheduled);
> + if (r)
> + goto error_abort;
> +
> + r = drm_syncobj_get_handle(p->filp, syncobj,
> + &cs->out.start_syncobj);
> + if (r)
> + goto error_abort;
> + drm_syncobj_put(syncobj);
> + }
> +
> job->owner = p->filp;
> p->fence = dma_fence_get(&job->base.s_fence->finished);
>
> amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq);
> amdgpu_cs_post_dependencies(p);
>
> if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) &&
> !p->ctx->preamble_presented) {
> job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
> p->ctx->preamble_presented = true;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index c806f984bcc5..a230a30722d4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -64,23 +64,24 @@
> * - 3.18.0 - Export gpu always on cu bitmap
> * - 3.19.0 - Add support for UVD MJPEG decode
> * - 3.20.0 - Add support for local BOs
> * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl
> * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl
> * - 3.23.0 - Add query for VRAM lost counter
> * - 3.24.0 - Add high priority compute support for gfx9
> * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
> * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
> * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
> + * - 3.28.0 - AMDGPU_IB_FLAG_GET_START_SYNCOBJ
> */
> #define KMS_DRIVER_MAJOR 3
> -#define KMS_DRIVER_MINOR 27
> +#define KMS_DRIVER_MINOR 28
> #define KMS_DRIVER_PATCHLEVEL 0
>
> int amdgpu_vram_limit = 0;
> int amdgpu_vis_vram_limit = 0;
> int amdgpu_gart_size = -1; /* auto */
> int amdgpu_gtt_size = -1; /* auto */
> int amdgpu_moverate = -1; /* auto */
> int amdgpu_benchmarking = 0;
> int amdgpu_testing = 0;
> int amdgpu_audio = -1;
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 662d379ea624..d0e0c99cea32 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -538,21 +538,23 @@ struct drm_amdgpu_cs_in {
> __u32 ctx_id;
> /** Handle of resource list associated with CS */
> __u32 bo_list_handle;
> __u32 num_chunks;
> __u32 _pad;
> /** this points to __u64 * which point to cs chunks */
> __u64 chunks;
> };
>
> struct drm_amdgpu_cs_out {
> - __u64 handle;
> + __u64 handle; /* sequence number */
> + __u32 start_syncobj; /* signalled when IB execution begins */
> + __u32 _pad;
> };
>
> union drm_amdgpu_cs {
> struct drm_amdgpu_cs_in in;
> struct drm_amdgpu_cs_out out;
> };
>
> /* Specify flags to be used for IB */
>
> /* This IB should be submitted to CE */
> @@ -566,20 +568,29 @@ union drm_amdgpu_cs {
>
> /* The IB fence should do the L2 writeback but not invalidate any shader
> * caches (L2/vL1/sL1/I$). */
> #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
>
> /* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
> * This will reset wave ID counters for the IB.
> */
> #define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
>
> +/* The CS ioctl will return a syncobj representing when all IBs begin
> + * execution. If set, this applies to all IBs. The returned syncobj can be
> + * used as an IB dependency for other IBs.
> + *
> + * This is used for GPU deadlock prevention when userspace uses mid-IB fences
> + * to wait for mid-IB work on other rings.
> + */
> +#define AMDGPU_IB_FLAG_GET_START_SYNCOBJ (1 << 5)
> +
> struct drm_amdgpu_cs_chunk_ib {
> __u32 _pad;
> /** AMDGPU_IB_FLAG_* */
> __u32 flags;
> /** Virtual address to begin IB execution */
> __u64 va_start;
> /** Size of submission */
> __u32 ib_bytes;
> /** HW IP to submit to */
> __u32 ip_type;
[-- Attachment #1.2: Type: text/html, Size: 16236 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence
[not found] ` <762485e9-7fe4-3ffb-16a4-f83436871451-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-01-29 13:32 ` Marek Olšák
@ 2019-01-29 21:54 ` Marek Olšák
[not found] ` <CAAxE2A4GaY4-osO6GfC-zG9ofxu2wjWZJ74Jds1ntqXZba_vQw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
1 sibling, 1 reply; 6+ messages in thread
From: Marek Olšák @ 2019-01-29 21:54 UTC (permalink / raw)
To: Christian König; +Cc: amd-gfx mailing list
[-- Attachment #1.1: Type: text/plain, Size: 589 bytes --]
On Tue, Jan 29, 2019 at 3:01 AM Christian König <
ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> Am 28.01.19 um 22:52 schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org>
> >
> > Normal syncobjs signal when an IB finishes. Start syncobjs signal when
> > an IB starts.
>
> That approach has quite a number of problems (for example you can't
> allocate memory at this point).
>
Even if I drop this patch, can you describe all the problems with it?
Andrey and I would like to understand this.
Thanks,
Marek
[-- Attachment #1.2: Type: text/html, Size: 1090 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence
[not found] ` <CAAxE2A4GaY4-osO6GfC-zG9ofxu2wjWZJ74Jds1ntqXZba_vQw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-01-30 8:19 ` Christian König
0 siblings, 0 replies; 6+ messages in thread
From: Christian König @ 2019-01-30 8:19 UTC (permalink / raw)
To: Marek Olšák, Christian König; +Cc: amd-gfx mailing list
[-- Attachment #1.1: Type: text/plain, Size: 1318 bytes --]
Am 29.01.19 um 22:54 schrieb Marek Olšák:
> On Tue, Jan 29, 2019 at 3:01 AM Christian König
> <ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org
> <mailto:ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>> wrote:
>
> Am 28.01.19 um 22:52 schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org <mailto:marek.olsak-5C7GfCeVMHo@public.gmane.org>>
> >
> > Normal syncobjs signal when an IB finishes. Start syncobjs
> signal when
> > an IB starts.
>
> That approach has quite a number of problems (for example you can't
> allocate memory at this point).
>
>
> Even if I drop this patch, can you describe all the problems with it?
> Andrey and I would like to understand this.
> amdgpu_mn_lock(p->mn);
> ...
> r = drm_syncobj_create(&syncobj, 0,...
You can't allocate memory while holding the MN lock, so calling
drm_syncobj_create() here is forbidden.
> r = drm_syncobj_get_handle(p->filp, syncobj,...
This can fail which would result in a syncobj which is never signaled
nor freed.
Regards,
Christian.
>
> Thanks,
> Marek
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[-- Attachment #1.2: Type: text/html, Size: 3316 bytes --]
[-- Attachment #2: Type: text/plain, Size: 154 bytes --]
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2019-01-30 8:19 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-28 21:52 [PATCH] drm/amdgpu: add AMDGPU_IB_FLAG_GET_START_SYNCOBJ to expose scheduled fence Marek Olšák
[not found] ` <20190128215239.32498-1-maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-01-29 8:01 ` Christian König
[not found] ` <762485e9-7fe4-3ffb-16a4-f83436871451-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-01-29 13:32 ` Marek Olšák
[not found] ` <CAAxE2A6F+Wexxe_fY22P7iWk1DwKbP6DOmxyoUV164YQFTvdaA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-01-29 13:46 ` Koenig, Christian
2019-01-29 21:54 ` Marek Olšák
[not found] ` <CAAxE2A4GaY4-osO6GfC-zG9ofxu2wjWZJ74Jds1ntqXZba_vQw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-01-30 8:19 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.