* [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2
@ 2020-04-06 20:07 Venkata Sandeep Dhanalakota
2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
` (4 more replies)
0 siblings, 5 replies; 14+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw)
To: intel-gfx; +Cc: chris.p.wilson
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
We're planning to use this for a couple of new feature where we need
to provide additional parameters to execbuf.
v2: Check for invalid flags in execbuffer2 (Lionel)
v3: Rename I915_EXEC_EXT -> I915_EXEC_USE_EXTENSIONS (Chris)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 39 ++++++++++++++++++-
include/uapi/drm/i915_drm.h | 26 +++++++++++--
2 files changed, 61 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9d11bad74e9a..16831f715daa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -26,6 +26,7 @@
#include "i915_gem_ioctls.h"
#include "i915_sw_fence_work.h"
#include "i915_trace.h"
+#include "i915_user_extensions.h"
struct eb_vma {
struct i915_vma *vma;
@@ -288,6 +289,10 @@ struct i915_execbuffer {
int lut_size;
struct hlist_head *buckets; /** ht for relocation handles */
struct eb_vma_array *array;
+
+ struct {
+ u64 flags; /** Available extensions parameters */
+ } extensions;
};
static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -1698,7 +1703,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
return -EINVAL;
/* Kernel clipping was a DRI1 misfeature */
- if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+ if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
+ I915_EXEC_USE_EXTENSIONS))) {
if (exec->num_cliprects || exec->cliprects_ptr)
return -EINVAL;
}
@@ -2431,6 +2437,33 @@ static void eb_request_add(struct i915_execbuffer *eb)
mutex_unlock(&tl->mutex);
}
+static const i915_user_extension_fn execbuf_extensions[] = {
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+ struct i915_execbuffer *eb)
+{
+ eb->extensions.flags = 0;
+
+ if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
+ return 0;
+
+ /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
+ * have another flag also using it at the same time.
+ */
+ if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+ return -EINVAL;
+
+ if (args->num_cliprects != 0)
+ return -EINVAL;
+
+ return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+ execbuf_extensions,
+ ARRAY_SIZE(execbuf_extensions),
+ eb);
+}
+
static int
i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_file *file,
@@ -2484,6 +2517,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (args->flags & I915_EXEC_IS_PINNED)
eb.batch_flags |= I915_DISPATCH_PINNED;
+ err = parse_execbuf2_extensions(args, &eb);
+ if (err)
+ return err;
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
if (!in_fence)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..7ea38aa6502c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1046,6 +1046,10 @@ struct drm_i915_gem_exec_fence {
__u32 flags;
};
+enum drm_i915_gem_execbuffer_ext {
+ DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
@@ -1062,8 +1066,15 @@ struct drm_i915_gem_execbuffer2 {
__u32 num_cliprects;
/**
* This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
- * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
- * struct drm_i915_gem_exec_fence *fences.
+ * & I915_EXEC_USE_EXTENSIONS are not set.
+ *
+ * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+ * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+ * of the array.
+ *
+ * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
+ * single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is
+ * 0.
*/
__u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (0x3f)
@@ -1181,7 +1192,16 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_FENCE_SUBMIT (1 << 20)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_USE_EXTENSIONS implies that
+ * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
+ * list of i915_user_extension. Each i915_user_extension node is the base of a
+ * larger structure. The list of supported structures are listed in the
+ * drm_i915_gem_execbuffer_ext enum.
+ */
+#define I915_EXEC_USE_EXTENSIONS (1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
@ 2020-04-06 20:07 ` Venkata Sandeep Dhanalakota
2020-04-08 16:29 ` Lionel Landwerlin
2020-04-08 17:14 ` Lionel Landwerlin
2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota
` (3 subsequent siblings)
4 siblings, 2 replies; 14+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw)
To: intel-gfx; +Cc: chris.p.wilson
Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.
v2: Reuse i915_user_extension_fn
v3: Check that the chained extension is only present once (Chris)
v4: Check that dma_fence_chain_find_seqno returns a non NULL fence
(Lionel)
v5: Use BIT_ULL (Chris)
v6: Fix issue with already signaled timeline points,
dma_fence_chain_find_seqno() setting fence to NULL (Chris)
v7: Report ENOENT with invalid syncobj handle (Lionel)
v8: Check for out of order timeline point insertion (Chris)
v9: After explanations on
https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
drop the ordering check from v8 (Lionel)
v10: Set first extension enum item to 1 (Jason)
v11: Add wait on previous sync points in timelines (Sandeep)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 ++++++++++++++----
drivers/gpu/drm/i915/i915_drv.c | 3 +-
drivers/gpu/drm/i915/i915_getparam.c | 1 +
include/uapi/drm/i915_drm.h | 38 +++
4 files changed, 296 insertions(+), 58 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 16831f715daa..4cb4cd035daa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -230,6 +230,13 @@ enum {
* the batchbuffer in trusted mode, otherwise the ioctl is rejected.
*/
+struct i915_eb_fences {
+ struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+ struct dma_fence *dma_fence;
+ u64 value;
+ struct dma_fence_chain *chain_fence;
+};
+
struct i915_execbuffer {
struct drm_i915_private *i915; /** i915 backpointer */
struct drm_file *file; /** per-file lookup tables and limits */
@@ -292,6 +299,7 @@ struct i915_execbuffer {
struct {
u64 flags; /** Available extensions parameters */
+ struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
} extensions;
};
@@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb,
}
static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
{
- while (n--)
- drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+ while (n--) {
+ drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+ dma_fence_put(fences[n].dma_fence);
+ kfree(fences[n].chain_fence);
+ }
kvfree(fences);
}
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
- struct drm_file *file)
+static struct i915_eb_fences *
+get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+ struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
+ &eb->extensions.timeline_fences;
+ struct drm_i915_gem_exec_fence __user *user_fences;
+ struct i915_eb_fences *fences;
+ u64 __user *user_values;
+ u64 num_fences, num_user_fences = timeline_fences->fence_count;
+ unsigned long n;
+ int err = 0;
+
+ /* Check multiplication overflow for access_ok() and kvmalloc_array() */
+ BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+ if (num_user_fences > min_t(unsigned long,
+ ULONG_MAX / sizeof(*user_fences),
+ SIZE_MAX / sizeof(*fences)))
+ return ERR_PTR(-EINVAL);
+
+ user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+ if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
+ return ERR_PTR(-EFAULT);
+
+ user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+ if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
+ return ERR_PTR(-EFAULT);
+
+ fences = kvmalloc_array(num_user_fences, sizeof(*fences),
+ __GFP_NOWARN | GFP_KERNEL);
+ if (!fences)
+ return ERR_PTR(-ENOMEM);
+
+ BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+ ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+ for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
+ struct drm_i915_gem_exec_fence user_fence;
+ struct drm_syncobj *syncobj;
+ struct dma_fence *fence = NULL;
+ u64 point;
+
+ if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (__get_user(point, user_values++)) {
+ err = -EFAULT;
+ goto err;
+ }
+
+ syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+ if (!syncobj) {
+ DRM_DEBUG("Invalid syncobj handle provided\n");
+ err = -ENOENT;
+ goto err;
+ }
+
+ fence = drm_syncobj_fence_get(syncobj);
+
+ if (!fence && user_fence.flags &&
+ !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+ DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_syncobj_put(syncobj);
+ err = -EINVAL;
+ goto err;
+ }
+
+ if (fence)
+ err = dma_fence_chain_find_seqno(&fence, point);
+
+ if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+ DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+ drm_syncobj_put(syncobj);
+ goto err;
+ }
+
+ /* A point might have been signaled already and
+ * garbage collected from the timeline. In this case
+ * just ignore the point and carry on.
+ */
+ if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) {
+ drm_syncobj_put(syncobj);
+ continue;
+ }
+
+ /*
+ * For timeline syncobjs we need to preallocate chains for
+ * later signaling.
+ */
+ if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+ /*
+ * Waiting and signaling the same point (when point !=
+ * 0) would break the timeline.
+ */
+ if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+ DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
+ err = -EINVAL;
+ drm_syncobj_put(syncobj);
+ goto err;
+ }
+
+ fences[num_fences].chain_fence =
+ kmalloc(sizeof(*fences[num_fences].chain_fence),
+ GFP_KERNEL);
+ if (!fences[num_fences].chain_fence) {
+ drm_syncobj_put(syncobj);
+ err = -ENOMEM;
+ DRM_DEBUG("Unable to alloc chain_fence\n");
+ goto err;
+ }
+ } else {
+ fences[num_fences].chain_fence = NULL;
+ }
+
+ fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+ fences[num_fences].dma_fence = fence;
+ fences[num_fences].value = point;
+ num_fences++;
+ }
+
+ *out_n_fences = num_fences;
+
+ return fences;
+
+err:
+ __free_fence_array(fences, num_fences);
+ return ERR_PTR(err);
+}
+
+static struct i915_eb_fences *
+get_legacy_fence_array(struct i915_execbuffer *eb,
+ int *out_n_fences)
{
- const unsigned long nfences = args->num_cliprects;
+ struct drm_i915_gem_execbuffer2 *args = eb->args;
struct drm_i915_gem_exec_fence __user *user;
- struct drm_syncobj **fences;
+ struct i915_eb_fences *fences;
+ const u32 num_fences = args->num_cliprects;
unsigned long n;
int err;
- if (!(args->flags & I915_EXEC_FENCE_ARRAY))
- return NULL;
+ *out_n_fences = num_fences;
/* Check multiplication overflow for access_ok() and kvmalloc_array() */
BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
- if (nfences > min_t(unsigned long,
- ULONG_MAX / sizeof(*user),
- SIZE_MAX / sizeof(*fences)))
+ if (*out_n_fences > min_t(unsigned long,
+ ULONG_MAX / sizeof(*user),
+ SIZE_MAX / sizeof(*fences)))
return ERR_PTR(-EINVAL);
user = u64_to_user_ptr(args->cliprects_ptr);
- if (!access_ok(user, nfences * sizeof(*user)))
+ if (!access_ok(user, *out_n_fences * sizeof(*user)))
return ERR_PTR(-EFAULT);
- fences = kvmalloc_array(nfences, sizeof(*fences),
+ fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
__GFP_NOWARN | GFP_KERNEL);
if (!fences)
return ERR_PTR(-ENOMEM);
- for (n = 0; n < nfences; n++) {
- struct drm_i915_gem_exec_fence fence;
+ for (n = 0; n < *out_n_fences; n++) {
+ struct drm_i915_gem_exec_fence user_fence;
struct drm_syncobj *syncobj;
+ struct dma_fence *fence = NULL;
- if (__copy_from_user(&fence, user++, sizeof(fence))) {
+ if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
err = -EFAULT;
goto err;
}
- if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+ if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
err = -EINVAL;
goto err;
}
- syncobj = drm_syncobj_find(file, fence.handle);
+ syncobj = drm_syncobj_find(eb->file, user_fence.handle);
if (!syncobj) {
DRM_DEBUG("Invalid syncobj handle provided\n");
err = -ENOENT;
goto err;
}
+ if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+ fence = drm_syncobj_fence_get(syncobj);
+ if (!fence) {
+ DRM_DEBUG("Syncobj handle has no fence\n");
+ drm_syncobj_put(syncobj);
+ err = -EINVAL;
+ goto err;
+ }
+ }
+
BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
- fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+ fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+ fences[n].dma_fence = fence;
+ fences[n].value = 0;
+ fences[n].chain_fence = NULL;
}
return fences;
@@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
return ERR_PTR(err);
}
+static struct i915_eb_fences *
+get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+ if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+ return get_legacy_fence_array(eb, out_n_fences);
+
+ if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+ return get_timeline_fence_array(eb, out_n_fences);
+
+ *out_n_fences = 0;
+ return NULL;
+}
+
static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
- struct drm_syncobj **fences)
+put_fence_array(struct i915_eb_fences *fences, int nfences)
{
if (fences)
- __free_fence_array(fences, args->num_cliprects);
+ __free_fence_array(fences, nfences);
}
static int
await_fence_array(struct i915_execbuffer *eb,
- struct drm_syncobj **fences)
+ struct i915_eb_fences *fences,
+ int nfences)
{
- const unsigned int nfences = eb->args->num_cliprects;
unsigned int n;
int err;
for (n = 0; n < nfences; n++) {
struct drm_syncobj *syncobj;
- struct dma_fence *fence;
unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
- if (!(flags & I915_EXEC_FENCE_WAIT))
- continue;
+ syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
- fence = drm_syncobj_fence_get(syncobj);
- if (!fence)
- return -EINVAL;
+ if (!fences[n].dma_fence)
+ continue;
- err = i915_request_await_dma_fence(eb->request, fence);
- dma_fence_put(fence);
+ err = i915_request_await_dma_fence(eb->request,
+ fences[n].dma_fence);
if (err < 0)
return err;
}
@@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb,
static void
signal_fence_array(struct i915_execbuffer *eb,
- struct drm_syncobj **fences)
+ struct i915_eb_fences *fences,
+ int nfences)
{
- const unsigned int nfences = eb->args->num_cliprects;
struct dma_fence * const fence = &eb->request->fence;
unsigned int n;
@@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb,
struct drm_syncobj *syncobj;
unsigned int flags;
- syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+ syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
if (!(flags & I915_EXEC_FENCE_SIGNAL))
continue;
- drm_syncobj_replace_fence(syncobj, fence);
+ if (fences[n].chain_fence) {
+ drm_syncobj_add_point(syncobj, fences[n].chain_fence,
+ fence, fences[n].value);
+ /*
+ * The chain's ownership is transferred to the
+ * timeline.
+ */
+ fences[n].chain_fence = NULL;
+ } else {
+ drm_syncobj_replace_fence(syncobj, fence);
+ }
}
}
+static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+ struct i915_execbuffer *eb = data;
+
+ /* Timeline fences are incompatible with the fence array flag. */
+ if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+ return -EINVAL;
+
+ if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+ return -EINVAL;
+
+ if (copy_from_user(&eb->extensions.timeline_fences, ext,
+ sizeof(eb->extensions.timeline_fences)))
+ return -EFAULT;
+
+ eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
+
+ return 0;
+}
+
static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
{
struct i915_request *rq, *rn;
@@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
}
static const i915_user_extension_fn execbuf_extensions[] = {
+ [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
};
static int
@@ -2468,16 +2667,17 @@ static int
i915_gem_do_execbuffer(struct drm_device *dev,
struct drm_file *file,
struct drm_i915_gem_execbuffer2 *args,
- struct drm_i915_gem_exec_object2 *exec,
- struct drm_syncobj **fences)
+ struct drm_i915_gem_exec_object2 *exec)
{
struct drm_i915_private *i915 = to_i915(dev);
struct i915_execbuffer eb;
struct dma_fence *in_fence = NULL;
struct dma_fence *exec_fence = NULL;
struct sync_file *out_fence = NULL;
+ struct i915_eb_fences *fences = NULL;
struct i915_vma *batch;
int out_fence_fd = -1;
+ int nfences = 0;
int err;
BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
@@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
if (err)
return err;
+ fences = get_fence_array(&eb, &nfences);
+ if (IS_ERR(fences))
+ return PTR_ERR(fences);
+
if (args->flags & I915_EXEC_FENCE_IN) {
in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
- if (!in_fence)
- return -EINVAL;
+ if (!in_fence) {
+ err = -EINVAL;
+ goto err_fences;
+ }
}
if (args->flags & I915_EXEC_FENCE_SUBMIT) {
@@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
}
if (fences) {
- err = await_fence_array(&eb, fences);
+ err = await_fence_array(&eb, fences, nfences);
if (err)
goto err_request;
}
@@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
eb_request_add(&eb);
if (fences)
- signal_fence_array(&eb, fences);
+ signal_fence_array(&eb, fences, nfences);
if (out_fence) {
if (err == 0) {
@@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
dma_fence_put(exec_fence);
err_in_fence:
dma_fence_put(in_fence);
+err_fences:
+ put_fence_array(fences, nfences);
return err;
}
@@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
exec2_list[i].flags = 0;
}
- err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+ err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
if (exec2.flags & __EXEC_HAS_RELOC) {
struct drm_i915_gem_exec_object __user *user_exec_list =
u64_to_user_ptr(args->buffers_ptr);
@@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
struct drm_i915_private *i915 = to_i915(dev);
struct drm_i915_gem_execbuffer2 *args = data;
struct drm_i915_gem_exec_object2 *exec2_list;
- struct drm_syncobj **fences = NULL;
const size_t count = args->buffer_count;
int err;
@@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
return -EFAULT;
}
- if (args->flags & I915_EXEC_FENCE_ARRAY) {
- fences = get_fence_array(args, file);
- if (IS_ERR(fences)) {
- kvfree(exec2_list);
- return PTR_ERR(fences);
- }
- }
-
- err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+ err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
/*
* Now that we have begun execution of the batchbuffer, we ignore
@@ -2917,7 +3116,6 @@ end:;
}
args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
- put_fence_array(args, fences);
kvfree(exec2_list);
return err;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a7a3b4b98572..f7f868c3c510 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1828,7 +1828,8 @@ static struct drm_driver driver = {
*/
.driver_features =
DRIVER_GEM |
- DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+ DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
+ DRIVER_SYNCOBJ_TIMELINE,
.release = i915_driver_release,
.open = i915_driver_open,
.lastclose = i915_driver_lastclose,
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 54fce81d5724..b9d3aab53c03 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_BATCH_FIRST:
case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+ case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
/* For the time being all of these are always true;
* if some supported hardware does not have one of these
* features this value needs to be provided from
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7ea38aa6502c..7b8680e3b49d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_PERF_REVISION 54
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence {
};
enum drm_i915_gem_execbuffer_ext {
+ /**
+ * See drm_i915_gem_execbuf_ext_timeline_fences.
+ */
+ DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
+
DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
};
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+ struct i915_user_extension base;
+
+ /**
+ * Number of element in the handles_ptr & value_ptr arrays.
+ */
+ __u64 fence_count;
+
+ /**
+ * Pointer to an array of struct drm_i915_gem_exec_fence of length
+ * fence_count.
+ */
+ __u64 handles_ptr;
+
+ /**
+ * Pointer to an array of u64 values of length fence_count. Values
+ * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+ * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+ */
+ __u64 values_ptr;
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
@ 2020-04-06 20:07 ` Venkata Sandeep Dhanalakota
2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork
` (2 subsequent siblings)
4 siblings, 0 replies; 14+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw)
To: intel-gfx; +Cc: chris.p.wilson
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 39 +++++++++++++++++--
1 file changed, 35 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4cb4cd035daa..9b01f7c51b65 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2504,6 +2504,7 @@ await_fence_array(struct i915_execbuffer *eb,
for (n = 0; n < nfences; n++) {
struct drm_syncobj *syncobj;
+ struct dma_fence_chain *chain;
unsigned int flags;
syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
@@ -2511,10 +2512,40 @@ await_fence_array(struct i915_execbuffer *eb,
if (!fences[n].dma_fence)
continue;
- err = i915_request_await_dma_fence(eb->request,
- fences[n].dma_fence);
- if (err < 0)
- return err;
+ /*
+ * If we're dealing with a dma-fence-chain, peel the chain by
+ * adding all of the unsignaled fences
+ * (dma_fence_chain_for_each does that for us) the chain
+ * points to.
+ *
+ * This enables us to identify waits on i915 fences and allows
+ * for faster engine-to-engine synchronization using HW
+ * semaphores.
+ */
+ chain = to_dma_fence_chain(fences[n].dma_fence);
+ if (chain) {
+ struct dma_fence *iter;
+
+ dma_fence_chain_for_each(iter, fences[n].dma_fence) {
+ struct dma_fence_chain *iter_chain =
+ to_dma_fence_chain(iter);
+
+ GEM_BUG_ON(!iter_chain);
+
+ err = i915_request_await_dma_fence(eb->request,
+ iter_chain->fence);
+ if (err < 0) {
+ dma_fence_put(iter);
+ return err;
+ }
+ }
+
+ } else {
+ err = i915_request_await_dma_fence(eb->request,
+ fences[n].dma_fence);
+ if (err < 0)
+ return err;
+ }
}
return 0;
--
2.21.0.5.gaeb582a983
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota
@ 2020-04-06 22:13 ` Patchwork
2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-04-07 8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-06 22:13 UTC (permalink / raw)
To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
URL : https://patchwork.freedesktop.org/series/75570/
State : warning
== Summary ==
$ dim checkpatch origin/drm-tip
296bd5133612 drm/i915: introduce a mechanism to extend execbuf2
-:141: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#141: FILE: include/uapi/drm/i915_drm.h:1204:
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS<<1))
^
total: 0 errors, 0 warnings, 1 checks, 113 lines checked
26b10e8e5551 drm/i915: add syncobj timeline support
-:26: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#26:
https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
-:34: ERROR:BAD_SIGN_OFF: Unrecognized email address: 'Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>'
#34:
Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
-:622: WARNING:NO_AUTHOR_SIGN_OFF: Missing Signed-off-by: line by nominal patch author 'Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>'
total: 1 errors, 2 warnings, 0 checks, 551 lines checked
afdb9ac918ac drm/i915: peel dma-fence-chains wait fences
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
` (2 preceding siblings ...)
2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork
@ 2020-04-06 22:37 ` Patchwork
2020-04-07 8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-06 22:37 UTC (permalink / raw)
To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
URL : https://patchwork.freedesktop.org/series/75570/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_8264 -> Patchwork_17225
====================================================
Summary
-------
**SUCCESS**
No regressions found.
External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html
Known issues
------------
Here are the changes found in Patchwork_17225 that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@i915_pm_rpm@basic-rte:
- fi-icl-dsi: [PASS][1] -> [INCOMPLETE][2] ([i915#189])
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-icl-dsi/igt@i915_pm_rpm@basic-rte.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-icl-dsi/igt@i915_pm_rpm@basic-rte.html
#### Possible fixes ####
* igt@gem_exec_suspend@basic-s4-devices:
- fi-tgl-y: [FAIL][3] ([i915#1158]) -> [PASS][4]
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-tgl-y/igt@gem_exec_suspend@basic-s4-devices.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-tgl-y/igt@gem_exec_suspend@basic-s4-devices.html
* igt@i915_selftest@live@hangcheck:
- fi-icl-y: [INCOMPLETE][5] ([i915#1580]) -> [PASS][6]
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-icl-y/igt@i915_selftest@live@hangcheck.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-icl-y/igt@i915_selftest@live@hangcheck.html
[i915#1158]: https://gitlab.freedesktop.org/drm/intel/issues/1158
[i915#1580]: https://gitlab.freedesktop.org/drm/intel/issues/1580
[i915#189]: https://gitlab.freedesktop.org/drm/intel/issues/189
Participating hosts (53 -> 46)
------------------------------
Missing (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus
Build changes
-------------
* CI: CI-20190529 -> None
* Linux: CI_DRM_8264 -> Patchwork_17225
CI-20190529: 20190529
CI_DRM_8264: e0104585f880a64d4a9b40803cf4fb51ab499f7c @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5573: 9c582425d6b4fc1de9fc2ffc8015cc6f0a0d3e98 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_17225: afdb9ac918aca5d4a603ec3d33e2b4932e3dc1ca @ git://anongit.freedesktop.org/gfx-ci/linux
== Linux commits ==
afdb9ac918ac drm/i915: peel dma-fence-chains wait fences
26b10e8e5551 drm/i915: add syncobj timeline support
296bd5133612 drm/i915: introduce a mechanism to extend execbuf2
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
` (3 preceding siblings ...)
2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-04-07 8:13 ` Patchwork
4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-07 8:13 UTC (permalink / raw)
To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
URL : https://patchwork.freedesktop.org/series/75570/
State : success
== Summary ==
CI Bug Log - changes from CI_DRM_8264_full -> Patchwork_17225_full
====================================================
Summary
-------
**SUCCESS**
No regressions found.
Known issues
------------
Here are the changes found in Patchwork_17225_full that come from known issues:
### IGT changes ###
#### Issues hit ####
* igt@i915_pm_dc@dc6-psr:
- shard-iclb: [PASS][1] -> [FAIL][2] ([i915#454])
[1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb4/igt@i915_pm_dc@dc6-psr.html
[2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb4/igt@i915_pm_dc@dc6-psr.html
* igt@i915_pm_rpm@modeset-non-lpsp-stress:
- shard-kbl: [PASS][3] -> [DMESG-WARN][4] ([i915#165])
[3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl6/igt@i915_pm_rpm@modeset-non-lpsp-stress.html
[4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl2/igt@i915_pm_rpm@modeset-non-lpsp-stress.html
* igt@kms_cursor_crc@pipe-a-cursor-suspend:
- shard-skl: [PASS][5] -> [INCOMPLETE][6] ([i915#300])
[5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl8/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
[6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl3/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
* igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge:
- shard-kbl: [PASS][7] -> [DMESG-WARN][8] ([i915#78])
[7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl6/igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge.html
[8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl2/igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge.html
* igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
- shard-glk: [PASS][9] -> [FAIL][10] ([i915#72])
[9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk8/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
[10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk8/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
* igt@kms_flip@flip-vs-suspend:
- shard-snb: [PASS][11] -> [DMESG-WARN][12] ([i915#42])
[11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb7/igt@kms_flip@flip-vs-suspend.html
[12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb5/igt@kms_flip@flip-vs-suspend.html
* igt@kms_flip@plain-flip-fb-recreate-interruptible:
- shard-glk: [PASS][13] -> [FAIL][14] ([i915#1487])
[13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk1/igt@kms_flip@plain-flip-fb-recreate-interruptible.html
[14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk1/igt@kms_flip@plain-flip-fb-recreate-interruptible.html
* igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
- shard-kbl: [PASS][15] -> [DMESG-WARN][16] ([i915#180])
[15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl7/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
[16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl4/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
* igt@kms_psr@psr2_cursor_mmap_cpu:
- shard-iclb: [PASS][17] -> [SKIP][18] ([fdo#109441]) +2 similar issues
[17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html
[18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb1/igt@kms_psr@psr2_cursor_mmap_cpu.html
* igt@kms_vblank@pipe-b-ts-continuation-suspend:
- shard-skl: [PASS][19] -> [INCOMPLETE][20] ([i915#69])
[19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl1/igt@kms_vblank@pipe-b-ts-continuation-suspend.html
[20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl9/igt@kms_vblank@pipe-b-ts-continuation-suspend.html
#### Possible fixes ####
* {igt@gem_ctx_isolation@preservation-s3@rcs0}:
- shard-apl: [DMESG-WARN][21] ([i915#180]) -> [PASS][22] +2 similar issues
[21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl6/igt@gem_ctx_isolation@preservation-s3@rcs0.html
[22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl2/igt@gem_ctx_isolation@preservation-s3@rcs0.html
* igt@gem_exec_balancer@hang:
- shard-tglb: [FAIL][23] ([i915#1277]) -> [PASS][24]
[23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-tglb6/igt@gem_exec_balancer@hang.html
[24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-tglb8/igt@gem_exec_balancer@hang.html
* igt@i915_pm_rpm@basic-pci-d3-state:
- shard-skl: [FAIL][25] ([i915#138]) -> [PASS][26]
[25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl5/igt@i915_pm_rpm@basic-pci-d3-state.html
[26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl8/igt@i915_pm_rpm@basic-pci-d3-state.html
* igt@i915_selftest@live@blt:
- shard-snb: [DMESG-FAIL][27] ([i915#1409]) -> [PASS][28]
[27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb4/igt@i915_selftest@live@blt.html
[28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb4/igt@i915_selftest@live@blt.html
* igt@i915_suspend@fence-restore-tiled2untiled:
- shard-skl: [INCOMPLETE][29] ([i915#69]) -> [PASS][30]
[29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl3/igt@i915_suspend@fence-restore-tiled2untiled.html
[30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl8/igt@i915_suspend@fence-restore-tiled2untiled.html
* igt@kms_fbcon_fbt@fbc-suspend:
- shard-kbl: [DMESG-WARN][31] ([i915#180] / [i915#93] / [i915#95]) -> [PASS][32]
[31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl1/igt@kms_fbcon_fbt@fbc-suspend.html
[32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl4/igt@kms_fbcon_fbt@fbc-suspend.html
* igt@kms_flip@2x-plain-flip-ts-check:
- shard-glk: [FAIL][33] ([i915#34]) -> [PASS][34]
[33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk5/igt@kms_flip@2x-plain-flip-ts-check.html
[34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk6/igt@kms_flip@2x-plain-flip-ts-check.html
* igt@kms_flip@flip-vs-expired-vblank:
- shard-apl: [FAIL][35] ([i915#79]) -> [PASS][36]
[35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl4/igt@kms_flip@flip-vs-expired-vblank.html
[36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl6/igt@kms_flip@flip-vs-expired-vblank.html
* igt@kms_flip@flip-vs-suspend-interruptible:
- shard-kbl: [DMESG-WARN][37] ([i915#180]) -> [PASS][38] +2 similar issues
[37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl3/igt@kms_flip@flip-vs-suspend-interruptible.html
[38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl1/igt@kms_flip@flip-vs-suspend-interruptible.html
* igt@kms_hdr@bpc-switch-dpms:
- shard-skl: [FAIL][39] ([i915#1188]) -> [PASS][40]
[39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl4/igt@kms_hdr@bpc-switch-dpms.html
[40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl4/igt@kms_hdr@bpc-switch-dpms.html
* igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
- shard-snb: [DMESG-WARN][41] ([i915#42]) -> [PASS][42]
[41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb6/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
[42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb2/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
* igt@kms_plane_lowres@pipe-a-tiling-none:
- shard-kbl: [DMESG-WARN][43] ([i915#165] / [i915#78]) -> [PASS][44]
[43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl2/igt@kms_plane_lowres@pipe-a-tiling-none.html
[44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl1/igt@kms_plane_lowres@pipe-a-tiling-none.html
* igt@kms_psr@psr2_cursor_plane_onoff:
- shard-iclb: [SKIP][45] ([fdo#109441]) -> [PASS][46]
[45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb1/igt@kms_psr@psr2_cursor_plane_onoff.html
[46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html
* igt@kms_setmode@basic:
- shard-apl: [FAIL][47] ([i915#31]) -> [PASS][48]
[47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl7/igt@kms_setmode@basic.html
[48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl4/igt@kms_setmode@basic.html
- shard-glk: [FAIL][49] ([i915#31]) -> [PASS][50]
[49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk5/igt@kms_setmode@basic.html
[50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk6/igt@kms_setmode@basic.html
#### Warnings ####
* igt@i915_pm_dc@dc6-psr:
- shard-tglb: [FAIL][51] ([i915#454]) -> [SKIP][52] ([i915#468])
[51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-tglb1/igt@i915_pm_dc@dc6-psr.html
[52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-tglb2/igt@i915_pm_dc@dc6-psr.html
* igt@i915_pm_rpm@system-suspend-devices:
- shard-snb: [SKIP][53] ([fdo#109271]) -> [INCOMPLETE][54] ([i915#82])
[53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb4/igt@i915_pm_rpm@system-suspend-devices.html
[54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb5/igt@i915_pm_rpm@system-suspend-devices.html
{name}: This element is suppressed. This means it is ignored when computing
the status of the difference (SUCCESS, WARNING, or FAILURE).
[fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
[fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
[i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
[i915#1277]: https://gitlab.freedesktop.org/drm/intel/issues/1277
[i915#138]: https://gitlab.freedesktop.org/drm/intel/issues/138
[i915#1409]: https://gitlab.freedesktop.org/drm/intel/issues/1409
[i915#1487]: https://gitlab.freedesktop.org/drm/intel/issues/1487
[i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165
[i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
[i915#300]: https://gitlab.freedesktop.org/drm/intel/issues/300
[i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
[i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34
[i915#42]: https://gitlab.freedesktop.org/drm/intel/issues/42
[i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
[i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468
[i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
[i915#72]: https://gitlab.freedesktop.org/drm/intel/issues/72
[i915#78]: https://gitlab.freedesktop.org/drm/intel/issues/78
[i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
[i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82
[i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
[i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95
Participating hosts (10 -> 10)
------------------------------
No changes in participating hosts
Build changes
-------------
* CI: CI-20190529 -> None
* Linux: CI_DRM_8264 -> Patchwork_17225
CI-20190529: 20190529
CI_DRM_8264: e0104585f880a64d4a9b40803cf4fb51ab499f7c @ git://anongit.freedesktop.org/gfx-ci/linux
IGT_5573: 9c582425d6b4fc1de9fc2ffc8015cc6f0a0d3e98 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
Patchwork_17225: afdb9ac918aca5d4a603ec3d33e2b4932e3dc1ca @ git://anongit.freedesktop.org/gfx-ci/linux
piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
@ 2020-04-08 16:29 ` Lionel Landwerlin
2020-04-08 17:00 ` Venkata Sandeep Dhanalakota
2020-04-08 17:14 ` Lionel Landwerlin
1 sibling, 1 reply; 14+ messages in thread
From: Lionel Landwerlin @ 2020-04-08 16:29 UTC (permalink / raw)
To: Venkata Sandeep Dhanalakota, intel-gfx; +Cc: chris.p.wilson
On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote:
> Introduces a new parameters to execbuf so that we can specify syncobj
> handles as well as timeline points.
>
> v2: Reuse i915_user_extension_fn
>
> v3: Check that the chained extension is only present once (Chris)
>
> v4: Check that dma_fence_chain_find_seqno returns a non NULL fence
> (Lionel)
>
> v5: Use BIT_ULL (Chris)
>
> v6: Fix issue with already signaled timeline points,
> dma_fence_chain_find_seqno() setting fence to NULL (Chris)
>
> v7: Report ENOENT with invalid syncobj handle (Lionel)
>
> v8: Check for out of order timeline point insertion (Chris)
>
> v9: After explanations on
> https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
> drop the ordering check from v8 (Lionel)
>
> v10: Set first extension enum item to 1 (Jason)
>
> v11: Add wait on previous sync points in timelines (Sandeep)
Thanks for picking this series up!
Could you point to the changes in v11?
I haven't look at it in a while and I can't remember what you would have
changed.
Thanks a lot,
-Lionel
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 ++++++++++++++----
> drivers/gpu/drm/i915/i915_drv.c | 3 +-
> drivers/gpu/drm/i915/i915_getparam.c | 1 +
> include/uapi/drm/i915_drm.h | 38 +++
> 4 files changed, 296 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 16831f715daa..4cb4cd035daa 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -230,6 +230,13 @@ enum {
> * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
> */
>
> +struct i915_eb_fences {
> + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
> + struct dma_fence *dma_fence;
> + u64 value;
> + struct dma_fence_chain *chain_fence;
> +};
> +
> struct i915_execbuffer {
> struct drm_i915_private *i915; /** i915 backpointer */
> struct drm_file *file; /** per-file lookup tables and limits */
> @@ -292,6 +299,7 @@ struct i915_execbuffer {
>
> struct {
> u64 flags; /** Available extensions parameters */
> + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
> } extensions;
> };
>
> @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb,
> }
>
> static void
> -__free_fence_array(struct drm_syncobj **fences, unsigned int n)
> +__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
> {
> - while (n--)
> - drm_syncobj_put(ptr_mask_bits(fences[n], 2));
> + while (n--) {
> + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
> + dma_fence_put(fences[n].dma_fence);
> + kfree(fences[n].chain_fence);
> + }
> kvfree(fences);
> }
>
> -static struct drm_syncobj **
> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> - struct drm_file *file)
> +static struct i915_eb_fences *
> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> + &eb->extensions.timeline_fences;
> + struct drm_i915_gem_exec_fence __user *user_fences;
> + struct i915_eb_fences *fences;
> + u64 __user *user_values;
> + u64 num_fences, num_user_fences = timeline_fences->fence_count;
> + unsigned long n;
> + int err = 0;
> +
> + /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> + if (num_user_fences > min_t(unsigned long,
> + ULONG_MAX / sizeof(*user_fences),
> + SIZE_MAX / sizeof(*fences)))
> + return ERR_PTR(-EINVAL);
> +
> + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> + return ERR_PTR(-EFAULT);
> +
> + user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> + if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> + return ERR_PTR(-EFAULT);
> +
> + fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> + __GFP_NOWARN | GFP_KERNEL);
> + if (!fences)
> + return ERR_PTR(-ENOMEM);
> +
> + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> +
> + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> + struct drm_i915_gem_exec_fence user_fence;
> + struct drm_syncobj *syncobj;
> + struct dma_fence *fence = NULL;
> + u64 point;
> +
> + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> + err = -EFAULT;
> + goto err;
> + }
> +
> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> + err = -EINVAL;
> + goto err;
> + }
> +
> + if (__get_user(point, user_values++)) {
> + err = -EFAULT;
> + goto err;
> + }
> +
> + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> + if (!syncobj) {
> + DRM_DEBUG("Invalid syncobj handle provided\n");
> + err = -ENOENT;
> + goto err;
> + }
> +
> + fence = drm_syncobj_fence_get(syncobj);
> +
> + if (!fence && user_fence.flags &&
> + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> + DRM_DEBUG("Syncobj handle has no fence\n");
> + drm_syncobj_put(syncobj);
> + err = -EINVAL;
> + goto err;
> + }
> +
> + if (fence)
> + err = dma_fence_chain_find_seqno(&fence, point);
> +
> + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> + drm_syncobj_put(syncobj);
> + goto err;
> + }
> +
> + /* A point might have been signaled already and
> + * garbage collected from the timeline. In this case
> + * just ignore the point and carry on.
> + */
> + if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) {
> + drm_syncobj_put(syncobj);
> + continue;
> + }
> +
> + /*
> + * For timeline syncobjs we need to preallocate chains for
> + * later signaling.
> + */
> + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> + /*
> + * Waiting and signaling the same point (when point !=
> + * 0) would break the timeline.
> + */
> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> + DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
> + err = -EINVAL;
> + drm_syncobj_put(syncobj);
> + goto err;
> + }
> +
> + fences[num_fences].chain_fence =
> + kmalloc(sizeof(*fences[num_fences].chain_fence),
> + GFP_KERNEL);
> + if (!fences[num_fences].chain_fence) {
> + drm_syncobj_put(syncobj);
> + err = -ENOMEM;
> + DRM_DEBUG("Unable to alloc chain_fence\n");
> + goto err;
> + }
> + } else {
> + fences[num_fences].chain_fence = NULL;
> + }
> +
> + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> + fences[num_fences].dma_fence = fence;
> + fences[num_fences].value = point;
> + num_fences++;
> + }
> +
> + *out_n_fences = num_fences;
> +
> + return fences;
> +
> +err:
> + __free_fence_array(fences, num_fences);
> + return ERR_PTR(err);
> +}
> +
> +static struct i915_eb_fences *
> +get_legacy_fence_array(struct i915_execbuffer *eb,
> + int *out_n_fences)
> {
> - const unsigned long nfences = args->num_cliprects;
> + struct drm_i915_gem_execbuffer2 *args = eb->args;
> struct drm_i915_gem_exec_fence __user *user;
> - struct drm_syncobj **fences;
> + struct i915_eb_fences *fences;
> + const u32 num_fences = args->num_cliprects;
> unsigned long n;
> int err;
>
> - if (!(args->flags & I915_EXEC_FENCE_ARRAY))
> - return NULL;
> + *out_n_fences = num_fences;
>
> /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> - if (nfences > min_t(unsigned long,
> - ULONG_MAX / sizeof(*user),
> - SIZE_MAX / sizeof(*fences)))
> + if (*out_n_fences > min_t(unsigned long,
> + ULONG_MAX / sizeof(*user),
> + SIZE_MAX / sizeof(*fences)))
> return ERR_PTR(-EINVAL);
>
> user = u64_to_user_ptr(args->cliprects_ptr);
> - if (!access_ok(user, nfences * sizeof(*user)))
> + if (!access_ok(user, *out_n_fences * sizeof(*user)))
> return ERR_PTR(-EFAULT);
>
> - fences = kvmalloc_array(nfences, sizeof(*fences),
> + fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
> __GFP_NOWARN | GFP_KERNEL);
> if (!fences)
> return ERR_PTR(-ENOMEM);
>
> - for (n = 0; n < nfences; n++) {
> - struct drm_i915_gem_exec_fence fence;
> + for (n = 0; n < *out_n_fences; n++) {
> + struct drm_i915_gem_exec_fence user_fence;
> struct drm_syncobj *syncobj;
> + struct dma_fence *fence = NULL;
>
> - if (__copy_from_user(&fence, user++, sizeof(fence))) {
> + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
> err = -EFAULT;
> goto err;
> }
>
> - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> err = -EINVAL;
> goto err;
> }
>
> - syncobj = drm_syncobj_find(file, fence.handle);
> + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> if (!syncobj) {
> DRM_DEBUG("Invalid syncobj handle provided\n");
> err = -ENOENT;
> goto err;
> }
>
> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> + fence = drm_syncobj_fence_get(syncobj);
> + if (!fence) {
> + DRM_DEBUG("Syncobj handle has no fence\n");
> + drm_syncobj_put(syncobj);
> + err = -EINVAL;
> + goto err;
> + }
> + }
> +
> BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>
> - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
> + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> + fences[n].dma_fence = fence;
> + fences[n].value = 0;
> + fences[n].chain_fence = NULL;
> }
>
> return fences;
> @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> return ERR_PTR(err);
> }
>
> +static struct i915_eb_fences *
> +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> + if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> + return get_legacy_fence_array(eb, out_n_fences);
> +
> + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> + return get_timeline_fence_array(eb, out_n_fences);
> +
> + *out_n_fences = 0;
> + return NULL;
> +}
> +
> static void
> -put_fence_array(struct drm_i915_gem_execbuffer2 *args,
> - struct drm_syncobj **fences)
> +put_fence_array(struct i915_eb_fences *fences, int nfences)
> {
> if (fences)
> - __free_fence_array(fences, args->num_cliprects);
> + __free_fence_array(fences, nfences);
> }
>
> static int
> await_fence_array(struct i915_execbuffer *eb,
> - struct drm_syncobj **fences)
> + struct i915_eb_fences *fences,
> + int nfences)
> {
> - const unsigned int nfences = eb->args->num_cliprects;
> unsigned int n;
> int err;
>
> for (n = 0; n < nfences; n++) {
> struct drm_syncobj *syncobj;
> - struct dma_fence *fence;
> unsigned int flags;
>
> - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> - if (!(flags & I915_EXEC_FENCE_WAIT))
> - continue;
> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>
> - fence = drm_syncobj_fence_get(syncobj);
> - if (!fence)
> - return -EINVAL;
> + if (!fences[n].dma_fence)
> + continue;
>
> - err = i915_request_await_dma_fence(eb->request, fence);
> - dma_fence_put(fence);
> + err = i915_request_await_dma_fence(eb->request,
> + fences[n].dma_fence);
> if (err < 0)
> return err;
> }
> @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb,
>
> static void
> signal_fence_array(struct i915_execbuffer *eb,
> - struct drm_syncobj **fences)
> + struct i915_eb_fences *fences,
> + int nfences)
> {
> - const unsigned int nfences = eb->args->num_cliprects;
> struct dma_fence * const fence = &eb->request->fence;
> unsigned int n;
>
> @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb,
> struct drm_syncobj *syncobj;
> unsigned int flags;
>
> - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> if (!(flags & I915_EXEC_FENCE_SIGNAL))
> continue;
>
> - drm_syncobj_replace_fence(syncobj, fence);
> + if (fences[n].chain_fence) {
> + drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> + fence, fences[n].value);
> + /*
> + * The chain's ownership is transferred to the
> + * timeline.
> + */
> + fences[n].chain_fence = NULL;
> + } else {
> + drm_syncobj_replace_fence(syncobj, fence);
> + }
> }
> }
>
> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> +{
> + struct i915_execbuffer *eb = data;
> +
> + /* Timeline fences are incompatible with the fence array flag. */
> + if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> + return -EINVAL;
> +
> + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> + return -EINVAL;
> +
> + if (copy_from_user(&eb->extensions.timeline_fences, ext,
> + sizeof(eb->extensions.timeline_fences)))
> + return -EFAULT;
> +
> + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> +
> + return 0;
> +}
> +
> static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
> {
> struct i915_request *rq, *rn;
> @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
> }
>
> static const i915_user_extension_fn execbuf_extensions[] = {
> + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> };
>
> static int
> @@ -2468,16 +2667,17 @@ static int
> i915_gem_do_execbuffer(struct drm_device *dev,
> struct drm_file *file,
> struct drm_i915_gem_execbuffer2 *args,
> - struct drm_i915_gem_exec_object2 *exec,
> - struct drm_syncobj **fences)
> + struct drm_i915_gem_exec_object2 *exec)
> {
> struct drm_i915_private *i915 = to_i915(dev);
> struct i915_execbuffer eb;
> struct dma_fence *in_fence = NULL;
> struct dma_fence *exec_fence = NULL;
> struct sync_file *out_fence = NULL;
> + struct i915_eb_fences *fences = NULL;
> struct i915_vma *batch;
> int out_fence_fd = -1;
> + int nfences = 0;
> int err;
>
> BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
> @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> if (err)
> return err;
>
> + fences = get_fence_array(&eb, &nfences);
> + if (IS_ERR(fences))
> + return PTR_ERR(fences);
> +
> if (args->flags & I915_EXEC_FENCE_IN) {
> in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> - if (!in_fence)
> - return -EINVAL;
> + if (!in_fence) {
> + err = -EINVAL;
> + goto err_fences;
> + }
> }
>
> if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> }
>
> if (fences) {
> - err = await_fence_array(&eb, fences);
> + err = await_fence_array(&eb, fences, nfences);
> if (err)
> goto err_request;
> }
> @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> eb_request_add(&eb);
>
> if (fences)
> - signal_fence_array(&eb, fences);
> + signal_fence_array(&eb, fences, nfences);
>
> if (out_fence) {
> if (err == 0) {
> @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> dma_fence_put(exec_fence);
> err_in_fence:
> dma_fence_put(in_fence);
> +err_fences:
> + put_fence_array(fences, nfences);
> return err;
> }
>
> @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
> exec2_list[i].flags = 0;
> }
>
> - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
> + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
> if (exec2.flags & __EXEC_HAS_RELOC) {
> struct drm_i915_gem_exec_object __user *user_exec_list =
> u64_to_user_ptr(args->buffers_ptr);
> @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> struct drm_i915_private *i915 = to_i915(dev);
> struct drm_i915_gem_execbuffer2 *args = data;
> struct drm_i915_gem_exec_object2 *exec2_list;
> - struct drm_syncobj **fences = NULL;
> const size_t count = args->buffer_count;
> int err;
>
> @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> return -EFAULT;
> }
>
> - if (args->flags & I915_EXEC_FENCE_ARRAY) {
> - fences = get_fence_array(args, file);
> - if (IS_ERR(fences)) {
> - kvfree(exec2_list);
> - return PTR_ERR(fences);
> - }
> - }
> -
> - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
> + err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
>
> /*
> * Now that we have begun execution of the batchbuffer, we ignore
> @@ -2917,7 +3116,6 @@ end:;
> }
>
> args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
> - put_fence_array(args, fences);
> kvfree(exec2_list);
> return err;
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index a7a3b4b98572..f7f868c3c510 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1828,7 +1828,8 @@ static struct drm_driver driver = {
> */
> .driver_features =
> DRIVER_GEM |
> - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
> + DRIVER_SYNCOBJ_TIMELINE,
> .release = i915_driver_release,
> .open = i915_driver_open,
> .lastclose = i915_driver_lastclose,
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index 54fce81d5724..b9d3aab53c03 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_EXEC_BATCH_FIRST:
> case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
> case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
> /* For the time being all of these are always true;
> * if some supported hardware does not have one of these
> * features this value needs to be provided from
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7ea38aa6502c..7b8680e3b49d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
> */
> #define I915_PARAM_PERF_REVISION 54
>
> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> + * I915_EXEC_USE_EXTENSIONS.
> + */
> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> +
> /* Must be kept compact -- no holes and well documented */
>
> typedef struct drm_i915_getparam {
> @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence {
> };
>
> enum drm_i915_gem_execbuffer_ext {
> + /**
> + * See drm_i915_gem_execbuf_ext_timeline_fences.
> + */
> + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
> +
> DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
> };
>
> +/**
> + * This structure describes an array of drm_syncobj and associated points for
> + * timeline variants of drm_syncobj. It is invalid to append this structure to
> + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> + */
> +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> + struct i915_user_extension base;
> +
> + /**
> + * Number of element in the handles_ptr & value_ptr arrays.
> + */
> + __u64 fence_count;
> +
> + /**
> + * Pointer to an array of struct drm_i915_gem_exec_fence of length
> + * fence_count.
> + */
> + __u64 handles_ptr;
> +
> + /**
> + * Pointer to an array of u64 values of length fence_count. Values
> + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
> + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
> + */
> + __u64 values_ptr;
> +};
> +
> struct drm_i915_gem_execbuffer2 {
> /**
> * List of gem_exec_object2 structs
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
2020-04-08 16:29 ` Lionel Landwerlin
@ 2020-04-08 17:00 ` Venkata Sandeep Dhanalakota
0 siblings, 0 replies; 14+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-08 17:00 UTC (permalink / raw)
To: Lionel Landwerlin; +Cc: intel-gfx, chris.p.wilson
On 20/04/08 07:29, Lionel Landwerlin wrote:
> On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote:
> > Introduces a new parameters to execbuf so that we can specify syncobj
> > handles as well as timeline points.
> >
> > v2: Reuse i915_user_extension_fn
> >
> > v3: Check that the chained extension is only present once (Chris)
> >
> > v4: Check that dma_fence_chain_find_seqno returns a non NULL fence
> > (Lionel)
> >
> > v5: Use BIT_ULL (Chris)
> >
> > v6: Fix issue with already signaled timeline points,
> > dma_fence_chain_find_seqno() setting fence to NULL (Chris)
> >
> > v7: Report ENOENT with invalid syncobj handle (Lionel)
> >
> > v8: Check for out of order timeline point insertion (Chris)
> >
> > v9: After explanations on
> > https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
> > drop the ordering check from v8 (Lionel)
> >
> > v10: Set first extension enum item to 1 (Jason)
> >
> > v11: Add wait on previous sync points in timelines (Sandeep)
>
>
> Thanks for picking this series up!
>
>
> Could you point to the changes in v11?
>
> I haven't look at it in a while and I can't remember what you would have
> changed.
>
Hi,
Mainly the changes are in get_timeline_fence_array(), to enforce the
implicit dependencies in signal fence-array. we want have efficient waits
on the last point on timelines so that we signal at a correct point in time along the timeline
the order is controlled so that we always wait on the previous request/sync point in the timeline
and signal after the completion of the current request.
Thank you,
~sandeep
>
> Thanks a lot,
>
>
> -Lionel
>
>
> >
> > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> > Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
> > ---
> > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 ++++++++++++++----
> > drivers/gpu/drm/i915/i915_drv.c | 3 +-
> > drivers/gpu/drm/i915/i915_getparam.c | 1 +
> > include/uapi/drm/i915_drm.h | 38 +++
> > 4 files changed, 296 insertions(+), 58 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > index 16831f715daa..4cb4cd035daa 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > @@ -230,6 +230,13 @@ enum {
> > * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
> > */
> > +struct i915_eb_fences {
> > + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
> > + struct dma_fence *dma_fence;
> > + u64 value;
> > + struct dma_fence_chain *chain_fence;
> > +};
> > +
> > struct i915_execbuffer {
> > struct drm_i915_private *i915; /** i915 backpointer */
> > struct drm_file *file; /** per-file lookup tables and limits */
> > @@ -292,6 +299,7 @@ struct i915_execbuffer {
> > struct {
> > u64 flags; /** Available extensions parameters */
> > + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
> > } extensions;
> > };
> > @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb,
> > }
> > static void
> > -__free_fence_array(struct drm_syncobj **fences, unsigned int n)
> > +__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
> > {
> > - while (n--)
> > - drm_syncobj_put(ptr_mask_bits(fences[n], 2));
> > + while (n--) {
> > + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
> > + dma_fence_put(fences[n].dma_fence);
> > + kfree(fences[n].chain_fence);
> > + }
> > kvfree(fences);
> > }
> > -static struct drm_syncobj **
> > -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> > - struct drm_file *file)
> > +static struct i915_eb_fences *
> > +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> > +{
> > + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> > + &eb->extensions.timeline_fences;
> > + struct drm_i915_gem_exec_fence __user *user_fences;
> > + struct i915_eb_fences *fences;
> > + u64 __user *user_values;
> > + u64 num_fences, num_user_fences = timeline_fences->fence_count;
> > + unsigned long n;
> > + int err = 0;
> > +
> > + /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> > + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> > + if (num_user_fences > min_t(unsigned long,
> > + ULONG_MAX / sizeof(*user_fences),
> > + SIZE_MAX / sizeof(*fences)))
> > + return ERR_PTR(-EINVAL);
> > +
> > + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> > + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> > + return ERR_PTR(-EFAULT);
> > +
> > + user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> > + if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> > + return ERR_PTR(-EFAULT);
> > +
> > + fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> > + __GFP_NOWARN | GFP_KERNEL);
> > + if (!fences)
> > + return ERR_PTR(-ENOMEM);
> > +
> > + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> > + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> > +
> > + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> > + struct drm_i915_gem_exec_fence user_fence;
> > + struct drm_syncobj *syncobj;
> > + struct dma_fence *fence = NULL;
> > + u64 point;
> > +
> > + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> > + err = -EFAULT;
> > + goto err;
> > + }
> > +
> > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> > + err = -EINVAL;
> > + goto err;
> > + }
> > +
> > + if (__get_user(point, user_values++)) {
> > + err = -EFAULT;
> > + goto err;
> > + }
> > +
> > + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> > + if (!syncobj) {
> > + DRM_DEBUG("Invalid syncobj handle provided\n");
> > + err = -ENOENT;
> > + goto err;
> > + }
> > +
> > + fence = drm_syncobj_fence_get(syncobj);
> > +
> > + if (!fence && user_fence.flags &&
> > + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> > + DRM_DEBUG("Syncobj handle has no fence\n");
> > + drm_syncobj_put(syncobj);
> > + err = -EINVAL;
> > + goto err;
> > + }
> > +
> > + if (fence)
> > + err = dma_fence_chain_find_seqno(&fence, point);
> > +
> > + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> > + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> > + drm_syncobj_put(syncobj);
> > + goto err;
> > + }
> > +
> > + /* A point might have been signaled already and
> > + * garbage collected from the timeline. In this case
> > + * just ignore the point and carry on.
> > + */
> > + if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) {
> > + drm_syncobj_put(syncobj);
> > + continue;
> > + }
> > +
> > + /*
> > + * For timeline syncobjs we need to preallocate chains for
> > + * later signaling.
> > + */
> > + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> > + /*
> > + * Waiting and signaling the same point (when point !=
> > + * 0) would break the timeline.
> > + */
> > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> > + DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
> > + err = -EINVAL;
> > + drm_syncobj_put(syncobj);
> > + goto err;
> > + }
> > +
> > + fences[num_fences].chain_fence =
> > + kmalloc(sizeof(*fences[num_fences].chain_fence),
> > + GFP_KERNEL);
> > + if (!fences[num_fences].chain_fence) {
> > + drm_syncobj_put(syncobj);
> > + err = -ENOMEM;
> > + DRM_DEBUG("Unable to alloc chain_fence\n");
> > + goto err;
> > + }
> > + } else {
> > + fences[num_fences].chain_fence = NULL;
> > + }
> > +
> > + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> > + fences[num_fences].dma_fence = fence;
> > + fences[num_fences].value = point;
> > + num_fences++;
> > + }
> > +
> > + *out_n_fences = num_fences;
> > +
> > + return fences;
> > +
> > +err:
> > + __free_fence_array(fences, num_fences);
> > + return ERR_PTR(err);
> > +}
> > +
> > +static struct i915_eb_fences *
> > +get_legacy_fence_array(struct i915_execbuffer *eb,
> > + int *out_n_fences)
> > {
> > - const unsigned long nfences = args->num_cliprects;
> > + struct drm_i915_gem_execbuffer2 *args = eb->args;
> > struct drm_i915_gem_exec_fence __user *user;
> > - struct drm_syncobj **fences;
> > + struct i915_eb_fences *fences;
> > + const u32 num_fences = args->num_cliprects;
> > unsigned long n;
> > int err;
> > - if (!(args->flags & I915_EXEC_FENCE_ARRAY))
> > - return NULL;
> > + *out_n_fences = num_fences;
> > /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> > BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> > - if (nfences > min_t(unsigned long,
> > - ULONG_MAX / sizeof(*user),
> > - SIZE_MAX / sizeof(*fences)))
> > + if (*out_n_fences > min_t(unsigned long,
> > + ULONG_MAX / sizeof(*user),
> > + SIZE_MAX / sizeof(*fences)))
> > return ERR_PTR(-EINVAL);
> > user = u64_to_user_ptr(args->cliprects_ptr);
> > - if (!access_ok(user, nfences * sizeof(*user)))
> > + if (!access_ok(user, *out_n_fences * sizeof(*user)))
> > return ERR_PTR(-EFAULT);
> > - fences = kvmalloc_array(nfences, sizeof(*fences),
> > + fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
> > __GFP_NOWARN | GFP_KERNEL);
> > if (!fences)
> > return ERR_PTR(-ENOMEM);
> > - for (n = 0; n < nfences; n++) {
> > - struct drm_i915_gem_exec_fence fence;
> > + for (n = 0; n < *out_n_fences; n++) {
> > + struct drm_i915_gem_exec_fence user_fence;
> > struct drm_syncobj *syncobj;
> > + struct dma_fence *fence = NULL;
> > - if (__copy_from_user(&fence, user++, sizeof(fence))) {
> > + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
> > err = -EFAULT;
> > goto err;
> > }
> > - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> > + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> > err = -EINVAL;
> > goto err;
> > }
> > - syncobj = drm_syncobj_find(file, fence.handle);
> > + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> > if (!syncobj) {
> > DRM_DEBUG("Invalid syncobj handle provided\n");
> > err = -ENOENT;
> > goto err;
> > }
> > + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> > + fence = drm_syncobj_fence_get(syncobj);
> > + if (!fence) {
> > + DRM_DEBUG("Syncobj handle has no fence\n");
> > + drm_syncobj_put(syncobj);
> > + err = -EINVAL;
> > + goto err;
> > + }
> > + }
> > +
> > BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> > ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> > - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
> > + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> > + fences[n].dma_fence = fence;
> > + fences[n].value = 0;
> > + fences[n].chain_fence = NULL;
> > }
> > return fences;
> > @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> > return ERR_PTR(err);
> > }
> > +static struct i915_eb_fences *
> > +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> > +{
> > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> > + return get_legacy_fence_array(eb, out_n_fences);
> > +
> > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> > + return get_timeline_fence_array(eb, out_n_fences);
> > +
> > + *out_n_fences = 0;
> > + return NULL;
> > +}
> > +
> > static void
> > -put_fence_array(struct drm_i915_gem_execbuffer2 *args,
> > - struct drm_syncobj **fences)
> > +put_fence_array(struct i915_eb_fences *fences, int nfences)
> > {
> > if (fences)
> > - __free_fence_array(fences, args->num_cliprects);
> > + __free_fence_array(fences, nfences);
> > }
> > static int
> > await_fence_array(struct i915_execbuffer *eb,
> > - struct drm_syncobj **fences)
> > + struct i915_eb_fences *fences,
> > + int nfences)
> > {
> > - const unsigned int nfences = eb->args->num_cliprects;
> > unsigned int n;
> > int err;
> > for (n = 0; n < nfences; n++) {
> > struct drm_syncobj *syncobj;
> > - struct dma_fence *fence;
> > unsigned int flags;
> > - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> > - if (!(flags & I915_EXEC_FENCE_WAIT))
> > - continue;
> > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> > - fence = drm_syncobj_fence_get(syncobj);
> > - if (!fence)
> > - return -EINVAL;
> > + if (!fences[n].dma_fence)
> > + continue;
> > - err = i915_request_await_dma_fence(eb->request, fence);
> > - dma_fence_put(fence);
> > + err = i915_request_await_dma_fence(eb->request,
> > + fences[n].dma_fence);
> > if (err < 0)
> > return err;
> > }
> > @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb,
> > static void
> > signal_fence_array(struct i915_execbuffer *eb,
> > - struct drm_syncobj **fences)
> > + struct i915_eb_fences *fences,
> > + int nfences)
> > {
> > - const unsigned int nfences = eb->args->num_cliprects;
> > struct dma_fence * const fence = &eb->request->fence;
> > unsigned int n;
> > @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb,
> > struct drm_syncobj *syncobj;
> > unsigned int flags;
> > - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> > + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> > if (!(flags & I915_EXEC_FENCE_SIGNAL))
> > continue;
> > - drm_syncobj_replace_fence(syncobj, fence);
> > + if (fences[n].chain_fence) {
> > + drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> > + fence, fences[n].value);
> > + /*
> > + * The chain's ownership is transferred to the
> > + * timeline.
> > + */
> > + fences[n].chain_fence = NULL;
> > + } else {
> > + drm_syncobj_replace_fence(syncobj, fence);
> > + }
> > }
> > }
> > +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> > +{
> > + struct i915_execbuffer *eb = data;
> > +
> > + /* Timeline fences are incompatible with the fence array flag. */
> > + if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> > + return -EINVAL;
> > +
> > + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> > + return -EINVAL;
> > +
> > + if (copy_from_user(&eb->extensions.timeline_fences, ext,
> > + sizeof(eb->extensions.timeline_fences)))
> > + return -EFAULT;
> > +
> > + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> > +
> > + return 0;
> > +}
> > +
> > static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
> > {
> > struct i915_request *rq, *rn;
> > @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
> > }
> > static const i915_user_extension_fn execbuf_extensions[] = {
> > + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> > };
> > static int
> > @@ -2468,16 +2667,17 @@ static int
> > i915_gem_do_execbuffer(struct drm_device *dev,
> > struct drm_file *file,
> > struct drm_i915_gem_execbuffer2 *args,
> > - struct drm_i915_gem_exec_object2 *exec,
> > - struct drm_syncobj **fences)
> > + struct drm_i915_gem_exec_object2 *exec)
> > {
> > struct drm_i915_private *i915 = to_i915(dev);
> > struct i915_execbuffer eb;
> > struct dma_fence *in_fence = NULL;
> > struct dma_fence *exec_fence = NULL;
> > struct sync_file *out_fence = NULL;
> > + struct i915_eb_fences *fences = NULL;
> > struct i915_vma *batch;
> > int out_fence_fd = -1;
> > + int nfences = 0;
> > int err;
> > BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
> > @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> > if (err)
> > return err;
> > + fences = get_fence_array(&eb, &nfences);
> > + if (IS_ERR(fences))
> > + return PTR_ERR(fences);
> > +
> > if (args->flags & I915_EXEC_FENCE_IN) {
> > in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> > - if (!in_fence)
> > - return -EINVAL;
> > + if (!in_fence) {
> > + err = -EINVAL;
> > + goto err_fences;
> > + }
> > }
> > if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> > @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> > }
> > if (fences) {
> > - err = await_fence_array(&eb, fences);
> > + err = await_fence_array(&eb, fences, nfences);
> > if (err)
> > goto err_request;
> > }
> > @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> > eb_request_add(&eb);
> > if (fences)
> > - signal_fence_array(&eb, fences);
> > + signal_fence_array(&eb, fences, nfences);
> > if (out_fence) {
> > if (err == 0) {
> > @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> > dma_fence_put(exec_fence);
> > err_in_fence:
> > dma_fence_put(in_fence);
> > +err_fences:
> > + put_fence_array(fences, nfences);
> > return err;
> > }
> > @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
> > exec2_list[i].flags = 0;
> > }
> > - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
> > + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
> > if (exec2.flags & __EXEC_HAS_RELOC) {
> > struct drm_i915_gem_exec_object __user *user_exec_list =
> > u64_to_user_ptr(args->buffers_ptr);
> > @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> > struct drm_i915_private *i915 = to_i915(dev);
> > struct drm_i915_gem_execbuffer2 *args = data;
> > struct drm_i915_gem_exec_object2 *exec2_list;
> > - struct drm_syncobj **fences = NULL;
> > const size_t count = args->buffer_count;
> > int err;
> > @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> > return -EFAULT;
> > }
> > - if (args->flags & I915_EXEC_FENCE_ARRAY) {
> > - fences = get_fence_array(args, file);
> > - if (IS_ERR(fences)) {
> > - kvfree(exec2_list);
> > - return PTR_ERR(fences);
> > - }
> > - }
> > -
> > - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
> > + err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
> > /*
> > * Now that we have begun execution of the batchbuffer, we ignore
> > @@ -2917,7 +3116,6 @@ end:;
> > }
> > args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
> > - put_fence_array(args, fences);
> > kvfree(exec2_list);
> > return err;
> > }
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index a7a3b4b98572..f7f868c3c510 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1828,7 +1828,8 @@ static struct drm_driver driver = {
> > */
> > .driver_features =
> > DRIVER_GEM |
> > - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> > + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
> > + DRIVER_SYNCOBJ_TIMELINE,
> > .release = i915_driver_release,
> > .open = i915_driver_open,
> > .lastclose = i915_driver_lastclose,
> > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > index 54fce81d5724..b9d3aab53c03 100644
> > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> > case I915_PARAM_HAS_EXEC_BATCH_FIRST:
> > case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
> > case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> > + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
> > /* For the time being all of these are always true;
> > * if some supported hardware does not have one of these
> > * features this value needs to be provided from
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 7ea38aa6502c..7b8680e3b49d 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
> > */
> > #define I915_PARAM_PERF_REVISION 54
> > +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> > + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> > + * I915_EXEC_USE_EXTENSIONS.
> > + */
> > +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> > +
> > /* Must be kept compact -- no holes and well documented */
> > typedef struct drm_i915_getparam {
> > @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence {
> > };
> > enum drm_i915_gem_execbuffer_ext {
> > + /**
> > + * See drm_i915_gem_execbuf_ext_timeline_fences.
> > + */
> > + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
> > +
> > DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
> > };
> > +/**
> > + * This structure describes an array of drm_syncobj and associated points for
> > + * timeline variants of drm_syncobj. It is invalid to append this structure to
> > + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> > + */
> > +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> > + struct i915_user_extension base;
> > +
> > + /**
> > + * Number of element in the handles_ptr & value_ptr arrays.
> > + */
> > + __u64 fence_count;
> > +
> > + /**
> > + * Pointer to an array of struct drm_i915_gem_exec_fence of length
> > + * fence_count.
> > + */
> > + __u64 handles_ptr;
> > +
> > + /**
> > + * Pointer to an array of u64 values of length fence_count. Values
> > + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
> > + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
> > + */
> > + __u64 values_ptr;
> > +};
> > +
> > struct drm_i915_gem_execbuffer2 {
> > /**
> > * List of gem_exec_object2 structs
>
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
2020-04-08 16:29 ` Lionel Landwerlin
@ 2020-04-08 17:14 ` Lionel Landwerlin
1 sibling, 0 replies; 14+ messages in thread
From: Lionel Landwerlin @ 2020-04-08 17:14 UTC (permalink / raw)
To: Venkata Sandeep Dhanalakota, intel-gfx; +Cc: chris.p.wilson
On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote:
> Introduces a new parameters to execbuf so that we can specify syncobj
> handles as well as timeline points.
>
> v2: Reuse i915_user_extension_fn
>
> v3: Check that the chained extension is only present once (Chris)
>
> v4: Check that dma_fence_chain_find_seqno returns a non NULL fence
> (Lionel)
>
> v5: Use BIT_ULL (Chris)
>
> v6: Fix issue with already signaled timeline points,
> dma_fence_chain_find_seqno() setting fence to NULL (Chris)
>
> v7: Report ENOENT with invalid syncobj handle (Lionel)
>
> v8: Check for out of order timeline point insertion (Chris)
>
> v9: After explanations on
> https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
> drop the ordering check from v8 (Lionel)
>
> v10: Set first extension enum item to 1 (Jason)
>
> v11: Add wait on previous sync points in timelines (Sandeep)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 312 ++++++++++++++----
> drivers/gpu/drm/i915/i915_drv.c | 3 +-
> drivers/gpu/drm/i915/i915_getparam.c | 1 +
> include/uapi/drm/i915_drm.h | 38 +++
> 4 files changed, 296 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 16831f715daa..4cb4cd035daa 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -230,6 +230,13 @@ enum {
> * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
> */
>
> +struct i915_eb_fences {
> + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
> + struct dma_fence *dma_fence;
> + u64 value;
> + struct dma_fence_chain *chain_fence;
> +};
> +
> struct i915_execbuffer {
> struct drm_i915_private *i915; /** i915 backpointer */
> struct drm_file *file; /** per-file lookup tables and limits */
> @@ -292,6 +299,7 @@ struct i915_execbuffer {
>
> struct {
> u64 flags; /** Available extensions parameters */
> + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
> } extensions;
> };
>
> @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb,
> }
>
> static void
> -__free_fence_array(struct drm_syncobj **fences, unsigned int n)
> +__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
> {
> - while (n--)
> - drm_syncobj_put(ptr_mask_bits(fences[n], 2));
> + while (n--) {
> + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
> + dma_fence_put(fences[n].dma_fence);
> + kfree(fences[n].chain_fence);
> + }
> kvfree(fences);
> }
>
> -static struct drm_syncobj **
> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> - struct drm_file *file)
> +static struct i915_eb_fences *
> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> + &eb->extensions.timeline_fences;
> + struct drm_i915_gem_exec_fence __user *user_fences;
> + struct i915_eb_fences *fences;
> + u64 __user *user_values;
> + u64 num_fences, num_user_fences = timeline_fences->fence_count;
> + unsigned long n;
> + int err = 0;
> +
> + /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> + if (num_user_fences > min_t(unsigned long,
> + ULONG_MAX / sizeof(*user_fences),
> + SIZE_MAX / sizeof(*fences)))
> + return ERR_PTR(-EINVAL);
> +
> + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> + return ERR_PTR(-EFAULT);
> +
> + user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> + if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> + return ERR_PTR(-EFAULT);
> +
> + fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> + __GFP_NOWARN | GFP_KERNEL);
> + if (!fences)
> + return ERR_PTR(-ENOMEM);
> +
> + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> + ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> +
> + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> + struct drm_i915_gem_exec_fence user_fence;
> + struct drm_syncobj *syncobj;
> + struct dma_fence *fence = NULL;
> + u64 point;
> +
> + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> + err = -EFAULT;
> + goto err;
> + }
> +
> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> + err = -EINVAL;
> + goto err;
> + }
> +
> + if (__get_user(point, user_values++)) {
> + err = -EFAULT;
> + goto err;
> + }
> +
> + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> + if (!syncobj) {
> + DRM_DEBUG("Invalid syncobj handle provided\n");
> + err = -ENOENT;
> + goto err;
> + }
> +
> + fence = drm_syncobj_fence_get(syncobj);
> +
> + if (!fence && user_fence.flags &&
> + !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> + DRM_DEBUG("Syncobj handle has no fence\n");
> + drm_syncobj_put(syncobj);
> + err = -EINVAL;
> + goto err;
> + }
> +
> + if (fence)
> + err = dma_fence_chain_find_seqno(&fence, point);
> +
> + if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> + DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> + drm_syncobj_put(syncobj);
> + goto err;
> + }
> +
> + /* A point might have been signaled already and
> + * garbage collected from the timeline. In this case
> + * just ignore the point and carry on.
> + */
> + if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) {
I think we can only skip if we're only waiting. If there is a signal
request we still need to honor it.
So I would replace this function above with :
if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> + drm_syncobj_put(syncobj);
> + continue;
> + }
> +
> + /*
> + * For timeline syncobjs we need to preallocate chains for
> + * later signaling.
> + */
> + if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> + /*
> + * Waiting and signaling the same point (when point !=
> + * 0) would break the timeline.
> + */
> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> + DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
> + err = -EINVAL;
> + drm_syncobj_put(syncobj);
> + goto err;
> + }
I think we can actually allow this. Wait and signal operations are added
in order so we could wait a point 3 and then replace it by another point 3.
If we keep this limitation, we need to add :
if (fence)
dma_fence_put(fence);
> +
> + fences[num_fences].chain_fence =
> + kmalloc(sizeof(*fences[num_fences].chain_fence),
> + GFP_KERNEL);
> + if (!fences[num_fences].chain_fence) {
> + drm_syncobj_put(syncobj);
We the change above, we could arrive here with fence != NULL.
We probably need to add :
if (fence)
dma_fence_put(fence);
> + err = -ENOMEM;
> + DRM_DEBUG("Unable to alloc chain_fence\n");
> + goto err;
> + }
> + } else {
> + fences[num_fences].chain_fence = NULL;
> + }
> +
> + fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> + fences[num_fences].dma_fence = fence;
> + fences[num_fences].value = point;
> + num_fences++;
> + }
> +
> + *out_n_fences = num_fences;
> +
> + return fences;
> +
> +err:
> + __free_fence_array(fences, num_fences);
> + return ERR_PTR(err);
> +}
> +
> +static struct i915_eb_fences *
> +get_legacy_fence_array(struct i915_execbuffer *eb,
> + int *out_n_fences)
> {
> - const unsigned long nfences = args->num_cliprects;
> + struct drm_i915_gem_execbuffer2 *args = eb->args;
> struct drm_i915_gem_exec_fence __user *user;
> - struct drm_syncobj **fences;
> + struct i915_eb_fences *fences;
> + const u32 num_fences = args->num_cliprects;
> unsigned long n;
> int err;
>
> - if (!(args->flags & I915_EXEC_FENCE_ARRAY))
> - return NULL;
> + *out_n_fences = num_fences;
>
> /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> - if (nfences > min_t(unsigned long,
> - ULONG_MAX / sizeof(*user),
> - SIZE_MAX / sizeof(*fences)))
> + if (*out_n_fences > min_t(unsigned long,
> + ULONG_MAX / sizeof(*user),
> + SIZE_MAX / sizeof(*fences)))
> return ERR_PTR(-EINVAL);
>
> user = u64_to_user_ptr(args->cliprects_ptr);
> - if (!access_ok(user, nfences * sizeof(*user)))
> + if (!access_ok(user, *out_n_fences * sizeof(*user)))
> return ERR_PTR(-EFAULT);
>
> - fences = kvmalloc_array(nfences, sizeof(*fences),
> + fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
> __GFP_NOWARN | GFP_KERNEL);
> if (!fences)
> return ERR_PTR(-ENOMEM);
>
> - for (n = 0; n < nfences; n++) {
> - struct drm_i915_gem_exec_fence fence;
> + for (n = 0; n < *out_n_fences; n++) {
> + struct drm_i915_gem_exec_fence user_fence;
> struct drm_syncobj *syncobj;
> + struct dma_fence *fence = NULL;
>
> - if (__copy_from_user(&fence, user++, sizeof(fence))) {
> + if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
> err = -EFAULT;
> goto err;
> }
>
> - if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> err = -EINVAL;
> goto err;
> }
>
> - syncobj = drm_syncobj_find(file, fence.handle);
> + syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> if (!syncobj) {
> DRM_DEBUG("Invalid syncobj handle provided\n");
> err = -ENOENT;
> goto err;
> }
>
> + if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> + fence = drm_syncobj_fence_get(syncobj);
> + if (!fence) {
> + DRM_DEBUG("Syncobj handle has no fence\n");
> + drm_syncobj_put(syncobj);
> + err = -EINVAL;
> + goto err;
> + }
> + }
> +
> BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>
> - fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
> + fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> + fences[n].dma_fence = fence;
> + fences[n].value = 0;
> + fences[n].chain_fence = NULL;
> }
>
> return fences;
> @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> return ERR_PTR(err);
> }
>
> +static struct i915_eb_fences *
> +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> + if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> + return get_legacy_fence_array(eb, out_n_fences);
> +
> + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> + return get_timeline_fence_array(eb, out_n_fences);
> +
> + *out_n_fences = 0;
> + return NULL;
> +}
> +
> static void
> -put_fence_array(struct drm_i915_gem_execbuffer2 *args,
> - struct drm_syncobj **fences)
> +put_fence_array(struct i915_eb_fences *fences, int nfences)
> {
> if (fences)
> - __free_fence_array(fences, args->num_cliprects);
> + __free_fence_array(fences, nfences);
> }
>
> static int
> await_fence_array(struct i915_execbuffer *eb,
> - struct drm_syncobj **fences)
> + struct i915_eb_fences *fences,
> + int nfences)
> {
> - const unsigned int nfences = eb->args->num_cliprects;
> unsigned int n;
> int err;
>
> for (n = 0; n < nfences; n++) {
> struct drm_syncobj *syncobj;
> - struct dma_fence *fence;
> unsigned int flags;
>
> - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> - if (!(flags & I915_EXEC_FENCE_WAIT))
> - continue;
> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>
> - fence = drm_syncobj_fence_get(syncobj);
> - if (!fence)
> - return -EINVAL;
> + if (!fences[n].dma_fence)
> + continue;
>
> - err = i915_request_await_dma_fence(eb->request, fence);
> - dma_fence_put(fence);
> + err = i915_request_await_dma_fence(eb->request,
> + fences[n].dma_fence);
> if (err < 0)
> return err;
> }
> @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb,
>
> static void
> signal_fence_array(struct i915_execbuffer *eb,
> - struct drm_syncobj **fences)
> + struct i915_eb_fences *fences,
> + int nfences)
> {
> - const unsigned int nfences = eb->args->num_cliprects;
> struct dma_fence * const fence = &eb->request->fence;
> unsigned int n;
>
> @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb,
> struct drm_syncobj *syncobj;
> unsigned int flags;
>
> - syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> + syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> if (!(flags & I915_EXEC_FENCE_SIGNAL))
> continue;
>
> - drm_syncobj_replace_fence(syncobj, fence);
> + if (fences[n].chain_fence) {
> + drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> + fence, fences[n].value);
> + /*
> + * The chain's ownership is transferred to the
> + * timeline.
> + */
> + fences[n].chain_fence = NULL;
> + } else {
> + drm_syncobj_replace_fence(syncobj, fence);
> + }
> }
> }
>
> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> +{
> + struct i915_execbuffer *eb = data;
> +
> + /* Timeline fences are incompatible with the fence array flag. */
> + if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> + return -EINVAL;
> +
> + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> + return -EINVAL;
> +
> + if (copy_from_user(&eb->extensions.timeline_fences, ext,
> + sizeof(eb->extensions.timeline_fences)))
> + return -EFAULT;
> +
> + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> +
> + return 0;
> +}
> +
> static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
> {
> struct i915_request *rq, *rn;
> @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
> }
>
> static const i915_user_extension_fn execbuf_extensions[] = {
> + [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> };
>
> static int
> @@ -2468,16 +2667,17 @@ static int
> i915_gem_do_execbuffer(struct drm_device *dev,
> struct drm_file *file,
> struct drm_i915_gem_execbuffer2 *args,
> - struct drm_i915_gem_exec_object2 *exec,
> - struct drm_syncobj **fences)
> + struct drm_i915_gem_exec_object2 *exec)
> {
> struct drm_i915_private *i915 = to_i915(dev);
> struct i915_execbuffer eb;
> struct dma_fence *in_fence = NULL;
> struct dma_fence *exec_fence = NULL;
> struct sync_file *out_fence = NULL;
> + struct i915_eb_fences *fences = NULL;
> struct i915_vma *batch;
> int out_fence_fd = -1;
> + int nfences = 0;
> int err;
>
> BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
> @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> if (err)
> return err;
>
> + fences = get_fence_array(&eb, &nfences);
> + if (IS_ERR(fences))
> + return PTR_ERR(fences);
> +
> if (args->flags & I915_EXEC_FENCE_IN) {
> in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> - if (!in_fence)
> - return -EINVAL;
> + if (!in_fence) {
> + err = -EINVAL;
> + goto err_fences;
> + }
> }
>
> if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> }
>
> if (fences) {
> - err = await_fence_array(&eb, fences);
> + err = await_fence_array(&eb, fences, nfences);
> if (err)
> goto err_request;
> }
> @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> eb_request_add(&eb);
>
> if (fences)
> - signal_fence_array(&eb, fences);
> + signal_fence_array(&eb, fences, nfences);
>
> if (out_fence) {
> if (err == 0) {
> @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> dma_fence_put(exec_fence);
> err_in_fence:
> dma_fence_put(in_fence);
> +err_fences:
> + put_fence_array(fences, nfences);
> return err;
> }
>
> @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
> exec2_list[i].flags = 0;
> }
>
> - err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
> + err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
> if (exec2.flags & __EXEC_HAS_RELOC) {
> struct drm_i915_gem_exec_object __user *user_exec_list =
> u64_to_user_ptr(args->buffers_ptr);
> @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> struct drm_i915_private *i915 = to_i915(dev);
> struct drm_i915_gem_execbuffer2 *args = data;
> struct drm_i915_gem_exec_object2 *exec2_list;
> - struct drm_syncobj **fences = NULL;
> const size_t count = args->buffer_count;
> int err;
>
> @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> return -EFAULT;
> }
>
> - if (args->flags & I915_EXEC_FENCE_ARRAY) {
> - fences = get_fence_array(args, file);
> - if (IS_ERR(fences)) {
> - kvfree(exec2_list);
> - return PTR_ERR(fences);
> - }
> - }
> -
> - err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
> + err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
>
> /*
> * Now that we have begun execution of the batchbuffer, we ignore
> @@ -2917,7 +3116,6 @@ end:;
> }
>
> args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
> - put_fence_array(args, fences);
> kvfree(exec2_list);
> return err;
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index a7a3b4b98572..f7f868c3c510 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1828,7 +1828,8 @@ static struct drm_driver driver = {
> */
> .driver_features =
> DRIVER_GEM |
> - DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> + DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
> + DRIVER_SYNCOBJ_TIMELINE,
> .release = i915_driver_release,
> .open = i915_driver_open,
> .lastclose = i915_driver_lastclose,
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index 54fce81d5724..b9d3aab53c03 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_EXEC_BATCH_FIRST:
> case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
> case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> + case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
> /* For the time being all of these are always true;
> * if some supported hardware does not have one of these
> * features this value needs to be provided from
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7ea38aa6502c..7b8680e3b49d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
> */
> #define I915_PARAM_PERF_REVISION 54
>
> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> + * I915_EXEC_USE_EXTENSIONS.
> + */
> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> +
> /* Must be kept compact -- no holes and well documented */
>
> typedef struct drm_i915_getparam {
> @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence {
> };
>
> enum drm_i915_gem_execbuffer_ext {
> + /**
> + * See drm_i915_gem_execbuf_ext_timeline_fences.
> + */
> + DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
> +
> DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
> };
>
> +/**
> + * This structure describes an array of drm_syncobj and associated points for
> + * timeline variants of drm_syncobj. It is invalid to append this structure to
> + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> + */
> +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> + struct i915_user_extension base;
> +
> + /**
> + * Number of element in the handles_ptr & value_ptr arrays.
> + */
> + __u64 fence_count;
> +
> + /**
> + * Pointer to an array of struct drm_i915_gem_exec_fence of length
> + * fence_count.
> + */
> + __u64 handles_ptr;
> +
> + /**
> + * Pointer to an array of u64 values of length fence_count. Values
> + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
> + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
> + */
> + __u64 values_ptr;
> +};
> +
> struct drm_i915_gem_execbuffer2 {
> /**
> * List of gem_exec_object2 structs
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
2020-08-03 14:08 ` Chris Wilson
@ 2020-08-03 14:11 ` Lionel Landwerlin
0 siblings, 0 replies; 14+ messages in thread
From: Lionel Landwerlin @ 2020-08-03 14:11 UTC (permalink / raw)
To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter
On 03/08/2020 17:08, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2020-08-03 15:01:47)
>> To allow faster engine to engine synchronization, peel the layer of
>> dma-fence-chain to expose potential i915 fences so that the
>> i915-request code can emit HW semaphore wait/signal operations in the
>> ring which is faster than waking up the host to submit unblocked
>> workloads after interrupt notification.
>>
>> v2: Also deal with chains where the last node is not a dma-fence-chain
> This is already done by i915_request_await_dma_fence.
> -Chris
Cool, we can drop this then.
-Lionel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
2020-08-03 14:01 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Lionel Landwerlin
@ 2020-08-03 14:08 ` Chris Wilson
2020-08-03 14:11 ` Lionel Landwerlin
0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-08-03 14:08 UTC (permalink / raw)
To: Lionel Landwerlin, intel-gfx; +Cc: Daniel Vetter
Quoting Lionel Landwerlin (2020-08-03 15:01:47)
> To allow faster engine to engine synchronization, peel the layer of
> dma-fence-chain to expose potential i915 fences so that the
> i915-request code can emit HW semaphore wait/signal operations in the
> ring which is faster than waking up the host to submit unblocked
> workloads after interrupt notification.
>
> v2: Also deal with chains where the last node is not a dma-fence-chain
This is already done by i915_request_await_dma_fence.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 14+ messages in thread
* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
2020-08-03 14:01 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
@ 2020-08-03 14:01 ` Lionel Landwerlin
2020-08-03 14:08 ` Chris Wilson
0 siblings, 1 reply; 14+ messages in thread
From: Lionel Landwerlin @ 2020-08-03 14:01 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter
To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.
v2: Also deal with chains where the last node is not a dma-fence-chain
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 39 ++++++++++++++++++-
1 file changed, 38 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1f766431f3a3..dbd7f03c2187 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2390,6 +2390,7 @@ await_fence_array(struct i915_execbuffer *eb)
for (n = 0; n < eb->n_fences; n++) {
struct drm_syncobj *syncobj;
+ struct dma_fence_chain *chain;
struct dma_fence *fence;
unsigned int flags;
@@ -2410,7 +2411,43 @@ await_fence_array(struct i915_execbuffer *eb)
continue;
}
- err = i915_request_await_dma_fence(eb->request, fence);
+ chain = to_dma_fence_chain(fence);
+ if (chain) {
+ struct dma_fence *iter;
+
+ /*
+ * If we're dealing with a dma-fence-chain, peel the
+ * chain by adding all of the unsignaled fences
+ * (dma_fence_chain_for_each does that for us) the
+ * chain points to.
+ *
+ * This enables us to identify waits on i915 fences
+ * and allows for faster engine-to-engine
+ * synchronization using HW semaphores.
+ */
+ dma_fence_chain_for_each(iter, fence) {
+ struct dma_fence_chain *iter_chain =
+ to_dma_fence_chain(iter);
+
+ /*
+ * It is possible that the last item in the
+ * chain is not a dma_fence_chain.
+ */
+ if (iter_chain) {
+ err = i915_request_await_dma_fence(eb->request,
+ iter_chain->fence);
+ } else {
+ err = i915_request_await_dma_fence(eb->request, iter);
+ }
+ if (err < 0) {
+ dma_fence_put(iter);
+ break;
+ }
+ }
+ } else {
+ err = i915_request_await_dma_fence(eb->request, fence);
+ }
+
dma_fence_put(fence);
if (err < 0)
return err;
--
2.28.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
2020-08-03 9:05 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
@ 2020-08-03 9:05 ` Lionel Landwerlin
0 siblings, 0 replies; 14+ messages in thread
From: Lionel Landwerlin @ 2020-08-03 9:05 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter
To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.
v2: Also deal with chains where the last node is not a dma-fence-chain
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 39 ++++++++++++++++++-
1 file changed, 38 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1f766431f3a3..dbd7f03c2187 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2390,6 +2390,7 @@ await_fence_array(struct i915_execbuffer *eb)
for (n = 0; n < eb->n_fences; n++) {
struct drm_syncobj *syncobj;
+ struct dma_fence_chain *chain;
struct dma_fence *fence;
unsigned int flags;
@@ -2410,7 +2411,43 @@ await_fence_array(struct i915_execbuffer *eb)
continue;
}
- err = i915_request_await_dma_fence(eb->request, fence);
+ chain = to_dma_fence_chain(fence);
+ if (chain) {
+ struct dma_fence *iter;
+
+ /*
+ * If we're dealing with a dma-fence-chain, peel the
+ * chain by adding all of the unsignaled fences
+ * (dma_fence_chain_for_each does that for us) the
+ * chain points to.
+ *
+ * This enables us to identify waits on i915 fences
+ * and allows for faster engine-to-engine
+ * synchronization using HW semaphores.
+ */
+ dma_fence_chain_for_each(iter, fence) {
+ struct dma_fence_chain *iter_chain =
+ to_dma_fence_chain(iter);
+
+ /*
+ * It is possible that the last item in the
+ * chain is not a dma_fence_chain.
+ */
+ if (iter_chain) {
+ err = i915_request_await_dma_fence(eb->request,
+ iter_chain->fence);
+ } else {
+ err = i915_request_await_dma_fence(eb->request, iter);
+ }
+ if (err < 0) {
+ dma_fence_put(iter);
+ break;
+ }
+ }
+ } else {
+ err = i915_request_await_dma_fence(eb->request, fence);
+ }
+
dma_fence_put(fence);
if (err < 0)
return err;
--
2.28.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
2020-07-31 13:45 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
@ 2020-07-31 13:45 ` Lionel Landwerlin
0 siblings, 0 replies; 14+ messages in thread
From: Lionel Landwerlin @ 2020-07-31 13:45 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter
To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.
v2: Also deal with chains where the last node is not a dma-fence-chain
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 39 ++++++++++++++++++-
1 file changed, 38 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 652f3b30a374..01e22b303e34 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2390,6 +2390,7 @@ await_fence_array(struct i915_execbuffer *eb)
for (n = 0; n < eb->n_fences; n++) {
struct drm_syncobj *syncobj;
+ struct dma_fence_chain *chain;
struct dma_fence *fence;
unsigned int flags;
@@ -2410,7 +2411,43 @@ await_fence_array(struct i915_execbuffer *eb)
continue;
}
- err = i915_request_await_dma_fence(eb->request, fence);
+ chain = to_dma_fence_chain(fence);
+ if (chain) {
+ struct dma_fence *iter;
+
+ /*
+ * If we're dealing with a dma-fence-chain, peel the
+ * chain by adding all of the unsignaled fences
+ * (dma_fence_chain_for_each does that for us) the
+ * chain points to.
+ *
+ * This enables us to identify waits on i915 fences
+ * and allows for faster engine-to-engine
+ * synchronization using HW semaphores.
+ */
+ dma_fence_chain_for_each(iter, fence) {
+ struct dma_fence_chain *iter_chain =
+ to_dma_fence_chain(iter);
+
+ /*
+ * It is possible that the last item in the
+ * chain is not a dma_fence_chain.
+ */
+ if (iter_chain) {
+ err = i915_request_await_dma_fence(eb->request,
+ iter_chain->fence);
+ } else {
+ err = i915_request_await_dma_fence(eb->request, iter);
+ }
+ if (err < 0) {
+ dma_fence_put(iter);
+ break;
+ }
+ }
+ } else {
+ err = i915_request_await_dma_fence(eb->request, fence);
+ }
+
dma_fence_put(fence);
if (err < 0)
return err;
--
2.28.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 14+ messages in thread
end of thread, other threads:[~2020-08-03 14:11 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
2020-04-08 16:29 ` Lionel Landwerlin
2020-04-08 17:00 ` Venkata Sandeep Dhanalakota
2020-04-08 17:14 ` Lionel Landwerlin
2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota
2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork
2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-04-07 8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2020-07-31 13:45 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
2020-07-31 13:45 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Lionel Landwerlin
2020-08-03 9:05 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
2020-08-03 9:05 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Lionel Landwerlin
2020-08-03 14:01 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
2020-08-03 14:01 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Lionel Landwerlin
2020-08-03 14:08 ` Chris Wilson
2020-08-03 14:11 ` Lionel Landwerlin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.