All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2
@ 2020-04-06 20:07 Venkata Sandeep Dhanalakota
  2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: chris.p.wilson

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

We're planning to use this for a couple of new feature where we need
to provide additional parameters to execbuf.

v2: Check for invalid flags in execbuffer2 (Lionel)

v3: Rename I915_EXEC_EXT -> I915_EXEC_USE_EXTENSIONS (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 ++++++++++++++++++-
 include/uapi/drm/i915_drm.h                   | 26 +++++++++++--
 2 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 9d11bad74e9a..16831f715daa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -26,6 +26,7 @@
 #include "i915_gem_ioctls.h"
 #include "i915_sw_fence_work.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 
 struct eb_vma {
 	struct i915_vma *vma;
@@ -288,6 +289,10 @@ struct i915_execbuffer {
 	int lut_size;
 	struct hlist_head *buckets; /** ht for relocation handles */
 	struct eb_vma_array *array;
+
+	struct {
+		u64 flags; /** Available extensions parameters */
+	} extensions;
 };
 
 static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
@@ -1698,7 +1703,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 		return -EINVAL;
 
 	/* Kernel clipping was a DRI1 misfeature */
-	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+	if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
+			     I915_EXEC_USE_EXTENSIONS))) {
 		if (exec->num_cliprects || exec->cliprects_ptr)
 			return -EINVAL;
 	}
@@ -2431,6 +2437,33 @@ static void eb_request_add(struct i915_execbuffer *eb)
 	mutex_unlock(&tl->mutex);
 }
 
+static const i915_user_extension_fn execbuf_extensions[] = {
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+			  struct i915_execbuffer *eb)
+{
+	eb->extensions.flags = 0;
+
+	if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
+		return 0;
+
+	/* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
+	 * have another flag also using it at the same time.
+	 */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (args->num_cliprects != 0)
+		return -EINVAL;
+
+	return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+				    execbuf_extensions,
+				    ARRAY_SIZE(execbuf_extensions),
+				    eb);
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev,
 		       struct drm_file *file,
@@ -2484,6 +2517,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		eb.batch_flags |= I915_DISPATCH_PINNED;
 
+	err = parse_execbuf2_extensions(args, &eb);
+	if (err)
+		return err;
+
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
 		if (!in_fence)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..7ea38aa6502c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1046,6 +1046,10 @@ struct drm_i915_gem_exec_fence {
 	__u32 flags;
 };
 
+enum drm_i915_gem_execbuffer_ext {
+	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -1062,8 +1066,15 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 num_cliprects;
 	/**
 	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
-	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
-	 * struct drm_i915_gem_exec_fence *fences.
+	 * & I915_EXEC_USE_EXTENSIONS are not set.
+	 *
+	 * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+	 * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+	 * of the array.
+	 *
+	 * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
+	 * single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is
+	 * 0.
 	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (0x3f)
@@ -1181,7 +1192,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_SUBMIT		(1 << 20)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_USE_EXTENSIONS implies that
+ * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
+ * list of i915_user_extension. Each i915_user_extension node is the base of a
+ * larger structure. The list of supported structures are listed in the
+ * drm_i915_gem_execbuffer_ext enum.
+ */
+#define I915_EXEC_USE_EXTENSIONS	(1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.21.0.5.gaeb582a983

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
  2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
@ 2020-04-06 20:07 ` Venkata Sandeep Dhanalakota
  2020-04-08 16:29   ` Lionel Landwerlin
  2020-04-08 17:14   ` Lionel Landwerlin
  2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 14+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: chris.p.wilson

Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.

v2: Reuse i915_user_extension_fn

v3: Check that the chained extension is only present once (Chris)

v4: Check that dma_fence_chain_find_seqno returns a non NULL fence
(Lionel)

v5: Use BIT_ULL (Chris)

v6: Fix issue with already signaled timeline points,
    dma_fence_chain_find_seqno() setting fence to NULL (Chris)

v7: Report ENOENT with invalid syncobj handle (Lionel)

v8: Check for out of order timeline point insertion (Chris)

v9: After explanations on
    https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
    drop the ordering check from v8 (Lionel)

v10: Set first extension enum item to 1 (Jason)

v11: Add wait on previous sync points in timelines (Sandeep)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 312 ++++++++++++++----
 drivers/gpu/drm/i915/i915_drv.c               |   3 +-
 drivers/gpu/drm/i915/i915_getparam.c          |   1 +
 include/uapi/drm/i915_drm.h                   |  38 +++
 4 files changed, 296 insertions(+), 58 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 16831f715daa..4cb4cd035daa 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -230,6 +230,13 @@ enum {
  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
  */
 
+struct i915_eb_fences {
+	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+	struct dma_fence *dma_fence;
+	u64 value;
+	struct dma_fence_chain *chain_fence;
+};
+
 struct i915_execbuffer {
 	struct drm_i915_private *i915; /** i915 backpointer */
 	struct drm_file *file; /** per-file lookup tables and limits */
@@ -292,6 +299,7 @@ struct i915_execbuffer {
 
 	struct {
 		u64 flags; /** Available extensions parameters */
+		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
 	} extensions;
 };
 
@@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb,
 }
 
 static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
 {
-	while (n--)
-		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+	while (n--) {
+		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+		dma_fence_put(fences[n].dma_fence);
+		kfree(fences[n].chain_fence);
+	}
 	kvfree(fences);
 }
 
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_file *file)
+static struct i915_eb_fences *
+get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+	struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
+		&eb->extensions.timeline_fences;
+	struct drm_i915_gem_exec_fence __user *user_fences;
+	struct i915_eb_fences *fences;
+	u64 __user *user_values;
+	u64 num_fences, num_user_fences = timeline_fences->fence_count;
+	unsigned long n;
+	int err = 0;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (num_user_fences > min_t(unsigned long,
+				    ULONG_MAX / sizeof(*user_fences),
+				    SIZE_MAX / sizeof(*fences)))
+		return ERR_PTR(-EINVAL);
+
+	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+	if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
+		return ERR_PTR(-EFAULT);
+
+	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+	if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
+		return ERR_PTR(-EFAULT);
+
+	fences = kvmalloc_array(num_user_fences, sizeof(*fences),
+				__GFP_NOWARN | GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+
+	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+	for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
+		struct drm_i915_gem_exec_fence user_fence;
+		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
+		u64 point;
+
+		if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (__get_user(point, user_values++)) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+		if (!syncobj) {
+			DRM_DEBUG("Invalid syncobj handle provided\n");
+			err = -ENOENT;
+			goto err;
+		}
+
+		fence = drm_syncobj_fence_get(syncobj);
+
+		if (!fence && user_fence.flags &&
+		    !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+			DRM_DEBUG("Syncobj handle has no fence\n");
+			drm_syncobj_put(syncobj);
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (fence)
+			err = dma_fence_chain_find_seqno(&fence, point);
+
+		if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+			DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+			drm_syncobj_put(syncobj);
+			goto err;
+		}
+
+		/* A point might have been signaled already and
+		 * garbage collected from the timeline. In this case
+		 * just ignore the point and carry on.
+		 */
+		if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) {
+			drm_syncobj_put(syncobj);
+			continue;
+		}
+
+		/*
+		 * For timeline syncobjs we need to preallocate chains for
+		 * later signaling.
+		 */
+		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+			/*
+			 * Waiting and signaling the same point (when point !=
+			 * 0) would break the timeline.
+			 */
+			if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+				DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
+				err = -EINVAL;
+				drm_syncobj_put(syncobj);
+				goto err;
+			}
+
+			fences[num_fences].chain_fence =
+				kmalloc(sizeof(*fences[num_fences].chain_fence),
+					GFP_KERNEL);
+			if (!fences[num_fences].chain_fence) {
+				drm_syncobj_put(syncobj);
+				err = -ENOMEM;
+				DRM_DEBUG("Unable to alloc chain_fence\n");
+				goto err;
+			}
+		} else {
+			fences[num_fences].chain_fence = NULL;
+		}
+
+		fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		fences[num_fences].dma_fence = fence;
+		fences[num_fences].value = point;
+		num_fences++;
+	}
+
+	*out_n_fences = num_fences;
+
+	return fences;
+
+err:
+	__free_fence_array(fences, num_fences);
+	return ERR_PTR(err);
+}
+
+static struct i915_eb_fences *
+get_legacy_fence_array(struct i915_execbuffer *eb,
+		       int *out_n_fences)
 {
-	const unsigned long nfences = args->num_cliprects;
+	struct drm_i915_gem_execbuffer2 *args = eb->args;
 	struct drm_i915_gem_exec_fence __user *user;
-	struct drm_syncobj **fences;
+	struct i915_eb_fences *fences;
+	const u32 num_fences = args->num_cliprects;
 	unsigned long n;
 	int err;
 
-	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
-		return NULL;
+	*out_n_fences = num_fences;
 
 	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
-	if (nfences > min_t(unsigned long,
-			    ULONG_MAX / sizeof(*user),
-			    SIZE_MAX / sizeof(*fences)))
+	if (*out_n_fences > min_t(unsigned long,
+				  ULONG_MAX / sizeof(*user),
+				  SIZE_MAX / sizeof(*fences)))
 		return ERR_PTR(-EINVAL);
 
 	user = u64_to_user_ptr(args->cliprects_ptr);
-	if (!access_ok(user, nfences * sizeof(*user)))
+	if (!access_ok(user, *out_n_fences * sizeof(*user)))
 		return ERR_PTR(-EFAULT);
 
-	fences = kvmalloc_array(nfences, sizeof(*fences),
+	fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
 				__GFP_NOWARN | GFP_KERNEL);
 	if (!fences)
 		return ERR_PTR(-ENOMEM);
 
-	for (n = 0; n < nfences; n++) {
-		struct drm_i915_gem_exec_fence fence;
+	for (n = 0; n < *out_n_fences; n++) {
+		struct drm_i915_gem_exec_fence user_fence;
 		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
 
-		if (__copy_from_user(&fence, user++, sizeof(fence))) {
+		if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
 			err = -EFAULT;
 			goto err;
 		}
 
-		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
 			err = -EINVAL;
 			goto err;
 		}
 
-		syncobj = drm_syncobj_find(file, fence.handle);
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
 		if (!syncobj) {
 			DRM_DEBUG("Invalid syncobj handle provided\n");
 			err = -ENOENT;
 			goto err;
 		}
 
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				DRM_DEBUG("Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				err = -EINVAL;
+				goto err;
+			}
+		}
+
 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
-		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		fences[n].dma_fence = fence;
+		fences[n].value = 0;
+		fences[n].chain_fence = NULL;
 	}
 
 	return fences;
@@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 	return ERR_PTR(err);
 }
 
+static struct i915_eb_fences *
+get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return get_legacy_fence_array(eb, out_n_fences);
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return get_timeline_fence_array(eb, out_n_fences);
+
+	*out_n_fences = 0;
+	return NULL;
+}
+
 static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_syncobj **fences)
+put_fence_array(struct i915_eb_fences *fences, int nfences)
 {
 	if (fences)
-		__free_fence_array(fences, args->num_cliprects);
+		__free_fence_array(fences, nfences);
 }
 
 static int
 await_fence_array(struct i915_execbuffer *eb,
-		  struct drm_syncobj **fences)
+		  struct i915_eb_fences *fences,
+		  int nfences)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	unsigned int n;
 	int err;
 
 	for (n = 0; n < nfences; n++) {
 		struct drm_syncobj *syncobj;
-		struct dma_fence *fence;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
-		if (!(flags & I915_EXEC_FENCE_WAIT))
-			continue;
+		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
 
-		fence = drm_syncobj_fence_get(syncobj);
-		if (!fence)
-			return -EINVAL;
+		if (!fences[n].dma_fence)
+			continue;
 
-		err = i915_request_await_dma_fence(eb->request, fence);
-		dma_fence_put(fence);
+		err = i915_request_await_dma_fence(eb->request,
+						   fences[n].dma_fence);
 		if (err < 0)
 			return err;
 	}
@@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb,
 
 static void
 signal_fence_array(struct i915_execbuffer *eb,
-		   struct drm_syncobj **fences)
+		   struct i915_eb_fences *fences,
+		   int nfences)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	struct dma_fence * const fence = &eb->request->fence;
 	unsigned int n;
 
@@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb,
 		struct drm_syncobj *syncobj;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
 			continue;
 
-		drm_syncobj_replace_fence(syncobj, fence);
+		if (fences[n].chain_fence) {
+			drm_syncobj_add_point(syncobj, fences[n].chain_fence,
+					      fence, fences[n].value);
+			/*
+			 * The chain's ownership is transferred to the
+			 * timeline.
+			 */
+			fences[n].chain_fence = NULL;
+		} else {
+			drm_syncobj_replace_fence(syncobj, fence);
+		}
 	}
 }
 
+static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	/* Timeline fences are incompatible with the fence array flag. */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.timeline_fences, ext,
+			   sizeof(eb->extensions.timeline_fences)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
+
+	return 0;
+}
+
 static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
 {
 	struct i915_request *rq, *rn;
@@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
 }
 
 static const i915_user_extension_fn execbuf_extensions[] = {
+	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
 };
 
 static int
@@ -2468,16 +2667,17 @@ static int
 i915_gem_do_execbuffer(struct drm_device *dev,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
-		       struct drm_i915_gem_exec_object2 *exec,
-		       struct drm_syncobj **fences)
+		       struct drm_i915_gem_exec_object2 *exec)
 {
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
 	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
+	struct i915_eb_fences *fences = NULL;
 	struct i915_vma *batch;
 	int out_fence_fd = -1;
+	int nfences = 0;
 	int err;
 
 	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
@@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (err)
 		return err;
 
+	fences = get_fence_array(&eb, &nfences);
+	if (IS_ERR(fences))
+		return PTR_ERR(fences);
+
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
-		if (!in_fence)
-			return -EINVAL;
+		if (!in_fence) {
+			err = -EINVAL;
+			goto err_fences;
+		}
 	}
 
 	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
@@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	}
 
 	if (fences) {
-		err = await_fence_array(&eb, fences);
+		err = await_fence_array(&eb, fences, nfences);
 		if (err)
 			goto err_request;
 	}
@@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	eb_request_add(&eb);
 
 	if (fences)
-		signal_fence_array(&eb, fences);
+		signal_fence_array(&eb, fences, nfences);
 
 	if (out_fence) {
 		if (err == 0) {
@@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
+err_fences:
+	put_fence_array(fences, nfences);
 	return err;
 }
 
@@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 			exec2_list[i].flags = 0;
 	}
 
-	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
 	if (exec2.flags & __EXEC_HAS_RELOC) {
 		struct drm_i915_gem_exec_object __user *user_exec_list =
 			u64_to_user_ptr(args->buffers_ptr);
@@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 	struct drm_i915_private *i915 = to_i915(dev);
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list;
-	struct drm_syncobj **fences = NULL;
 	const size_t count = args->buffer_count;
 	int err;
 
@@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		return -EFAULT;
 	}
 
-	if (args->flags & I915_EXEC_FENCE_ARRAY) {
-		fences = get_fence_array(args, file);
-		if (IS_ERR(fences)) {
-			kvfree(exec2_list);
-			return PTR_ERR(fences);
-		}
-	}
-
-	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
 
 	/*
 	 * Now that we have begun execution of the batchbuffer, we ignore
@@ -2917,7 +3116,6 @@ end:;
 	}
 
 	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
-	put_fence_array(args, fences);
 	kvfree(exec2_list);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index a7a3b4b98572..f7f868c3c510 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1828,7 +1828,8 @@ static struct drm_driver driver = {
 	 */
 	.driver_features =
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
 	.release = i915_driver_release,
 	.open = i915_driver_open,
 	.lastclose = i915_driver_lastclose,
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index 54fce81d5724..b9d3aab53c03 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
 	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7ea38aa6502c..7b8680e3b49d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION	54
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence {
 };
 
 enum drm_i915_gem_execbuffer_ext {
+	/**
+	 * See drm_i915_gem_execbuf_ext_timeline_fences.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+	struct i915_user_extension base;
+
+	/**
+	 * Number of element in the handles_ptr & value_ptr arrays.
+	 */
+	__u64 fence_count;
+
+	/**
+	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
+	 * fence_count.
+	 */
+	__u64 handles_ptr;
+
+	/**
+	 * Pointer to an array of u64 values of length fence_count. Values
+	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+	 */
+	__u64 values_ptr;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
-- 
2.21.0.5.gaeb582a983

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
  2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
  2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
@ 2020-04-06 20:07 ` Venkata Sandeep Dhanalakota
  2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-06 20:07 UTC (permalink / raw)
  To: intel-gfx; +Cc: chris.p.wilson

From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 +++++++++++++++++--
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 4cb4cd035daa..9b01f7c51b65 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2504,6 +2504,7 @@ await_fence_array(struct i915_execbuffer *eb,
 
 	for (n = 0; n < nfences; n++) {
 		struct drm_syncobj *syncobj;
+		struct dma_fence_chain *chain;
 		unsigned int flags;
 
 		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
@@ -2511,10 +2512,40 @@ await_fence_array(struct i915_execbuffer *eb,
 		if (!fences[n].dma_fence)
 			continue;
 
-		err = i915_request_await_dma_fence(eb->request,
-						   fences[n].dma_fence);
-		if (err < 0)
-			return err;
+		/*
+		 * If we're dealing with a dma-fence-chain, peel the chain by
+		 * adding all of the unsignaled fences
+		 * (dma_fence_chain_for_each does that for us) the chain
+		 * points to.
+		 *
+		 * This enables us to identify waits on i915 fences and allows
+		 * for faster engine-to-engine synchronization using HW
+		 * semaphores.
+		 */
+		chain = to_dma_fence_chain(fences[n].dma_fence);
+		if (chain) {
+			struct dma_fence *iter;
+
+			dma_fence_chain_for_each(iter, fences[n].dma_fence) {
+				struct dma_fence_chain *iter_chain =
+					to_dma_fence_chain(iter);
+
+				GEM_BUG_ON(!iter_chain);
+
+				err = i915_request_await_dma_fence(eb->request,
+								   iter_chain->fence);
+				if (err < 0) {
+					dma_fence_put(iter);
+					return err;
+				}
+			}
+
+		} else {
+			err = i915_request_await_dma_fence(eb->request,
+							   fences[n].dma_fence);
+			if (err < 0)
+				return err;
+		}
 	}
 
 	return 0;
-- 
2.21.0.5.gaeb582a983

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
  2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
  2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
  2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota
@ 2020-04-06 22:13 ` Patchwork
  2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
  2020-04-07  8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-06 22:13 UTC (permalink / raw)
  To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
URL   : https://patchwork.freedesktop.org/series/75570/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
296bd5133612 drm/i915: introduce a mechanism to extend execbuf2
-:141: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#141: FILE: include/uapi/drm/i915_drm.h:1204:
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS<<1))
                                                              ^

total: 0 errors, 0 warnings, 1 checks, 113 lines checked
26b10e8e5551 drm/i915: add syncobj timeline support
-:26: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a maximum 75 chars per line)
#26: 
    https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html

-:34: ERROR:BAD_SIGN_OFF: Unrecognized email address: 'Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>'
#34: 
Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>

-:622: WARNING:NO_AUTHOR_SIGN_OFF: Missing Signed-off-by: line by nominal patch author 'Venkata Sandeep Dhanalakota <venkata.s.dhanalakota@intel.com>'

total: 1 errors, 2 warnings, 0 checks, 551 lines checked
afdb9ac918ac drm/i915: peel dma-fence-chains wait fences

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✓ Fi.CI.BAT: success for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
  2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
                   ` (2 preceding siblings ...)
  2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork
@ 2020-04-06 22:37 ` Patchwork
  2020-04-07  8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-06 22:37 UTC (permalink / raw)
  To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
URL   : https://patchwork.freedesktop.org/series/75570/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8264 -> Patchwork_17225
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html

Known issues
------------

  Here are the changes found in Patchwork_17225 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_rpm@basic-rte:
    - fi-icl-dsi:         [PASS][1] -> [INCOMPLETE][2] ([i915#189])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-icl-dsi/igt@i915_pm_rpm@basic-rte.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-icl-dsi/igt@i915_pm_rpm@basic-rte.html

  
#### Possible fixes ####

  * igt@gem_exec_suspend@basic-s4-devices:
    - fi-tgl-y:           [FAIL][3] ([i915#1158]) -> [PASS][4]
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-tgl-y/igt@gem_exec_suspend@basic-s4-devices.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-tgl-y/igt@gem_exec_suspend@basic-s4-devices.html

  * igt@i915_selftest@live@hangcheck:
    - fi-icl-y:           [INCOMPLETE][5] ([i915#1580]) -> [PASS][6]
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/fi-icl-y/igt@i915_selftest@live@hangcheck.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/fi-icl-y/igt@i915_selftest@live@hangcheck.html

  
  [i915#1158]: https://gitlab.freedesktop.org/drm/intel/issues/1158
  [i915#1580]: https://gitlab.freedesktop.org/drm/intel/issues/1580
  [i915#189]: https://gitlab.freedesktop.org/drm/intel/issues/189


Participating hosts (53 -> 46)
------------------------------

  Missing    (7): fi-ilk-m540 fi-hsw-4200u fi-byt-squawks fi-bsw-cyan fi-ctg-p8600 fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8264 -> Patchwork_17225

  CI-20190529: 20190529
  CI_DRM_8264: e0104585f880a64d4a9b40803cf4fb51ab499f7c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5573: 9c582425d6b4fc1de9fc2ffc8015cc6f0a0d3e98 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17225: afdb9ac918aca5d4a603ec3d33e2b4932e3dc1ca @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

afdb9ac918ac drm/i915: peel dma-fence-chains wait fences
26b10e8e5551 drm/i915: add syncobj timeline support
296bd5133612 drm/i915: introduce a mechanism to extend execbuf2

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] ✓ Fi.CI.IGT: success for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
  2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
                   ` (3 preceding siblings ...)
  2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
@ 2020-04-07  8:13 ` Patchwork
  4 siblings, 0 replies; 14+ messages in thread
From: Patchwork @ 2020-04-07  8:13 UTC (permalink / raw)
  To: Venkata Sandeep Dhanalakota; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2
URL   : https://patchwork.freedesktop.org/series/75570/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_8264_full -> Patchwork_17225_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_17225_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@i915_pm_dc@dc6-psr:
    - shard-iclb:         [PASS][1] -> [FAIL][2] ([i915#454])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb4/igt@i915_pm_dc@dc6-psr.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb4/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_rpm@modeset-non-lpsp-stress:
    - shard-kbl:          [PASS][3] -> [DMESG-WARN][4] ([i915#165])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl6/igt@i915_pm_rpm@modeset-non-lpsp-stress.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl2/igt@i915_pm_rpm@modeset-non-lpsp-stress.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-skl:          [PASS][5] -> [INCOMPLETE][6] ([i915#300])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl8/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl3/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge:
    - shard-kbl:          [PASS][7] -> [DMESG-WARN][8] ([i915#78])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl6/igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl2/igt@kms_cursor_edge_walk@pipe-c-128x128-bottom-edge.html

  * igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic:
    - shard-glk:          [PASS][9] -> [FAIL][10] ([i915#72])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk8/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk8/igt@kms_cursor_legacy@2x-long-flip-vs-cursor-atomic.html

  * igt@kms_flip@flip-vs-suspend:
    - shard-snb:          [PASS][11] -> [DMESG-WARN][12] ([i915#42])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb7/igt@kms_flip@flip-vs-suspend.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb5/igt@kms_flip@flip-vs-suspend.html

  * igt@kms_flip@plain-flip-fb-recreate-interruptible:
    - shard-glk:          [PASS][13] -> [FAIL][14] ([i915#1487])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk1/igt@kms_flip@plain-flip-fb-recreate-interruptible.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk1/igt@kms_flip@plain-flip-fb-recreate-interruptible.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-kbl:          [PASS][15] -> [DMESG-WARN][16] ([i915#180])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl7/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl4/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html

  * igt@kms_psr@psr2_cursor_mmap_cpu:
    - shard-iclb:         [PASS][17] -> [SKIP][18] ([fdo#109441]) +2 similar issues
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb2/igt@kms_psr@psr2_cursor_mmap_cpu.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb1/igt@kms_psr@psr2_cursor_mmap_cpu.html

  * igt@kms_vblank@pipe-b-ts-continuation-suspend:
    - shard-skl:          [PASS][19] -> [INCOMPLETE][20] ([i915#69])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl1/igt@kms_vblank@pipe-b-ts-continuation-suspend.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl9/igt@kms_vblank@pipe-b-ts-continuation-suspend.html

  
#### Possible fixes ####

  * {igt@gem_ctx_isolation@preservation-s3@rcs0}:
    - shard-apl:          [DMESG-WARN][21] ([i915#180]) -> [PASS][22] +2 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl6/igt@gem_ctx_isolation@preservation-s3@rcs0.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl2/igt@gem_ctx_isolation@preservation-s3@rcs0.html

  * igt@gem_exec_balancer@hang:
    - shard-tglb:         [FAIL][23] ([i915#1277]) -> [PASS][24]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-tglb6/igt@gem_exec_balancer@hang.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-tglb8/igt@gem_exec_balancer@hang.html

  * igt@i915_pm_rpm@basic-pci-d3-state:
    - shard-skl:          [FAIL][25] ([i915#138]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl5/igt@i915_pm_rpm@basic-pci-d3-state.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl8/igt@i915_pm_rpm@basic-pci-d3-state.html

  * igt@i915_selftest@live@blt:
    - shard-snb:          [DMESG-FAIL][27] ([i915#1409]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb4/igt@i915_selftest@live@blt.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb4/igt@i915_selftest@live@blt.html

  * igt@i915_suspend@fence-restore-tiled2untiled:
    - shard-skl:          [INCOMPLETE][29] ([i915#69]) -> [PASS][30]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl3/igt@i915_suspend@fence-restore-tiled2untiled.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl8/igt@i915_suspend@fence-restore-tiled2untiled.html

  * igt@kms_fbcon_fbt@fbc-suspend:
    - shard-kbl:          [DMESG-WARN][31] ([i915#180] / [i915#93] / [i915#95]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl1/igt@kms_fbcon_fbt@fbc-suspend.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl4/igt@kms_fbcon_fbt@fbc-suspend.html

  * igt@kms_flip@2x-plain-flip-ts-check:
    - shard-glk:          [FAIL][33] ([i915#34]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk5/igt@kms_flip@2x-plain-flip-ts-check.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk6/igt@kms_flip@2x-plain-flip-ts-check.html

  * igt@kms_flip@flip-vs-expired-vblank:
    - shard-apl:          [FAIL][35] ([i915#79]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl4/igt@kms_flip@flip-vs-expired-vblank.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl6/igt@kms_flip@flip-vs-expired-vblank.html

  * igt@kms_flip@flip-vs-suspend-interruptible:
    - shard-kbl:          [DMESG-WARN][37] ([i915#180]) -> [PASS][38] +2 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl3/igt@kms_flip@flip-vs-suspend-interruptible.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl1/igt@kms_flip@flip-vs-suspend-interruptible.html

  * igt@kms_hdr@bpc-switch-dpms:
    - shard-skl:          [FAIL][39] ([i915#1188]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-skl4/igt@kms_hdr@bpc-switch-dpms.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-skl4/igt@kms_hdr@bpc-switch-dpms.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes:
    - shard-snb:          [DMESG-WARN][41] ([i915#42]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb6/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb2/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-b-planes.html

  * igt@kms_plane_lowres@pipe-a-tiling-none:
    - shard-kbl:          [DMESG-WARN][43] ([i915#165] / [i915#78]) -> [PASS][44]
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-kbl2/igt@kms_plane_lowres@pipe-a-tiling-none.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-kbl1/igt@kms_plane_lowres@pipe-a-tiling-none.html

  * igt@kms_psr@psr2_cursor_plane_onoff:
    - shard-iclb:         [SKIP][45] ([fdo#109441]) -> [PASS][46]
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-iclb1/igt@kms_psr@psr2_cursor_plane_onoff.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html

  * igt@kms_setmode@basic:
    - shard-apl:          [FAIL][47] ([i915#31]) -> [PASS][48]
   [47]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-apl7/igt@kms_setmode@basic.html
   [48]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-apl4/igt@kms_setmode@basic.html
    - shard-glk:          [FAIL][49] ([i915#31]) -> [PASS][50]
   [49]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-glk5/igt@kms_setmode@basic.html
   [50]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-glk6/igt@kms_setmode@basic.html

  
#### Warnings ####

  * igt@i915_pm_dc@dc6-psr:
    - shard-tglb:         [FAIL][51] ([i915#454]) -> [SKIP][52] ([i915#468])
   [51]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-tglb1/igt@i915_pm_dc@dc6-psr.html
   [52]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-tglb2/igt@i915_pm_dc@dc6-psr.html

  * igt@i915_pm_rpm@system-suspend-devices:
    - shard-snb:          [SKIP][53] ([fdo#109271]) -> [INCOMPLETE][54] ([i915#82])
   [53]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_8264/shard-snb4/igt@i915_pm_rpm@system-suspend-devices.html
   [54]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/shard-snb5/igt@i915_pm_rpm@system-suspend-devices.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [i915#1188]: https://gitlab.freedesktop.org/drm/intel/issues/1188
  [i915#1277]: https://gitlab.freedesktop.org/drm/intel/issues/1277
  [i915#138]: https://gitlab.freedesktop.org/drm/intel/issues/138
  [i915#1409]: https://gitlab.freedesktop.org/drm/intel/issues/1409
  [i915#1487]: https://gitlab.freedesktop.org/drm/intel/issues/1487
  [i915#165]: https://gitlab.freedesktop.org/drm/intel/issues/165
  [i915#180]: https://gitlab.freedesktop.org/drm/intel/issues/180
  [i915#300]: https://gitlab.freedesktop.org/drm/intel/issues/300
  [i915#31]: https://gitlab.freedesktop.org/drm/intel/issues/31
  [i915#34]: https://gitlab.freedesktop.org/drm/intel/issues/34
  [i915#42]: https://gitlab.freedesktop.org/drm/intel/issues/42
  [i915#454]: https://gitlab.freedesktop.org/drm/intel/issues/454
  [i915#468]: https://gitlab.freedesktop.org/drm/intel/issues/468
  [i915#69]: https://gitlab.freedesktop.org/drm/intel/issues/69
  [i915#72]: https://gitlab.freedesktop.org/drm/intel/issues/72
  [i915#78]: https://gitlab.freedesktop.org/drm/intel/issues/78
  [i915#79]: https://gitlab.freedesktop.org/drm/intel/issues/79
  [i915#82]: https://gitlab.freedesktop.org/drm/intel/issues/82
  [i915#93]: https://gitlab.freedesktop.org/drm/intel/issues/93
  [i915#95]: https://gitlab.freedesktop.org/drm/intel/issues/95


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_8264 -> Patchwork_17225

  CI-20190529: 20190529
  CI_DRM_8264: e0104585f880a64d4a9b40803cf4fb51ab499f7c @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5573: 9c582425d6b4fc1de9fc2ffc8015cc6f0a0d3e98 @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_17225: afdb9ac918aca5d4a603ec3d33e2b4932e3dc1ca @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_17225/index.html
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
  2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
@ 2020-04-08 16:29   ` Lionel Landwerlin
  2020-04-08 17:00     ` Venkata Sandeep Dhanalakota
  2020-04-08 17:14   ` Lionel Landwerlin
  1 sibling, 1 reply; 14+ messages in thread
From: Lionel Landwerlin @ 2020-04-08 16:29 UTC (permalink / raw)
  To: Venkata Sandeep Dhanalakota, intel-gfx; +Cc: chris.p.wilson

On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote:
> Introduces a new parameters to execbuf so that we can specify syncobj
> handles as well as timeline points.
>
> v2: Reuse i915_user_extension_fn
>
> v3: Check that the chained extension is only present once (Chris)
>
> v4: Check that dma_fence_chain_find_seqno returns a non NULL fence
> (Lionel)
>
> v5: Use BIT_ULL (Chris)
>
> v6: Fix issue with already signaled timeline points,
>      dma_fence_chain_find_seqno() setting fence to NULL (Chris)
>
> v7: Report ENOENT with invalid syncobj handle (Lionel)
>
> v8: Check for out of order timeline point insertion (Chris)
>
> v9: After explanations on
>      https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
>      drop the ordering check from v8 (Lionel)
>
> v10: Set first extension enum item to 1 (Jason)
>
> v11: Add wait on previous sync points in timelines (Sandeep)


Thanks for picking this series up!


Could you point to the changes in v11?

I haven't look at it in a while and I can't remember what you would have 
changed.


Thanks a lot,


-Lionel


>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 312 ++++++++++++++----
>   drivers/gpu/drm/i915/i915_drv.c               |   3 +-
>   drivers/gpu/drm/i915/i915_getparam.c          |   1 +
>   include/uapi/drm/i915_drm.h                   |  38 +++
>   4 files changed, 296 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 16831f715daa..4cb4cd035daa 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -230,6 +230,13 @@ enum {
>    * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
>    */
>   
> +struct i915_eb_fences {
> +	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
> +	struct dma_fence *dma_fence;
> +	u64 value;
> +	struct dma_fence_chain *chain_fence;
> +};
> +
>   struct i915_execbuffer {
>   	struct drm_i915_private *i915; /** i915 backpointer */
>   	struct drm_file *file; /** per-file lookup tables and limits */
> @@ -292,6 +299,7 @@ struct i915_execbuffer {
>   
>   	struct {
>   		u64 flags; /** Available extensions parameters */
> +		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
>   	} extensions;
>   };
>   
> @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb,
>   }
>   
>   static void
> -__free_fence_array(struct drm_syncobj **fences, unsigned int n)
> +__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
>   {
> -	while (n--)
> -		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
> +	while (n--) {
> +		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
> +		dma_fence_put(fences[n].dma_fence);
> +		kfree(fences[n].chain_fence);
> +	}
>   	kvfree(fences);
>   }
>   
> -static struct drm_syncobj **
> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> -		struct drm_file *file)
> +static struct i915_eb_fences *
> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> +	struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> +		&eb->extensions.timeline_fences;
> +	struct drm_i915_gem_exec_fence __user *user_fences;
> +	struct i915_eb_fences *fences;
> +	u64 __user *user_values;
> +	u64 num_fences, num_user_fences = timeline_fences->fence_count;
> +	unsigned long n;
> +	int err = 0;
> +
> +	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
> +	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> +	if (num_user_fences > min_t(unsigned long,
> +				    ULONG_MAX / sizeof(*user_fences),
> +				    SIZE_MAX / sizeof(*fences)))
> +		return ERR_PTR(-EINVAL);
> +
> +	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> +	if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> +		return ERR_PTR(-EFAULT);
> +
> +	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> +	if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> +		return ERR_PTR(-EFAULT);
> +
> +	fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> +				__GFP_NOWARN | GFP_KERNEL);
> +	if (!fences)
> +		return ERR_PTR(-ENOMEM);
> +
> +	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> +		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> +
> +	for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> +		struct drm_i915_gem_exec_fence user_fence;
> +		struct drm_syncobj *syncobj;
> +		struct dma_fence *fence = NULL;
> +		u64 point;
> +
> +		if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> +			err = -EFAULT;
> +			goto err;
> +		}
> +
> +		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		if (__get_user(point, user_values++)) {
> +			err = -EFAULT;
> +			goto err;
> +		}
> +
> +		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> +		if (!syncobj) {
> +			DRM_DEBUG("Invalid syncobj handle provided\n");
> +			err = -ENOENT;
> +			goto err;
> +		}
> +
> +		fence = drm_syncobj_fence_get(syncobj);
> +
> +		if (!fence && user_fence.flags &&
> +		    !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> +			DRM_DEBUG("Syncobj handle has no fence\n");
> +			drm_syncobj_put(syncobj);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		if (fence)
> +			err = dma_fence_chain_find_seqno(&fence, point);
> +
> +		if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> +			DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> +			drm_syncobj_put(syncobj);
> +			goto err;
> +		}
> +
> +		/* A point might have been signaled already and
> +		 * garbage collected from the timeline. In this case
> +		 * just ignore the point and carry on.
> +		 */
> +		if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) {
> +			drm_syncobj_put(syncobj);
> +			continue;
> +		}
> +
> +		/*
> +		 * For timeline syncobjs we need to preallocate chains for
> +		 * later signaling.
> +		 */
> +		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> +			/*
> +			 * Waiting and signaling the same point (when point !=
> +			 * 0) would break the timeline.
> +			 */
> +			if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> +				DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
> +				err = -EINVAL;
> +				drm_syncobj_put(syncobj);
> +				goto err;
> +			}
> +
> +			fences[num_fences].chain_fence =
> +				kmalloc(sizeof(*fences[num_fences].chain_fence),
> +					GFP_KERNEL);
> +			if (!fences[num_fences].chain_fence) {
> +				drm_syncobj_put(syncobj);
> +				err = -ENOMEM;
> +				DRM_DEBUG("Unable to alloc chain_fence\n");
> +				goto err;
> +			}
> +		} else {
> +			fences[num_fences].chain_fence = NULL;
> +		}
> +
> +		fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> +		fences[num_fences].dma_fence = fence;
> +		fences[num_fences].value = point;
> +		num_fences++;
> +	}
> +
> +	*out_n_fences = num_fences;
> +
> +	return fences;
> +
> +err:
> +	__free_fence_array(fences, num_fences);
> +	return ERR_PTR(err);
> +}
> +
> +static struct i915_eb_fences *
> +get_legacy_fence_array(struct i915_execbuffer *eb,
> +		       int *out_n_fences)
>   {
> -	const unsigned long nfences = args->num_cliprects;
> +	struct drm_i915_gem_execbuffer2 *args = eb->args;
>   	struct drm_i915_gem_exec_fence __user *user;
> -	struct drm_syncobj **fences;
> +	struct i915_eb_fences *fences;
> +	const u32 num_fences = args->num_cliprects;
>   	unsigned long n;
>   	int err;
>   
> -	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
> -		return NULL;
> +	*out_n_fences = num_fences;
>   
>   	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
>   	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> -	if (nfences > min_t(unsigned long,
> -			    ULONG_MAX / sizeof(*user),
> -			    SIZE_MAX / sizeof(*fences)))
> +	if (*out_n_fences > min_t(unsigned long,
> +				  ULONG_MAX / sizeof(*user),
> +				  SIZE_MAX / sizeof(*fences)))
>   		return ERR_PTR(-EINVAL);
>   
>   	user = u64_to_user_ptr(args->cliprects_ptr);
> -	if (!access_ok(user, nfences * sizeof(*user)))
> +	if (!access_ok(user, *out_n_fences * sizeof(*user)))
>   		return ERR_PTR(-EFAULT);
>   
> -	fences = kvmalloc_array(nfences, sizeof(*fences),
> +	fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
>   				__GFP_NOWARN | GFP_KERNEL);
>   	if (!fences)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (n = 0; n < nfences; n++) {
> -		struct drm_i915_gem_exec_fence fence;
> +	for (n = 0; n < *out_n_fences; n++) {
> +		struct drm_i915_gem_exec_fence user_fence;
>   		struct drm_syncobj *syncobj;
> +		struct dma_fence *fence = NULL;
>   
> -		if (__copy_from_user(&fence, user++, sizeof(fence))) {
> +		if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
>   			err = -EFAULT;
>   			goto err;
>   		}
>   
> -		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> +		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
>   			err = -EINVAL;
>   			goto err;
>   		}
>   
> -		syncobj = drm_syncobj_find(file, fence.handle);
> +		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
>   		if (!syncobj) {
>   			DRM_DEBUG("Invalid syncobj handle provided\n");
>   			err = -ENOENT;
>   			goto err;
>   		}
>   
> +		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> +			fence = drm_syncobj_fence_get(syncobj);
> +			if (!fence) {
> +				DRM_DEBUG("Syncobj handle has no fence\n");
> +				drm_syncobj_put(syncobj);
> +				err = -EINVAL;
> +				goto err;
> +			}
> +		}
> +
>   		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
>   			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>   
> -		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
> +		fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> +		fences[n].dma_fence = fence;
> +		fences[n].value = 0;
> +		fences[n].chain_fence = NULL;
>   	}
>   
>   	return fences;
> @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>   	return ERR_PTR(err);
>   }
>   
> +static struct i915_eb_fences *
> +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> +	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> +		return get_legacy_fence_array(eb, out_n_fences);
> +
> +	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> +		return get_timeline_fence_array(eb, out_n_fences);
> +
> +	*out_n_fences = 0;
> +	return NULL;
> +}
> +
>   static void
> -put_fence_array(struct drm_i915_gem_execbuffer2 *args,
> -		struct drm_syncobj **fences)
> +put_fence_array(struct i915_eb_fences *fences, int nfences)
>   {
>   	if (fences)
> -		__free_fence_array(fences, args->num_cliprects);
> +		__free_fence_array(fences, nfences);
>   }
>   
>   static int
>   await_fence_array(struct i915_execbuffer *eb,
> -		  struct drm_syncobj **fences)
> +		  struct i915_eb_fences *fences,
> +		  int nfences)
>   {
> -	const unsigned int nfences = eb->args->num_cliprects;
>   	unsigned int n;
>   	int err;
>   
>   	for (n = 0; n < nfences; n++) {
>   		struct drm_syncobj *syncobj;
> -		struct dma_fence *fence;
>   		unsigned int flags;
>   
> -		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> -		if (!(flags & I915_EXEC_FENCE_WAIT))
> -			continue;
> +		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>   
> -		fence = drm_syncobj_fence_get(syncobj);
> -		if (!fence)
> -			return -EINVAL;
> +		if (!fences[n].dma_fence)
> +			continue;
>   
> -		err = i915_request_await_dma_fence(eb->request, fence);
> -		dma_fence_put(fence);
> +		err = i915_request_await_dma_fence(eb->request,
> +						   fences[n].dma_fence);
>   		if (err < 0)
>   			return err;
>   	}
> @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb,
>   
>   static void
>   signal_fence_array(struct i915_execbuffer *eb,
> -		   struct drm_syncobj **fences)
> +		   struct i915_eb_fences *fences,
> +		   int nfences)
>   {
> -	const unsigned int nfences = eb->args->num_cliprects;
>   	struct dma_fence * const fence = &eb->request->fence;
>   	unsigned int n;
>   
> @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb,
>   		struct drm_syncobj *syncobj;
>   		unsigned int flags;
>   
> -		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> +		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>   		if (!(flags & I915_EXEC_FENCE_SIGNAL))
>   			continue;
>   
> -		drm_syncobj_replace_fence(syncobj, fence);
> +		if (fences[n].chain_fence) {
> +			drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> +					      fence, fences[n].value);
> +			/*
> +			 * The chain's ownership is transferred to the
> +			 * timeline.
> +			 */
> +			fences[n].chain_fence = NULL;
> +		} else {
> +			drm_syncobj_replace_fence(syncobj, fence);
> +		}
>   	}
>   }
>   
> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> +{
> +	struct i915_execbuffer *eb = data;
> +
> +	/* Timeline fences are incompatible with the fence array flag. */
> +	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> +		return -EINVAL;
> +
> +	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> +		return -EINVAL;
> +
> +	if (copy_from_user(&eb->extensions.timeline_fences, ext,
> +			   sizeof(eb->extensions.timeline_fences)))
> +		return -EFAULT;
> +
> +	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> +
> +	return 0;
> +}
> +
>   static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
>   {
>   	struct i915_request *rq, *rn;
> @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
>   }
>   
>   static const i915_user_extension_fn execbuf_extensions[] = {
> +	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>   };
>   
>   static int
> @@ -2468,16 +2667,17 @@ static int
>   i915_gem_do_execbuffer(struct drm_device *dev,
>   		       struct drm_file *file,
>   		       struct drm_i915_gem_execbuffer2 *args,
> -		       struct drm_i915_gem_exec_object2 *exec,
> -		       struct drm_syncobj **fences)
> +		       struct drm_i915_gem_exec_object2 *exec)
>   {
>   	struct drm_i915_private *i915 = to_i915(dev);
>   	struct i915_execbuffer eb;
>   	struct dma_fence *in_fence = NULL;
>   	struct dma_fence *exec_fence = NULL;
>   	struct sync_file *out_fence = NULL;
> +	struct i915_eb_fences *fences = NULL;
>   	struct i915_vma *batch;
>   	int out_fence_fd = -1;
> +	int nfences = 0;
>   	int err;
>   
>   	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
> @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (err)
>   		return err;
>   
> +	fences = get_fence_array(&eb, &nfences);
> +	if (IS_ERR(fences))
> +		return PTR_ERR(fences);
> +
>   	if (args->flags & I915_EXEC_FENCE_IN) {
>   		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> -		if (!in_fence)
> -			return -EINVAL;
> +		if (!in_fence) {
> +			err = -EINVAL;
> +			goto err_fences;
> +		}
>   	}
>   
>   	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	}
>   
>   	if (fences) {
> -		err = await_fence_array(&eb, fences);
> +		err = await_fence_array(&eb, fences, nfences);
>   		if (err)
>   			goto err_request;
>   	}
> @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	eb_request_add(&eb);
>   
>   	if (fences)
> -		signal_fence_array(&eb, fences);
> +		signal_fence_array(&eb, fences, nfences);
>   
>   	if (out_fence) {
>   		if (err == 0) {
> @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	dma_fence_put(exec_fence);
>   err_in_fence:
>   	dma_fence_put(in_fence);
> +err_fences:
> +	put_fence_array(fences, nfences);
>   	return err;
>   }
>   
> @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
>   			exec2_list[i].flags = 0;
>   	}
>   
> -	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
> +	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
>   	if (exec2.flags & __EXEC_HAS_RELOC) {
>   		struct drm_i915_gem_exec_object __user *user_exec_list =
>   			u64_to_user_ptr(args->buffers_ptr);
> @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>   	struct drm_i915_private *i915 = to_i915(dev);
>   	struct drm_i915_gem_execbuffer2 *args = data;
>   	struct drm_i915_gem_exec_object2 *exec2_list;
> -	struct drm_syncobj **fences = NULL;
>   	const size_t count = args->buffer_count;
>   	int err;
>   
> @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>   		return -EFAULT;
>   	}
>   
> -	if (args->flags & I915_EXEC_FENCE_ARRAY) {
> -		fences = get_fence_array(args, file);
> -		if (IS_ERR(fences)) {
> -			kvfree(exec2_list);
> -			return PTR_ERR(fences);
> -		}
> -	}
> -
> -	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
> +	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
>   
>   	/*
>   	 * Now that we have begun execution of the batchbuffer, we ignore
> @@ -2917,7 +3116,6 @@ end:;
>   	}
>   
>   	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
> -	put_fence_array(args, fences);
>   	kvfree(exec2_list);
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index a7a3b4b98572..f7f868c3c510 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1828,7 +1828,8 @@ static struct drm_driver driver = {
>   	 */
>   	.driver_features =
>   	    DRIVER_GEM |
> -	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> +	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
> +	    DRIVER_SYNCOBJ_TIMELINE,
>   	.release = i915_driver_release,
>   	.open = i915_driver_open,
>   	.lastclose = i915_driver_lastclose,
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index 54fce81d5724..b9d3aab53c03 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>   	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
>   	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> +	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
>   		/* For the time being all of these are always true;
>   		 * if some supported hardware does not have one of these
>   		 * features this value needs to be provided from
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7ea38aa6502c..7b8680e3b49d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
>    */
>   #define I915_PARAM_PERF_REVISION	54
>   
> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> + * I915_EXEC_USE_EXTENSIONS.
> + */
> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> +
>   /* Must be kept compact -- no holes and well documented */
>   
>   typedef struct drm_i915_getparam {
> @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence {
>   };
>   
>   enum drm_i915_gem_execbuffer_ext {
> +	/**
> +	 * See drm_i915_gem_execbuf_ext_timeline_fences.
> +	 */
> +	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
> +
>   	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
>   };
>   
> +/**
> + * This structure describes an array of drm_syncobj and associated points for
> + * timeline variants of drm_syncobj. It is invalid to append this structure to
> + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> + */
> +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> +	struct i915_user_extension base;
> +
> +	/**
> +	 * Number of element in the handles_ptr & value_ptr arrays.
> +	 */
> +	__u64 fence_count;
> +
> +	/**
> +	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
> +	 * fence_count.
> +	 */
> +	__u64 handles_ptr;
> +
> +	/**
> +	 * Pointer to an array of u64 values of length fence_count. Values
> +	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
> +	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
> +	 */
> +	__u64 values_ptr;
> +};
> +
>   struct drm_i915_gem_execbuffer2 {
>   	/**
>   	 * List of gem_exec_object2 structs


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
  2020-04-08 16:29   ` Lionel Landwerlin
@ 2020-04-08 17:00     ` Venkata Sandeep Dhanalakota
  0 siblings, 0 replies; 14+ messages in thread
From: Venkata Sandeep Dhanalakota @ 2020-04-08 17:00 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx, chris.p.wilson

On 20/04/08 07:29, Lionel Landwerlin wrote:
> On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote:
> > Introduces a new parameters to execbuf so that we can specify syncobj
> > handles as well as timeline points.
> > 
> > v2: Reuse i915_user_extension_fn
> > 
> > v3: Check that the chained extension is only present once (Chris)
> > 
> > v4: Check that dma_fence_chain_find_seqno returns a non NULL fence
> > (Lionel)
> > 
> > v5: Use BIT_ULL (Chris)
> > 
> > v6: Fix issue with already signaled timeline points,
> >      dma_fence_chain_find_seqno() setting fence to NULL (Chris)
> > 
> > v7: Report ENOENT with invalid syncobj handle (Lionel)
> > 
> > v8: Check for out of order timeline point insertion (Chris)
> > 
> > v9: After explanations on
> >      https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
> >      drop the ordering check from v8 (Lionel)
> > 
> > v10: Set first extension enum item to 1 (Jason)
> > 
> > v11: Add wait on previous sync points in timelines (Sandeep)
> 
> 
> Thanks for picking this series up!
> 
> 
> Could you point to the changes in v11?
> 
> I haven't look at it in a while and I can't remember what you would have
> changed.
> 
Hi,

Mainly the changes are in get_timeline_fence_array(), to enforce the
implicit dependencies in signal fence-array. we want have efficient waits
on the last point on timelines so that we signal at a correct point in time along the timeline
the order is controlled so that we always wait on the previous request/sync point in the timeline
and signal after the completion of the current request.

Thank you,
~sandeep
> 
> Thanks a lot,
> 
> 
> -Lionel
> 
> 
> > 
> > Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> > Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
> > ---
> >   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 312 ++++++++++++++----
> >   drivers/gpu/drm/i915/i915_drv.c               |   3 +-
> >   drivers/gpu/drm/i915/i915_getparam.c          |   1 +
> >   include/uapi/drm/i915_drm.h                   |  38 +++
> >   4 files changed, 296 insertions(+), 58 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > index 16831f715daa..4cb4cd035daa 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > @@ -230,6 +230,13 @@ enum {
> >    * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
> >    */
> > +struct i915_eb_fences {
> > +	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
> > +	struct dma_fence *dma_fence;
> > +	u64 value;
> > +	struct dma_fence_chain *chain_fence;
> > +};
> > +
> >   struct i915_execbuffer {
> >   	struct drm_i915_private *i915; /** i915 backpointer */
> >   	struct drm_file *file; /** per-file lookup tables and limits */
> > @@ -292,6 +299,7 @@ struct i915_execbuffer {
> >   	struct {
> >   		u64 flags; /** Available extensions parameters */
> > +		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
> >   	} extensions;
> >   };
> > @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb,
> >   }
> >   static void
> > -__free_fence_array(struct drm_syncobj **fences, unsigned int n)
> > +__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
> >   {
> > -	while (n--)
> > -		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
> > +	while (n--) {
> > +		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
> > +		dma_fence_put(fences[n].dma_fence);
> > +		kfree(fences[n].chain_fence);
> > +	}
> >   	kvfree(fences);
> >   }
> > -static struct drm_syncobj **
> > -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> > -		struct drm_file *file)
> > +static struct i915_eb_fences *
> > +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> > +{
> > +	struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> > +		&eb->extensions.timeline_fences;
> > +	struct drm_i915_gem_exec_fence __user *user_fences;
> > +	struct i915_eb_fences *fences;
> > +	u64 __user *user_values;
> > +	u64 num_fences, num_user_fences = timeline_fences->fence_count;
> > +	unsigned long n;
> > +	int err = 0;
> > +
> > +	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
> > +	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> > +	if (num_user_fences > min_t(unsigned long,
> > +				    ULONG_MAX / sizeof(*user_fences),
> > +				    SIZE_MAX / sizeof(*fences)))
> > +		return ERR_PTR(-EINVAL);
> > +
> > +	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> > +	if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> > +		return ERR_PTR(-EFAULT);
> > +
> > +	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> > +	if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> > +		return ERR_PTR(-EFAULT);
> > +
> > +	fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> > +				__GFP_NOWARN | GFP_KERNEL);
> > +	if (!fences)
> > +		return ERR_PTR(-ENOMEM);
> > +
> > +	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> > +		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> > +
> > +	for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> > +		struct drm_i915_gem_exec_fence user_fence;
> > +		struct drm_syncobj *syncobj;
> > +		struct dma_fence *fence = NULL;
> > +		u64 point;
> > +
> > +		if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> > +			err = -EFAULT;
> > +			goto err;
> > +		}
> > +
> > +		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> > +			err = -EINVAL;
> > +			goto err;
> > +		}
> > +
> > +		if (__get_user(point, user_values++)) {
> > +			err = -EFAULT;
> > +			goto err;
> > +		}
> > +
> > +		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> > +		if (!syncobj) {
> > +			DRM_DEBUG("Invalid syncobj handle provided\n");
> > +			err = -ENOENT;
> > +			goto err;
> > +		}
> > +
> > +		fence = drm_syncobj_fence_get(syncobj);
> > +
> > +		if (!fence && user_fence.flags &&
> > +		    !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> > +			DRM_DEBUG("Syncobj handle has no fence\n");
> > +			drm_syncobj_put(syncobj);
> > +			err = -EINVAL;
> > +			goto err;
> > +		}
> > +
> > +		if (fence)
> > +			err = dma_fence_chain_find_seqno(&fence, point);
> > +
> > +		if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> > +			DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> > +			drm_syncobj_put(syncobj);
> > +			goto err;
> > +		}
> > +
> > +		/* A point might have been signaled already and
> > +		 * garbage collected from the timeline. In this case
> > +		 * just ignore the point and carry on.
> > +		 */
> > +		if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) {
> > +			drm_syncobj_put(syncobj);
> > +			continue;
> > +		}
> > +
> > +		/*
> > +		 * For timeline syncobjs we need to preallocate chains for
> > +		 * later signaling.
> > +		 */
> > +		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> > +			/*
> > +			 * Waiting and signaling the same point (when point !=
> > +			 * 0) would break the timeline.
> > +			 */
> > +			if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> > +				DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
> > +				err = -EINVAL;
> > +				drm_syncobj_put(syncobj);
> > +				goto err;
> > +			}
> > +
> > +			fences[num_fences].chain_fence =
> > +				kmalloc(sizeof(*fences[num_fences].chain_fence),
> > +					GFP_KERNEL);
> > +			if (!fences[num_fences].chain_fence) {
> > +				drm_syncobj_put(syncobj);
> > +				err = -ENOMEM;
> > +				DRM_DEBUG("Unable to alloc chain_fence\n");
> > +				goto err;
> > +			}
> > +		} else {
> > +			fences[num_fences].chain_fence = NULL;
> > +		}
> > +
> > +		fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> > +		fences[num_fences].dma_fence = fence;
> > +		fences[num_fences].value = point;
> > +		num_fences++;
> > +	}
> > +
> > +	*out_n_fences = num_fences;
> > +
> > +	return fences;
> > +
> > +err:
> > +	__free_fence_array(fences, num_fences);
> > +	return ERR_PTR(err);
> > +}
> > +
> > +static struct i915_eb_fences *
> > +get_legacy_fence_array(struct i915_execbuffer *eb,
> > +		       int *out_n_fences)
> >   {
> > -	const unsigned long nfences = args->num_cliprects;
> > +	struct drm_i915_gem_execbuffer2 *args = eb->args;
> >   	struct drm_i915_gem_exec_fence __user *user;
> > -	struct drm_syncobj **fences;
> > +	struct i915_eb_fences *fences;
> > +	const u32 num_fences = args->num_cliprects;
> >   	unsigned long n;
> >   	int err;
> > -	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
> > -		return NULL;
> > +	*out_n_fences = num_fences;
> >   	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
> >   	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> > -	if (nfences > min_t(unsigned long,
> > -			    ULONG_MAX / sizeof(*user),
> > -			    SIZE_MAX / sizeof(*fences)))
> > +	if (*out_n_fences > min_t(unsigned long,
> > +				  ULONG_MAX / sizeof(*user),
> > +				  SIZE_MAX / sizeof(*fences)))
> >   		return ERR_PTR(-EINVAL);
> >   	user = u64_to_user_ptr(args->cliprects_ptr);
> > -	if (!access_ok(user, nfences * sizeof(*user)))
> > +	if (!access_ok(user, *out_n_fences * sizeof(*user)))
> >   		return ERR_PTR(-EFAULT);
> > -	fences = kvmalloc_array(nfences, sizeof(*fences),
> > +	fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
> >   				__GFP_NOWARN | GFP_KERNEL);
> >   	if (!fences)
> >   		return ERR_PTR(-ENOMEM);
> > -	for (n = 0; n < nfences; n++) {
> > -		struct drm_i915_gem_exec_fence fence;
> > +	for (n = 0; n < *out_n_fences; n++) {
> > +		struct drm_i915_gem_exec_fence user_fence;
> >   		struct drm_syncobj *syncobj;
> > +		struct dma_fence *fence = NULL;
> > -		if (__copy_from_user(&fence, user++, sizeof(fence))) {
> > +		if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
> >   			err = -EFAULT;
> >   			goto err;
> >   		}
> > -		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> > +		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> >   			err = -EINVAL;
> >   			goto err;
> >   		}
> > -		syncobj = drm_syncobj_find(file, fence.handle);
> > +		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> >   		if (!syncobj) {
> >   			DRM_DEBUG("Invalid syncobj handle provided\n");
> >   			err = -ENOENT;
> >   			goto err;
> >   		}
> > +		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> > +			fence = drm_syncobj_fence_get(syncobj);
> > +			if (!fence) {
> > +				DRM_DEBUG("Syncobj handle has no fence\n");
> > +				drm_syncobj_put(syncobj);
> > +				err = -EINVAL;
> > +				goto err;
> > +			}
> > +		}
> > +
> >   		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> >   			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> > -		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
> > +		fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> > +		fences[n].dma_fence = fence;
> > +		fences[n].value = 0;
> > +		fences[n].chain_fence = NULL;
> >   	}
> >   	return fences;
> > @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> >   	return ERR_PTR(err);
> >   }
> > +static struct i915_eb_fences *
> > +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> > +{
> > +	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> > +		return get_legacy_fence_array(eb, out_n_fences);
> > +
> > +	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> > +		return get_timeline_fence_array(eb, out_n_fences);
> > +
> > +	*out_n_fences = 0;
> > +	return NULL;
> > +}
> > +
> >   static void
> > -put_fence_array(struct drm_i915_gem_execbuffer2 *args,
> > -		struct drm_syncobj **fences)
> > +put_fence_array(struct i915_eb_fences *fences, int nfences)
> >   {
> >   	if (fences)
> > -		__free_fence_array(fences, args->num_cliprects);
> > +		__free_fence_array(fences, nfences);
> >   }
> >   static int
> >   await_fence_array(struct i915_execbuffer *eb,
> > -		  struct drm_syncobj **fences)
> > +		  struct i915_eb_fences *fences,
> > +		  int nfences)
> >   {
> > -	const unsigned int nfences = eb->args->num_cliprects;
> >   	unsigned int n;
> >   	int err;
> >   	for (n = 0; n < nfences; n++) {
> >   		struct drm_syncobj *syncobj;
> > -		struct dma_fence *fence;
> >   		unsigned int flags;
> > -		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> > -		if (!(flags & I915_EXEC_FENCE_WAIT))
> > -			continue;
> > +		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> > -		fence = drm_syncobj_fence_get(syncobj);
> > -		if (!fence)
> > -			return -EINVAL;
> > +		if (!fences[n].dma_fence)
> > +			continue;
> > -		err = i915_request_await_dma_fence(eb->request, fence);
> > -		dma_fence_put(fence);
> > +		err = i915_request_await_dma_fence(eb->request,
> > +						   fences[n].dma_fence);
> >   		if (err < 0)
> >   			return err;
> >   	}
> > @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb,
> >   static void
> >   signal_fence_array(struct i915_execbuffer *eb,
> > -		   struct drm_syncobj **fences)
> > +		   struct i915_eb_fences *fences,
> > +		   int nfences)
> >   {
> > -	const unsigned int nfences = eb->args->num_cliprects;
> >   	struct dma_fence * const fence = &eb->request->fence;
> >   	unsigned int n;
> > @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb,
> >   		struct drm_syncobj *syncobj;
> >   		unsigned int flags;
> > -		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> > +		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> >   		if (!(flags & I915_EXEC_FENCE_SIGNAL))
> >   			continue;
> > -		drm_syncobj_replace_fence(syncobj, fence);
> > +		if (fences[n].chain_fence) {
> > +			drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> > +					      fence, fences[n].value);
> > +			/*
> > +			 * The chain's ownership is transferred to the
> > +			 * timeline.
> > +			 */
> > +			fences[n].chain_fence = NULL;
> > +		} else {
> > +			drm_syncobj_replace_fence(syncobj, fence);
> > +		}
> >   	}
> >   }
> > +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> > +{
> > +	struct i915_execbuffer *eb = data;
> > +
> > +	/* Timeline fences are incompatible with the fence array flag. */
> > +	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> > +		return -EINVAL;
> > +
> > +	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> > +		return -EINVAL;
> > +
> > +	if (copy_from_user(&eb->extensions.timeline_fences, ext,
> > +			   sizeof(eb->extensions.timeline_fences)))
> > +		return -EFAULT;
> > +
> > +	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> > +
> > +	return 0;
> > +}
> > +
> >   static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
> >   {
> >   	struct i915_request *rq, *rn;
> > @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
> >   }
> >   static const i915_user_extension_fn execbuf_extensions[] = {
> > +	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
> >   };
> >   static int
> > @@ -2468,16 +2667,17 @@ static int
> >   i915_gem_do_execbuffer(struct drm_device *dev,
> >   		       struct drm_file *file,
> >   		       struct drm_i915_gem_execbuffer2 *args,
> > -		       struct drm_i915_gem_exec_object2 *exec,
> > -		       struct drm_syncobj **fences)
> > +		       struct drm_i915_gem_exec_object2 *exec)
> >   {
> >   	struct drm_i915_private *i915 = to_i915(dev);
> >   	struct i915_execbuffer eb;
> >   	struct dma_fence *in_fence = NULL;
> >   	struct dma_fence *exec_fence = NULL;
> >   	struct sync_file *out_fence = NULL;
> > +	struct i915_eb_fences *fences = NULL;
> >   	struct i915_vma *batch;
> >   	int out_fence_fd = -1;
> > +	int nfences = 0;
> >   	int err;
> >   	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
> > @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> >   	if (err)
> >   		return err;
> > +	fences = get_fence_array(&eb, &nfences);
> > +	if (IS_ERR(fences))
> > +		return PTR_ERR(fences);
> > +
> >   	if (args->flags & I915_EXEC_FENCE_IN) {
> >   		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> > -		if (!in_fence)
> > -			return -EINVAL;
> > +		if (!in_fence) {
> > +			err = -EINVAL;
> > +			goto err_fences;
> > +		}
> >   	}
> >   	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> > @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> >   	}
> >   	if (fences) {
> > -		err = await_fence_array(&eb, fences);
> > +		err = await_fence_array(&eb, fences, nfences);
> >   		if (err)
> >   			goto err_request;
> >   	}
> > @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> >   	eb_request_add(&eb);
> >   	if (fences)
> > -		signal_fence_array(&eb, fences);
> > +		signal_fence_array(&eb, fences, nfences);
> >   	if (out_fence) {
> >   		if (err == 0) {
> > @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
> >   	dma_fence_put(exec_fence);
> >   err_in_fence:
> >   	dma_fence_put(in_fence);
> > +err_fences:
> > +	put_fence_array(fences, nfences);
> >   	return err;
> >   }
> > @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
> >   			exec2_list[i].flags = 0;
> >   	}
> > -	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
> > +	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
> >   	if (exec2.flags & __EXEC_HAS_RELOC) {
> >   		struct drm_i915_gem_exec_object __user *user_exec_list =
> >   			u64_to_user_ptr(args->buffers_ptr);
> > @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> >   	struct drm_i915_private *i915 = to_i915(dev);
> >   	struct drm_i915_gem_execbuffer2 *args = data;
> >   	struct drm_i915_gem_exec_object2 *exec2_list;
> > -	struct drm_syncobj **fences = NULL;
> >   	const size_t count = args->buffer_count;
> >   	int err;
> > @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
> >   		return -EFAULT;
> >   	}
> > -	if (args->flags & I915_EXEC_FENCE_ARRAY) {
> > -		fences = get_fence_array(args, file);
> > -		if (IS_ERR(fences)) {
> > -			kvfree(exec2_list);
> > -			return PTR_ERR(fences);
> > -		}
> > -	}
> > -
> > -	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
> > +	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
> >   	/*
> >   	 * Now that we have begun execution of the batchbuffer, we ignore
> > @@ -2917,7 +3116,6 @@ end:;
> >   	}
> >   	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
> > -	put_fence_array(args, fences);
> >   	kvfree(exec2_list);
> >   	return err;
> >   }
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> > index a7a3b4b98572..f7f868c3c510 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -1828,7 +1828,8 @@ static struct drm_driver driver = {
> >   	 */
> >   	.driver_features =
> >   	    DRIVER_GEM |
> > -	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> > +	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
> > +	    DRIVER_SYNCOBJ_TIMELINE,
> >   	.release = i915_driver_release,
> >   	.open = i915_driver_open,
> >   	.lastclose = i915_driver_lastclose,
> > diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> > index 54fce81d5724..b9d3aab53c03 100644
> > --- a/drivers/gpu/drm/i915/i915_getparam.c
> > +++ b/drivers/gpu/drm/i915/i915_getparam.c
> > @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
> >   	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
> >   	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
> >   	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> > +	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
> >   		/* For the time being all of these are always true;
> >   		 * if some supported hardware does not have one of these
> >   		 * features this value needs to be provided from
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 7ea38aa6502c..7b8680e3b49d 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
> >    */
> >   #define I915_PARAM_PERF_REVISION	54
> > +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> > + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> > + * I915_EXEC_USE_EXTENSIONS.
> > + */
> > +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> > +
> >   /* Must be kept compact -- no holes and well documented */
> >   typedef struct drm_i915_getparam {
> > @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence {
> >   };
> >   enum drm_i915_gem_execbuffer_ext {
> > +	/**
> > +	 * See drm_i915_gem_execbuf_ext_timeline_fences.
> > +	 */
> > +	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
> > +
> >   	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
> >   };
> > +/**
> > + * This structure describes an array of drm_syncobj and associated points for
> > + * timeline variants of drm_syncobj. It is invalid to append this structure to
> > + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> > + */
> > +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> > +	struct i915_user_extension base;
> > +
> > +	/**
> > +	 * Number of element in the handles_ptr & value_ptr arrays.
> > +	 */
> > +	__u64 fence_count;
> > +
> > +	/**
> > +	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
> > +	 * fence_count.
> > +	 */
> > +	__u64 handles_ptr;
> > +
> > +	/**
> > +	 * Pointer to an array of u64 values of length fence_count. Values
> > +	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
> > +	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
> > +	 */
> > +	__u64 values_ptr;
> > +};
> > +
> >   struct drm_i915_gem_execbuffer2 {
> >   	/**
> >   	 * List of gem_exec_object2 structs
> 
> 
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
  2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
  2020-04-08 16:29   ` Lionel Landwerlin
@ 2020-04-08 17:14   ` Lionel Landwerlin
  1 sibling, 0 replies; 14+ messages in thread
From: Lionel Landwerlin @ 2020-04-08 17:14 UTC (permalink / raw)
  To: Venkata Sandeep Dhanalakota, intel-gfx; +Cc: chris.p.wilson

On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote:
> Introduces a new parameters to execbuf so that we can specify syncobj
> handles as well as timeline points.
>
> v2: Reuse i915_user_extension_fn
>
> v3: Check that the chained extension is only present once (Chris)
>
> v4: Check that dma_fence_chain_find_seqno returns a non NULL fence
> (Lionel)
>
> v5: Use BIT_ULL (Chris)
>
> v6: Fix issue with already signaled timeline points,
>      dma_fence_chain_find_seqno() setting fence to NULL (Chris)
>
> v7: Report ENOENT with invalid syncobj handle (Lionel)
>
> v8: Check for out of order timeline point insertion (Chris)
>
> v9: After explanations on
>      https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html
>      drop the ordering check from v8 (Lionel)
>
> v10: Set first extension enum item to 1 (Jason)
>
> v11: Add wait on previous sync points in timelines (Sandeep)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Venkata Sandeep Dhanalakota <venkata.s.dhanalakota at intel.com>
> ---
>   .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 312 ++++++++++++++----
>   drivers/gpu/drm/i915/i915_drv.c               |   3 +-
>   drivers/gpu/drm/i915/i915_getparam.c          |   1 +
>   include/uapi/drm/i915_drm.h                   |  38 +++
>   4 files changed, 296 insertions(+), 58 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 16831f715daa..4cb4cd035daa 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -230,6 +230,13 @@ enum {
>    * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
>    */
>   
> +struct i915_eb_fences {
> +	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
> +	struct dma_fence *dma_fence;
> +	u64 value;
> +	struct dma_fence_chain *chain_fence;
> +};
> +
>   struct i915_execbuffer {
>   	struct drm_i915_private *i915; /** i915 backpointer */
>   	struct drm_file *file; /** per-file lookup tables and limits */
> @@ -292,6 +299,7 @@ struct i915_execbuffer {
>   
>   	struct {
>   		u64 flags; /** Available extensions parameters */
> +		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
>   	} extensions;
>   };
>   
> @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb,
>   }
>   
>   static void
> -__free_fence_array(struct drm_syncobj **fences, unsigned int n)
> +__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
>   {
> -	while (n--)
> -		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
> +	while (n--) {
> +		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
> +		dma_fence_put(fences[n].dma_fence);
> +		kfree(fences[n].chain_fence);
> +	}
>   	kvfree(fences);
>   }
>   
> -static struct drm_syncobj **
> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> -		struct drm_file *file)
> +static struct i915_eb_fences *
> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> +	struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> +		&eb->extensions.timeline_fences;
> +	struct drm_i915_gem_exec_fence __user *user_fences;
> +	struct i915_eb_fences *fences;
> +	u64 __user *user_values;
> +	u64 num_fences, num_user_fences = timeline_fences->fence_count;
> +	unsigned long n;
> +	int err = 0;
> +
> +	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
> +	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> +	if (num_user_fences > min_t(unsigned long,
> +				    ULONG_MAX / sizeof(*user_fences),
> +				    SIZE_MAX / sizeof(*fences)))
> +		return ERR_PTR(-EINVAL);
> +
> +	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> +	if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> +		return ERR_PTR(-EFAULT);
> +
> +	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> +	if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> +		return ERR_PTR(-EFAULT);
> +
> +	fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> +				__GFP_NOWARN | GFP_KERNEL);
> +	if (!fences)
> +		return ERR_PTR(-ENOMEM);
> +
> +	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> +		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> +
> +	for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> +		struct drm_i915_gem_exec_fence user_fence;
> +		struct drm_syncobj *syncobj;
> +		struct dma_fence *fence = NULL;
> +		u64 point;
> +
> +		if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> +			err = -EFAULT;
> +			goto err;
> +		}
> +
> +		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		if (__get_user(point, user_values++)) {
> +			err = -EFAULT;
> +			goto err;
> +		}
> +
> +		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> +		if (!syncobj) {
> +			DRM_DEBUG("Invalid syncobj handle provided\n");
> +			err = -ENOENT;
> +			goto err;
> +		}
> +
> +		fence = drm_syncobj_fence_get(syncobj);
> +
> +		if (!fence && user_fence.flags &&
> +		    !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> +			DRM_DEBUG("Syncobj handle has no fence\n");
> +			drm_syncobj_put(syncobj);
> +			err = -EINVAL;
> +			goto err;
> +		}
> +
> +		if (fence)
> +			err = dma_fence_chain_find_seqno(&fence, point);
> +
> +		if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
> +			DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> +			drm_syncobj_put(syncobj);
> +			goto err;
> +		}
> +
> +		/* A point might have been signaled already and
> +		 * garbage collected from the timeline. In this case
> +		 * just ignore the point and carry on.
> +		 */
> +		if (!fence && (user_fence.flags & I915_EXEC_FENCE_WAIT)) {


I think we can only skip if we're only waiting. If there is a signal 
request we still need to honor it.

So I would replace this function above with :

if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {


> +			drm_syncobj_put(syncobj);
> +			continue;
> +		}
> +
> +		/*
> +		 * For timeline syncobjs we need to preallocate chains for
> +		 * later signaling.
> +		 */
> +		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> +			/*
> +			 * Waiting and signaling the same point (when point !=
> +			 * 0) would break the timeline.
> +			 */
> +			if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> +				DRM_DEBUG("Trying to wait & signal the same timeline point.\n");
> +				err = -EINVAL;
> +				drm_syncobj_put(syncobj);
> +				goto err;
> +			}


I think we can actually allow this. Wait and signal operations are added 
in order so we could wait a point 3 and then replace it by another point 3.


If we keep this limitation, we need to add :

if (fence)

     dma_fence_put(fence);


> +
> +			fences[num_fences].chain_fence =
> +				kmalloc(sizeof(*fences[num_fences].chain_fence),
> +					GFP_KERNEL);
> +			if (!fences[num_fences].chain_fence) {
> +				drm_syncobj_put(syncobj);


We the change above, we could arrive here with fence != NULL.

We probably need to add :

if (fence)

     dma_fence_put(fence);


> +				err = -ENOMEM;
> +				DRM_DEBUG("Unable to alloc chain_fence\n");
> +				goto err;
> +			}
> +		} else {
> +			fences[num_fences].chain_fence = NULL;
> +		}
> +
> +		fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> +		fences[num_fences].dma_fence = fence;
> +		fences[num_fences].value = point;
> +		num_fences++;
> +	}
> +
> +	*out_n_fences = num_fences;
> +
> +	return fences;
> +
> +err:
> +	__free_fence_array(fences, num_fences);
> +	return ERR_PTR(err);
> +}
> +
> +static struct i915_eb_fences *
> +get_legacy_fence_array(struct i915_execbuffer *eb,
> +		       int *out_n_fences)
>   {
> -	const unsigned long nfences = args->num_cliprects;
> +	struct drm_i915_gem_execbuffer2 *args = eb->args;
>   	struct drm_i915_gem_exec_fence __user *user;
> -	struct drm_syncobj **fences;
> +	struct i915_eb_fences *fences;
> +	const u32 num_fences = args->num_cliprects;
>   	unsigned long n;
>   	int err;
>   
> -	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
> -		return NULL;
> +	*out_n_fences = num_fences;
>   
>   	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
>   	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> -	if (nfences > min_t(unsigned long,
> -			    ULONG_MAX / sizeof(*user),
> -			    SIZE_MAX / sizeof(*fences)))
> +	if (*out_n_fences > min_t(unsigned long,
> +				  ULONG_MAX / sizeof(*user),
> +				  SIZE_MAX / sizeof(*fences)))
>   		return ERR_PTR(-EINVAL);
>   
>   	user = u64_to_user_ptr(args->cliprects_ptr);
> -	if (!access_ok(user, nfences * sizeof(*user)))
> +	if (!access_ok(user, *out_n_fences * sizeof(*user)))
>   		return ERR_PTR(-EFAULT);
>   
> -	fences = kvmalloc_array(nfences, sizeof(*fences),
> +	fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
>   				__GFP_NOWARN | GFP_KERNEL);
>   	if (!fences)
>   		return ERR_PTR(-ENOMEM);
>   
> -	for (n = 0; n < nfences; n++) {
> -		struct drm_i915_gem_exec_fence fence;
> +	for (n = 0; n < *out_n_fences; n++) {
> +		struct drm_i915_gem_exec_fence user_fence;
>   		struct drm_syncobj *syncobj;
> +		struct dma_fence *fence = NULL;
>   
> -		if (__copy_from_user(&fence, user++, sizeof(fence))) {
> +		if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
>   			err = -EFAULT;
>   			goto err;
>   		}
>   
> -		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> +		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
>   			err = -EINVAL;
>   			goto err;
>   		}
>   
> -		syncobj = drm_syncobj_find(file, fence.handle);
> +		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
>   		if (!syncobj) {
>   			DRM_DEBUG("Invalid syncobj handle provided\n");
>   			err = -ENOENT;
>   			goto err;
>   		}
>   
> +		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> +			fence = drm_syncobj_fence_get(syncobj);
> +			if (!fence) {
> +				DRM_DEBUG("Syncobj handle has no fence\n");
> +				drm_syncobj_put(syncobj);
> +				err = -EINVAL;
> +				goto err;
> +			}
> +		}
> +
>   		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
>   			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>   
> -		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
> +		fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> +		fences[n].dma_fence = fence;
> +		fences[n].value = 0;
> +		fences[n].chain_fence = NULL;
>   	}
>   
>   	return fences;
> @@ -2314,37 +2474,45 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>   	return ERR_PTR(err);
>   }
>   
> +static struct i915_eb_fences *
> +get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> +	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> +		return get_legacy_fence_array(eb, out_n_fences);
> +
> +	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> +		return get_timeline_fence_array(eb, out_n_fences);
> +
> +	*out_n_fences = 0;
> +	return NULL;
> +}
> +
>   static void
> -put_fence_array(struct drm_i915_gem_execbuffer2 *args,
> -		struct drm_syncobj **fences)
> +put_fence_array(struct i915_eb_fences *fences, int nfences)
>   {
>   	if (fences)
> -		__free_fence_array(fences, args->num_cliprects);
> +		__free_fence_array(fences, nfences);
>   }
>   
>   static int
>   await_fence_array(struct i915_execbuffer *eb,
> -		  struct drm_syncobj **fences)
> +		  struct i915_eb_fences *fences,
> +		  int nfences)
>   {
> -	const unsigned int nfences = eb->args->num_cliprects;
>   	unsigned int n;
>   	int err;
>   
>   	for (n = 0; n < nfences; n++) {
>   		struct drm_syncobj *syncobj;
> -		struct dma_fence *fence;
>   		unsigned int flags;
>   
> -		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> -		if (!(flags & I915_EXEC_FENCE_WAIT))
> -			continue;
> +		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>   
> -		fence = drm_syncobj_fence_get(syncobj);
> -		if (!fence)
> -			return -EINVAL;
> +		if (!fences[n].dma_fence)
> +			continue;
>   
> -		err = i915_request_await_dma_fence(eb->request, fence);
> -		dma_fence_put(fence);
> +		err = i915_request_await_dma_fence(eb->request,
> +						   fences[n].dma_fence);
>   		if (err < 0)
>   			return err;
>   	}
> @@ -2354,9 +2522,9 @@ await_fence_array(struct i915_execbuffer *eb,
>   
>   static void
>   signal_fence_array(struct i915_execbuffer *eb,
> -		   struct drm_syncobj **fences)
> +		   struct i915_eb_fences *fences,
> +		   int nfences)
>   {
> -	const unsigned int nfences = eb->args->num_cliprects;
>   	struct dma_fence * const fence = &eb->request->fence;
>   	unsigned int n;
>   
> @@ -2364,14 +2532,44 @@ signal_fence_array(struct i915_execbuffer *eb,
>   		struct drm_syncobj *syncobj;
>   		unsigned int flags;
>   
> -		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> +		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>   		if (!(flags & I915_EXEC_FENCE_SIGNAL))
>   			continue;
>   
> -		drm_syncobj_replace_fence(syncobj, fence);
> +		if (fences[n].chain_fence) {
> +			drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> +					      fence, fences[n].value);
> +			/*
> +			 * The chain's ownership is transferred to the
> +			 * timeline.
> +			 */
> +			fences[n].chain_fence = NULL;
> +		} else {
> +			drm_syncobj_replace_fence(syncobj, fence);
> +		}
>   	}
>   }
>   
> +static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
> +{
> +	struct i915_execbuffer *eb = data;
> +
> +	/* Timeline fences are incompatible with the fence array flag. */
> +	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
> +		return -EINVAL;
> +
> +	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
> +		return -EINVAL;
> +
> +	if (copy_from_user(&eb->extensions.timeline_fences, ext,
> +			   sizeof(eb->extensions.timeline_fences)))
> +		return -EFAULT;
> +
> +	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
> +
> +	return 0;
> +}
> +
>   static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
>   {
>   	struct i915_request *rq, *rn;
> @@ -2438,6 +2636,7 @@ static void eb_request_add(struct i915_execbuffer *eb)
>   }
>   
>   static const i915_user_extension_fn execbuf_extensions[] = {
> +	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
>   };
>   
>   static int
> @@ -2468,16 +2667,17 @@ static int
>   i915_gem_do_execbuffer(struct drm_device *dev,
>   		       struct drm_file *file,
>   		       struct drm_i915_gem_execbuffer2 *args,
> -		       struct drm_i915_gem_exec_object2 *exec,
> -		       struct drm_syncobj **fences)
> +		       struct drm_i915_gem_exec_object2 *exec)
>   {
>   	struct drm_i915_private *i915 = to_i915(dev);
>   	struct i915_execbuffer eb;
>   	struct dma_fence *in_fence = NULL;
>   	struct dma_fence *exec_fence = NULL;
>   	struct sync_file *out_fence = NULL;
> +	struct i915_eb_fences *fences = NULL;
>   	struct i915_vma *batch;
>   	int out_fence_fd = -1;
> +	int nfences = 0;
>   	int err;
>   
>   	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
> @@ -2521,10 +2721,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	if (err)
>   		return err;
>   
> +	fences = get_fence_array(&eb, &nfences);
> +	if (IS_ERR(fences))
> +		return PTR_ERR(fences);
> +
>   	if (args->flags & I915_EXEC_FENCE_IN) {
>   		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
> -		if (!in_fence)
> -			return -EINVAL;
> +		if (!in_fence) {
> +			err = -EINVAL;
> +			goto err_fences;
> +		}
>   	}
>   
>   	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
> @@ -2648,7 +2854,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	}
>   
>   	if (fences) {
> -		err = await_fence_array(&eb, fences);
> +		err = await_fence_array(&eb, fences, nfences);
>   		if (err)
>   			goto err_request;
>   	}
> @@ -2680,7 +2886,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	eb_request_add(&eb);
>   
>   	if (fences)
> -		signal_fence_array(&eb, fences);
> +		signal_fence_array(&eb, fences, nfences);
>   
>   	if (out_fence) {
>   		if (err == 0) {
> @@ -2715,6 +2921,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
>   	dma_fence_put(exec_fence);
>   err_in_fence:
>   	dma_fence_put(in_fence);
> +err_fences:
> +	put_fence_array(fences, nfences);
>   	return err;
>   }
>   
> @@ -2809,7 +3017,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
>   			exec2_list[i].flags = 0;
>   	}
>   
> -	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
> +	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
>   	if (exec2.flags & __EXEC_HAS_RELOC) {
>   		struct drm_i915_gem_exec_object __user *user_exec_list =
>   			u64_to_user_ptr(args->buffers_ptr);
> @@ -2841,7 +3049,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>   	struct drm_i915_private *i915 = to_i915(dev);
>   	struct drm_i915_gem_execbuffer2 *args = data;
>   	struct drm_i915_gem_exec_object2 *exec2_list;
> -	struct drm_syncobj **fences = NULL;
>   	const size_t count = args->buffer_count;
>   	int err;
>   
> @@ -2869,15 +3076,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
>   		return -EFAULT;
>   	}
>   
> -	if (args->flags & I915_EXEC_FENCE_ARRAY) {
> -		fences = get_fence_array(args, file);
> -		if (IS_ERR(fences)) {
> -			kvfree(exec2_list);
> -			return PTR_ERR(fences);
> -		}
> -	}
> -
> -	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
> +	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
>   
>   	/*
>   	 * Now that we have begun execution of the batchbuffer, we ignore
> @@ -2917,7 +3116,6 @@ end:;
>   	}
>   
>   	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
> -	put_fence_array(args, fences);
>   	kvfree(exec2_list);
>   	return err;
>   }
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index a7a3b4b98572..f7f868c3c510 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1828,7 +1828,8 @@ static struct drm_driver driver = {
>   	 */
>   	.driver_features =
>   	    DRIVER_GEM |
> -	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
> +	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
> +	    DRIVER_SYNCOBJ_TIMELINE,
>   	.release = i915_driver_release,
>   	.open = i915_driver_open,
>   	.lastclose = i915_driver_lastclose,
> diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
> index 54fce81d5724..b9d3aab53c03 100644
> --- a/drivers/gpu/drm/i915/i915_getparam.c
> +++ b/drivers/gpu/drm/i915/i915_getparam.c
> @@ -132,6 +132,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
>   	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
>   	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
>   	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
> +	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
>   		/* For the time being all of these are always true;
>   		 * if some supported hardware does not have one of these
>   		 * features this value needs to be provided from
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 7ea38aa6502c..7b8680e3b49d 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
>    */
>   #define I915_PARAM_PERF_REVISION	54
>   
> +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
> + * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
> + * I915_EXEC_USE_EXTENSIONS.
> + */
> +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
> +
>   /* Must be kept compact -- no holes and well documented */
>   
>   typedef struct drm_i915_getparam {
> @@ -1047,9 +1053,41 @@ struct drm_i915_gem_exec_fence {
>   };
>   
>   enum drm_i915_gem_execbuffer_ext {
> +	/**
> +	 * See drm_i915_gem_execbuf_ext_timeline_fences.
> +	 */
> +	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
> +
>   	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
>   };
>   
> +/**
> + * This structure describes an array of drm_syncobj and associated points for
> + * timeline variants of drm_syncobj. It is invalid to append this structure to
> + * the execbuf if I915_EXEC_FENCE_ARRAY is set.
> + */
> +struct drm_i915_gem_execbuffer_ext_timeline_fences {
> +	struct i915_user_extension base;
> +
> +	/**
> +	 * Number of element in the handles_ptr & value_ptr arrays.
> +	 */
> +	__u64 fence_count;
> +
> +	/**
> +	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
> +	 * fence_count.
> +	 */
> +	__u64 handles_ptr;
> +
> +	/**
> +	 * Pointer to an array of u64 values of length fence_count. Values
> +	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
> +	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
> +	 */
> +	__u64 values_ptr;
> +};
> +
>   struct drm_i915_gem_execbuffer2 {
>   	/**
>   	 * List of gem_exec_object2 structs


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
  2020-08-03 14:08   ` Chris Wilson
@ 2020-08-03 14:11     ` Lionel Landwerlin
  0 siblings, 0 replies; 14+ messages in thread
From: Lionel Landwerlin @ 2020-08-03 14:11 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx; +Cc: Daniel Vetter

On 03/08/2020 17:08, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2020-08-03 15:01:47)
>> To allow faster engine to engine synchronization, peel the layer of
>> dma-fence-chain to expose potential i915 fences so that the
>> i915-request code can emit HW semaphore wait/signal operations in the
>> ring which is faster than waking up the host to submit unblocked
>> workloads after interrupt notification.
>>
>> v2: Also deal with chains where the last node is not a dma-fence-chain
> This is already done by i915_request_await_dma_fence.
> -Chris
Cool, we can drop this then.

-Lionel
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
  2020-08-03 14:01 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Lionel Landwerlin
@ 2020-08-03 14:08   ` Chris Wilson
  2020-08-03 14:11     ` Lionel Landwerlin
  0 siblings, 1 reply; 14+ messages in thread
From: Chris Wilson @ 2020-08-03 14:08 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx; +Cc: Daniel Vetter

Quoting Lionel Landwerlin (2020-08-03 15:01:47)
> To allow faster engine to engine synchronization, peel the layer of
> dma-fence-chain to expose potential i915 fences so that the
> i915-request code can emit HW semaphore wait/signal operations in the
> ring which is faster than waking up the host to submit unblocked
> workloads after interrupt notification.
> 
> v2: Also deal with chains where the last node is not a dma-fence-chain

This is already done by i915_request_await_dma_fence.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
  2020-08-03 14:01 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
@ 2020-08-03 14:01 ` Lionel Landwerlin
  2020-08-03 14:08   ` Chris Wilson
  0 siblings, 1 reply; 14+ messages in thread
From: Lionel Landwerlin @ 2020-08-03 14:01 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.

v2: Also deal with chains where the last node is not a dma-fence-chain

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1f766431f3a3..dbd7f03c2187 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2390,6 +2390,7 @@ await_fence_array(struct i915_execbuffer *eb)
 
 	for (n = 0; n < eb->n_fences; n++) {
 		struct drm_syncobj *syncobj;
+		struct dma_fence_chain *chain;
 		struct dma_fence *fence;
 		unsigned int flags;
 
@@ -2410,7 +2411,43 @@ await_fence_array(struct i915_execbuffer *eb)
 				continue;
 		}
 
-		err = i915_request_await_dma_fence(eb->request, fence);
+		chain = to_dma_fence_chain(fence);
+		if (chain) {
+			struct dma_fence *iter;
+
+			/*
+			 * If we're dealing with a dma-fence-chain, peel the
+			 * chain by adding all of the unsignaled fences
+			 * (dma_fence_chain_for_each does that for us) the
+			 * chain points to.
+			 *
+			 * This enables us to identify waits on i915 fences
+			 * and allows for faster engine-to-engine
+			 * synchronization using HW semaphores.
+			 */
+			dma_fence_chain_for_each(iter, fence) {
+				struct dma_fence_chain *iter_chain =
+					to_dma_fence_chain(iter);
+
+				/*
+				 * It is possible that the last item in the
+				 * chain is not a dma_fence_chain.
+				 */
+				if (iter_chain) {
+					err = i915_request_await_dma_fence(eb->request,
+									   iter_chain->fence);
+				} else {
+					err = i915_request_await_dma_fence(eb->request, iter);
+				}
+				if (err < 0) {
+					dma_fence_put(iter);
+					break;
+				}
+			}
+		} else {
+			err = i915_request_await_dma_fence(eb->request, fence);
+		}
+
 		dma_fence_put(fence);
 		if (err < 0)
 			return err;
-- 
2.28.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
  2020-08-03  9:05 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
@ 2020-08-03  9:05 ` Lionel Landwerlin
  0 siblings, 0 replies; 14+ messages in thread
From: Lionel Landwerlin @ 2020-08-03  9:05 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.

v2: Also deal with chains where the last node is not a dma-fence-chain

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 1f766431f3a3..dbd7f03c2187 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2390,6 +2390,7 @@ await_fence_array(struct i915_execbuffer *eb)
 
 	for (n = 0; n < eb->n_fences; n++) {
 		struct drm_syncobj *syncobj;
+		struct dma_fence_chain *chain;
 		struct dma_fence *fence;
 		unsigned int flags;
 
@@ -2410,7 +2411,43 @@ await_fence_array(struct i915_execbuffer *eb)
 				continue;
 		}
 
-		err = i915_request_await_dma_fence(eb->request, fence);
+		chain = to_dma_fence_chain(fence);
+		if (chain) {
+			struct dma_fence *iter;
+
+			/*
+			 * If we're dealing with a dma-fence-chain, peel the
+			 * chain by adding all of the unsignaled fences
+			 * (dma_fence_chain_for_each does that for us) the
+			 * chain points to.
+			 *
+			 * This enables us to identify waits on i915 fences
+			 * and allows for faster engine-to-engine
+			 * synchronization using HW semaphores.
+			 */
+			dma_fence_chain_for_each(iter, fence) {
+				struct dma_fence_chain *iter_chain =
+					to_dma_fence_chain(iter);
+
+				/*
+				 * It is possible that the last item in the
+				 * chain is not a dma_fence_chain.
+				 */
+				if (iter_chain) {
+					err = i915_request_await_dma_fence(eb->request,
+									   iter_chain->fence);
+				} else {
+					err = i915_request_await_dma_fence(eb->request, iter);
+				}
+				if (err < 0) {
+					dma_fence_put(iter);
+					break;
+				}
+			}
+		} else {
+			err = i915_request_await_dma_fence(eb->request, fence);
+		}
+
 		dma_fence_put(fence);
 		if (err < 0)
 			return err;
-- 
2.28.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
  2020-07-31 13:45 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
@ 2020-07-31 13:45 ` Lionel Landwerlin
  0 siblings, 0 replies; 14+ messages in thread
From: Lionel Landwerlin @ 2020-07-31 13:45 UTC (permalink / raw)
  To: intel-gfx; +Cc: Daniel Vetter

To allow faster engine to engine synchronization, peel the layer of
dma-fence-chain to expose potential i915 fences so that the
i915-request code can emit HW semaphore wait/signal operations in the
ring which is faster than waking up the host to submit unblocked
workloads after interrupt notification.

v2: Also deal with chains where the last node is not a dma-fence-chain

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 39 ++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 652f3b30a374..01e22b303e34 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2390,6 +2390,7 @@ await_fence_array(struct i915_execbuffer *eb)
 
 	for (n = 0; n < eb->n_fences; n++) {
 		struct drm_syncobj *syncobj;
+		struct dma_fence_chain *chain;
 		struct dma_fence *fence;
 		unsigned int flags;
 
@@ -2410,7 +2411,43 @@ await_fence_array(struct i915_execbuffer *eb)
 				continue;
 		}
 
-		err = i915_request_await_dma_fence(eb->request, fence);
+		chain = to_dma_fence_chain(fence);
+		if (chain) {
+			struct dma_fence *iter;
+
+			/*
+			 * If we're dealing with a dma-fence-chain, peel the
+			 * chain by adding all of the unsignaled fences
+			 * (dma_fence_chain_for_each does that for us) the
+			 * chain points to.
+			 *
+			 * This enables us to identify waits on i915 fences
+			 * and allows for faster engine-to-engine
+			 * synchronization using HW semaphores.
+			 */
+			dma_fence_chain_for_each(iter, fence) {
+				struct dma_fence_chain *iter_chain =
+					to_dma_fence_chain(iter);
+
+				/*
+				 * It is possible that the last item in the
+				 * chain is not a dma_fence_chain.
+				 */
+				if (iter_chain) {
+					err = i915_request_await_dma_fence(eb->request,
+									   iter_chain->fence);
+				} else {
+					err = i915_request_await_dma_fence(eb->request, iter);
+				}
+				if (err < 0) {
+					dma_fence_put(iter);
+					break;
+				}
+			}
+		} else {
+			err = i915_request_await_dma_fence(eb->request, fence);
+		}
+
 		dma_fence_put(fence);
 		if (err < 0)
 			return err;
-- 
2.28.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2020-08-03 14:11 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-06 20:07 [Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2 Venkata Sandeep Dhanalakota
2020-04-06 20:07 ` [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support Venkata Sandeep Dhanalakota
2020-04-08 16:29   ` Lionel Landwerlin
2020-04-08 17:00     ` Venkata Sandeep Dhanalakota
2020-04-08 17:14   ` Lionel Landwerlin
2020-04-06 20:07 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Venkata Sandeep Dhanalakota
2020-04-06 22:13 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [1/3] drm/i915: introduce a mechanism to extend execbuf2 Patchwork
2020-04-06 22:37 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2020-04-07  8:13 ` [Intel-gfx] ✓ Fi.CI.IGT: " Patchwork
2020-07-31 13:45 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
2020-07-31 13:45 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Lionel Landwerlin
2020-08-03  9:05 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
2020-08-03  9:05 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Lionel Landwerlin
2020-08-03 14:01 [Intel-gfx] [PATCH 0/3] drm/i915: timeline semaphore support Lionel Landwerlin
2020-08-03 14:01 ` [Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences Lionel Landwerlin
2020-08-03 14:08   ` Chris Wilson
2020-08-03 14:11     ` Lionel Landwerlin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.