All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 0/2] drm/i915: timeline semaphore support
@ 2019-07-31 14:07 Lionel Landwerlin
  2019-07-31 14:07 ` [PATCH v3 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
                   ` (5 more replies)
  0 siblings, 6 replies; 19+ messages in thread
From: Lionel Landwerlin @ 2019-07-31 14:07 UTC (permalink / raw)
  To: intel-gfx

Hi all,

This is the timeline semaphore support extracted from the vulkan
performance query support series.

We're hoping to get this in so we can support timeline semaphore as
soon as possible.

The mesa MR make use of this series (although not all the features) is
at : https://gitlab.freedesktop.org/mesa/mesa/merge_requests/1525

IGT tests are available in the vulkan perf query series :
https://patchwork.freedesktop.org/series/64220/

Cheers,

Lionel Landwerlin (2):
  drm/i915: introduce a mechanism to extend execbuf2
  drm/i915: add syncobj timeline support

 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 335 +++++++++++++++---
 drivers/gpu/drm/i915/i915_drv.c               |   4 +-
 include/uapi/drm/i915_drm.h                   |  64 +++-
 3 files changed, 342 insertions(+), 61 deletions(-)

--
2.22.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH v3 1/2] drm/i915: introduce a mechanism to extend execbuf2
  2019-07-31 14:07 [PATCH v3 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
@ 2019-07-31 14:07 ` Lionel Landwerlin
  2019-07-31 21:41   ` Chris Wilson
  2019-07-31 14:07 ` [PATCH v3 2/2] drm/i915: add syncobj timeline support Lionel Landwerlin
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Lionel Landwerlin @ 2019-07-31 14:07 UTC (permalink / raw)
  To: intel-gfx

We're planning to use this for a couple of new feature where we need
to provide additional parameters to execbuf.

v2: Check for invalid flags in execbuffer2 (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v1)
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 38 ++++++++++++++++++-
 include/uapi/drm/i915_drm.h                   | 25 ++++++++++--
 2 files changed, 59 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index cbd7c6e3a1f8..58eb3bd57656 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -23,6 +23,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_trace.h"
+#include "i915_user_extensions.h"
 #include "intel_drv.h"
 
 enum {
@@ -270,6 +271,10 @@ struct i915_execbuffer {
 	 */
 	int lut_size;
 	struct hlist_head *buckets; /** ht for relocation handles */
+
+	struct {
+		u64 flags; /** Available extensions parameters */
+	} extensions;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1964,7 +1969,7 @@ static bool i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
 		return false;
 
 	/* Kernel clipping was a DRI1 misfeature */
-	if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) {
+	if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | I915_EXEC_EXT))) {
 		if (exec->num_cliprects || exec->cliprects_ptr)
 			return false;
 	}
@@ -2342,6 +2347,33 @@ signal_fence_array(struct i915_execbuffer *eb,
 	}
 }
 
+static const i915_user_extension_fn execbuf_extensions[] = {
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+			  struct i915_execbuffer *eb)
+{
+	eb->extensions.flags = 0;
+
+	if (!(args->flags & I915_EXEC_EXT))
+		return 0;
+
+	/* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
+	 * have another flag also using it at the same time.
+	 */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (args->num_cliprects != 0)
+		return -EINVAL;
+
+	return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+				    execbuf_extensions,
+				    ARRAY_SIZE(execbuf_extensions),
+				    eb);
+}
+
 static int
 i915_gem_do_execbuffer(struct drm_device *dev,
 		       struct drm_file *file,
@@ -2388,6 +2420,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (args->flags & I915_EXEC_IS_PINNED)
 		eb.batch_flags |= I915_DISPATCH_PINNED;
 
+	err = parse_execbuf2_extensions(args, &eb);
+	if (err)
+		return err;
+
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
 		if (!in_fence)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 469dc512cca3..7ea35b048a82 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1007,6 +1007,10 @@ struct drm_i915_gem_exec_fence {
 	__u32 flags;
 };
 
+enum drm_i915_gem_execbuffer_ext {
+	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
@@ -1023,8 +1027,14 @@ struct drm_i915_gem_execbuffer2 {
 	__u32 num_cliprects;
 	/**
 	 * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
-	 * is not set.  If I915_EXEC_FENCE_ARRAY is set, then this is a
-	 * struct drm_i915_gem_exec_fence *fences.
+	 * & I915_EXEC_EXT are not set.
+	 *
+	 * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+	 * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+	 * of the array.
+	 *
+	 * If I915_EXEC_EXT is set, then this is a pointer to a single struct
+	 * drm_i915_gem_base_execbuffer_ext and num_cliprects is 0.
 	 */
 	__u64 cliprects_ptr;
 #define I915_EXEC_RING_MASK              (0x3f)
@@ -1142,7 +1152,16 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_FENCE_SUBMIT		(1 << 20)
 
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_EXT implies that drm_i915_gem_execbuffer2.cliprects_ptr
+ * is treated as a pointer to an linked list of i915_user_extension. Each
+ * i915_user_extension node is the base of a larger structure. The list of
+ * supported structures are listed in the drm_i915_gem_execbuffer_ext
+ * enum.
+ */
+#define I915_EXEC_EXT		(1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_EXT<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-07-31 14:07 [PATCH v3 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
  2019-07-31 14:07 ` [PATCH v3 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
@ 2019-07-31 14:07 ` Lionel Landwerlin
  2019-07-31 20:03   ` Chris Wilson
  2019-07-31 14:27 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: timeline semaphore support (rev3) Patchwork
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: Lionel Landwerlin @ 2019-07-31 14:07 UTC (permalink / raw)
  To: intel-gfx

Introduces a new parameters to execbuf so that we can specify syncobj
handles as well as timeline points.

v2: Reuse i915_user_extension_fn

v3: Check that the chained extension is only present once (Chris)

v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel)

v5: Use BIT_ULL (Chris)

v6: Fix issue with already signaled timeline points,
    dma_fence_chain_find_seqno() setting fence to NULL (Chris)

v7: Report ENOENT with invalid syncobj handle (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 297 ++++++++++++++----
 drivers/gpu/drm/i915/i915_drv.c               |   4 +-
 include/uapi/drm/i915_drm.h                   |  39 +++
 3 files changed, 283 insertions(+), 57 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 58eb3bd57656..d0e39986f0f4 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -213,6 +213,13 @@ enum {
  * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
  */
 
+struct i915_eb_fences {
+	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+	struct dma_fence *dma_fence;
+	u64 value;
+	struct dma_fence_chain *chain_fence;
+};
+
 struct i915_execbuffer {
 	struct drm_i915_private *i915; /** i915 backpointer */
 	struct drm_file *file; /** per-file lookup tables and limits */
@@ -274,6 +281,7 @@ struct i915_execbuffer {
 
 	struct {
 		u64 flags; /** Available extensions parameters */
+		struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
 	} extensions;
 };
 
@@ -2219,67 +2227,207 @@ eb_select_engine(struct i915_execbuffer *eb,
 }
 
 static void
-__free_fence_array(struct drm_syncobj **fences, unsigned int n)
+__free_fence_array(struct i915_eb_fences *fences, unsigned int n)
 {
-	while (n--)
-		drm_syncobj_put(ptr_mask_bits(fences[n], 2));
+	while (n--) {
+		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+		dma_fence_put(fences[n].dma_fence);
+		kfree(fences[n].chain_fence);
+	}
 	kvfree(fences);
 }
 
-static struct drm_syncobj **
-get_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_file *file)
+static struct i915_eb_fences *
+get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+	struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
+		&eb->extensions.timeline_fences;
+	struct drm_i915_gem_exec_fence __user *user_fences;
+	struct i915_eb_fences *fences;
+	u64 __user *user_values;
+	u64 num_fences, num_user_fences = timeline_fences->fence_count;
+	unsigned long n;
+	int err;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (num_user_fences > min_t(unsigned long,
+				    ULONG_MAX / sizeof(*user_fences),
+				    SIZE_MAX / sizeof(*fences)))
+		return ERR_PTR(-EINVAL);
+
+	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+	if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
+		return ERR_PTR(-EFAULT);
+
+	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+	if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
+		return ERR_PTR(-EFAULT);
+
+	fences = kvmalloc_array(num_user_fences, sizeof(*fences),
+				__GFP_NOWARN | GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+
+	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+	for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
+		struct drm_i915_gem_exec_fence user_fence;
+		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
+		u64 point;
+
+		if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+			err = -EINVAL;
+			goto err;
+		}
+
+		if (__get_user(point, user_values++)) {
+			err = -EFAULT;
+			goto err;
+		}
+
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+		if (!syncobj) {
+			DRM_DEBUG("Invalid syncobj handle provided\n");
+			err = -ENOENT;
+			goto err;
+		}
+
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				DRM_DEBUG("Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				err = -EINVAL;
+				goto err;
+			}
+
+			err = dma_fence_chain_find_seqno(&fence, point);
+			if (err) {
+				DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
+				drm_syncobj_put(syncobj);
+				goto err;
+			}
+
+			/* A point might have been signaled already and
+			 * garbage collected from the timeline. In this case
+			 * just ignore the point and carry on.
+			 */
+			if (!fence) {
+				drm_syncobj_put(syncobj);
+				continue;
+			}
+		}
+
+		/*
+		 * For timeline syncobjs we need to preallocate chains for
+		 * later signaling.
+		 */
+		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+			fences[num_fences].chain_fence =
+				kmalloc(sizeof(*fences[num_fences].chain_fence),
+					GFP_KERNEL);
+			if (!fences[num_fences].chain_fence) {
+				dma_fence_put(fence);
+				drm_syncobj_put(syncobj);
+				err = -ENOMEM;
+				DRM_DEBUG("Unable to alloc chain_fence\n");
+				goto err;
+			}
+		} else {
+			fences[num_fences].chain_fence = NULL;
+		}
+
+		fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		fences[num_fences].dma_fence = fence;
+		fences[num_fences].value = point;
+		num_fences++;
+	}
+
+	*out_n_fences = num_fences;
+
+	return fences;
+
+err:
+	__free_fence_array(fences, num_fences);
+	return ERR_PTR(err);
+}
+
+static struct i915_eb_fences *
+get_legacy_fence_array(struct i915_execbuffer *eb,
+		       int *out_n_fences)
 {
-	const unsigned long nfences = args->num_cliprects;
+	struct drm_i915_gem_execbuffer2 *args = eb->args;
 	struct drm_i915_gem_exec_fence __user *user;
-	struct drm_syncobj **fences;
+	struct i915_eb_fences *fences;
+	const u32 num_fences = args->num_cliprects;
 	unsigned long n;
 	int err;
 
-	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
-		return NULL;
+	*out_n_fences = num_fences;
 
 	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
 	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
-	if (nfences > min_t(unsigned long,
-			    ULONG_MAX / sizeof(*user),
-			    SIZE_MAX / sizeof(*fences)))
+	if (*out_n_fences > min_t(unsigned long,
+				  ULONG_MAX / sizeof(*user),
+				  SIZE_MAX / sizeof(*fences)))
 		return ERR_PTR(-EINVAL);
 
 	user = u64_to_user_ptr(args->cliprects_ptr);
-	if (!access_ok(user, nfences * sizeof(*user)))
+	if (!access_ok(user, *out_n_fences * sizeof(*user)))
 		return ERR_PTR(-EFAULT);
 
-	fences = kvmalloc_array(nfences, sizeof(*fences),
+	fences = kvmalloc_array(*out_n_fences, sizeof(*fences),
 				__GFP_NOWARN | GFP_KERNEL);
 	if (!fences)
 		return ERR_PTR(-ENOMEM);
 
-	for (n = 0; n < nfences; n++) {
-		struct drm_i915_gem_exec_fence fence;
+	for (n = 0; n < *out_n_fences; n++) {
+		struct drm_i915_gem_exec_fence user_fence;
 		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
 
-		if (__copy_from_user(&fence, user++, sizeof(fence))) {
+		if (__copy_from_user(&user_fence, user++, sizeof(user_fence))) {
 			err = -EFAULT;
 			goto err;
 		}
 
-		if (fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
 			err = -EINVAL;
 			goto err;
 		}
 
-		syncobj = drm_syncobj_find(file, fence.handle);
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
 		if (!syncobj) {
 			DRM_DEBUG("Invalid syncobj handle provided\n");
 			err = -ENOENT;
 			goto err;
 		}
 
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				DRM_DEBUG("Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				err = -EINVAL;
+				goto err;
+			}
+		}
+
 		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
 			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
 
-		fences[n] = ptr_pack_bits(syncobj, fence.flags, 2);
+		fences[n].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		fences[n].dma_fence = fence;
+		fences[n].value = 0;
+		fences[n].chain_fence = NULL;
 	}
 
 	return fences;
@@ -2289,37 +2437,44 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 	return ERR_PTR(err);
 }
 
+static struct i915_eb_fences *
+get_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
+{
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return get_legacy_fence_array(eb, out_n_fences);
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return get_timeline_fence_array(eb, out_n_fences);
+
+	*out_n_fences = 0;
+	return NULL;
+}
+
 static void
-put_fence_array(struct drm_i915_gem_execbuffer2 *args,
-		struct drm_syncobj **fences)
+put_fence_array(struct i915_eb_fences *fences, int nfences)
 {
 	if (fences)
-		__free_fence_array(fences, args->num_cliprects);
+		__free_fence_array(fences, nfences);
 }
 
 static int
 await_fence_array(struct i915_execbuffer *eb,
-		  struct drm_syncobj **fences)
+		  struct i915_eb_fences *fences,
+		  int nfences)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	unsigned int n;
 	int err;
 
 	for (n = 0; n < nfences; n++) {
 		struct drm_syncobj *syncobj;
-		struct dma_fence *fence;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_WAIT))
 			continue;
 
-		fence = drm_syncobj_fence_get(syncobj);
-		if (!fence)
-			return -EINVAL;
-
-		err = i915_request_await_dma_fence(eb->request, fence);
-		dma_fence_put(fence);
+		err = i915_request_await_dma_fence(eb->request,
+						   fences[n].dma_fence);
 		if (err < 0)
 			return err;
 	}
@@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
 
 static void
 signal_fence_array(struct i915_execbuffer *eb,
-		   struct drm_syncobj **fences)
+		   struct i915_eb_fences *fences,
+		   int nfences)
 {
-	const unsigned int nfences = eb->args->num_cliprects;
 	struct dma_fence * const fence = &eb->request->fence;
 	unsigned int n;
 
@@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
 		struct drm_syncobj *syncobj;
 		unsigned int flags;
 
-		syncobj = ptr_unpack_bits(fences[n], &flags, 2);
+		syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
 		if (!(flags & I915_EXEC_FENCE_SIGNAL))
 			continue;
 
-		drm_syncobj_replace_fence(syncobj, fence);
+		if (fences[n].chain_fence) {
+			drm_syncobj_add_point(syncobj, fences[n].chain_fence,
+					      fence, fences[n].value);
+			/*
+			 * The chain's ownership is transfered to the
+			 * timeline.
+			 */
+			fences[n].chain_fence = NULL;
+		} else {
+			drm_syncobj_replace_fence(syncobj, fence);
+		}
 	}
 }
 
+static int parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+
+	/* Timeline fences are incompatible with the fence array flag. */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES))
+		return -EINVAL;
+
+	if (copy_from_user(&eb->extensions.timeline_fences, ext,
+			   sizeof(eb->extensions.timeline_fences)))
+		return -EFAULT;
+
+	eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES);
+
+	return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
+        [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
 };
 
 static int
@@ -2378,14 +2564,15 @@ static int
 i915_gem_do_execbuffer(struct drm_device *dev,
 		       struct drm_file *file,
 		       struct drm_i915_gem_execbuffer2 *args,
-		       struct drm_i915_gem_exec_object2 *exec,
-		       struct drm_syncobj **fences)
+		       struct drm_i915_gem_exec_object2 *exec)
 {
 	struct i915_execbuffer eb;
 	struct dma_fence *in_fence = NULL;
 	struct dma_fence *exec_fence = NULL;
 	struct sync_file *out_fence = NULL;
+	struct i915_eb_fences *fences = NULL;
 	int out_fence_fd = -1;
+	int nfences = 0;
 	int err;
 
 	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
@@ -2424,10 +2611,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	if (err)
 		return err;
 
+	fences = get_fence_array(&eb, &nfences);
+	if (IS_ERR(fences))
+		return PTR_ERR(fences);
+
 	if (args->flags & I915_EXEC_FENCE_IN) {
 		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
-		if (!in_fence)
-			return -EINVAL;
+		if (!in_fence) {
+			err = -EINVAL;
+			goto err_fences;
+		}
 	}
 
 	if (args->flags & I915_EXEC_FENCE_SUBMIT) {
@@ -2585,7 +2778,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	}
 
 	if (fences) {
-		err = await_fence_array(&eb, fences);
+		err = await_fence_array(&eb, fences, nfences);
 		if (err)
 			goto err_request;
 	}
@@ -2614,7 +2807,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	i915_request_add(eb.request);
 
 	if (fences)
-		signal_fence_array(&eb, fences);
+		signal_fence_array(&eb, fences, nfences);
 
 	if (out_fence) {
 		if (err == 0) {
@@ -2649,6 +2842,8 @@ i915_gem_do_execbuffer(struct drm_device *dev,
 	dma_fence_put(exec_fence);
 err_in_fence:
 	dma_fence_put(in_fence);
+err_fences:
+	put_fence_array(fences, nfences);
 	return err;
 }
 
@@ -2742,7 +2937,7 @@ i915_gem_execbuffer_ioctl(struct drm_device *dev, void *data,
 			exec2_list[i].flags = 0;
 	}
 
-	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list, NULL);
+	err = i915_gem_do_execbuffer(dev, file, &exec2, exec2_list);
 	if (exec2.flags & __EXEC_HAS_RELOC) {
 		struct drm_i915_gem_exec_object __user *user_exec_list =
 			u64_to_user_ptr(args->buffers_ptr);
@@ -2773,7 +2968,6 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_gem_execbuffer2 *args = data;
 	struct drm_i915_gem_exec_object2 *exec2_list;
-	struct drm_syncobj **fences = NULL;
 	const size_t count = args->buffer_count;
 	int err;
 
@@ -2801,15 +2995,7 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		return -EFAULT;
 	}
 
-	if (args->flags & I915_EXEC_FENCE_ARRAY) {
-		fences = get_fence_array(args, file);
-		if (IS_ERR(fences)) {
-			kvfree(exec2_list);
-			return PTR_ERR(fences);
-		}
-	}
-
-	err = i915_gem_do_execbuffer(dev, file, args, exec2_list, fences);
+	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
 
 	/*
 	 * Now that we have begun execution of the batchbuffer, we ignore
@@ -2849,7 +3035,6 @@ end:;
 	}
 
 	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
-	put_fence_array(args, fences);
 	kvfree(exec2_list);
 	return err;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 761726818a22..a6acece99abf 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -463,6 +463,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
 	case I915_PARAM_HAS_EXEC_FENCE_ARRAY:
 	case I915_PARAM_HAS_EXEC_SUBMIT_FENCE:
+	case I915_PARAM_HAS_EXEC_TIMELINE_FENCES:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -3208,7 +3209,8 @@ static struct drm_driver driver = {
 	 */
 	.driver_features =
 	    DRIVER_GEM |
-	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ,
+	    DRIVER_RENDER | DRIVER_MODESET | DRIVER_ATOMIC | DRIVER_SYNCOBJ |
+	    DRIVER_SYNCOBJ_TIMELINE,
 	.release = i915_driver_release,
 	.open = i915_driver_open,
 	.lastclose = i915_driver_lastclose,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7ea35b048a82..92c3323f3167 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait {
  * See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
  */
 #define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuf_ext_timeline_fences. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 54
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1008,9 +1015,41 @@ struct drm_i915_gem_exec_fence {
 };
 
 enum drm_i915_gem_execbuffer_ext {
+	/**
+	 * See drm_i915_gem_execbuf_ext_timeline_fences.
+	 */
+	DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 0,
+
 	DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+	struct i915_user_extension base;
+
+	/**
+	 * Number of element in the handles_ptr & value_ptr arrays.
+	 */
+	__u64 fence_count;
+
+	/**
+	 * Pointer to an array of struct drm_i915_gem_exec_fence of length
+	 * fence_count.
+	 */
+	__u64 handles_ptr;
+
+	/**
+	 * Pointer to an array of u64 values of length fence_count. Values
+	 * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+	 * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+	 */
+	__u64 values_ptr;
+};
+
 struct drm_i915_gem_execbuffer2 {
 	/**
 	 * List of gem_exec_object2 structs
-- 
2.22.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915: timeline semaphore support (rev3)
  2019-07-31 14:07 [PATCH v3 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
  2019-07-31 14:07 ` [PATCH v3 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
  2019-07-31 14:07 ` [PATCH v3 2/2] drm/i915: add syncobj timeline support Lionel Landwerlin
@ 2019-07-31 14:27 ` Patchwork
  2019-07-31 14:29 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2019-07-31 14:27 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: timeline semaphore support (rev3)
URL   : https://patchwork.freedesktop.org/series/61032/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
a226f06ee52a drm/i915: introduce a mechanism to extend execbuf2
-:137: CHECK:SPACING: spaces preferred around that '<<' (ctx:VxV)
#137: FILE: include/uapi/drm/i915_drm.h:1164:
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_EXT<<1))
                                                   ^

total: 0 errors, 0 warnings, 1 checks, 111 lines checked
ea893730971d drm/i915: add syncobj timeline support
-:363: WARNING:TYPO_SPELLING: 'transfered' may be misspelled - perhaps 'transferred'?
#363: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2505:
+			 * The chain's ownership is transfered to the

-:394: ERROR:CODE_INDENT: code indent should use tabs where possible
#394: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2536:
+        [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,$

-:394: WARNING:LEADING_SPACE: please, no spaces at the start of a line
#394: FILE: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c:2536:
+        [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,$

total: 1 errors, 2 warnings, 0 checks, 531 lines checked

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915: timeline semaphore support (rev3)
  2019-07-31 14:07 [PATCH v3 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
                   ` (2 preceding siblings ...)
  2019-07-31 14:27 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: timeline semaphore support (rev3) Patchwork
@ 2019-07-31 14:29 ` Patchwork
  2019-08-01 14:31 ` ✓ Fi.CI.BAT: success " Patchwork
  2019-08-02  6:05 ` ✓ Fi.CI.IGT: " Patchwork
  5 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2019-07-31 14:29 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: timeline semaphore support (rev3)
URL   : https://patchwork.freedesktop.org/series/61032/
State : warning

== Summary ==

$ dim sparse origin/drm-tip
Sparse version: v0.5.2
Commit: drm/i915: introduce a mechanism to extend execbuf2
Okay!

Commit: drm/i915: add syncobj timeline support
+./include/linux/mm.h:685:13: error: not a function <noident>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-07-31 14:07 ` [PATCH v3 2/2] drm/i915: add syncobj timeline support Lionel Landwerlin
@ 2019-07-31 20:03   ` Chris Wilson
  2019-08-01  7:43     ` Lionel Landwerlin
  2019-08-01 14:29     ` Lionel Landwerlin
  0 siblings, 2 replies; 19+ messages in thread
From: Chris Wilson @ 2019-07-31 20:03 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2019-07-31 15:07:33)
> -static struct drm_syncobj **
> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> -               struct drm_file *file)
> +static struct i915_eb_fences *
> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> +{
> +       struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> +               &eb->extensions.timeline_fences;
> +       struct drm_i915_gem_exec_fence __user *user_fences;
> +       struct i915_eb_fences *fences;
> +       u64 __user *user_values;
> +       u64 num_fences, num_user_fences = timeline_fences->fence_count;
> +       unsigned long n;
> +       int err;
> +
> +       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> +       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> +       if (num_user_fences > min_t(unsigned long,
> +                                   ULONG_MAX / sizeof(*user_fences),
> +                                   SIZE_MAX / sizeof(*fences)))
> +               return ERR_PTR(-EINVAL);
> +
> +       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> +       if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> +               return ERR_PTR(-EFAULT);
> +
> +       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> +       if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> +               return ERR_PTR(-EFAULT);
> +
> +       fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> +                               __GFP_NOWARN | GFP_KERNEL);
> +       if (!fences)
> +               return ERR_PTR(-ENOMEM);
> +
> +       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> +                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> +
> +       for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> +               struct drm_i915_gem_exec_fence user_fence;
> +               struct drm_syncobj *syncobj;
> +               struct dma_fence *fence = NULL;
> +               u64 point;
> +
> +               if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> +                       err = -EFAULT;
> +                       goto err;
> +               }
> +
> +               if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> +                       err = -EINVAL;
> +                       goto err;
> +               }
> +
> +               if (__get_user(point, user_values++)) {
> +                       err = -EFAULT;
> +                       goto err;
> +               }
> +
> +               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> +               if (!syncobj) {
> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
> +                       err = -ENOENT;
> +                       goto err;
> +               }
> +
> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> +                       fence = drm_syncobj_fence_get(syncobj);
> +                       if (!fence) {
> +                               DRM_DEBUG("Syncobj handle has no fence\n");
> +                               drm_syncobj_put(syncobj);
> +                               err = -EINVAL;
> +                               goto err;
> +                       }
> +
> +                       err = dma_fence_chain_find_seqno(&fence, point);
> +                       if (err) {
> +                               DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> +                               drm_syncobj_put(syncobj);
> +                               goto err;
> +                       }
> +
> +                       /* A point might have been signaled already and
> +                        * garbage collected from the timeline. In this case
> +                        * just ignore the point and carry on.
> +                        */
> +                       if (!fence) {
> +                               drm_syncobj_put(syncobj);
> +                               continue;
> +                       }
> +               }
> +
> +               /*
> +                * For timeline syncobjs we need to preallocate chains for
> +                * later signaling.
> +                */
> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {

if (dma_fence_chain_find_seqno() == 0)
	return -EINVAL;

as an early sanity check?

> +                       fences[num_fences].chain_fence =
> +                               kmalloc(sizeof(*fences[num_fences].chain_fence),
> +                                       GFP_KERNEL);
> +                       if (!fences[num_fences].chain_fence) {
> +                               dma_fence_put(fence);
> +                               drm_syncobj_put(syncobj);
> +                               err = -ENOMEM;
> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
> +                               goto err;
> +                       }
> +               } else {
> +                       fences[num_fences].chain_fence = NULL;
> +               }
> +
> +               fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> +               fences[num_fences].dma_fence = fence;
> +               fences[num_fences].value = point;
> +               num_fences++;
> +       }
> +
> +       *out_n_fences = num_fences;
> +
> +       return fences;
> +
> +err:
> +       __free_fence_array(fences, num_fences);
> +       return ERR_PTR(err);
> +}

> @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
>  
>  static void
>  signal_fence_array(struct i915_execbuffer *eb,
> -                  struct drm_syncobj **fences)
> +                  struct i915_eb_fences *fences,
> +                  int nfences)
>  {
> -       const unsigned int nfences = eb->args->num_cliprects;
>         struct dma_fence * const fence = &eb->request->fence;
>         unsigned int n;
>  
> @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
>                 struct drm_syncobj *syncobj;
>                 unsigned int flags;
>  
> -               syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> +               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>                 if (!(flags & I915_EXEC_FENCE_SIGNAL))
>                         continue;
>  
> -               drm_syncobj_replace_fence(syncobj, fence);
> +               if (fences[n].chain_fence) {
> +                       drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> +                                             fence, fences[n].value);
> +                       /*
> +                        * The chain's ownership is transfered to the
> +                        * timeline.
> +                        */
> +                       fences[n].chain_fence = NULL;
> +               } else {
> +                       drm_syncobj_replace_fence(syncobj, fence);
> +               }
>         }
>  }

I think I have convinced myself that with the split between wait before,
signal after combined with the rule that seqno point along the syncobj
are monotonic, you should not be able to generate an AB-BA deadlock
between concurrent clients.

Except that we need to fixup drm_syncobj_add_point() to actually apply
that rule. Throwing an error and leaving the client stuck is less than
ideal, we can't even recover with a GPU reset, and as they are native
fences we don't employ a failsafe timer.

Doesn't WAIT_FOR_SUBMIT throw a spanner in the works and allow for
deadlocks? On the other hand, if they used MI_SEMA rather scheduler
fences, give or take some coarseness in our timeslicing granularity (I
need to enable fast context switches on user semaphores -- first attempt
was just an interrupt storm!) it will work as well as a round-robin
scheduler can work. (In other words, we only need coarse fences for the
scheduler and userspace can implement its own semaphores -- with the
open discussion on how to share the iova user semaphores between
processes, cf ugpu.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 1/2] drm/i915: introduce a mechanism to extend execbuf2
  2019-07-31 14:07 ` [PATCH v3 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
@ 2019-07-31 21:41   ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2019-07-31 21:41 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2019-07-31 15:07:32)
> +/*
> + * Setting I915_EXEC_EXT implies that drm_i915_gem_execbuffer2.cliprects_ptr
> + * is treated as a pointer to an linked list of i915_user_extension. Each
> + * i915_user_extension node is the base of a larger structure. The list of
> + * supported structures are listed in the drm_i915_gem_execbuffer_ext
> + * enum.
> + */
> +#define I915_EXEC_EXT          (1 << 21)

On reflection, I think I915_EXEC_USE_EXTENSIONS to match
I915_CONTEXT_CREATE_USE_EXTENSIONS, and establish a pattern for the future.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-07-31 20:03   ` Chris Wilson
@ 2019-08-01  7:43     ` Lionel Landwerlin
  2019-08-01  8:08       ` Chris Wilson
  2019-08-01 14:29     ` Lionel Landwerlin
  1 sibling, 1 reply; 19+ messages in thread
From: Lionel Landwerlin @ 2019-08-01  7:43 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 31/07/2019 23:03, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
>> -static struct drm_syncobj **
>> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>> -               struct drm_file *file)
>> +static struct i915_eb_fences *
>> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
>> +{
>> +       struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
>> +               &eb->extensions.timeline_fences;
>> +       struct drm_i915_gem_exec_fence __user *user_fences;
>> +       struct i915_eb_fences *fences;
>> +       u64 __user *user_values;
>> +       u64 num_fences, num_user_fences = timeline_fences->fence_count;
>> +       unsigned long n;
>> +       int err;
>> +
>> +       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
>> +       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
>> +       if (num_user_fences > min_t(unsigned long,
>> +                                   ULONG_MAX / sizeof(*user_fences),
>> +                                   SIZE_MAX / sizeof(*fences)))
>> +               return ERR_PTR(-EINVAL);
>> +
>> +       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
>> +       if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
>> +               return ERR_PTR(-EFAULT);
>> +
>> +       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
>> +       if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
>> +               return ERR_PTR(-EFAULT);
>> +
>> +       fences = kvmalloc_array(num_user_fences, sizeof(*fences),
>> +                               __GFP_NOWARN | GFP_KERNEL);
>> +       if (!fences)
>> +               return ERR_PTR(-ENOMEM);
>> +
>> +       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
>> +                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>> +
>> +       for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
>> +               struct drm_i915_gem_exec_fence user_fence;
>> +               struct drm_syncobj *syncobj;
>> +               struct dma_fence *fence = NULL;
>> +               u64 point;
>> +
>> +               if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
>> +                       err = -EFAULT;
>> +                       goto err;
>> +               }
>> +
>> +               if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
>> +                       err = -EINVAL;
>> +                       goto err;
>> +               }
>> +
>> +               if (__get_user(point, user_values++)) {
>> +                       err = -EFAULT;
>> +                       goto err;
>> +               }
>> +
>> +               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
>> +               if (!syncobj) {
>> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
>> +                       err = -ENOENT;
>> +                       goto err;
>> +               }
>> +
>> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
>> +                       fence = drm_syncobj_fence_get(syncobj);
>> +                       if (!fence) {
>> +                               DRM_DEBUG("Syncobj handle has no fence\n");
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -EINVAL;
>> +                               goto err;
>> +                       }
>> +
>> +                       err = dma_fence_chain_find_seqno(&fence, point);
>> +                       if (err) {
>> +                               DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
>> +                               drm_syncobj_put(syncobj);
>> +                               goto err;
>> +                       }
>> +
>> +                       /* A point might have been signaled already and
>> +                        * garbage collected from the timeline. In this case
>> +                        * just ignore the point and carry on.
>> +                        */
>> +                       if (!fence) {
>> +                               drm_syncobj_put(syncobj);
>> +                               continue;
>> +                       }
>> +               }
>> +
>> +               /*
>> +                * For timeline syncobjs we need to preallocate chains for
>> +                * later signaling.
>> +                */
>> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> if (dma_fence_chain_find_seqno() == 0)
> 	return -EINVAL;
>
> as an early sanity check?


Makes sense, let me see if that breaks anything.


Also waiting and signaling the same point doesn't work anymore for 
timelines.

Will check that too.


>
>> +                       fences[num_fences].chain_fence =
>> +                               kmalloc(sizeof(*fences[num_fences].chain_fence),
>> +                                       GFP_KERNEL);
>> +                       if (!fences[num_fences].chain_fence) {
>> +                               dma_fence_put(fence);
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -ENOMEM;
>> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
>> +                               goto err;
>> +                       }
>> +               } else {
>> +                       fences[num_fences].chain_fence = NULL;
>> +               }
>> +
>> +               fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
>> +               fences[num_fences].dma_fence = fence;
>> +               fences[num_fences].value = point;
>> +               num_fences++;
>> +       }
>> +
>> +       *out_n_fences = num_fences;
>> +
>> +       return fences;
>> +
>> +err:
>> +       __free_fence_array(fences, num_fences);
>> +       return ERR_PTR(err);
>> +}
>> @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>   
>>   static void
>>   signal_fence_array(struct i915_execbuffer *eb,
>> -                  struct drm_syncobj **fences)
>> +                  struct i915_eb_fences *fences,
>> +                  int nfences)
>>   {
>> -       const unsigned int nfences = eb->args->num_cliprects;
>>          struct dma_fence * const fence = &eb->request->fence;
>>          unsigned int n;
>>   
>> @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
>>                  struct drm_syncobj *syncobj;
>>                  unsigned int flags;
>>   
>> -               syncobj = ptr_unpack_bits(fences[n], &flags, 2);
>> +               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>>                  if (!(flags & I915_EXEC_FENCE_SIGNAL))
>>                          continue;
>>   
>> -               drm_syncobj_replace_fence(syncobj, fence);
>> +               if (fences[n].chain_fence) {
>> +                       drm_syncobj_add_point(syncobj, fences[n].chain_fence,
>> +                                             fence, fences[n].value);
>> +                       /*
>> +                        * The chain's ownership is transfered to the
>> +                        * timeline.
>> +                        */
>> +                       fences[n].chain_fence = NULL;
>> +               } else {
>> +                       drm_syncobj_replace_fence(syncobj, fence);
>> +               }
>>          }
>>   }
> I think I have convinced myself that with the split between wait before,
> signal after combined with the rule that seqno point along the syncobj
> are monotonic, you should not be able to generate an AB-BA deadlock
> between concurrent clients.
>
> Except that we need to fixup drm_syncobj_add_point() to actually apply
> that rule. Throwing an error and leaving the client stuck is less than
> ideal, we can't even recover with a GPU reset, and as they are native
> fences we don't employ a failsafe timer.


Unfortunately we're not the only user of add_point() and amdgpu doesn't 
want it to fail.

Best I can come up with is take the syncobj lock when signaling in 
get_timeline_fence_array() do the check there, unlock in __free_fence_array.



>
> Doesn't WAIT_FOR_SUBMIT throw a spanner in the works and allow for
> deadlocks?


Hm... I can't see how.

Unless you mean clients could deadlock them themselves the same way it 
would using 2 pthread_mutex and having 2 threads trying to lock both 
mutexes in opposite order.

Is it that the kernel's problem?


> On the other hand, if they used MI_SEMA rather scheduler
> fences, give or take some coarseness in our timeslicing granularity (I
> need to enable fast context switches on user semaphores -- first attempt
> was just an interrupt storm!) it will work as well as a round-robin
> scheduler can work. (In other words, we only need coarse fences for the
> scheduler and userspace can implement its own semaphores -- with the
> open discussion on how to share the iova user semaphores between
> processes, cf ugpu.)
> -Chris
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-08-01  7:43     ` Lionel Landwerlin
@ 2019-08-01  8:08       ` Chris Wilson
  2019-08-01  9:01         ` Lionel Landwerlin
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2019-08-01  8:08 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2019-08-01 08:43:24)
> On 31/07/2019 23:03, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2019-07-31 15:07:33)
> > I think I have convinced myself that with the split between wait before,
> > signal after combined with the rule that seqno point along the syncobj
> > are monotonic, you should not be able to generate an AB-BA deadlock
> > between concurrent clients.
> >
> > Except that we need to fixup drm_syncobj_add_point() to actually apply
> > that rule. Throwing an error and leaving the client stuck is less than
> > ideal, we can't even recover with a GPU reset, and as they are native
> > fences we don't employ a failsafe timer.
> 
> 
> Unfortunately we're not the only user of add_point() and amdgpu doesn't 
> want it to fail.

It's dangerously exposing the deadlock from incorrect seqno ordering to
userspace. We should be able to hit that DRM_ERROR() in testing, and be
able to detect the out-of-sequence timeline.

> Best I can come up with is take the syncobj lock when signaling in 
> get_timeline_fence_array() do the check there, unlock in __free_fence_array.

I think the intermediate step is a "safe" version of add_point that
doesn't leave the timeline broken. That still leaves a glitch visible to
userspace, but it should not deadlock.

The way I was looking at it was to reserve a placeholder syncpt before
building the request and allow replacing the placeholder afterwards.

If we abort the submission part way, we leave the placeholder in the
timeline to be replaced later, or subsumed by a later syncpt.

> > Doesn't WAIT_FOR_SUBMIT throw a spanner in the works and allow for
> > deadlocks?
> 
> 
> Hm... I can't see how.
> 
> Unless you mean clients could deadlock them themselves the same way it 
> would using 2 pthread_mutex and having 2 threads trying to lock both 
> mutexes in opposite order.

Yes.
 
> Is it that the kernel's problem?

It becomes a problem for us as it ties up kernel resources that we can
not reap currently. Userspace is not meant to be able to break the
kernel on a whim.  Even futexes are robust. ;)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-08-01  8:08       ` Chris Wilson
@ 2019-08-01  9:01         ` Lionel Landwerlin
  2019-08-01  9:22           ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Lionel Landwerlin @ 2019-08-01  9:01 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 01/08/2019 11:08, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-01 08:43:24)
>> On 31/07/2019 23:03, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
>>> I think I have convinced myself that with the split between wait before,
>>> signal after combined with the rule that seqno point along the syncobj
>>> are monotonic, you should not be able to generate an AB-BA deadlock
>>> between concurrent clients.
>>>
>>> Except that we need to fixup drm_syncobj_add_point() to actually apply
>>> that rule. Throwing an error and leaving the client stuck is less than
>>> ideal, we can't even recover with a GPU reset, and as they are native
>>> fences we don't employ a failsafe timer.
>>
>> Unfortunately we're not the only user of add_point() and amdgpu doesn't
>> want it to fail.
> It's dangerously exposing the deadlock from incorrect seqno ordering to
> userspace. We should be able to hit that DRM_ERROR() in testing, and be
> able to detect the out-of-sequence timeline.
>
>> Best I can come up with is take the syncobj lock when signaling in
>> get_timeline_fence_array() do the check there, unlock in __free_fence_array.
> I think the intermediate step is a "safe" version of add_point that
> doesn't leave the timeline broken. That still leaves a glitch visible to
> userspace, but it should not deadlock.


Right, sounds doable. What happens in execbuf after add_point() fails?

We've already handed the request to the scheduler and potentially it 
might be running already.


>
> The way I was looking at it was to reserve a placeholder syncpt before
> building the request and allow replacing the placeholder afterwards.


That sounds fairly tricky to get right :(

The fence stuff is already full of magic.


>
> If we abort the submission part way, we leave the placeholder in the
> timeline to be replaced later, or subsumed by a later syncpt.
>
>>> Doesn't WAIT_FOR_SUBMIT throw a spanner in the works and allow for
>>> deadlocks?
>>
>> Hm... I can't see how.
>>
>> Unless you mean clients could deadlock them themselves the same way it
>> would using 2 pthread_mutex and having 2 threads trying to lock both
>> mutexes in opposite order.
> Yes.
>   
>> Is it that the kernel's problem?
> It becomes a problem for us as it ties up kernel resources that we can
> not reap currently. Userspace is not meant to be able to break the
> kernel on a whim.  Even futexes are robust. ;)
> -Chris
>

Thanks,


-Lionel

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-08-01  9:01         ` Lionel Landwerlin
@ 2019-08-01  9:22           ` Chris Wilson
  2019-08-01  9:37             ` Lionel Landwerlin
  0 siblings, 1 reply; 19+ messages in thread
From: Chris Wilson @ 2019-08-01  9:22 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2019-08-01 10:01:44)
> On 01/08/2019 11:08, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2019-08-01 08:43:24)
> >> On 31/07/2019 23:03, Chris Wilson wrote:
> >>> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
> >>> I think I have convinced myself that with the split between wait before,
> >>> signal after combined with the rule that seqno point along the syncobj
> >>> are monotonic, you should not be able to generate an AB-BA deadlock
> >>> between concurrent clients.
> >>>
> >>> Except that we need to fixup drm_syncobj_add_point() to actually apply
> >>> that rule. Throwing an error and leaving the client stuck is less than
> >>> ideal, we can't even recover with a GPU reset, and as they are native
> >>> fences we don't employ a failsafe timer.
> >>
> >> Unfortunately we're not the only user of add_point() and amdgpu doesn't
> >> want it to fail.
> > It's dangerously exposing the deadlock from incorrect seqno ordering to
> > userspace. We should be able to hit that DRM_ERROR() in testing, and be
> > able to detect the out-of-sequence timeline.
> >
> >> Best I can come up with is take the syncobj lock when signaling in
> >> get_timeline_fence_array() do the check there, unlock in __free_fence_array.
> > I think the intermediate step is a "safe" version of add_point that
> > doesn't leave the timeline broken. That still leaves a glitch visible to
> > userspace, but it should not deadlock.
> 
> 
> Right, sounds doable. What happens in execbuf after add_point() fails?
> 
> We've already handed the request to the scheduler and potentially it 
> might be running already.

Right, at present we've already submitted the request, so the batch will
be run and fence will be signaled. The failure to add to it the
timeline means that someone else already submitted a later fence and
some third party is using that syncpt instead of ours for their
dependency. So that third party will be delayed, but so long as they kept
their dependencies in order it should just be a delay.

The problem comes into play if we go ahead and insert our fence into the
dma_fence_chain out of order, breaking all the monotonic guarantees and
flipping the order of the fences. (The equivalent to taking mutexes out
of order.)

> > The way I was looking at it was to reserve a placeholder syncpt before
> > building the request and allow replacing the placeholder afterwards.
> 
> 
> That sounds fairly tricky to get right :(

Invasive, yeah, turns out one needs to start walking the fence chain
more carefully. The actual placeholder fence is pretty run of the mill
as far as dma-fence*.c goes, which is say...
 
> The fence stuff is already full of magic.

It's full of magic.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-08-01  9:22           ` Chris Wilson
@ 2019-08-01  9:37             ` Lionel Landwerlin
  2019-08-01 10:18               ` Chris Wilson
  0 siblings, 1 reply; 19+ messages in thread
From: Lionel Landwerlin @ 2019-08-01  9:37 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 01/08/2019 12:22, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-01 10:01:44)
>> On 01/08/2019 11:08, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2019-08-01 08:43:24)
>>>> On 31/07/2019 23:03, Chris Wilson wrote:
>>>>> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
>>>>> I think I have convinced myself that with the split between wait before,
>>>>> signal after combined with the rule that seqno point along the syncobj
>>>>> are monotonic, you should not be able to generate an AB-BA deadlock
>>>>> between concurrent clients.
>>>>>
>>>>> Except that we need to fixup drm_syncobj_add_point() to actually apply
>>>>> that rule. Throwing an error and leaving the client stuck is less than
>>>>> ideal, we can't even recover with a GPU reset, and as they are native
>>>>> fences we don't employ a failsafe timer.
>>>> Unfortunately we're not the only user of add_point() and amdgpu doesn't
>>>> want it to fail.
>>> It's dangerously exposing the deadlock from incorrect seqno ordering to
>>> userspace. We should be able to hit that DRM_ERROR() in testing, and be
>>> able to detect the out-of-sequence timeline.
>>>
>>>> Best I can come up with is take the syncobj lock when signaling in
>>>> get_timeline_fence_array() do the check there, unlock in __free_fence_array.
>>> I think the intermediate step is a "safe" version of add_point that
>>> doesn't leave the timeline broken. That still leaves a glitch visible to
>>> userspace, but it should not deadlock.
>>
>> Right, sounds doable. What happens in execbuf after add_point() fails?
>>
>> We've already handed the request to the scheduler and potentially it
>> might be running already.
> Right, at present we've already submitted the request, so the batch will
> be run and fence will be signaled. The failure to add to it the
> timeline means that someone else already submitted a later fence and
> some third party is using that syncpt instead of ours for their
> dependency. So that third party will be delayed, but so long as they kept
> their dependencies in order it should just be a delay.


But should we return an error to userspace?


-Lionel


>
> The problem comes into play if we go ahead and insert our fence into the
> dma_fence_chain out of order, breaking all the monotonic guarantees and
> flipping the order of the fences. (The equivalent to taking mutexes out
> of order.)
>
>>> The way I was looking at it was to reserve a placeholder syncpt before
>>> building the request and allow replacing the placeholder afterwards.
>>
>> That sounds fairly tricky to get right :(
> Invasive, yeah, turns out one needs to start walking the fence chain
> more carefully. The actual placeholder fence is pretty run of the mill
> as far as dma-fence*.c goes, which is say...
>   
>> The fence stuff is already full of magic.
> It's full of magic.
> -Chris
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-08-01  9:37             ` Lionel Landwerlin
@ 2019-08-01 10:18               ` Chris Wilson
  0 siblings, 0 replies; 19+ messages in thread
From: Chris Wilson @ 2019-08-01 10:18 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2019-08-01 10:37:23)
> On 01/08/2019 12:22, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2019-08-01 10:01:44)
> >> On 01/08/2019 11:08, Chris Wilson wrote:
> >>> Quoting Lionel Landwerlin (2019-08-01 08:43:24)
> >>>> On 31/07/2019 23:03, Chris Wilson wrote:
> >>>>> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
> >>>>> I think I have convinced myself that with the split between wait before,
> >>>>> signal after combined with the rule that seqno point along the syncobj
> >>>>> are monotonic, you should not be able to generate an AB-BA deadlock
> >>>>> between concurrent clients.
> >>>>>
> >>>>> Except that we need to fixup drm_syncobj_add_point() to actually apply
> >>>>> that rule. Throwing an error and leaving the client stuck is less than
> >>>>> ideal, we can't even recover with a GPU reset, and as they are native
> >>>>> fences we don't employ a failsafe timer.
> >>>> Unfortunately we're not the only user of add_point() and amdgpu doesn't
> >>>> want it to fail.
> >>> It's dangerously exposing the deadlock from incorrect seqno ordering to
> >>> userspace. We should be able to hit that DRM_ERROR() in testing, and be
> >>> able to detect the out-of-sequence timeline.
> >>>
> >>>> Best I can come up with is take the syncobj lock when signaling in
> >>>> get_timeline_fence_array() do the check there, unlock in __free_fence_array.
> >>> I think the intermediate step is a "safe" version of add_point that
> >>> doesn't leave the timeline broken. That still leaves a glitch visible to
> >>> userspace, but it should not deadlock.
> >>
> >> Right, sounds doable. What happens in execbuf after add_point() fails?
> >>
> >> We've already handed the request to the scheduler and potentially it
> >> might be running already.
> > Right, at present we've already submitted the request, so the batch will
> > be run and fence will be signaled. The failure to add to it the
> > timeline means that someone else already submitted a later fence and
> > some third party is using that syncpt instead of ours for their
> > dependency. So that third party will be delayed, but so long as they kept
> > their dependencies in order it should just be a delay.
> 
> 
> But should we return an error to userspace?

No, the submission itself hasn't failed and the result of the batch and
its fence will be true. It's a case where userspace lost a race with
itself -- if there was a non-error warning flag, maybe. If you keep the
language loose that a timeline semaphore wait will be after its syncpt has
been signaled, but not necessarily before the next syncpt is signaled,
unless explicitly ordered by the client.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-07-31 20:03   ` Chris Wilson
  2019-08-01  7:43     ` Lionel Landwerlin
@ 2019-08-01 14:29     ` Lionel Landwerlin
  2019-08-01 15:16       ` Chris Wilson
  1 sibling, 1 reply; 19+ messages in thread
From: Lionel Landwerlin @ 2019-08-01 14:29 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 31/07/2019 23:03, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
>> -static struct drm_syncobj **
>> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>> -               struct drm_file *file)
>> +static struct i915_eb_fences *
>> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
>> +{
>> +       struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
>> +               &eb->extensions.timeline_fences;
>> +       struct drm_i915_gem_exec_fence __user *user_fences;
>> +       struct i915_eb_fences *fences;
>> +       u64 __user *user_values;
>> +       u64 num_fences, num_user_fences = timeline_fences->fence_count;
>> +       unsigned long n;
>> +       int err;
>> +
>> +       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
>> +       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
>> +       if (num_user_fences > min_t(unsigned long,
>> +                                   ULONG_MAX / sizeof(*user_fences),
>> +                                   SIZE_MAX / sizeof(*fences)))
>> +               return ERR_PTR(-EINVAL);
>> +
>> +       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
>> +       if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
>> +               return ERR_PTR(-EFAULT);
>> +
>> +       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
>> +       if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
>> +               return ERR_PTR(-EFAULT);
>> +
>> +       fences = kvmalloc_array(num_user_fences, sizeof(*fences),
>> +                               __GFP_NOWARN | GFP_KERNEL);
>> +       if (!fences)
>> +               return ERR_PTR(-ENOMEM);
>> +
>> +       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
>> +                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>> +
>> +       for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
>> +               struct drm_i915_gem_exec_fence user_fence;
>> +               struct drm_syncobj *syncobj;
>> +               struct dma_fence *fence = NULL;
>> +               u64 point;
>> +
>> +               if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
>> +                       err = -EFAULT;
>> +                       goto err;
>> +               }
>> +
>> +               if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
>> +                       err = -EINVAL;
>> +                       goto err;
>> +               }
>> +
>> +               if (__get_user(point, user_values++)) {
>> +                       err = -EFAULT;
>> +                       goto err;
>> +               }
>> +
>> +               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
>> +               if (!syncobj) {
>> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
>> +                       err = -ENOENT;
>> +                       goto err;
>> +               }
>> +
>> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
>> +                       fence = drm_syncobj_fence_get(syncobj);
>> +                       if (!fence) {
>> +                               DRM_DEBUG("Syncobj handle has no fence\n");
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -EINVAL;
>> +                               goto err;
>> +                       }
>> +
>> +                       err = dma_fence_chain_find_seqno(&fence, point);
>> +                       if (err) {
>> +                               DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
>> +                               drm_syncobj_put(syncobj);
>> +                               goto err;
>> +                       }
>> +
>> +                       /* A point might have been signaled already and
>> +                        * garbage collected from the timeline. In this case
>> +                        * just ignore the point and carry on.
>> +                        */
>> +                       if (!fence) {
>> +                               drm_syncobj_put(syncobj);
>> +                               continue;
>> +                       }
>> +               }
>> +
>> +               /*
>> +                * For timeline syncobjs we need to preallocate chains for
>> +                * later signaling.
>> +                */
>> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> if (dma_fence_chain_find_seqno() == 0)
> 	return -EINVAL;
>
> as an early sanity check?
>
>> +                       fences[num_fences].chain_fence =
>> +                               kmalloc(sizeof(*fences[num_fences].chain_fence),
>> +                                       GFP_KERNEL);
>> +                       if (!fences[num_fences].chain_fence) {
>> +                               dma_fence_put(fence);
>> +                               drm_syncobj_put(syncobj);
>> +                               err = -ENOMEM;
>> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
>> +                               goto err;
>> +                       }
>> +               } else {
>> +                       fences[num_fences].chain_fence = NULL;
>> +               }
>> +
>> +               fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
>> +               fences[num_fences].dma_fence = fence;
>> +               fences[num_fences].value = point;
>> +               num_fences++;
>> +       }
>> +
>> +       *out_n_fences = num_fences;
>> +
>> +       return fences;
>> +
>> +err:
>> +       __free_fence_array(fences, num_fences);
>> +       return ERR_PTR(err);
>> +}
>> @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>   
>>   static void
>>   signal_fence_array(struct i915_execbuffer *eb,
>> -                  struct drm_syncobj **fences)
>> +                  struct i915_eb_fences *fences,
>> +                  int nfences)
>>   {
>> -       const unsigned int nfences = eb->args->num_cliprects;
>>          struct dma_fence * const fence = &eb->request->fence;
>>          unsigned int n;
>>   
>> @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
>>                  struct drm_syncobj *syncobj;
>>                  unsigned int flags;
>>   
>> -               syncobj = ptr_unpack_bits(fences[n], &flags, 2);
>> +               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>>                  if (!(flags & I915_EXEC_FENCE_SIGNAL))
>>                          continue;
>>   
>> -               drm_syncobj_replace_fence(syncobj, fence);
>> +               if (fences[n].chain_fence) {
>> +                       drm_syncobj_add_point(syncobj, fences[n].chain_fence,
>> +                                             fence, fences[n].value);
>> +                       /*
>> +                        * The chain's ownership is transfered to the
>> +                        * timeline.
>> +                        */
>> +                       fences[n].chain_fence = NULL;
>> +               } else {
>> +                       drm_syncobj_replace_fence(syncobj, fence);
>> +               }
>>          }
>>   }
> I think I have convinced myself that with the split between wait before,
> signal after combined with the rule that seqno point along the syncobj
> are monotonic, you should not be able to generate an AB-BA deadlock
> between concurrent clients.


Can you come up with an example that would deadlock?

Because as far as I can think, we're always submitting fences that are 
already gone through submission.


The only think I could think is an MI_WAIT_SEMAPHORE but that's already 
problematic today.


-Lionel


>
> Except that we need to fixup drm_syncobj_add_point() to actually apply
> that rule. Throwing an error and leaving the client stuck is less than
> ideal, we can't even recover with a GPU reset, and as they are native
> fences we don't employ a failsafe timer.
>
> Doesn't WAIT_FOR_SUBMIT throw a spanner in the works and allow for
> deadlocks? On the other hand, if they used MI_SEMA rather scheduler
> fences, give or take some coarseness in our timeslicing granularity (I
> need to enable fast context switches on user semaphores -- first attempt
> was just an interrupt storm!) it will work as well as a round-robin
> scheduler can work. (In other words, we only need coarse fences for the
> scheduler and userspace can implement its own semaphores -- with the
> open discussion on how to share the iova user semaphores between
> processes, cf ugpu.)
> -Chris
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.BAT: success for drm/i915: timeline semaphore support (rev3)
  2019-07-31 14:07 [PATCH v3 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
                   ` (3 preceding siblings ...)
  2019-07-31 14:29 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2019-08-01 14:31 ` Patchwork
  2019-08-02  6:05 ` ✓ Fi.CI.IGT: " Patchwork
  5 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2019-08-01 14:31 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: timeline semaphore support (rev3)
URL   : https://patchwork.freedesktop.org/series/61032/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6602 -> Patchwork_13827
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/

Known issues
------------

  Here are the changes found in Patchwork_13827 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_exec_suspend@basic-s3:
    - fi-blb-e6850:       [PASS][1] -> [INCOMPLETE][2] ([fdo#107718])
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-blb-e6850/igt@gem_exec_suspend@basic-s3.html

  * igt@gem_workarounds@basic-read:
    - fi-icl-u3:          [PASS][3] -> [DMESG-WARN][4] ([fdo#107724]) +1 similar issue
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-icl-u3/igt@gem_workarounds@basic-read.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-icl-u3/igt@gem_workarounds@basic-read.html

  * igt@i915_pm_rpm@module-reload:
    - fi-skl-6770hq:      [PASS][5] -> [FAIL][6] ([fdo#108511])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-skl-6770hq/igt@i915_pm_rpm@module-reload.html

  * igt@kms_chamelium@common-hpd-after-suspend:
    - fi-kbl-7567u:       [PASS][7] -> [WARN][8] ([fdo#109380])
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-kbl-7567u/igt@kms_chamelium@common-hpd-after-suspend.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-kbl-7567u/igt@kms_chamelium@common-hpd-after-suspend.html

  * igt@kms_frontbuffer_tracking@basic:
    - fi-icl-dsi:         [PASS][9] -> [FAIL][10] ([fdo#103167])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-icl-dsi/igt@kms_frontbuffer_tracking@basic.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-icl-dsi/igt@kms_frontbuffer_tracking@basic.html

  * igt@kms_pipe_crc_basic@read-crc-pipe-c:
    - fi-kbl-7567u:       [PASS][11] -> [SKIP][12] ([fdo#109271]) +23 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-kbl-7567u/igt@kms_pipe_crc_basic@read-crc-pipe-c.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-kbl-7567u/igt@kms_pipe_crc_basic@read-crc-pipe-c.html

  * igt@prime_vgem@basic-fence-mmap:
    - fi-byt-n2820:       [PASS][13] -> [INCOMPLETE][14] ([fdo#102657] / [fdo#111276])
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-byt-n2820/igt@prime_vgem@basic-fence-mmap.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-byt-n2820/igt@prime_vgem@basic-fence-mmap.html
    - fi-pnv-d510:        [PASS][15] -> [INCOMPLETE][16] ([fdo#110740] / [fdo#111276])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-pnv-d510/igt@prime_vgem@basic-fence-mmap.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-pnv-d510/igt@prime_vgem@basic-fence-mmap.html

  * igt@prime_vgem@basic-fence-read:
    - fi-gdg-551:         [PASS][17] -> [INCOMPLETE][18] ([fdo#108316])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-gdg-551/igt@prime_vgem@basic-fence-read.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-gdg-551/igt@prime_vgem@basic-fence-read.html

  * igt@prime_vgem@basic-wait-default:
    - fi-bxt-j4205:       [PASS][19] -> [FAIL][20] ([fdo#111277]) +1 similar issue
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-bxt-j4205/igt@prime_vgem@basic-wait-default.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-bxt-j4205/igt@prime_vgem@basic-wait-default.html
    - fi-bxt-dsi:         [PASS][21] -> [FAIL][22] ([fdo#111277]) +1 similar issue
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-bxt-dsi/igt@prime_vgem@basic-wait-default.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-bxt-dsi/igt@prime_vgem@basic-wait-default.html

  
#### Possible fixes ####

  * igt@gem_mmap@basic:
    - fi-icl-u3:          [DMESG-WARN][23] ([fdo#107724]) -> [PASS][24] +1 similar issue
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-icl-u3/igt@gem_mmap@basic.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-icl-u3/igt@gem_mmap@basic.html

  * igt@i915_selftest@live_execlists:
    - fi-skl-gvtdvm:      [DMESG-FAIL][25] ([fdo#111108]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-skl-gvtdvm/igt@i915_selftest@live_execlists.html

  * igt@kms_chamelium@dp-edid-read:
    - fi-icl-u2:          [FAIL][27] ([fdo#109483] / [fdo#109635 ]) -> [PASS][28]
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-icl-u2/igt@kms_chamelium@dp-edid-read.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-icl-u2/igt@kms_chamelium@dp-edid-read.html

  * igt@kms_chamelium@hdmi-hpd-fast:
    - fi-kbl-7500u:       [FAIL][29] ([fdo#109485]) -> [PASS][30]
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-kbl-7500u/igt@kms_chamelium@hdmi-hpd-fast.html

  * igt@prime_vgem@basic-fence-mmap:
    - fi-bwr-2160:        [INCOMPLETE][31] ([fdo#111276]) -> [PASS][32]
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-bwr-2160/igt@prime_vgem@basic-fence-mmap.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-bwr-2160/igt@prime_vgem@basic-fence-mmap.html

  * igt@prime_vgem@basic-fence-read:
    - fi-pnv-d510:        [INCOMPLETE][33] ([fdo#110740]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-pnv-d510/igt@prime_vgem@basic-fence-read.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-pnv-d510/igt@prime_vgem@basic-fence-read.html

  
#### Warnings ####

  * igt@kms_chamelium@vga-edid-read:
    - fi-icl-u2:          [FAIL][35] ([fdo#109483]) -> [SKIP][36] ([fdo#109309])
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-icl-u2/igt@kms_chamelium@vga-edid-read.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-icl-u2/igt@kms_chamelium@vga-edid-read.html

  * igt@prime_vgem@basic-fence-mmap:
    - fi-elk-e7500:       [INCOMPLETE][37] ([fdo#103989]) -> [INCOMPLETE][38] ([fdo#103989] / [fdo#111276])
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/fi-elk-e7500/igt@prime_vgem@basic-fence-mmap.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/fi-elk-e7500/igt@prime_vgem@basic-fence-mmap.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [fdo#102657]: https://bugs.freedesktop.org/show_bug.cgi?id=102657
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103989]: https://bugs.freedesktop.org/show_bug.cgi?id=103989
  [fdo#107718]: https://bugs.freedesktop.org/show_bug.cgi?id=107718
  [fdo#107724]: https://bugs.freedesktop.org/show_bug.cgi?id=107724
  [fdo#108316]: https://bugs.freedesktop.org/show_bug.cgi?id=108316
  [fdo#108511]: https://bugs.freedesktop.org/show_bug.cgi?id=108511
  [fdo#109271]: https://bugs.freedesktop.org/show_bug.cgi?id=109271
  [fdo#109309]: https://bugs.freedesktop.org/show_bug.cgi?id=109309
  [fdo#109380]: https://bugs.freedesktop.org/show_bug.cgi?id=109380
  [fdo#109483]: https://bugs.freedesktop.org/show_bug.cgi?id=109483
  [fdo#109485]: https://bugs.freedesktop.org/show_bug.cgi?id=109485
  [fdo#109635 ]: https://bugs.freedesktop.org/show_bug.cgi?id=109635 
  [fdo#110740]: https://bugs.freedesktop.org/show_bug.cgi?id=110740
  [fdo#111045]: https://bugs.freedesktop.org/show_bug.cgi?id=111045
  [fdo#111096]: https://bugs.freedesktop.org/show_bug.cgi?id=111096
  [fdo#111108]: https://bugs.freedesktop.org/show_bug.cgi?id=111108
  [fdo#111276]: https://bugs.freedesktop.org/show_bug.cgi?id=111276
  [fdo#111277]: https://bugs.freedesktop.org/show_bug.cgi?id=111277


Participating hosts (50 -> 44)
------------------------------

  Missing    (6): fi-kbl-soraka fi-byt-squawks fi-bsw-cyan fi-icl-y fi-byt-clapper fi-bdw-samus 


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6602 -> Patchwork_13827

  CI-20190529: 20190529
  CI_DRM_6602: dd7a77416867b70c404a75cb413f717d3ca1bfdc @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5119: b646668b947851459e0aec0ecfc54e9ed5503bfc @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13827: a7eebf167e7c93fc61ffa3b303c77e0dacb98ea9 @ git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

a7eebf167e7c drm/i915: add syncobj timeline support
3a849a04b043 drm/i915: introduce a mechanism to extend execbuf2

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-08-01 14:29     ` Lionel Landwerlin
@ 2019-08-01 15:16       ` Chris Wilson
  2019-08-01 15:35         ` Lionel Landwerlin
  2019-08-11  8:51         ` Lionel Landwerlin
  0 siblings, 2 replies; 19+ messages in thread
From: Chris Wilson @ 2019-08-01 15:16 UTC (permalink / raw)
  To: Lionel Landwerlin, intel-gfx

Quoting Lionel Landwerlin (2019-08-01 15:29:34)
> On 31/07/2019 23:03, Chris Wilson wrote:
> > Quoting Lionel Landwerlin (2019-07-31 15:07:33)
> >> -static struct drm_syncobj **
> >> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
> >> -               struct drm_file *file)
> >> +static struct i915_eb_fences *
> >> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
> >> +{
> >> +       struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
> >> +               &eb->extensions.timeline_fences;
> >> +       struct drm_i915_gem_exec_fence __user *user_fences;
> >> +       struct i915_eb_fences *fences;
> >> +       u64 __user *user_values;
> >> +       u64 num_fences, num_user_fences = timeline_fences->fence_count;
> >> +       unsigned long n;
> >> +       int err;
> >> +
> >> +       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
> >> +       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
> >> +       if (num_user_fences > min_t(unsigned long,
> >> +                                   ULONG_MAX / sizeof(*user_fences),
> >> +                                   SIZE_MAX / sizeof(*fences)))
> >> +               return ERR_PTR(-EINVAL);
> >> +
> >> +       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
> >> +       if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
> >> +               return ERR_PTR(-EFAULT);
> >> +
> >> +       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
> >> +       if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
> >> +               return ERR_PTR(-EFAULT);
> >> +
> >> +       fences = kvmalloc_array(num_user_fences, sizeof(*fences),
> >> +                               __GFP_NOWARN | GFP_KERNEL);
> >> +       if (!fences)
> >> +               return ERR_PTR(-ENOMEM);
> >> +
> >> +       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
> >> +                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
> >> +
> >> +       for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
> >> +               struct drm_i915_gem_exec_fence user_fence;
> >> +               struct drm_syncobj *syncobj;
> >> +               struct dma_fence *fence = NULL;
> >> +               u64 point;
> >> +
> >> +               if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
> >> +                       err = -EFAULT;
> >> +                       goto err;
> >> +               }
> >> +
> >> +               if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
> >> +                       err = -EINVAL;
> >> +                       goto err;
> >> +               }
> >> +
> >> +               if (__get_user(point, user_values++)) {
> >> +                       err = -EFAULT;
> >> +                       goto err;
> >> +               }
> >> +
> >> +               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
> >> +               if (!syncobj) {
> >> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
> >> +                       err = -ENOENT;
> >> +                       goto err;
> >> +               }
> >> +
> >> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
> >> +                       fence = drm_syncobj_fence_get(syncobj);
> >> +                       if (!fence) {
> >> +                               DRM_DEBUG("Syncobj handle has no fence\n");
> >> +                               drm_syncobj_put(syncobj);
> >> +                               err = -EINVAL;
> >> +                               goto err;
> >> +                       }
> >> +
> >> +                       err = dma_fence_chain_find_seqno(&fence, point);
> >> +                       if (err) {
> >> +                               DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
> >> +                               drm_syncobj_put(syncobj);
> >> +                               goto err;
> >> +                       }
> >> +
> >> +                       /* A point might have been signaled already and
> >> +                        * garbage collected from the timeline. In this case
> >> +                        * just ignore the point and carry on.
> >> +                        */
> >> +                       if (!fence) {
> >> +                               drm_syncobj_put(syncobj);
> >> +                               continue;
> >> +                       }
> >> +               }
> >> +
> >> +               /*
> >> +                * For timeline syncobjs we need to preallocate chains for
> >> +                * later signaling.
> >> +                */
> >> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
> > if (dma_fence_chain_find_seqno() == 0)
> >       return -EINVAL;
> >
> > as an early sanity check?
> >
> >> +                       fences[num_fences].chain_fence =
> >> +                               kmalloc(sizeof(*fences[num_fences].chain_fence),
> >> +                                       GFP_KERNEL);
> >> +                       if (!fences[num_fences].chain_fence) {
> >> +                               dma_fence_put(fence);
> >> +                               drm_syncobj_put(syncobj);
> >> +                               err = -ENOMEM;
> >> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
> >> +                               goto err;
> >> +                       }
> >> +               } else {
> >> +                       fences[num_fences].chain_fence = NULL;
> >> +               }
> >> +
> >> +               fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
> >> +               fences[num_fences].dma_fence = fence;
> >> +               fences[num_fences].value = point;
> >> +               num_fences++;
> >> +       }
> >> +
> >> +       *out_n_fences = num_fences;
> >> +
> >> +       return fences;
> >> +
> >> +err:
> >> +       __free_fence_array(fences, num_fences);
> >> +       return ERR_PTR(err);
> >> +}
> >> @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
> >>   
> >>   static void
> >>   signal_fence_array(struct i915_execbuffer *eb,
> >> -                  struct drm_syncobj **fences)
> >> +                  struct i915_eb_fences *fences,
> >> +                  int nfences)
> >>   {
> >> -       const unsigned int nfences = eb->args->num_cliprects;
> >>          struct dma_fence * const fence = &eb->request->fence;
> >>          unsigned int n;
> >>   
> >> @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
> >>                  struct drm_syncobj *syncobj;
> >>                  unsigned int flags;
> >>   
> >> -               syncobj = ptr_unpack_bits(fences[n], &flags, 2);
> >> +               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
> >>                  if (!(flags & I915_EXEC_FENCE_SIGNAL))
> >>                          continue;
> >>   
> >> -               drm_syncobj_replace_fence(syncobj, fence);
> >> +               if (fences[n].chain_fence) {
> >> +                       drm_syncobj_add_point(syncobj, fences[n].chain_fence,
> >> +                                             fence, fences[n].value);
> >> +                       /*
> >> +                        * The chain's ownership is transfered to the
> >> +                        * timeline.
> >> +                        */
> >> +                       fences[n].chain_fence = NULL;
> >> +               } else {
> >> +                       drm_syncobj_replace_fence(syncobj, fence);
> >> +               }
> >>          }
> >>   }
> > I think I have convinced myself that with the split between wait before,
> > signal after combined with the rule that seqno point along the syncobj
> > are monotonic, you should not be able to generate an AB-BA deadlock
> > between concurrent clients.
> 
> 
> Can you come up with an example that would deadlock?

Timeline holds 2,1; a wait on 2 will fail with -EINVAL to userspace.
(Though possibly perfectly valid behaviour on the part of the user.)

Timeline holds 2, with 1 being submitted. A wait on 1 waits on 2
instead. If 1 gains a dependency on A (e.g. bad userspace or an
implicit fence, it's a concurrent wait/submit so expect the worst, i.e.
userspace has to be racing with itself to get into this mess), you
now have a deadlock. (The assumption being that the syncpt along the
timeline are themselves not strictly ordered, and considering they are
external syncobj, that seems like a reasonable generalisation.)
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-08-01 15:16       ` Chris Wilson
@ 2019-08-01 15:35         ` Lionel Landwerlin
  2019-08-11  8:51         ` Lionel Landwerlin
  1 sibling, 0 replies; 19+ messages in thread
From: Lionel Landwerlin @ 2019-08-01 15:35 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 01/08/2019 18:16, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-01 15:29:34)
>> On 31/07/2019 23:03, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
...
>>> I think I have convinced myself that with the split between wait before,
>>> signal after combined with the rule that seqno point along the syncobj
>>> are monotonic, you should not be able to generate an AB-BA deadlock
>>> between concurrent clients.
>>
>> Can you come up with an example that would deadlock?
> Timeline holds 2,1; a wait on 2 will fail with -EINVAL to userspace.
> (Though possibly perfectly valid behaviour on the part of the user.)
>
> Timeline holds 2, with 1 being submitted. A wait on 1 waits on 2
> instead. If 1 gains a dependency on A (e.g. bad userspace or an


You mean : 1 gains a dependency on 2?


> implicit fence, it's a concurrent wait/submit so expect the worst, i.e.
> userspace has to be racing with itself to get into this mess), you
> now have a deadlock.


Not quite sure I see why...

2 doesn't have any dependency on 1, it just happens to have a number higher.


-Lionel


>   (The assumption being that the syncpt along the
> timeline are themselves not strictly ordered, and considering they are
> external syncobj, that seems like a reasonable generalisation.)
> -Chris
>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* ✓ Fi.CI.IGT: success for drm/i915: timeline semaphore support (rev3)
  2019-07-31 14:07 [PATCH v3 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
                   ` (4 preceding siblings ...)
  2019-08-01 14:31 ` ✓ Fi.CI.BAT: success " Patchwork
@ 2019-08-02  6:05 ` Patchwork
  5 siblings, 0 replies; 19+ messages in thread
From: Patchwork @ 2019-08-02  6:05 UTC (permalink / raw)
  To: Lionel Landwerlin; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: timeline semaphore support (rev3)
URL   : https://patchwork.freedesktop.org/series/61032/
State : success

== Summary ==

CI Bug Log - changes from CI_DRM_6602_full -> Patchwork_13827_full
====================================================

Summary
-------

  **SUCCESS**

  No regressions found.

  

Known issues
------------

  Here are the changes found in Patchwork_13827_full that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@gem_ctx_isolation@rcs0-s3:
    - shard-apl:          [PASS][1] -> [DMESG-WARN][2] ([fdo#108566]) +2 similar issues
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-apl6/igt@gem_ctx_isolation@rcs0-s3.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-apl8/igt@gem_ctx_isolation@rcs0-s3.html

  * igt@gem_exec_balancer@smoke:
    - shard-iclb:         [PASS][3] -> [SKIP][4] ([fdo#110854])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-iclb2/igt@gem_exec_balancer@smoke.html
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-iclb6/igt@gem_exec_balancer@smoke.html

  * igt@gem_pwrite@big-gtt-random:
    - shard-iclb:         [PASS][5] -> [INCOMPLETE][6] ([fdo#107713])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-iclb2/igt@gem_pwrite@big-gtt-random.html
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-iclb7/igt@gem_pwrite@big-gtt-random.html

  * igt@gem_workarounds@suspend-resume-context:
    - shard-skl:          [PASS][7] -> [INCOMPLETE][8] ([fdo#104108]) +1 similar issue
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-skl7/igt@gem_workarounds@suspend-resume-context.html
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-skl8/igt@gem_workarounds@suspend-resume-context.html

  * igt@kms_flip_tiling@flip-to-yf-tiled:
    - shard-skl:          [PASS][9] -> [FAIL][10] ([fdo#107931] / [fdo#108134])
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-skl3/igt@kms_flip_tiling@flip-to-yf-tiled.html
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-skl8/igt@kms_flip_tiling@flip-to-yf-tiled.html

  * igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-pwrite:
    - shard-iclb:         [PASS][11] -> [FAIL][12] ([fdo#103167]) +2 similar issues
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-iclb8/igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-pwrite.html
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-iclb4/igt@kms_frontbuffer_tracking@fbc-1p-offscren-pri-shrfb-draw-pwrite.html

  * igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c:
    - shard-kbl:          [PASS][13] -> [DMESG-WARN][14] ([fdo#108566]) +1 similar issue
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-kbl2/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-kbl4/igt@kms_pipe_crc_basic@suspend-read-crc-pipe-c.html

  * igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min:
    - shard-skl:          [PASS][15] -> [FAIL][16] ([fdo#108145])
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-skl2/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-skl4/igt@kms_plane_alpha_blend@pipe-b-constant-alpha-min.html

  * igt@kms_plane_alpha_blend@pipe-b-coverage-7efc:
    - shard-skl:          [PASS][17] -> [FAIL][18] ([fdo#108145] / [fdo#110403])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-skl1/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-skl1/igt@kms_plane_alpha_blend@pipe-b-coverage-7efc.html

  * igt@kms_psr@no_drrs:
    - shard-iclb:         [PASS][19] -> [FAIL][20] ([fdo#108341])
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-iclb3/igt@kms_psr@no_drrs.html
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-iclb1/igt@kms_psr@no_drrs.html

  * igt@kms_psr@psr2_primary_mmap_cpu:
    - shard-iclb:         [PASS][21] -> [SKIP][22] ([fdo#109441]) +2 similar issues
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-iclb2/igt@kms_psr@psr2_primary_mmap_cpu.html
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-iclb3/igt@kms_psr@psr2_primary_mmap_cpu.html

  * igt@kms_vblank@pipe-c-wait-forked-busy-hang:
    - shard-hsw:          [PASS][23] -> [INCOMPLETE][24] ([fdo#103540])
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-hsw4/igt@kms_vblank@pipe-c-wait-forked-busy-hang.html
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-hsw8/igt@kms_vblank@pipe-c-wait-forked-busy-hang.html

  
#### Possible fixes ####

  * igt@kms_big_fb@y-tiled-32bpp-rotate-0:
    - shard-apl:          [INCOMPLETE][25] ([fdo#103927]) -> [PASS][26]
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-apl7/igt@kms_big_fb@y-tiled-32bpp-rotate-0.html
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-apl2/igt@kms_big_fb@y-tiled-32bpp-rotate-0.html

  * igt@kms_cursor_crc@pipe-a-cursor-suspend:
    - shard-kbl:          [DMESG-WARN][27] ([fdo#108566]) -> [PASS][28] +2 similar issues
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-kbl4/igt@kms_cursor_crc@pipe-a-cursor-suspend.html
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-kbl6/igt@kms_cursor_crc@pipe-a-cursor-suspend.html

  * igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-pwrite:
    - shard-iclb:         [FAIL][29] ([fdo#103167]) -> [PASS][30] +4 similar issues
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-iclb7/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-pwrite.html
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-iclb3/igt@kms_frontbuffer_tracking@fbc-1p-primscrn-pri-indfb-draw-pwrite.html

  * igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes:
    - shard-skl:          [INCOMPLETE][31] ([fdo#104108]) -> [PASS][32] +1 similar issue
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-skl8/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-skl2/igt@kms_plane@plane-panning-bottom-right-suspend-pipe-c-planes.html

  * igt@kms_plane_lowres@pipe-a-tiling-x:
    - shard-iclb:         [INCOMPLETE][33] ([fdo#107713]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-iclb7/igt@kms_plane_lowres@pipe-a-tiling-x.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-iclb2/igt@kms_plane_lowres@pipe-a-tiling-x.html

  * igt@kms_psr@psr2_cursor_plane_onoff:
    - shard-iclb:         [SKIP][35] ([fdo#109441]) -> [PASS][36] +2 similar issues
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-iclb6/igt@kms_psr@psr2_cursor_plane_onoff.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-iclb2/igt@kms_psr@psr2_cursor_plane_onoff.html

  * igt@kms_vblank@pipe-a-ts-continuation-suspend:
    - shard-apl:          [DMESG-WARN][37] ([fdo#108566]) -> [PASS][38] +3 similar issues
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-apl7/igt@kms_vblank@pipe-a-ts-continuation-suspend.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-apl3/igt@kms_vblank@pipe-a-ts-continuation-suspend.html

  * igt@perf@blocking:
    - shard-skl:          [FAIL][39] ([fdo#110728]) -> [PASS][40] +1 similar issue
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-skl3/igt@perf@blocking.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-skl5/igt@perf@blocking.html

  * igt@prime_vgem@basic-sync-default:
    - shard-apl:          [FAIL][41] ([fdo#111277]) -> [PASS][42]
   [41]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-apl7/igt@prime_vgem@basic-sync-default.html
   [42]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-apl5/igt@prime_vgem@basic-sync-default.html

  
#### Warnings ####

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-gtt:
    - shard-skl:          [FAIL][43] ([fdo#103167]) -> [FAIL][44] ([fdo#108040])
   [43]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-skl3/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-gtt.html
   [44]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-skl8/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-gtt.html

  * igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-wc:
    - shard-skl:          [FAIL][45] ([fdo#108040]) -> [FAIL][46] ([fdo#103167])
   [45]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_6602/shard-skl3/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-wc.html
   [46]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/shard-skl8/igt@kms_frontbuffer_tracking@fbcpsr-1p-primscrn-pri-shrfb-draw-mmap-wc.html

  
  [fdo#103167]: https://bugs.freedesktop.org/show_bug.cgi?id=103167
  [fdo#103540]: https://bugs.freedesktop.org/show_bug.cgi?id=103540
  [fdo#103927]: https://bugs.freedesktop.org/show_bug.cgi?id=103927
  [fdo#104108]: https://bugs.freedesktop.org/show_bug.cgi?id=104108
  [fdo#107713]: https://bugs.freedesktop.org/show_bug.cgi?id=107713
  [fdo#107931]: https://bugs.freedesktop.org/show_bug.cgi?id=107931
  [fdo#108040]: https://bugs.freedesktop.org/show_bug.cgi?id=108040
  [fdo#108134]: https://bugs.freedesktop.org/show_bug.cgi?id=108134
  [fdo#108145]: https://bugs.freedesktop.org/show_bug.cgi?id=108145
  [fdo#108341]: https://bugs.freedesktop.org/show_bug.cgi?id=108341
  [fdo#108566]: https://bugs.freedesktop.org/show_bug.cgi?id=108566
  [fdo#109441]: https://bugs.freedesktop.org/show_bug.cgi?id=109441
  [fdo#110403]: https://bugs.freedesktop.org/show_bug.cgi?id=110403
  [fdo#110728]: https://bugs.freedesktop.org/show_bug.cgi?id=110728
  [fdo#110854]: https://bugs.freedesktop.org/show_bug.cgi?id=110854
  [fdo#111277]: https://bugs.freedesktop.org/show_bug.cgi?id=111277


Participating hosts (10 -> 10)
------------------------------

  No changes in participating hosts


Build changes
-------------

  * CI: CI-20190529 -> None
  * Linux: CI_DRM_6602 -> Patchwork_13827

  CI-20190529: 20190529
  CI_DRM_6602: dd7a77416867b70c404a75cb413f717d3ca1bfdc @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_5119: b646668b947851459e0aec0ecfc54e9ed5503bfc @ git://anongit.freedesktop.org/xorg/app/intel-gpu-tools
  Patchwork_13827: a7eebf167e7c93fc61ffa3b303c77e0dacb98ea9 @ git://anongit.freedesktop.org/gfx-ci/linux
  piglit_4509: fdc5a4ca11124ab8413c7988896eec4c97336694 @ git://anongit.freedesktop.org/piglit

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_13827/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH v3 2/2] drm/i915: add syncobj timeline support
  2019-08-01 15:16       ` Chris Wilson
  2019-08-01 15:35         ` Lionel Landwerlin
@ 2019-08-11  8:51         ` Lionel Landwerlin
  1 sibling, 0 replies; 19+ messages in thread
From: Lionel Landwerlin @ 2019-08-11  8:51 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On 01/08/2019 17:16, Chris Wilson wrote:
> Quoting Lionel Landwerlin (2019-08-01 15:29:34)
>> On 31/07/2019 23:03, Chris Wilson wrote:
>>> Quoting Lionel Landwerlin (2019-07-31 15:07:33)
>>>> -static struct drm_syncobj **
>>>> -get_fence_array(struct drm_i915_gem_execbuffer2 *args,
>>>> -               struct drm_file *file)
>>>> +static struct i915_eb_fences *
>>>> +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences)
>>>> +{
>>>> +       struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences =
>>>> +               &eb->extensions.timeline_fences;
>>>> +       struct drm_i915_gem_exec_fence __user *user_fences;
>>>> +       struct i915_eb_fences *fences;
>>>> +       u64 __user *user_values;
>>>> +       u64 num_fences, num_user_fences = timeline_fences->fence_count;
>>>> +       unsigned long n;
>>>> +       int err;
>>>> +
>>>> +       /* Check multiplication overflow for access_ok() and kvmalloc_array() */
>>>> +       BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
>>>> +       if (num_user_fences > min_t(unsigned long,
>>>> +                                   ULONG_MAX / sizeof(*user_fences),
>>>> +                                   SIZE_MAX / sizeof(*fences)))
>>>> +               return ERR_PTR(-EINVAL);
>>>> +
>>>> +       user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
>>>> +       if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences)))
>>>> +               return ERR_PTR(-EFAULT);
>>>> +
>>>> +       user_values = u64_to_user_ptr(timeline_fences->values_ptr);
>>>> +       if (!access_ok(user_values, num_user_fences * sizeof(*user_values)))
>>>> +               return ERR_PTR(-EFAULT);
>>>> +
>>>> +       fences = kvmalloc_array(num_user_fences, sizeof(*fences),
>>>> +                               __GFP_NOWARN | GFP_KERNEL);
>>>> +       if (!fences)
>>>> +               return ERR_PTR(-ENOMEM);
>>>> +
>>>> +       BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
>>>> +                    ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
>>>> +
>>>> +       for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) {
>>>> +               struct drm_i915_gem_exec_fence user_fence;
>>>> +               struct drm_syncobj *syncobj;
>>>> +               struct dma_fence *fence = NULL;
>>>> +               u64 point;
>>>> +
>>>> +               if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) {
>>>> +                       err = -EFAULT;
>>>> +                       goto err;
>>>> +               }
>>>> +
>>>> +               if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) {
>>>> +                       err = -EINVAL;
>>>> +                       goto err;
>>>> +               }
>>>> +
>>>> +               if (__get_user(point, user_values++)) {
>>>> +                       err = -EFAULT;
>>>> +                       goto err;
>>>> +               }
>>>> +
>>>> +               syncobj = drm_syncobj_find(eb->file, user_fence.handle);
>>>> +               if (!syncobj) {
>>>> +                       DRM_DEBUG("Invalid syncobj handle provided\n");
>>>> +                       err = -ENOENT;
>>>> +                       goto err;
>>>> +               }
>>>> +
>>>> +               if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
>>>> +                       fence = drm_syncobj_fence_get(syncobj);
>>>> +                       if (!fence) {
>>>> +                               DRM_DEBUG("Syncobj handle has no fence\n");
>>>> +                               drm_syncobj_put(syncobj);
>>>> +                               err = -EINVAL;
>>>> +                               goto err;
>>>> +                       }
>>>> +
>>>> +                       err = dma_fence_chain_find_seqno(&fence, point);
>>>> +                       if (err) {
>>>> +                               DRM_DEBUG("Syncobj handle missing requested point %llu\n", point);
>>>> +                               drm_syncobj_put(syncobj);
>>>> +                               goto err;
>>>> +                       }
>>>> +
>>>> +                       /* A point might have been signaled already and
>>>> +                        * garbage collected from the timeline. In this case
>>>> +                        * just ignore the point and carry on.
>>>> +                        */
>>>> +                       if (!fence) {
>>>> +                               drm_syncobj_put(syncobj);
>>>> +                               continue;
>>>> +                       }
>>>> +               }
>>>> +
>>>> +               /*
>>>> +                * For timeline syncobjs we need to preallocate chains for
>>>> +                * later signaling.
>>>> +                */
>>>> +               if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
>>> if (dma_fence_chain_find_seqno() == 0)
>>>        return -EINVAL;
>>>
>>> as an early sanity check?
>>>
>>>> +                       fences[num_fences].chain_fence =
>>>> +                               kmalloc(sizeof(*fences[num_fences].chain_fence),
>>>> +                                       GFP_KERNEL);
>>>> +                       if (!fences[num_fences].chain_fence) {
>>>> +                               dma_fence_put(fence);
>>>> +                               drm_syncobj_put(syncobj);
>>>> +                               err = -ENOMEM;
>>>> +                               DRM_DEBUG("Unable to alloc chain_fence\n");
>>>> +                               goto err;
>>>> +                       }
>>>> +               } else {
>>>> +                       fences[num_fences].chain_fence = NULL;
>>>> +               }
>>>> +
>>>> +               fences[num_fences].syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
>>>> +               fences[num_fences].dma_fence = fence;
>>>> +               fences[num_fences].value = point;
>>>> +               num_fences++;
>>>> +       }
>>>> +
>>>> +       *out_n_fences = num_fences;
>>>> +
>>>> +       return fences;
>>>> +
>>>> +err:
>>>> +       __free_fence_array(fences, num_fences);
>>>> +       return ERR_PTR(err);
>>>> +}
>>>> @@ -2329,9 +2484,9 @@ await_fence_array(struct i915_execbuffer *eb,
>>>>    
>>>>    static void
>>>>    signal_fence_array(struct i915_execbuffer *eb,
>>>> -                  struct drm_syncobj **fences)
>>>> +                  struct i915_eb_fences *fences,
>>>> +                  int nfences)
>>>>    {
>>>> -       const unsigned int nfences = eb->args->num_cliprects;
>>>>           struct dma_fence * const fence = &eb->request->fence;
>>>>           unsigned int n;
>>>>    
>>>> @@ -2339,15 +2494,46 @@ signal_fence_array(struct i915_execbuffer *eb,
>>>>                   struct drm_syncobj *syncobj;
>>>>                   unsigned int flags;
>>>>    
>>>> -               syncobj = ptr_unpack_bits(fences[n], &flags, 2);
>>>> +               syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2);
>>>>                   if (!(flags & I915_EXEC_FENCE_SIGNAL))
>>>>                           continue;
>>>>    
>>>> -               drm_syncobj_replace_fence(syncobj, fence);
>>>> +               if (fences[n].chain_fence) {
>>>> +                       drm_syncobj_add_point(syncobj, fences[n].chain_fence,
>>>> +                                             fence, fences[n].value);
>>>> +                       /*
>>>> +                        * The chain's ownership is transfered to the
>>>> +                        * timeline.
>>>> +                        */
>>>> +                       fences[n].chain_fence = NULL;
>>>> +               } else {
>>>> +                       drm_syncobj_replace_fence(syncobj, fence);
>>>> +               }
>>>>           }
>>>>    }
>>> I think I have convinced myself that with the split between wait before,
>>> signal after combined with the rule that seqno point along the syncobj
>>> are monotonic, you should not be able to generate an AB-BA deadlock
>>> between concurrent clients.
>>
>> Can you come up with an example that would deadlock?
> Timeline holds 2,1; a wait on 2 will fail with -EINVAL to userspace.
> (Though possibly perfectly valid behaviour on the part of the user.)
>
> Timeline holds 2, with 1 being submitted. A wait on 1 waits on 2
> instead. If 1 gains a dependency on A (e.g. bad userspace or an
> implicit fence, it's a concurrent wait/submit so expect the worst, i.e.
> userspace has to be racing with itself to get into this mess), you
> now have a deadlock. (The assumption being that the syncpt along the
> timeline are themselves not strictly ordered, and considering they are
> external syncobj, that seems like a reasonable generalisation.)
> -Chris
>
Hey Chris,


Does this discussion around how the dma-fence-chain works prevent those 
AB-BA deadlocks? : 
https://lists.freedesktop.org/archives/dri-devel/2019-August/229289.html

Essentially any point added to the timeline will not be signaled until 
all previous points are.

This is ensured by the dma-fence-chain node which waits for all previous 
dma-fence-chains nodes to be signal and its own wrapped fence (in our 
case an i915 one).


Thanks,


-Lionel


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2019-08-11  8:51 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-07-31 14:07 [PATCH v3 0/2] drm/i915: timeline semaphore support Lionel Landwerlin
2019-07-31 14:07 ` [PATCH v3 1/2] drm/i915: introduce a mechanism to extend execbuf2 Lionel Landwerlin
2019-07-31 21:41   ` Chris Wilson
2019-07-31 14:07 ` [PATCH v3 2/2] drm/i915: add syncobj timeline support Lionel Landwerlin
2019-07-31 20:03   ` Chris Wilson
2019-08-01  7:43     ` Lionel Landwerlin
2019-08-01  8:08       ` Chris Wilson
2019-08-01  9:01         ` Lionel Landwerlin
2019-08-01  9:22           ` Chris Wilson
2019-08-01  9:37             ` Lionel Landwerlin
2019-08-01 10:18               ` Chris Wilson
2019-08-01 14:29     ` Lionel Landwerlin
2019-08-01 15:16       ` Chris Wilson
2019-08-01 15:35         ` Lionel Landwerlin
2019-08-11  8:51         ` Lionel Landwerlin
2019-07-31 14:27 ` ✗ Fi.CI.CHECKPATCH: warning for drm/i915: timeline semaphore support (rev3) Patchwork
2019-07-31 14:29 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-08-01 14:31 ` ✓ Fi.CI.BAT: success " Patchwork
2019-08-02  6:05 ` ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.