All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC] drm/i915: Android native sync support
@ 2015-01-22 11:15 Tvrtko Ursulin
  2015-01-22 11:42 ` Chris Wilson
                   ` (2 more replies)
  0 siblings, 3 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-22 11:15 UTC (permalink / raw)
  To: Intel-gfx

From: Jesse Barnes <jbarnes@virtuousgeek.org>

Add Android native sync support with fences exported as file descriptors via
the execbuf ioctl (rsvd2 field is used).

This is a continuation of Jesse Barnes's previous work, squashed to arrive at
the final destination, cleaned up, with some fixes and preliminary light
testing.

GEM requests are extended with fence structures which are associated with
Android sync fences exported to user space via file descriptors. Fences which
are waited upon, and while exported to userspace, are referenced and added to
the irq_queue so they are signalled when requests are completed. There is no
overhead apart from the where fences are not requested.

Based on patches by Jesse Barnes:
   drm/i915: Android sync points for i915 v3
   drm/i915: add fences to the request struct
   drm/i915: sync fence fixes/updates

To do:
   * Extend driver data with context id / ring id.
   * More testing.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  14 ++
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/i915_drv.h            |  25 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  19 +++
 drivers/gpu/drm/i915/i915_sync.c           | 248 +++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h                |   8 +-
 6 files changed, 312 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_sync.c

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 4e39ab3..6b23d17 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -43,6 +43,20 @@ config DRM_I915_KMS
 
 	  If in doubt, say "Y".
 
+config DRM_I915_SYNC
+	bool "Enable explicit sync support"
+	depends on DRM_I915
+	default y
+	select STAGING
+	select ANDROID
+	select SYNC
+	help
+	  Choose this option to enable Android native sync support and the
+	  corresponding i915 driver code to expose it.  Slightly increases
+	  driver size and pulls in sync support from staging.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_FBDEV
 	bool "Enable legacy fbdev support for the modesetting intel driver"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 16e3dc3..bcff481 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -58,6 +58,7 @@ i915-y += intel_audio.o \
 	  intel_sprite.o
 i915-$(CONFIG_ACPI)		+= intel_acpi.o intel_opregion.o
 i915-$(CONFIG_DRM_I915_FBDEV)	+= intel_fbdev.o
+i915-$(CONFIG_DRM_I915_SYNC)	+= i915_sync.o
 
 # modesetting output/encoder code
 i915-y += dvo_ch7017.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 66f0c60..f57b3a1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -49,6 +49,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/fence.h>
 
 /* General customization:
  */
@@ -2090,6 +2091,13 @@ struct drm_i915_gem_request {
 	struct list_head client_list;
 
 	uint32_t uniq;
+
+#ifdef CONFIG_DRM_I915_SYNC
+	/* core fence obj for this request, may be exported */
+	struct fence fence;
+
+	wait_queue_t wait;
+#endif
 };
 
 void i915_gem_request_free(struct kref *req_ref);
@@ -2581,6 +2589,23 @@ void i915_init_vm(struct drm_i915_private *dev_priv,
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
+/* i915_sync.c */
+struct sync_fence;
+
+#ifdef CONFIG_DRM_I915_SYNC
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd);
+#else
+static inline
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd)
+{
+	return -ENODEV;
+}
+#endif
+
 #define PIN_MAPPABLE 0x1
 #define PIN_NONBLOCK 0x2
 #define PIN_GLOBAL 0x4
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index e3ef177..191ecaa 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -32,6 +32,7 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
+#include "../../../staging/android/sync.h"
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
@@ -1356,6 +1357,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	u32 flags;
 	int ret;
 	bool need_relocs;
+	struct sync_fence *sync_fence = NULL;
+	int fence_fd = -1;
 
 	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
@@ -1504,9 +1507,25 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	} else
 		exec_start += i915_gem_obj_offset(batch_obj, vm);
 
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		ret = i915_create_sync_fence_ring(ring, ctx,
+						  &sync_fence, &fence_fd);
+		if (ret)
+			goto sync_err;
+	}
+
 	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
 				      &eb->vmas, batch_obj, exec_start, flags);
 
+	if (!ret && sync_fence) {
+		sync_fence_install(sync_fence, fence_fd);
+		args->rsvd2 = fence_fd;
+	} else if (sync_fence) {
+		fput(sync_fence->file);
+		put_unused_fd(fence_fd);
+	}
+
+sync_err:
 	/*
 	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
 	 * batch vma for correctness. For less ugly and less fragility this
diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c
new file mode 100644
index 0000000..009338b
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sync.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright © 2013-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jesse Barnes <jbarnes@virtuousgeek.org>
+ *    Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_vma_manager.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/oom.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include "../../../staging/android/sync.h"
+
+static DEFINE_SPINLOCK(fence_lock); /* NB: Lock is required for the fence API
+				       but is currently unused here. */
+
+/*
+ * i915 Android native sync fences.
+ *
+ * We implement sync points in terms of i915 seqnos. They're exposed through
+ * the DRM_IOCTL_I915_GEM_EXECBUFFER2 ioctl, and can be mixed and matched
+ * with other Android timelines and aggregated into sync_fences, etc.
+ *
+ * TODO:
+ *   * Display engine fences.
+ *   * Extend driver data with context id / ring id.
+ */
+
+#define to_i915_request(x) container_of(x, struct drm_i915_gem_request, fence)
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+	return "i915";
+}
+
+static const char *i915_fence_ring_get_timeline_name(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	return req->ring->name;
+}
+
+static int
+i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+	struct drm_i915_gem_request *req = wait->private;
+	struct intel_engine_cs *ring = req->ring;
+
+	if (!i915_gem_request_completed(req, false))
+		return 0;
+
+	fence_signal_locked(&req->fence);
+
+	__remove_wait_queue(&ring->irq_queue, wait);
+	ring->irq_put(ring);
+
+	return 0;
+}
+
+static bool i915_fence_ring_enable_signaling(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct intel_engine_cs *ring = req->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	wait_queue_t *wait = &req->wait;
+
+	/* queue fence wait queue on irq queue and get fence */
+	if (i915_gem_request_completed(req, false) ||
+	    i915_terminally_wedged(&dev_priv->gpu_error))
+		return false;
+
+	if (!ring->irq_get(ring))
+		return false;
+
+	if (i915_gem_request_completed(req, false)) {
+		ring->irq_put(ring);
+		return true;
+	}
+
+	wait->flags = 0;
+	wait->private = req;
+	wait->func = i915_fence_ring_check;
+
+	__add_wait_queue(&ring->irq_queue, wait);
+
+	return true;
+}
+
+static bool i915_fence_ring_signaled(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	return i915_gem_request_completed(req, false);
+}
+
+static signed long
+i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
+	int ret;
+	s64 timeout;
+
+	timeout = jiffies_to_nsecs(timeout_js);
+
+	ret = __i915_wait_request(req,
+				  atomic_read(&dev_priv->gpu_error.reset_counter),
+				  intr, &timeout, NULL);
+	if (ret == -ETIME)
+		return nsecs_to_jiffies(timeout);
+
+	return ret;
+}
+
+static int
+i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	if (size < sizeof(req->seqno))
+		return -ENOMEM;
+
+	memcpy(data, &req->seqno,
+	       sizeof(req->seqno));
+
+	return sizeof(req->seqno);
+}
+
+static void i915_fence_ring_value_str(struct fence *fence, char *str, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	snprintf(str, size, "%u", req->seqno);
+}
+
+static void
+i915_fence_ring_timeline_value_str(struct fence *fence, char *str, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct intel_engine_cs *ring = req->ring;
+
+	snprintf(str, size, "%u", ring->get_seqno(ring, false));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct drm_device *dev = req->ring->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	i915_gem_request_unreference(req);
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static struct fence_ops i915_fence_ring_ops = {
+	.get_driver_name =	i915_fence_get_driver_name,
+	.get_timeline_name =	i915_fence_ring_get_timeline_name,
+	.enable_signaling =	i915_fence_ring_enable_signaling,
+	.signaled =		i915_fence_ring_signaled,
+	.wait =			i915_fence_ring_wait,
+	.fill_driver_data =	i915_fence_ring_fill_driver_data,
+	.fence_value_str =	i915_fence_ring_value_str,
+	.timeline_value_str =	i915_fence_ring_timeline_value_str,
+	.release =		i915_fence_release
+};
+
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd)
+{
+	struct drm_i915_gem_request *req;
+	struct fence *fence = NULL;
+	struct sync_fence *sfence;
+	char ring_name[6] = "ring0";
+	int fd;
+	int ret;
+
+	if (!intel_ring_initialized(ring)) {
+		DRM_DEBUG("Ring not initialized!\n");
+		return -ENODEV;
+	}
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		DRM_DEBUG("No available file descriptor!\n");
+		return fd;
+	}
+
+	ret = ring->add_request(ring);
+	if (ret) {
+		DRM_ERROR("add_request failed\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	req = ring->outstanding_lazy_request;
+
+	i915_gem_request_reference(req);
+	fence = &req->fence;
+	fence_init(fence, &i915_fence_ring_ops, &fence_lock, ctx->user_handle,
+		   req->seqno);
+
+	ring_name[4] += ring->id;
+	sfence = sync_fence_create_dma(ring_name, fence);
+	if (!sfence) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	*sync_fence = sfence;
+	*fence_fd = fd;
+
+	return 0;
+err:
+	if (fence)
+		i915_gem_request_unreference(req);
+	put_unused_fd(fd);
+	return ret;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2e559f6e..c197770 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
-#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
 	__u64 flags;
 	__u64 rsvd1; /* now used for context info */
-	__u64 rsvd2;
+	__u64 rsvd2; /* now used for fence fd */
 };
 
 /** Resets the SO write offset registers for transform feedback on gen7. */
@@ -750,7 +750,9 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT		(1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+#define I915_EXEC_FENCE_OUT		(1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_FENCE_OUT<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.2.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 11:15 [RFC] drm/i915: Android native sync support Tvrtko Ursulin
@ 2015-01-22 11:42 ` Chris Wilson
  2015-01-22 13:41   ` Tvrtko Ursulin
  2015-01-23 11:13 ` [RFC v2] " Tvrtko Ursulin
  2015-01-27 11:29 ` [RFC v3] " Tvrtko Ursulin
  2 siblings, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-22 11:42 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jan 22, 2015 at 11:15:40AM +0000, Tvrtko Ursulin wrote:
> From: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> Add Android native sync support with fences exported as file descriptors via
> the execbuf ioctl (rsvd2 field is used).
> 
> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
> the final destination, cleaned up, with some fixes and preliminary light
> testing.
> 
> GEM requests are extended with fence structures which are associated with
> Android sync fences exported to user space via file descriptors. Fences which
> are waited upon, and while exported to userspace, are referenced and added to
> the irq_queue so they are signalled when requests are completed. There is no
> overhead apart from the where fences are not requested.
> 
> Based on patches by Jesse Barnes:
>    drm/i915: Android sync points for i915 v3
>    drm/i915: add fences to the request struct
>    drm/i915: sync fence fixes/updates
> 
> To do:
>    * Extend driver data with context id / ring id.
>    * More testing.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> Cc: John Harrison <John.C.Harrison@Intel.com>
> ---
>  drivers/gpu/drm/i915/Kconfig               |  14 ++
>  drivers/gpu/drm/i915/Makefile              |   1 +
>  drivers/gpu/drm/i915/i915_drv.h            |  25 +++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  19 +++
>  drivers/gpu/drm/i915/i915_sync.c           | 248 +++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h                |   8 +-
>  6 files changed, 312 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_sync.c
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 4e39ab3..6b23d17 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -43,6 +43,20 @@ config DRM_I915_KMS
>  
>  	  If in doubt, say "Y".
>  
> +config DRM_I915_SYNC
> +	bool "Enable explicit sync support"
> +	depends on DRM_I915
> +	default y
> +	select STAGING
> +	select ANDROID
> +	select SYNC
> +	help
> +	  Choose this option to enable Android native sync support and the
> +	  corresponding i915 driver code to expose it.  Slightly increases
> +	  driver size and pulls in sync support from staging.
> +
> +	  If in doubt, say "Y".

So we should move i915 into staging? I think you mean
"default y if STAGING"

> +	if (args->flags & I915_EXEC_FENCE_OUT) {
> +		ret = i915_create_sync_fence_ring(ring, ctx,
> +						  &sync_fence, &fence_fd);
> +		if (ret)
> +			goto sync_err;
> +	}
> +
>  	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
>  				      &eb->vmas, batch_obj, exec_start, flags);

You emit the fence prior to the execution of the batch? Interesting. Not
exactly where I would expect the fence. Both before/after are
justifiable.
 
> +	if (!ret && sync_fence) {
> +		sync_fence_install(sync_fence, fence_fd);
> +		args->rsvd2 = fence_fd;
> +	} else if (sync_fence) {
> +		fput(sync_fence->file);
> +		put_unused_fd(fence_fd);
> +	}

I think this can be tidied up and made consistent with the existing
style of error handling thusly:

if (ret)
	goto fence_err;

if (sync_fence) {
	/* transferr ownershup to userspace */
	sync_fence_install(sync_fence, fence_fd);
	args->rsvd2 = fence_fd; 
	sync_fence = NULL; 
}

fence_err:
if (sync_fence) {
	fput(sync_fence->file);
	put_unused_fd(fence_fd);
}

> +sync_err:


> +static signed long
> +i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
> +	int ret;
> +	s64 timeout;
> +
> +	timeout = jiffies_to_nsecs(timeout_js);
> +
> +	ret = __i915_wait_request(req,
> +				  atomic_read(&dev_priv->gpu_error.reset_counter),
> +				  intr, &timeout, NULL);
> +	if (ret == -ETIME)
> +		return nsecs_to_jiffies(timeout);

This should be equivalent to return 0; intended?
> +
> +	return ret;
> +}

> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 2e559f6e..c197770 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
> -#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>  #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>  #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>  #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
>  #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
>  	__u64 flags;
>  	__u64 rsvd1; /* now used for context info */
> -	__u64 rsvd2;
> +	__u64 rsvd2; /* now used for fence fd */
If we are going to use this slot for fence fd, may as well make it
supply both before/after.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 11:42 ` Chris Wilson
@ 2015-01-22 13:41   ` Tvrtko Ursulin
  2015-01-22 13:49     ` Chris Wilson
  2015-01-22 14:04     ` Damien Lespiau
  0 siblings, 2 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-22 13:41 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx


On 01/22/2015 11:42 AM, Chris Wilson wrote:
> On Thu, Jan 22, 2015 at 11:15:40AM +0000, Tvrtko Ursulin wrote:
>> From: Jesse Barnes <jbarnes@virtuousgeek.org>
>>
>> Add Android native sync support with fences exported as file descriptors via
>> the execbuf ioctl (rsvd2 field is used).
>>
>> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
>> the final destination, cleaned up, with some fixes and preliminary light
>> testing.
>>
>> GEM requests are extended with fence structures which are associated with
>> Android sync fences exported to user space via file descriptors. Fences which
>> are waited upon, and while exported to userspace, are referenced and added to
>> the irq_queue so they are signalled when requests are completed. There is no
>> overhead apart from the where fences are not requested.
>>
>> Based on patches by Jesse Barnes:
>>     drm/i915: Android sync points for i915 v3
>>     drm/i915: add fences to the request struct
>>     drm/i915: sync fence fixes/updates
>>
>> To do:
>>     * Extend driver data with context id / ring id.
>>     * More testing.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
>> Cc: John Harrison <John.C.Harrison@Intel.com>
>> ---
>>   drivers/gpu/drm/i915/Kconfig               |  14 ++
>>   drivers/gpu/drm/i915/Makefile              |   1 +
>>   drivers/gpu/drm/i915/i915_drv.h            |  25 +++
>>   drivers/gpu/drm/i915/i915_gem_execbuffer.c |  19 +++
>>   drivers/gpu/drm/i915/i915_sync.c           | 248 +++++++++++++++++++++++++++++
>>   include/uapi/drm/i915_drm.h                |   8 +-
>>   6 files changed, 312 insertions(+), 3 deletions(-)
>>   create mode 100644 drivers/gpu/drm/i915/i915_sync.c
>>
>> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
>> index 4e39ab3..6b23d17 100644
>> --- a/drivers/gpu/drm/i915/Kconfig
>> +++ b/drivers/gpu/drm/i915/Kconfig
>> @@ -43,6 +43,20 @@ config DRM_I915_KMS
>>
>>   	  If in doubt, say "Y".
>>
>> +config DRM_I915_SYNC
>> +	bool "Enable explicit sync support"
>> +	depends on DRM_I915
>> +	default y
>> +	select STAGING
>> +	select ANDROID
>> +	select SYNC
>> +	help
>> +	  Choose this option to enable Android native sync support and the
>> +	  corresponding i915 driver code to expose it.  Slightly increases
>> +	  driver size and pulls in sync support from staging.
>> +
>> +	  If in doubt, say "Y".
>
> So we should move i915 into staging? I think you mean
> "default y if STAGING"

Did not pay attention to this part, will change.

>> +	if (args->flags & I915_EXEC_FENCE_OUT) {
>> +		ret = i915_create_sync_fence_ring(ring, ctx,
>> +						  &sync_fence, &fence_fd);
>> +		if (ret)
>> +			goto sync_err;
>> +	}
>> +
>>   	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
>>   				      &eb->vmas, batch_obj, exec_start, flags);
>
> You emit the fence prior to the execution of the batch? Interesting. Not
> exactly where I would expect the fence. Both before/after are
> justifiable.

What do yo consider emitting? To me that is fd_install and that happens 
after request was successfully submitted. I thought it is tidier to set 
up required objects before and then install the fence, or discard it, 
depending on the outcome. You think differently?

>> +	if (!ret && sync_fence) {
>> +		sync_fence_install(sync_fence, fence_fd);
>> +		args->rsvd2 = fence_fd;
>> +	} else if (sync_fence) {
>> +		fput(sync_fence->file);
>> +		put_unused_fd(fence_fd);
>> +	}
>
> I think this can be tidied up and made consistent with the existing
> style of error handling thusly:
>
> if (ret)
> 	goto fence_err;
>
> if (sync_fence) {
> 	/* transferr ownershup to userspace */
> 	sync_fence_install(sync_fence, fence_fd);
> 	args->rsvd2 = fence_fd;
> 	sync_fence = NULL;
> }
>
> fence_err:
> if (sync_fence) {
> 	fput(sync_fence->file);
> 	put_unused_fd(fence_fd);
> }

Yeah makes sense, will change.

>> +sync_err:
>
>
>> +static signed long
>> +i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js)
>> +{
>> +	struct drm_i915_gem_request *req = to_i915_request(fence);
>> +	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
>> +	int ret;
>> +	s64 timeout;
>> +
>> +	timeout = jiffies_to_nsecs(timeout_js);
>> +
>> +	ret = __i915_wait_request(req,
>> +				  atomic_read(&dev_priv->gpu_error.reset_counter),
>> +				  intr, &timeout, NULL);
>> +	if (ret == -ETIME)
>> +		return nsecs_to_jiffies(timeout);
>
> This should be equivalent to return 0; intended?

I can't see that it was intended so I'll simplify it unless Jesse know 
better.

>> +
>> +	return ret;
>> +}
>
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 2e559f6e..c197770 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
>>   #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>>   #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
>> -#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>>   #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>>   #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>>   #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
>> @@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
>>   #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
>>   	__u64 flags;
>>   	__u64 rsvd1; /* now used for context info */
>> -	__u64 rsvd2;
>> +	__u64 rsvd2; /* now used for fence fd */
> If we are going to use this slot for fence fd, may as well make it
> supply both before/after.

Not sure what you mean by before/after?

In the future it will take in the input fence fd and return the output 
fence if that's what you mean.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 13:41   ` Tvrtko Ursulin
@ 2015-01-22 13:49     ` Chris Wilson
  2015-01-22 13:56       ` Tvrtko Ursulin
  2015-01-22 14:04     ` Damien Lespiau
  1 sibling, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-22 13:49 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jan 22, 2015 at 01:41:48PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/22/2015 11:42 AM, Chris Wilson wrote:
> >>+	if (args->flags & I915_EXEC_FENCE_OUT) {
> >>+		ret = i915_create_sync_fence_ring(ring, ctx,
> >>+						  &sync_fence, &fence_fd);
> >>+		if (ret)
> >>+			goto sync_err;
> >>+	}
> >>+
> >>  	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
> >>  				      &eb->vmas, batch_obj, exec_start, flags);
> >
> >You emit the fence prior to the execution of the batch? Interesting. Not
> >exactly where I would expect the fence. Both before/after are
> >justifiable.
> 
> What do yo consider emitting? To me that is fd_install and that
> happens after request was successfully submitted. I thought it is
> tidier to set up required objects before and then install the fence,
> or discard it, depending on the outcome. You think differently?

i915_create_sync_fence_ring() inserts a breadcrumb into the ring that
fires before we execute the execbuf (which then gets its own request +
breadcrumb).

I believe the intention is to hook the fence into the breadcrumb that
fires after the execbuf, i.e. to add it to the execbuf request rather
than create a new request all for itself.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 13:49     ` Chris Wilson
@ 2015-01-22 13:56       ` Tvrtko Ursulin
  0 siblings, 0 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-22 13:56 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx


On 01/22/2015 01:49 PM, Chris Wilson wrote:
> On Thu, Jan 22, 2015 at 01:41:48PM +0000, Tvrtko Ursulin wrote:
>>
>> On 01/22/2015 11:42 AM, Chris Wilson wrote:
>>>> +	if (args->flags & I915_EXEC_FENCE_OUT) {
>>>> +		ret = i915_create_sync_fence_ring(ring, ctx,
>>>> +						  &sync_fence, &fence_fd);
>>>> +		if (ret)
>>>> +			goto sync_err;
>>>> +	}
>>>> +
>>>>   	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
>>>>   				      &eb->vmas, batch_obj, exec_start, flags);
>>>
>>> You emit the fence prior to the execution of the batch? Interesting. Not
>>> exactly where I would expect the fence. Both before/after are
>>> justifiable.
>>
>> What do yo consider emitting? To me that is fd_install and that
>> happens after request was successfully submitted. I thought it is
>> tidier to set up required objects before and then install the fence,
>> or discard it, depending on the outcome. You think differently?
>
> i915_create_sync_fence_ring() inserts a breadcrumb into the ring that
> fires before we execute the execbuf (which then gets its own request +
> breadcrumb).
>
> I believe the intention is to hook the fence into the breadcrumb that
> fires after the execbuf, i.e. to add it to the execbuf request rather
> than create a new request all for itself.

You are right, it should be after.

I assumed that ring->add_request() is just to make sure there is a 
request structure for this submission, if some other operation hasn't 
created it already. This was based on my relatively old recollection of 
how this code works. It looks like I need to re-visit this.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 13:41   ` Tvrtko Ursulin
  2015-01-22 13:49     ` Chris Wilson
@ 2015-01-22 14:04     ` Damien Lespiau
  2015-01-22 15:28       ` Tvrtko Ursulin
  1 sibling, 1 reply; 38+ messages in thread
From: Damien Lespiau @ 2015-01-22 14:04 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jan 22, 2015 at 01:41:48PM +0000, Tvrtko Ursulin wrote:
> >>@@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
> >>  #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
> >>  	__u64 flags;
> >>  	__u64 rsvd1; /* now used for context info */
> >>-	__u64 rsvd2;
> >>+	__u64 rsvd2; /* now used for fence fd */
> >If we are going to use this slot for fence fd, may as well make it
> >supply both before/after.
> 
> Not sure what you mean by before/after?
> 
> In the future it will take in the input fence fd and return the output fence
> if that's what you mean.

BTW, couldn't we take 32bits here and leave 32bits reserved?

-- 
Damien
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 14:04     ` Damien Lespiau
@ 2015-01-22 15:28       ` Tvrtko Ursulin
  2015-01-22 15:47         ` Damien Lespiau
  0 siblings, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-22 15:28 UTC (permalink / raw)
  To: Damien Lespiau; +Cc: Intel-gfx


On 01/22/2015 02:04 PM, Damien Lespiau wrote:
> On Thu, Jan 22, 2015 at 01:41:48PM +0000, Tvrtko Ursulin wrote:
>>>> @@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
>>>>   #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
>>>>   	__u64 flags;
>>>>   	__u64 rsvd1; /* now used for context info */
>>>> -	__u64 rsvd2;
>>>> +	__u64 rsvd2; /* now used for fence fd */
>>> If we are going to use this slot for fence fd, may as well make it
>>> supply both before/after.
>>
>> Not sure what you mean by before/after?
>>
>> In the future it will take in the input fence fd and return the output fence
>> if that's what you mean.
>
> BTW, couldn't we take 32bits here and leave 32bits reserved?

I guess so. Any ideas why the same wasn't done with rsvd1 - I see only 
32-bits are used for context id there?

Regards,

Tvrtko


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 15:28       ` Tvrtko Ursulin
@ 2015-01-22 15:47         ` Damien Lespiau
  2015-01-22 15:54           ` Tvrtko Ursulin
  0 siblings, 1 reply; 38+ messages in thread
From: Damien Lespiau @ 2015-01-22 15:47 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jan 22, 2015 at 03:28:04PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/22/2015 02:04 PM, Damien Lespiau wrote:
> >On Thu, Jan 22, 2015 at 01:41:48PM +0000, Tvrtko Ursulin wrote:
> >>>>@@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
> >>>>  #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
> >>>>  	__u64 flags;
> >>>>  	__u64 rsvd1; /* now used for context info */
> >>>>-	__u64 rsvd2;
> >>>>+	__u64 rsvd2; /* now used for fence fd */
> >>>If we are going to use this slot for fence fd, may as well make it
> >>>supply both before/after.
> >>
> >>Not sure what you mean by before/after?
> >>
> >>In the future it will take in the input fence fd and return the output fence
> >>if that's what you mean.
> >
> >BTW, couldn't we take 32bits here and leave 32bits reserved?
> 
> I guess so. Any ideas why the same wasn't done with rsvd1 - I see only
> 32-bits are used for context id there?

Nop, no idea, except may if someone was doing an assert(p->rsvd2 == 0);
Spliting the field would break ABI.

-- 
Damien
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 15:47         ` Damien Lespiau
@ 2015-01-22 15:54           ` Tvrtko Ursulin
  2015-01-22 16:07             ` Damien Lespiau
  0 siblings, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-22 15:54 UTC (permalink / raw)
  To: Damien Lespiau; +Cc: Intel-gfx


On 01/22/2015 03:47 PM, Damien Lespiau wrote:
> On Thu, Jan 22, 2015 at 03:28:04PM +0000, Tvrtko Ursulin wrote:
>>
>> On 01/22/2015 02:04 PM, Damien Lespiau wrote:
>>> On Thu, Jan 22, 2015 at 01:41:48PM +0000, Tvrtko Ursulin wrote:
>>>>>> @@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
>>>>>>   #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
>>>>>>   	__u64 flags;
>>>>>>   	__u64 rsvd1; /* now used for context info */
>>>>>> -	__u64 rsvd2;
>>>>>> +	__u64 rsvd2; /* now used for fence fd */
>>>>> If we are going to use this slot for fence fd, may as well make it
>>>>> supply both before/after.
>>>>
>>>> Not sure what you mean by before/after?
>>>>
>>>> In the future it will take in the input fence fd and return the output fence
>>>> if that's what you mean.
>>>
>>> BTW, couldn't we take 32bits here and leave 32bits reserved?
>>
>> I guess so. Any ideas why the same wasn't done with rsvd1 - I see only
>> 32-bits are used for context id there?
>
> Nop, no idea, except may if someone was doing an assert(p->rsvd2 == 0);
> Spliting the field would break ABI.

I don't see it, it wouldn't assert unless the same, presumably old, 
userspace was also setting flags it doesn't know about?

Regards,

Tvrtko

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC] drm/i915: Android native sync support
  2015-01-22 15:54           ` Tvrtko Ursulin
@ 2015-01-22 16:07             ` Damien Lespiau
  0 siblings, 0 replies; 38+ messages in thread
From: Damien Lespiau @ 2015-01-22 16:07 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Thu, Jan 22, 2015 at 03:54:29PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/22/2015 03:47 PM, Damien Lespiau wrote:
> >On Thu, Jan 22, 2015 at 03:28:04PM +0000, Tvrtko Ursulin wrote:
> >>
> >>On 01/22/2015 02:04 PM, Damien Lespiau wrote:
> >>>On Thu, Jan 22, 2015 at 01:41:48PM +0000, Tvrtko Ursulin wrote:
> >>>>>>@@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
> >>>>>>  #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
> >>>>>>  	__u64 flags;
> >>>>>>  	__u64 rsvd1; /* now used for context info */
> >>>>>>-	__u64 rsvd2;
> >>>>>>+	__u64 rsvd2; /* now used for fence fd */
> >>>>>If we are going to use this slot for fence fd, may as well make it
> >>>>>supply both before/after.
> >>>>
> >>>>Not sure what you mean by before/after?
> >>>>
> >>>>In the future it will take in the input fence fd and return the output fence
> >>>>if that's what you mean.
> >>>
> >>>BTW, couldn't we take 32bits here and leave 32bits reserved?
> >>
> >>I guess so. Any ideas why the same wasn't done with rsvd1 - I see only
> >>32-bits are used for context id there?
> >
> >Nop, no idea, except may if someone was doing an assert(p->rsvd2 == 0);
> >Spliting the field would break ABI.
> 
> I don't see it, it wouldn't assert unless the same, presumably old,
> userspace was also setting flags it doesn't know about?

Meh, doesn't really apply here.

-- 
Damien
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [RFC v2] drm/i915: Android native sync support
  2015-01-22 11:15 [RFC] drm/i915: Android native sync support Tvrtko Ursulin
  2015-01-22 11:42 ` Chris Wilson
@ 2015-01-23 11:13 ` Tvrtko Ursulin
  2015-01-23 11:27   ` Chris Wilson
  2015-01-27 11:29 ` [RFC v3] " Tvrtko Ursulin
  2 siblings, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-23 11:13 UTC (permalink / raw)
  To: Intel-gfx

From: Jesse Barnes <jbarnes@virtuousgeek.org>

Add Android native sync support with fences exported as file descriptors via
the execbuf ioctl (rsvd2 field is used).

This is a continuation of Jesse Barnes's previous work, squashed to arrive at
the final destination, cleaned up, with some fixes and preliminary light
testing.

GEM requests are extended with fence structures which are associated with
Android sync fences exported to user space via file descriptors. Fences which
are waited upon, and while exported to userspace, are referenced and added to
the irq_queue so they are signalled when requests are completed. There is no
overhead apart from the where fences are not requested.

Based on patches by Jesse Barnes:
   drm/i915: Android sync points for i915 v3
   drm/i915: add fences to the request struct
   drm/i915: sync fence fixes/updates

To do:
   * Extend driver data with context id / ring id (TBD).

v2:
   * Code review comments. (Chris Wilson)
   * ring->add_request() was a wrong thing to call - rebase on top of John
     Harrison's (drm/i915: Early alloc request) to ensure correct request is
     present before creating a fence.
   * Take a request reference from signalling path as well to ensure request
     sticks around while fence is on the request completion wait queue.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  14 ++
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/i915_drv.h            |  25 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  24 +++
 drivers/gpu/drm/i915/i915_sync.c           | 241 +++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h                |   8 +-
 6 files changed, 310 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_sync.c

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 4e39ab3..abafe68 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -43,6 +43,20 @@ config DRM_I915_KMS
 
 	  If in doubt, say "Y".
 
+config DRM_I915_SYNC
+	bool "Enable explicit sync support"
+	depends on DRM_I915
+	default y if STAGING
+	select STAGING
+	select ANDROID
+	select SYNC
+	help
+	  Choose this option to enable Android native sync support and the
+	  corresponding i915 driver code to expose it.  Slightly increases
+	  driver size and pulls in sync support from staging.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_FBDEV
 	bool "Enable legacy fbdev support for the modesetting intel driver"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 16e3dc3..bcff481 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -58,6 +58,7 @@ i915-y += intel_audio.o \
 	  intel_sprite.o
 i915-$(CONFIG_ACPI)		+= intel_acpi.o intel_opregion.o
 i915-$(CONFIG_DRM_I915_FBDEV)	+= intel_fbdev.o
+i915-$(CONFIG_DRM_I915_SYNC)	+= i915_sync.o
 
 # modesetting output/encoder code
 i915-y += dvo_ch7017.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 023f422..75729ab 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -49,6 +49,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/fence.h>
 
 /* General customization:
  */
@@ -2092,6 +2093,13 @@ struct drm_i915_gem_request {
 	struct list_head client_list;
 
 	uint32_t uniq;
+
+#ifdef CONFIG_DRM_I915_SYNC
+	/* core fence obj for this request, may be exported */
+	struct fence fence;
+
+	wait_queue_t wait;
+#endif
 };
 
 void i915_gem_request_free(struct kref *req_ref);
@@ -2583,6 +2591,23 @@ void i915_init_vm(struct drm_i915_private *dev_priv,
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
+/* i915_sync.c */
+struct sync_fence;
+
+#ifdef CONFIG_DRM_I915_SYNC
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd);
+#else
+static inline
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd)
+{
+	return -ENODEV;
+}
+#endif
+
 #define PIN_MAPPABLE 0x1
 #define PIN_NONBLOCK 0x2
 #define PIN_GLOBAL 0x4
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2a3d1a9..b04157a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -32,6 +32,7 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
+#include "../../../staging/android/sync.h"
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
@@ -1356,6 +1357,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	u32 flags;
 	int ret;
 	bool need_relocs;
+	struct sync_fence *sync_fence = NULL;
+	int fence_fd = -1;
 
 	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
@@ -1509,8 +1512,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (ret)
 		goto req_err;
 
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		ret = i915_create_sync_fence_ring(ring, ctx,
+						  &sync_fence, &fence_fd);
+		if (ret)
+			goto req_err;
+	}
+
 	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
 				      &eb->vmas, batch_obj, exec_start, flags);
+	if (ret)
+		goto exec_err;
+
+	if (sync_fence) {
+		sync_fence_install(sync_fence, fence_fd);
+		args->rsvd2 = fence_fd;
+		sync_fence = NULL;
+	}
+
+exec_err:
+	if (sync_fence) {
+		fput(sync_fence->file);
+		put_unused_fd(fence_fd);
+	}
 
 req_err:
 	/*
diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c
new file mode 100644
index 0000000..27c0c15
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sync.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright © 2013-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jesse Barnes <jbarnes@virtuousgeek.org>
+ *    Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_vma_manager.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/oom.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include "../../../staging/android/sync.h"
+
+static DEFINE_SPINLOCK(fence_lock);
+
+/*
+ * i915 Android native sync fences.
+ *
+ * We implement sync points in terms of i915 seqnos. They're exposed through
+ * the DRM_IOCTL_I915_GEM_EXECBUFFER2 ioctl, and can be mixed and matched
+ * with other Android timelines and aggregated into sync_fences, etc.
+ *
+ * TODO:
+ *   * Display engine fences.
+ *   * Extend driver data with context id / ring id.
+ */
+
+#define to_i915_request(x) container_of(x, struct drm_i915_gem_request, fence)
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+	return "i915";
+}
+
+static const char *i915_fence_ring_get_timeline_name(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	return req->ring->name;
+}
+
+static int
+i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+	struct drm_i915_gem_request *req = wait->private;
+	struct intel_engine_cs *ring = req->ring;
+
+	if (!i915_gem_request_completed(req, false))
+		return 0;
+
+	fence_signal(&req->fence);
+
+	__remove_wait_queue(&ring->irq_queue, wait);
+	i915_gem_request_unreference(req);
+	ring->irq_put(ring);
+
+	return 0;
+}
+
+static bool i915_fence_ring_enable_signaling(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct intel_engine_cs *ring = req->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	wait_queue_t *wait = &req->wait;
+
+	/* queue fence wait queue on irq queue and get fence */
+	if (i915_gem_request_completed(req, false) ||
+	    i915_terminally_wedged(&dev_priv->gpu_error))
+		return false;
+
+	if (!ring->irq_get(ring))
+		return false;
+
+	if (i915_gem_request_completed(req, false)) {
+		ring->irq_put(ring);
+		return true;
+	}
+
+	wait->flags = 0;
+	wait->private = req;
+	wait->func = i915_fence_ring_check;
+
+	i915_gem_request_reference(req);
+
+	__add_wait_queue(&ring->irq_queue, wait);
+
+	return true;
+}
+
+static bool i915_fence_ring_signaled(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	return i915_gem_request_completed(req, false);
+}
+
+static signed long
+i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
+	int ret;
+	s64 timeout;
+
+	timeout = jiffies_to_nsecs(timeout_js);
+
+	ret = __i915_wait_request(req,
+				  atomic_read(&dev_priv->gpu_error.reset_counter),
+				  intr, &timeout, NULL);
+	if (ret == -ETIME)
+		return 0;
+
+	return ret;
+}
+
+static int
+i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	if (size < sizeof(req->seqno))
+		return -ENOMEM;
+
+	memcpy(data, &req->seqno,
+	       sizeof(req->seqno));
+
+	return sizeof(req->seqno);
+}
+
+static void i915_fence_ring_value_str(struct fence *fence, char *str, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	snprintf(str, size, "%u", req->seqno);
+}
+
+static void
+i915_fence_ring_timeline_value_str(struct fence *fence, char *str, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct intel_engine_cs *ring = req->ring;
+
+	snprintf(str, size, "%u", ring->get_seqno(ring, false));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct drm_device *dev = req->ring->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	i915_gem_request_unreference(req);
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static struct fence_ops i915_fence_ring_ops = {
+	.get_driver_name =	i915_fence_get_driver_name,
+	.get_timeline_name =	i915_fence_ring_get_timeline_name,
+	.enable_signaling =	i915_fence_ring_enable_signaling,
+	.signaled =		i915_fence_ring_signaled,
+	.wait =			i915_fence_ring_wait,
+	.fill_driver_data =	i915_fence_ring_fill_driver_data,
+	.fence_value_str =	i915_fence_ring_value_str,
+	.timeline_value_str =	i915_fence_ring_timeline_value_str,
+	.release =		i915_fence_release
+};
+
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd)
+{
+	struct drm_i915_gem_request *req;
+	struct sync_fence *sfence;
+	char ring_name[6] = "ring0";
+	int fd;
+
+	if (!intel_ring_initialized(ring)) {
+		DRM_DEBUG("Ring not initialized!\n");
+		return -EAGAIN;
+	}
+
+	req = ring->outstanding_lazy_request;
+	if (WARN_ON_ONCE(!req))
+		return -ECANCELED;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		DRM_DEBUG("No available file descriptor!\n");
+		return fd;
+	}
+
+	i915_gem_request_reference(req);
+
+	fence_init(&req->fence, &i915_fence_ring_ops, &fence_lock,
+		   ctx->user_handle, req->seqno);
+
+	ring_name[4] += ring->id;
+	sfence = sync_fence_create_dma(ring_name, &req->fence);
+	if (!sfence)
+		goto err;
+
+	*sync_fence = sfence;
+	*fence_fd = fd;
+
+	return 0;
+
+err:
+	i915_gem_request_unreference(req);
+	put_unused_fd(fd);
+	return -ENOMEM;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2e559f6e..c197770 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
-#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
 	__u64 flags;
 	__u64 rsvd1; /* now used for context info */
-	__u64 rsvd2;
+	__u64 rsvd2; /* now used for fence fd */
 };
 
 /** Resets the SO write offset registers for transform feedback on gen7. */
@@ -750,7 +750,9 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT		(1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+#define I915_EXEC_FENCE_OUT		(1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_FENCE_OUT<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.2.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-23 11:13 ` [RFC v2] " Tvrtko Ursulin
@ 2015-01-23 11:27   ` Chris Wilson
  2015-01-23 14:02     ` Tvrtko Ursulin
  0 siblings, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-23 11:27 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Fri, Jan 23, 2015 at 11:13:14AM +0000, Tvrtko Ursulin wrote:
> From: Jesse Barnes <jbarnes@virtuousgeek.org>
> 
> Add Android native sync support with fences exported as file descriptors via
> the execbuf ioctl (rsvd2 field is used).
> 
> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
> the final destination, cleaned up, with some fixes and preliminary light
> testing.
> 
> GEM requests are extended with fence structures which are associated with
> Android sync fences exported to user space via file descriptors. Fences which
> are waited upon, and while exported to userspace, are referenced and added to
> the irq_queue so they are signalled when requests are completed. There is no
> overhead apart from the where fences are not requested.
> 
> Based on patches by Jesse Barnes:
>    drm/i915: Android sync points for i915 v3
>    drm/i915: add fences to the request struct
>    drm/i915: sync fence fixes/updates
> 
> To do:
>    * Extend driver data with context id / ring id (TBD).
> 
> v2:
>    * Code review comments. (Chris Wilson)
>    * ring->add_request() was a wrong thing to call - rebase on top of John
>      Harrison's (drm/i915: Early alloc request) to ensure correct request is
>      present before creating a fence.
>    * Take a request reference from signalling path as well to ensure request
>      sticks around while fence is on the request completion wait queue.

Ok, in this arrangement, attaching a fence to the execbuf is rather meh
as it is just a special cased version of attaching a fence to a bo.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-23 11:27   ` Chris Wilson
@ 2015-01-23 14:02     ` Tvrtko Ursulin
  2015-01-23 15:53       ` Daniel Vetter
  0 siblings, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-23 14:02 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx


On 01/23/2015 11:27 AM, Chris Wilson wrote:
> On Fri, Jan 23, 2015 at 11:13:14AM +0000, Tvrtko Ursulin wrote:
>> From: Jesse Barnes <jbarnes@virtuousgeek.org>
>>
>> Add Android native sync support with fences exported as file descriptors via
>> the execbuf ioctl (rsvd2 field is used).
>>
>> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
>> the final destination, cleaned up, with some fixes and preliminary light
>> testing.
>>
>> GEM requests are extended with fence structures which are associated with
>> Android sync fences exported to user space via file descriptors. Fences which
>> are waited upon, and while exported to userspace, are referenced and added to
>> the irq_queue so they are signalled when requests are completed. There is no
>> overhead apart from the where fences are not requested.
>>
>> Based on patches by Jesse Barnes:
>>     drm/i915: Android sync points for i915 v3
>>     drm/i915: add fences to the request struct
>>     drm/i915: sync fence fixes/updates
>>
>> To do:
>>     * Extend driver data with context id / ring id (TBD).
>>
>> v2:
>>     * Code review comments. (Chris Wilson)
>>     * ring->add_request() was a wrong thing to call - rebase on top of John
>>       Harrison's (drm/i915: Early alloc request) to ensure correct request is
>>       present before creating a fence.
>>     * Take a request reference from signalling path as well to ensure request
>>       sticks around while fence is on the request completion wait queue.
>
> Ok, in this arrangement, attaching a fence to the execbuf is rather meh
> as it is just a special cased version of attaching a fence to a bo.

Better meh than "no"! :D

My understanding is this is what people want, with the future input 
fence extension (scheduler).

Anyway.. v2 is broken since it unreferences requests without holding the 
mutex, more so from irq context so I need to rework that a bit.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-23 14:02     ` Tvrtko Ursulin
@ 2015-01-23 15:53       ` Daniel Vetter
  2015-01-23 16:49         ` Tvrtko Ursulin
  2015-01-23 17:30         ` Chris Wilson
  0 siblings, 2 replies; 38+ messages in thread
From: Daniel Vetter @ 2015-01-23 15:53 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Fri, Jan 23, 2015 at 02:02:44PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/23/2015 11:27 AM, Chris Wilson wrote:
> >On Fri, Jan 23, 2015 at 11:13:14AM +0000, Tvrtko Ursulin wrote:
> >>From: Jesse Barnes <jbarnes@virtuousgeek.org>
> >>
> >>Add Android native sync support with fences exported as file descriptors via
> >>the execbuf ioctl (rsvd2 field is used).
> >>
> >>This is a continuation of Jesse Barnes's previous work, squashed to arrive at
> >>the final destination, cleaned up, with some fixes and preliminary light
> >>testing.
> >>
> >>GEM requests are extended with fence structures which are associated with
> >>Android sync fences exported to user space via file descriptors. Fences which
> >>are waited upon, and while exported to userspace, are referenced and added to
> >>the irq_queue so they are signalled when requests are completed. There is no
> >>overhead apart from the where fences are not requested.
> >>
> >>Based on patches by Jesse Barnes:
> >>    drm/i915: Android sync points for i915 v3
> >>    drm/i915: add fences to the request struct
> >>    drm/i915: sync fence fixes/updates
> >>
> >>To do:
> >>    * Extend driver data with context id / ring id (TBD).
> >>
> >>v2:
> >>    * Code review comments. (Chris Wilson)
> >>    * ring->add_request() was a wrong thing to call - rebase on top of John
> >>      Harrison's (drm/i915: Early alloc request) to ensure correct request is
> >>      present before creating a fence.
> >>    * Take a request reference from signalling path as well to ensure request
> >>      sticks around while fence is on the request completion wait queue.
> >
> >Ok, in this arrangement, attaching a fence to the execbuf is rather meh
> >as it is just a special cased version of attaching a fence to a bo.
> 
> Better meh than "no"! :D
> 
> My understanding is this is what people want, with the future input fence
> extension (scheduler).
> 
> Anyway.. v2 is broken since it unreferences requests without holding the
> mutex, more so from irq context so I need to rework that a bit.

Yeah that's kind the big behaviour difference (at least as I see it)
between explicit sync and implicit sync:
- with implicit sync the kernel attachs sync points/requests to buffers
  and userspace just asks about idle/business of buffers. Synchronization
  between different users is all handled behind userspace's back in the
  kernel.

- explicit sync attaches sync points to individual bits of work and makes
  them explicit objects userspace can get at and pass around. Userspace
  uses these separate things to inquire about when something is
  done/idle/busy and has its own mapping between explicit sync objects and
  the different pieces of memory affected by each. Synchronization between
  different clients is handled explicitly by passing sync objects around
  each time some rendering is done.

The bigger driver for explicit sync (besides "nvidia likes it sooooo much
that everyone uses it a lot") seems to be a) shitty gpu drivers without
proper bo managers (*cough*android*cough*) and svm, where there's simply
no buffer objects any more to attach sync information to.

So attaching explicit sync objects to buffers doesn't make that much
sense by default. Except when we want to mix explicit and implicit
userspace, then we need to grab sync objects out of dma-bufs and attach
random sync objects to them at the border. Iirc Maarten had dma-buf
patches for exactly this.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-23 15:53       ` Daniel Vetter
@ 2015-01-23 16:49         ` Tvrtko Ursulin
  2015-01-24  9:47           ` Daniel Vetter
  2015-01-23 17:30         ` Chris Wilson
  1 sibling, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-23 16:49 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel-gfx


On 01/23/2015 03:53 PM, Daniel Vetter wrote:
> On Fri, Jan 23, 2015 at 02:02:44PM +0000, Tvrtko Ursulin wrote:
>>
>> On 01/23/2015 11:27 AM, Chris Wilson wrote:
>>> On Fri, Jan 23, 2015 at 11:13:14AM +0000, Tvrtko Ursulin wrote:
>>>> From: Jesse Barnes <jbarnes@virtuousgeek.org>
>>>>
>>>> Add Android native sync support with fences exported as file descriptors via
>>>> the execbuf ioctl (rsvd2 field is used).
>>>>
>>>> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
>>>> the final destination, cleaned up, with some fixes and preliminary light
>>>> testing.
>>>>
>>>> GEM requests are extended with fence structures which are associated with
>>>> Android sync fences exported to user space via file descriptors. Fences which
>>>> are waited upon, and while exported to userspace, are referenced and added to
>>>> the irq_queue so they are signalled when requests are completed. There is no
>>>> overhead apart from the where fences are not requested.
>>>>
>>>> Based on patches by Jesse Barnes:
>>>>     drm/i915: Android sync points for i915 v3
>>>>     drm/i915: add fences to the request struct
>>>>     drm/i915: sync fence fixes/updates
>>>>
>>>> To do:
>>>>     * Extend driver data with context id / ring id (TBD).
>>>>
>>>> v2:
>>>>     * Code review comments. (Chris Wilson)
>>>>     * ring->add_request() was a wrong thing to call - rebase on top of John
>>>>       Harrison's (drm/i915: Early alloc request) to ensure correct request is
>>>>       present before creating a fence.
>>>>     * Take a request reference from signalling path as well to ensure request
>>>>       sticks around while fence is on the request completion wait queue.
>>>
>>> Ok, in this arrangement, attaching a fence to the execbuf is rather meh
>>> as it is just a special cased version of attaching a fence to a bo.
>>
>> Better meh than "no"! :D
>>
>> My understanding is this is what people want, with the future input fence
>> extension (scheduler).
>>
>> Anyway.. v2 is broken since it unreferences requests without holding the
>> mutex, more so from irq context so I need to rework that a bit.
>
> Yeah that's kind the big behaviour difference (at least as I see it)
> between explicit sync and implicit sync:
> - with implicit sync the kernel attachs sync points/requests to buffers
>    and userspace just asks about idle/business of buffers. Synchronization
>    between different users is all handled behind userspace's back in the
>    kernel.
>
> - explicit sync attaches sync points to individual bits of work and makes
>    them explicit objects userspace can get at and pass around. Userspace
>    uses these separate things to inquire about when something is
>    done/idle/busy and has its own mapping between explicit sync objects and
>    the different pieces of memory affected by each. Synchronization between
>    different clients is handled explicitly by passing sync objects around
>    each time some rendering is done.
>
> The bigger driver for explicit sync (besides "nvidia likes it sooooo much
> that everyone uses it a lot") seems to be a) shitty gpu drivers without
> proper bo managers (*cough*android*cough*) and svm, where there's simply
> no buffer objects any more to attach sync information to.
>
> So attaching explicit sync objects to buffers doesn't make that much
> sense by default. Except when we want to mix explicit and implicit
> userspace, then we need to grab sync objects out of dma-bufs and attach
> random sync objects to them at the border. Iirc Maarten had dma-buf
> patches for exactly this.

Could you please translate this into something understandable by 
newcomers? :)

In the context of Jesse's work I took over - there my impressions was 
you were mostly happy just needed converting from dedicated ioctl to 
execbuf?

Then how does future scheduling come into picture in your views above?

Submitting something with an input fence meaning "execute this work when 
this fence has been done" - when mixing and matching fences from 
different sources etc. How do you envisage that to work in implicit world?

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-23 15:53       ` Daniel Vetter
  2015-01-23 16:49         ` Tvrtko Ursulin
@ 2015-01-23 17:30         ` Chris Wilson
  2015-01-24  9:41           ` Daniel Vetter
  1 sibling, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-23 17:30 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel-gfx

On Fri, Jan 23, 2015 at 04:53:48PM +0100, Daniel Vetter wrote:
> Yeah that's kind the big behaviour difference (at least as I see it)
> between explicit sync and implicit sync:
> - with implicit sync the kernel attachs sync points/requests to buffers
>   and userspace just asks about idle/business of buffers. Synchronization
>   between different users is all handled behind userspace's back in the
>   kernel.
> 
> - explicit sync attaches sync points to individual bits of work and makes
>   them explicit objects userspace can get at and pass around. Userspace
>   uses these separate things to inquire about when something is
>   done/idle/busy and has its own mapping between explicit sync objects and
>   the different pieces of memory affected by each. Synchronization between
>   different clients is handled explicitly by passing sync objects around
>   each time some rendering is done.
> 
> The bigger driver for explicit sync (besides "nvidia likes it sooooo much
> that everyone uses it a lot") seems to be a) shitty gpu drivers without
> proper bo managers (*cough*android*cough*) and svm, where there's simply
> no buffer objects any more to attach sync information to.

Actually, mesa would really like much finer granularity than at batch
boundaries. Having a sync object for a batch boundary itself is very meh
and not a substantive improvement on what it possible today, but being
able to convert the implicit sync into an explicit fence object is
interesting and lends a layer of abstraction that could make it more
versatile. Most importantly, it allows me to defer the overhead of fence
creation until I actually want to sleep on a completion. Also Jesse
originally supporting inserting fences inside a batch, which looked
interesting if impractical.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-23 17:30         ` Chris Wilson
@ 2015-01-24  9:41           ` Daniel Vetter
  2015-01-24 16:08             ` Chris Wilson
  0 siblings, 1 reply; 38+ messages in thread
From: Daniel Vetter @ 2015-01-24  9:41 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Tvrtko Ursulin, intel-gfx

On Fri, Jan 23, 2015 at 6:30 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Fri, Jan 23, 2015 at 04:53:48PM +0100, Daniel Vetter wrote:
>> Yeah that's kind the big behaviour difference (at least as I see it)
>> between explicit sync and implicit sync:
>> - with implicit sync the kernel attachs sync points/requests to buffers
>>   and userspace just asks about idle/business of buffers. Synchronization
>>   between different users is all handled behind userspace's back in the
>>   kernel.
>>
>> - explicit sync attaches sync points to individual bits of work and makes
>>   them explicit objects userspace can get at and pass around. Userspace
>>   uses these separate things to inquire about when something is
>>   done/idle/busy and has its own mapping between explicit sync objects and
>>   the different pieces of memory affected by each. Synchronization between
>>   different clients is handled explicitly by passing sync objects around
>>   each time some rendering is done.
>>
>> The bigger driver for explicit sync (besides "nvidia likes it sooooo much
>> that everyone uses it a lot") seems to be a) shitty gpu drivers without
>> proper bo managers (*cough*android*cough*) and svm, where there's simply
>> no buffer objects any more to attach sync information to.
>
> Actually, mesa would really like much finer granularity than at batch
> boundaries. Having a sync object for a batch boundary itself is very meh
> and not a substantive improvement on what it possible today, but being
> able to convert the implicit sync into an explicit fence object is
> interesting and lends a layer of abstraction that could make it more
> versatile. Most importantly, it allows me to defer the overhead of fence
> creation until I actually want to sleep on a completion. Also Jesse
> originally supporting inserting fences inside a batch, which looked
> interesting if impractical.

If want to allow the kernel to stall on fences (in e.g. the scheduler)
only the kernel should be allowed to create fences imo. At least
current fences assume that they _will_ signal eventually, and for i915
fences we have the hangcheck to ensure this is the case. In-batch
fences and lazy fence creation (beyond just delaying the fd allocation
to avoid too many fds flying around) is therefore a no-go.

For that kind of fine-grained sync between gpu and cpu workloads the
solutions thus far (at least what I've seen) is just busy-looping.
Usually those workloads have a few order more sync pionts than frames
we tend to render, so blocking isn't terrible efficient anyway.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-23 16:49         ` Tvrtko Ursulin
@ 2015-01-24  9:47           ` Daniel Vetter
  2015-01-26 11:08             ` Tvrtko Ursulin
  0 siblings, 1 reply; 38+ messages in thread
From: Daniel Vetter @ 2015-01-24  9:47 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Fri, Jan 23, 2015 at 5:49 PM, Tvrtko Ursulin
<tvrtko.ursulin@linux.intel.com> wrote:
> Could you please translate this into something understandable by newcomers?
> :)

I don't know which parts are confusing without questions so please ask
them ... the questions below about scheduler interactions seem fairly
advanced ;-)

> In the context of Jesse's work I took over - there my impressions was you
> were mostly happy just needed converting from dedicated ioctl to execbuf?

Yeah that's all we need to get started.

> Then how does future scheduling come into picture in your views above?

Scheduler just takes a pile of fences from the execbuf code and
doesn't care that much whether they come from implicit or explicit
syncing modes. Which means we'll always combine all the fences (both
implicit and explicit) when figuring out depencies.

> Submitting something with an input fence meaning "execute this work when
> this fence has been done" - when mixing and matching fences from different
> sources etc. How do you envisage that to work in implicit world?

execbuf always needs to obey the implicit fences since if it doesn't
our shrinker will start to unmap objects which are still in use. Not
good.

So if you want to do an execbuf with _only_ explicit syncing then you
need to lie about the write domains in your relocations: The kernel
allows parallel reads nowadays, so if you never set a write domain
there will be no implicit syncing (except with the shrinker, and as
explained that is mandatory anyway). Only write domains create
implicit sync depencies between batches, so if there's none you can
control all the depencies between batches with explicit fences.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-24  9:41           ` Daniel Vetter
@ 2015-01-24 16:08             ` Chris Wilson
  2015-01-26  7:52               ` Daniel Vetter
  0 siblings, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-24 16:08 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Sat, Jan 24, 2015 at 10:41:46AM +0100, Daniel Vetter wrote:
> On Fri, Jan 23, 2015 at 6:30 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Fri, Jan 23, 2015 at 04:53:48PM +0100, Daniel Vetter wrote:
> >> Yeah that's kind the big behaviour difference (at least as I see it)
> >> between explicit sync and implicit sync:
> >> - with implicit sync the kernel attachs sync points/requests to buffers
> >>   and userspace just asks about idle/business of buffers. Synchronization
> >>   between different users is all handled behind userspace's back in the
> >>   kernel.
> >>
> >> - explicit sync attaches sync points to individual bits of work and makes
> >>   them explicit objects userspace can get at and pass around. Userspace
> >>   uses these separate things to inquire about when something is
> >>   done/idle/busy and has its own mapping between explicit sync objects and
> >>   the different pieces of memory affected by each. Synchronization between
> >>   different clients is handled explicitly by passing sync objects around
> >>   each time some rendering is done.
> >>
> >> The bigger driver for explicit sync (besides "nvidia likes it sooooo much
> >> that everyone uses it a lot") seems to be a) shitty gpu drivers without
> >> proper bo managers (*cough*android*cough*) and svm, where there's simply
> >> no buffer objects any more to attach sync information to.
> >
> > Actually, mesa would really like much finer granularity than at batch
> > boundaries. Having a sync object for a batch boundary itself is very meh
> > and not a substantive improvement on what it possible today, but being
> > able to convert the implicit sync into an explicit fence object is
> > interesting and lends a layer of abstraction that could make it more
> > versatile. Most importantly, it allows me to defer the overhead of fence
> > creation until I actually want to sleep on a completion. Also Jesse
> > originally supporting inserting fences inside a batch, which looked
> > interesting if impractical.
> 
> If want to allow the kernel to stall on fences (in e.g. the scheduler)
> only the kernel should be allowed to create fences imo. At least
> current fences assume that they _will_ signal eventually, and for i915
> fences we have the hangcheck to ensure this is the case. In-batch
> fences and lazy fence creation (beyond just delaying the fd allocation
> to avoid too many fds flying around) is therefore a no-go.

Lazy fence creation (i.e. attaching a fence to a buffer) just means
creating the fd for an existing request (which is derived from the
fence). Or if the buffer is read or write idle, then you just create the
fence as already-signaled. And yes, it is just to avoid the death by a
thousand file descriptors and especially creating one every batch.

> For that kind of fine-grained sync between gpu and cpu workloads the
> solutions thus far (at least what I've seen) is just busy-looping.
> Usually those workloads have a few order more sync pionts than frames
> we tend to render, so blocking isn't terrible efficient anyway.

Heck, I think a full-fledged fence fd per batch is still more overhead
than I want.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-24 16:08             ` Chris Wilson
@ 2015-01-26  7:52               ` Daniel Vetter
  2015-01-26  9:08                 ` Chris Wilson
  0 siblings, 1 reply; 38+ messages in thread
From: Daniel Vetter @ 2015-01-26  7:52 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Tvrtko Ursulin, intel-gfx

On Sat, Jan 24, 2015 at 04:08:32PM +0000, Chris Wilson wrote:
> On Sat, Jan 24, 2015 at 10:41:46AM +0100, Daniel Vetter wrote:
> > On Fri, Jan 23, 2015 at 6:30 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > On Fri, Jan 23, 2015 at 04:53:48PM +0100, Daniel Vetter wrote:
> > >> Yeah that's kind the big behaviour difference (at least as I see it)
> > >> between explicit sync and implicit sync:
> > >> - with implicit sync the kernel attachs sync points/requests to buffers
> > >>   and userspace just asks about idle/business of buffers. Synchronization
> > >>   between different users is all handled behind userspace's back in the
> > >>   kernel.
> > >>
> > >> - explicit sync attaches sync points to individual bits of work and makes
> > >>   them explicit objects userspace can get at and pass around. Userspace
> > >>   uses these separate things to inquire about when something is
> > >>   done/idle/busy and has its own mapping between explicit sync objects and
> > >>   the different pieces of memory affected by each. Synchronization between
> > >>   different clients is handled explicitly by passing sync objects around
> > >>   each time some rendering is done.
> > >>
> > >> The bigger driver for explicit sync (besides "nvidia likes it sooooo much
> > >> that everyone uses it a lot") seems to be a) shitty gpu drivers without
> > >> proper bo managers (*cough*android*cough*) and svm, where there's simply
> > >> no buffer objects any more to attach sync information to.
> > >
> > > Actually, mesa would really like much finer granularity than at batch
> > > boundaries. Having a sync object for a batch boundary itself is very meh
> > > and not a substantive improvement on what it possible today, but being
> > > able to convert the implicit sync into an explicit fence object is
> > > interesting and lends a layer of abstraction that could make it more
> > > versatile. Most importantly, it allows me to defer the overhead of fence
> > > creation until I actually want to sleep on a completion. Also Jesse
> > > originally supporting inserting fences inside a batch, which looked
> > > interesting if impractical.
> > 
> > If want to allow the kernel to stall on fences (in e.g. the scheduler)
> > only the kernel should be allowed to create fences imo. At least
> > current fences assume that they _will_ signal eventually, and for i915
> > fences we have the hangcheck to ensure this is the case. In-batch
> > fences and lazy fence creation (beyond just delaying the fd allocation
> > to avoid too many fds flying around) is therefore a no-go.
> 
> Lazy fence creation (i.e. attaching a fence to a buffer) just means
> creating the fd for an existing request (which is derived from the
> fence). Or if the buffer is read or write idle, then you just create the
> fence as already-signaled. And yes, it is just to avoid the death by a
> thousand file descriptors and especially creating one every batch.

I think the problem will be platforms that want full explicit fence (like
android) but allow delayed creation of the fence fd from a gl sync object
(like the android egl extension allows).

I'm not sure yet how to best expose that really since just creating a
fence from the implicit request attached to the batch might upset the
interface purists with the mix in implicit and explicit fencing ;-) Hence
why I think for now we should just do the eager fd creation at execbuf
until ppl scream (well maybe not merge this patch until ppl scream ...).

> > For that kind of fine-grained sync between gpu and cpu workloads the
> > solutions thus far (at least what I've seen) is just busy-looping.
> > Usually those workloads have a few order more sync pionts than frames
> > we tend to render, so blocking isn't terrible efficient anyway.
> 
> Heck, I think a full-fledged fence fd per batch is still more overhead
> than I want.

One idea that crossed my mind is to expose the 2nd interrupt source to
userspace somehow (we have pipe_control/mi_flush_dw and
mi_user_interrupt). Then we could use that, maybe with some
wakeupfiltering to allow userspace to block a bit more efficient.

But my gut feel still says that most likely a bit of busy-looping won't
hurt in such a case with very fine-grained synchronization. There should
be a full blocking kernel request nearby. And often the check is for "busy
or not" only anyway, and that can already be done with seqno writes from
batchbuffers to a per-ctx bo that userspace manages.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-26  7:52               ` Daniel Vetter
@ 2015-01-26  9:08                 ` Chris Wilson
  2015-01-28  9:22                   ` Daniel Vetter
  0 siblings, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-26  9:08 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, Jan 26, 2015 at 08:52:39AM +0100, Daniel Vetter wrote:
> I think the problem will be platforms that want full explicit fence (like
> android) but allow delayed creation of the fence fd from a gl sync object
> (like the android egl extension allows).
> 
> I'm not sure yet how to best expose that really since just creating a
> fence from the implicit request attached to the batch might upset the
> interface purists with the mix in implicit and explicit fencing ;-) Hence
> why I think for now we should just do the eager fd creation at execbuf
> until ppl scream (well maybe not merge this patch until ppl scream ...).

Everything we do is buffer centric. Even in the future with random bits
of memory, we will still use buffers behind the scenes. From an
interface perspective, it is clearer to me if we say "give me a fence for
this buffer". Exactly the same way as we say "is this buffer busy" or
"wait on this buffer". The change is that we now hand back an fd to slot
into an event loop. That, to me, is a much smaller evolutionary step of
the existing API, and yet more versatile than just attaching one to the
execbuf.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-24  9:47           ` Daniel Vetter
@ 2015-01-26 11:08             ` Tvrtko Ursulin
  2015-01-28  9:20               ` Daniel Vetter
  0 siblings, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-26 11:08 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx


On 01/24/2015 09:47 AM, Daniel Vetter wrote:
> On Fri, Jan 23, 2015 at 5:49 PM, Tvrtko Ursulin
> <tvrtko.ursulin@linux.intel.com> wrote:
>> Could you please translate this into something understandable by newcomers?
>> :)
>
> I don't know which parts are confusing without questions so please ask
> them ... the questions below about scheduler interactions seem fairly
> advanced ;-)

I suppose for starters, when I took over this work I read the last 
thread from December.  There you were saying to Jesse to convert the 
separate ioctl into execbuf interface for in & out fences.

That's what I did I thought. So is that not the fashion of the day any 
more or I misunderstood something?

People will scream very soon for something along these lines anyway 
since it is kind of packaged with the scheduler I am told and both will 
be very much desired soon.

Idea is to allow submitting of work and not block in userspace rather 
let the scheduler queue and shuffle depending on fences.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* [RFC v3] drm/i915: Android native sync support
  2015-01-22 11:15 [RFC] drm/i915: Android native sync support Tvrtko Ursulin
  2015-01-22 11:42 ` Chris Wilson
  2015-01-23 11:13 ` [RFC v2] " Tvrtko Ursulin
@ 2015-01-27 11:29 ` Tvrtko Ursulin
  2015-01-27 11:40   ` Chris Wilson
  2015-01-28  9:29   ` Daniel Vetter
  2 siblings, 2 replies; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-27 11:29 UTC (permalink / raw)
  To: Intel-gfx

From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Add Android native sync support with fences exported as file descriptors via
the execbuf ioctl (rsvd2 field is used).

This is a continuation of Jesse Barnes's previous work, squashed to arrive at
the final destination, cleaned up, with some fixes and preliminary light
testing.

GEM requests are extended with fence structures which are associated with
Android sync fences exported to user space via file descriptors. Fences which
are waited upon, and while exported to userspace, are referenced and added to
the irq_queue so they are signalled when requests are completed. There is no
overhead apart from the where fences are not requested.

Based on patches by Jesse Barnes:
   drm/i915: Android sync points for i915 v3
   drm/i915: add fences to the request struct
   drm/i915: sync fence fixes/updates

To do:
   * Extend driver data with context id / ring id (TBD).

v2:
   * Code review comments. (Chris Wilson)
   * ring->add_request() was a wrong thing to call - rebase on top of John
     Harrison's (drm/i915: Early alloc request) to ensure correct request is
     present before creating a fence.
   * Take a request reference from signalling path as well to ensure request
     sticks around while fence is on the request completion wait queue.

v3:
   * Use worker to unreference on completion so it can lock the mutex from
     interrupt context.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: John Harrison <John.C.Harrison@Intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  14 ++
 drivers/gpu/drm/i915/Makefile              |   1 +
 drivers/gpu/drm/i915/i915_drv.h            |  27 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |  24 +++
 drivers/gpu/drm/i915/i915_sync.c           | 254 +++++++++++++++++++++++++++++
 include/uapi/drm/i915_drm.h                |   8 +-
 6 files changed, 325 insertions(+), 3 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/i915_sync.c

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 4e39ab3..abafe68 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -43,6 +43,20 @@ config DRM_I915_KMS
 
 	  If in doubt, say "Y".
 
+config DRM_I915_SYNC
+	bool "Enable explicit sync support"
+	depends on DRM_I915
+	default y if STAGING
+	select STAGING
+	select ANDROID
+	select SYNC
+	help
+	  Choose this option to enable Android native sync support and the
+	  corresponding i915 driver code to expose it.  Slightly increases
+	  driver size and pulls in sync support from staging.
+
+	  If in doubt, say "Y".
+
 config DRM_I915_FBDEV
 	bool "Enable legacy fbdev support for the modesetting intel driver"
 	depends on DRM_I915
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 16e3dc3..bcff481 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -58,6 +58,7 @@ i915-y += intel_audio.o \
 	  intel_sprite.o
 i915-$(CONFIG_ACPI)		+= intel_acpi.o intel_opregion.o
 i915-$(CONFIG_DRM_I915_FBDEV)	+= intel_fbdev.o
+i915-$(CONFIG_DRM_I915_SYNC)	+= i915_sync.o
 
 # modesetting output/encoder code
 i915-y += dvo_ch7017.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 023f422..c4d254c 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -49,6 +49,7 @@
 #include <linux/intel-iommu.h>
 #include <linux/kref.h>
 #include <linux/pm_qos.h>
+#include <linux/fence.h>
 
 /* General customization:
  */
@@ -2092,6 +2093,15 @@ struct drm_i915_gem_request {
 	struct list_head client_list;
 
 	uint32_t uniq;
+
+#ifdef CONFIG_DRM_I915_SYNC
+	/* core fence obj for this request, may be exported */
+	struct fence fence;
+
+	wait_queue_t wait;
+
+	struct work_struct unref_work;
+#endif
 };
 
 void i915_gem_request_free(struct kref *req_ref);
@@ -2583,6 +2593,23 @@ void i915_init_vm(struct drm_i915_private *dev_priv,
 void i915_gem_free_object(struct drm_gem_object *obj);
 void i915_gem_vma_destroy(struct i915_vma *vma);
 
+/* i915_sync.c */
+struct sync_fence;
+
+#ifdef CONFIG_DRM_I915_SYNC
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd);
+#else
+static inline
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd)
+{
+	return -ENODEV;
+}
+#endif
+
 #define PIN_MAPPABLE 0x1
 #define PIN_NONBLOCK 0x2
 #define PIN_GLOBAL 0x4
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 2a3d1a9..b04157a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -32,6 +32,7 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 #include <linux/dma_remapping.h>
+#include "../../../staging/android/sync.h"
 
 #define  __EXEC_OBJECT_HAS_PIN (1<<31)
 #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
@@ -1356,6 +1357,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	u32 flags;
 	int ret;
 	bool need_relocs;
+	struct sync_fence *sync_fence = NULL;
+	int fence_fd = -1;
 
 	if (!i915_gem_check_execbuffer(args))
 		return -EINVAL;
@@ -1509,8 +1512,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	if (ret)
 		goto req_err;
 
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		ret = i915_create_sync_fence_ring(ring, ctx,
+						  &sync_fence, &fence_fd);
+		if (ret)
+			goto req_err;
+	}
+
 	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
 				      &eb->vmas, batch_obj, exec_start, flags);
+	if (ret)
+		goto exec_err;
+
+	if (sync_fence) {
+		sync_fence_install(sync_fence, fence_fd);
+		args->rsvd2 = fence_fd;
+		sync_fence = NULL;
+	}
+
+exec_err:
+	if (sync_fence) {
+		fput(sync_fence->file);
+		put_unused_fd(fence_fd);
+	}
 
 req_err:
 	/*
diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c
new file mode 100644
index 0000000..77b7bc9
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_sync.c
@@ -0,0 +1,254 @@
+/*
+ * Copyright © 2013-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jesse Barnes <jbarnes@virtuousgeek.org>
+ *    Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_vma_manager.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+#include "i915_trace.h"
+#include "intel_drv.h"
+#include <linux/oom.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include "../../../staging/android/sync.h"
+
+static DEFINE_SPINLOCK(fence_lock);
+
+/*
+ * i915 Android native sync fences.
+ *
+ * We implement sync points in terms of i915 seqnos. They're exposed through
+ * the DRM_IOCTL_I915_GEM_EXECBUFFER2 ioctl, and can be mixed and matched
+ * with other Android timelines and aggregated into sync_fences, etc.
+ *
+ * TODO:
+ *   * Display engine fences.
+ *   * Extend driver data with context id / ring id.
+ */
+
+#define to_i915_request(x) container_of(x, struct drm_i915_gem_request, fence)
+
+static const char *i915_fence_get_driver_name(struct fence *fence)
+{
+	return "i915";
+}
+
+static const char *i915_fence_ring_get_timeline_name(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	return req->ring->name;
+}
+
+static void i915_gem_request_unreference_worker(struct work_struct *work)
+{
+	struct drm_i915_gem_request *req =
+		container_of(work, struct drm_i915_gem_request, unref_work);
+	struct drm_device *dev = req->ring->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	i915_gem_request_unreference(req);
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static int
+i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
+{
+	struct drm_i915_gem_request *req = wait->private;
+	struct intel_engine_cs *ring = req->ring;
+
+	if (!i915_gem_request_completed(req, false))
+		return 0;
+
+	fence_signal_locked(&req->fence);
+
+	__remove_wait_queue(&ring->irq_queue, wait);
+	ring->irq_put(ring);
+
+	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
+	schedule_work(&req->unref_work);
+
+	return 0;
+}
+
+static bool i915_fence_ring_enable_signaling(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct intel_engine_cs *ring = req->ring;
+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
+	wait_queue_t *wait = &req->wait;
+
+	/* queue fence wait queue on irq queue and get fence */
+	if (i915_gem_request_completed(req, false) ||
+	    i915_terminally_wedged(&dev_priv->gpu_error))
+		return false;
+
+	if (!ring->irq_get(ring))
+		return false;
+
+	if (i915_gem_request_completed(req, false)) {
+		ring->irq_put(ring);
+		return true;
+	}
+
+	wait->flags = 0;
+	wait->private = req;
+	wait->func = i915_fence_ring_check;
+
+	i915_gem_request_reference(req);
+
+	__add_wait_queue(&ring->irq_queue, wait);
+
+	return true;
+}
+
+static bool i915_fence_ring_signaled(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	return i915_gem_request_completed(req, false);
+}
+
+static signed long
+i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
+	int ret;
+	s64 timeout;
+
+	timeout = jiffies_to_nsecs(timeout_js);
+
+	ret = __i915_wait_request(req,
+				  atomic_read(&dev_priv->gpu_error.reset_counter),
+				  intr, &timeout, NULL);
+	if (ret == -ETIME)
+		return 0;
+
+	return ret;
+}
+
+static int
+i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	if (size < sizeof(req->seqno))
+		return -ENOMEM;
+
+	memcpy(data, &req->seqno,
+	       sizeof(req->seqno));
+
+	return sizeof(req->seqno);
+}
+
+static void i915_fence_ring_value_str(struct fence *fence, char *str, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+
+	snprintf(str, size, "%u", req->seqno);
+}
+
+static void
+i915_fence_ring_timeline_value_str(struct fence *fence, char *str, int size)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct intel_engine_cs *ring = req->ring;
+
+	snprintf(str, size, "%u", ring->get_seqno(ring, false));
+}
+
+static void i915_fence_release(struct fence *fence)
+{
+	struct drm_i915_gem_request *req = to_i915_request(fence);
+	struct drm_device *dev = req->ring->dev;
+
+	mutex_lock(&dev->struct_mutex);
+	i915_gem_request_unreference(req);
+	mutex_unlock(&dev->struct_mutex);
+}
+
+static struct fence_ops i915_fence_ring_ops = {
+	.get_driver_name =	i915_fence_get_driver_name,
+	.get_timeline_name =	i915_fence_ring_get_timeline_name,
+	.enable_signaling =	i915_fence_ring_enable_signaling,
+	.signaled =		i915_fence_ring_signaled,
+	.wait =			i915_fence_ring_wait,
+	.fill_driver_data =	i915_fence_ring_fill_driver_data,
+	.fence_value_str =	i915_fence_ring_value_str,
+	.timeline_value_str =	i915_fence_ring_timeline_value_str,
+	.release =		i915_fence_release
+};
+
+int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
+				struct intel_context *ctx,
+				struct sync_fence **sync_fence, int *fence_fd)
+{
+	struct drm_i915_gem_request *req;
+	struct sync_fence *sfence;
+	char ring_name[6] = "ring0";
+	int fd;
+
+	if (!intel_ring_initialized(ring)) {
+		DRM_DEBUG("Ring not initialized!\n");
+		return -EAGAIN;
+	}
+
+	req = ring->outstanding_lazy_request;
+	if (WARN_ON_ONCE(!req))
+		return -ECANCELED;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0) {
+		DRM_DEBUG("No available file descriptor!\n");
+		return fd;
+	}
+
+	i915_gem_request_reference(req);
+
+	fence_init(&req->fence, &i915_fence_ring_ops, &fence_lock,
+		   ctx->user_handle, req->seqno);
+
+	ring_name[4] += ring->id;
+	sfence = sync_fence_create_dma(ring_name, &req->fence);
+	if (!sfence)
+		goto err;
+
+	*sync_fence = sfence;
+	*fence_fd = fd;
+
+	return 0;
+
+err:
+	i915_gem_request_unreference(req);
+	put_unused_fd(fd);
+	return -ENOMEM;
+}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2e559f6e..c197770 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
-#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
 	__u64 flags;
 	__u64 rsvd1; /* now used for context info */
-	__u64 rsvd2;
+	__u64 rsvd2; /* now used for fence fd */
 };
 
 /** Resets the SO write offset registers for transform feedback on gen7. */
@@ -750,7 +750,9 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT		(1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+#define I915_EXEC_FENCE_OUT		(1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_FENCE_OUT<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.2.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [RFC v3] drm/i915: Android native sync support
  2015-01-27 11:29 ` [RFC v3] " Tvrtko Ursulin
@ 2015-01-27 11:40   ` Chris Wilson
  2015-01-27 12:13     ` Tvrtko Ursulin
  2015-01-28  9:29   ` Daniel Vetter
  1 sibling, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-27 11:40 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
> +static void i915_gem_request_unreference_worker(struct work_struct *work)
> +{
> +	struct drm_i915_gem_request *req =
> +		container_of(work, struct drm_i915_gem_request, unref_work);
> +	struct drm_device *dev = req->ring->dev;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	i915_gem_request_unreference(req);
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
> +static int
> +i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
> +{
> +	struct drm_i915_gem_request *req = wait->private;
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	if (!i915_gem_request_completed(req, false))
> +		return 0;
> +
> +	fence_signal_locked(&req->fence);
> +
> +	__remove_wait_queue(&ring->irq_queue, wait);
> +	ring->irq_put(ring);
> +
> +	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
> +	schedule_work(&req->unref_work);
> +
> +	return 0;
> +}
> +
> +static bool i915_fence_ring_enable_signaling(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct intel_engine_cs *ring = req->ring;
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	wait_queue_t *wait = &req->wait;
> +
> +	/* queue fence wait queue on irq queue and get fence */
> +	if (i915_gem_request_completed(req, false) ||
> +	    i915_terminally_wedged(&dev_priv->gpu_error))
> +		return false;
> +
> +	if (!ring->irq_get(ring))
> +		return false;
> +
> +	if (i915_gem_request_completed(req, false)) {
> +		ring->irq_put(ring);
> +		return true;
> +	}
> +
> +	wait->flags = 0;
> +	wait->private = req;
> +	wait->func = i915_fence_ring_check;
> +
> +	i915_gem_request_reference(req);
> +
> +	__add_wait_queue(&ring->irq_queue, wait);
> +
> +	return true;
> +}

Explain how fence_release interacts with enable_signalling. Presumably
either the core fence routines cleanup the outstanding signalling, or we
are expected to. Doing so will remove the requirement for the extra
ref/unref (including the worker).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v3] drm/i915: Android native sync support
  2015-01-27 11:40   ` Chris Wilson
@ 2015-01-27 12:13     ` Tvrtko Ursulin
  2015-01-27 12:18       ` Chris Wilson
  0 siblings, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-27 12:13 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx

On 01/27/2015 11:40 AM, Chris Wilson wrote:
> On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
>> +static void i915_gem_request_unreference_worker(struct work_struct *work)
>> +{
>> +	struct drm_i915_gem_request *req =
>> +		container_of(work, struct drm_i915_gem_request, unref_work);
>> +	struct drm_device *dev = req->ring->dev;
>> +
>> +	mutex_lock(&dev->struct_mutex);
>> +	i915_gem_request_unreference(req);
>> +	mutex_unlock(&dev->struct_mutex);
>> +}
>> +
>> +static int
>> +i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
>> +{
>> +	struct drm_i915_gem_request *req = wait->private;
>> +	struct intel_engine_cs *ring = req->ring;
>> +
>> +	if (!i915_gem_request_completed(req, false))
>> +		return 0;
>> +
>> +	fence_signal_locked(&req->fence);
>> +
>> +	__remove_wait_queue(&ring->irq_queue, wait);
>> +	ring->irq_put(ring);
>> +
>> +	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
>> +	schedule_work(&req->unref_work);
>> +
>> +	return 0;
>> +}
>> +
>> +static bool i915_fence_ring_enable_signaling(struct fence *fence)
>> +{
>> +	struct drm_i915_gem_request *req = to_i915_request(fence);
>> +	struct intel_engine_cs *ring = req->ring;
>> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
>> +	wait_queue_t *wait = &req->wait;
>> +
>> +	/* queue fence wait queue on irq queue and get fence */
>> +	if (i915_gem_request_completed(req, false) ||
>> +	    i915_terminally_wedged(&dev_priv->gpu_error))
>> +		return false;
>> +
>> +	if (!ring->irq_get(ring))
>> +		return false;
>> +
>> +	if (i915_gem_request_completed(req, false)) {
>> +		ring->irq_put(ring);
>> +		return true;
>> +	}
>> +
>> +	wait->flags = 0;
>> +	wait->private = req;
>> +	wait->func = i915_fence_ring_check;
>> +
>> +	i915_gem_request_reference(req);
>> +
>> +	__add_wait_queue(&ring->irq_queue, wait);
>> +
>> +	return true;
>> +}
>
> Explain how fence_release interacts with enable_signalling. Presumably
> either the core fence routines cleanup the outstanding signalling, or we
> are expected to. Doing so will remove the requirement for the extra
> ref/unref (including the worker).

I think normally we would be expected to use fence_get/put on the 
signaling side of things but that is not possible here since struct 
fence is embedded in the request.

So as it is, sync_fence_release will tear everything down with our 
callback still on the irq_queue which is what taking a request reference 
sorts out.

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v3] drm/i915: Android native sync support
  2015-01-27 12:13     ` Tvrtko Ursulin
@ 2015-01-27 12:18       ` Chris Wilson
  2015-01-27 13:43         ` Tvrtko Ursulin
  0 siblings, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-27 12:18 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Tue, Jan 27, 2015 at 12:13:14PM +0000, Tvrtko Ursulin wrote:
> On 01/27/2015 11:40 AM, Chris Wilson wrote:
> >On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
> >>+static void i915_gem_request_unreference_worker(struct work_struct *work)
> >>+{
> >>+	struct drm_i915_gem_request *req =
> >>+		container_of(work, struct drm_i915_gem_request, unref_work);
> >>+	struct drm_device *dev = req->ring->dev;
> >>+
> >>+	mutex_lock(&dev->struct_mutex);
> >>+	i915_gem_request_unreference(req);
> >>+	mutex_unlock(&dev->struct_mutex);
> >>+}
> >>+
> >>+static int
> >>+i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
> >>+{
> >>+	struct drm_i915_gem_request *req = wait->private;
> >>+	struct intel_engine_cs *ring = req->ring;
> >>+
> >>+	if (!i915_gem_request_completed(req, false))
> >>+		return 0;
> >>+
> >>+	fence_signal_locked(&req->fence);
> >>+
> >>+	__remove_wait_queue(&ring->irq_queue, wait);
> >>+	ring->irq_put(ring);
> >>+
> >>+	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
> >>+	schedule_work(&req->unref_work);
> >>+
> >>+	return 0;
> >>+}
> >>+
> >>+static bool i915_fence_ring_enable_signaling(struct fence *fence)
> >>+{
> >>+	struct drm_i915_gem_request *req = to_i915_request(fence);
> >>+	struct intel_engine_cs *ring = req->ring;
> >>+	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> >>+	wait_queue_t *wait = &req->wait;
> >>+
> >>+	/* queue fence wait queue on irq queue and get fence */
> >>+	if (i915_gem_request_completed(req, false) ||
> >>+	    i915_terminally_wedged(&dev_priv->gpu_error))
> >>+		return false;
> >>+
> >>+	if (!ring->irq_get(ring))
> >>+		return false;
> >>+
> >>+	if (i915_gem_request_completed(req, false)) {
> >>+		ring->irq_put(ring);
> >>+		return true;
> >>+	}
> >>+
> >>+	wait->flags = 0;
> >>+	wait->private = req;
> >>+	wait->func = i915_fence_ring_check;
> >>+
> >>+	i915_gem_request_reference(req);
> >>+
> >>+	__add_wait_queue(&ring->irq_queue, wait);
> >>+
> >>+	return true;
> >>+}
> >
> >Explain how fence_release interacts with enable_signalling. Presumably
> >either the core fence routines cleanup the outstanding signalling, or we
> >are expected to. Doing so will remove the requirement for the extra
> >ref/unref (including the worker).
> 
> I think normally we would be expected to use fence_get/put on the
> signaling side of things but that is not possible here since struct
> fence is embedded in the request.
> 
> So as it is, sync_fence_release will tear everything down with our
> callback still on the irq_queue which is what taking a request
> reference sorts out.

So you are saying that we have a callback for when the fence is
discarded by userspace and we ignore that opportunity for disabling
interrupt generation, and remove the extra request references?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v3] drm/i915: Android native sync support
  2015-01-27 12:18       ` Chris Wilson
@ 2015-01-27 13:43         ` Tvrtko Ursulin
  2015-01-28  9:25           ` Daniel Vetter
  0 siblings, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-27 13:43 UTC (permalink / raw)
  To: Chris Wilson, Intel-gfx


On 01/27/2015 12:18 PM, Chris Wilson wrote:
> On Tue, Jan 27, 2015 at 12:13:14PM +0000, Tvrtko Ursulin wrote:
>> On 01/27/2015 11:40 AM, Chris Wilson wrote:

[snip]

>>> Explain how fence_release interacts with enable_signalling. Presumably
>>> either the core fence routines cleanup the outstanding signalling, or we
>>> are expected to. Doing so will remove the requirement for the extra
>>> ref/unref (including the worker).
>>
>> I think normally we would be expected to use fence_get/put on the
>> signaling side of things but that is not possible here since struct
>> fence is embedded in the request.
>>
>> So as it is, sync_fence_release will tear everything down with our
>> callback still on the irq_queue which is what taking a request
>> reference sorts out.
>
> So you are saying that we have a callback for when the fence is
> discarded by userspace and we ignore that opportunity for disabling
> interrupt generation, and remove the extra request references?

I can change it to work like that sure, don't see anything obvious 
preventing this rework. Can use the fence lock to avoid wait queue 
removal race and that should be pretty much it.

On a related not, do you have any ideas for submitting a batch which can 
either keep the GPU busy for a configurable time, or until signaled by 
something else (possibly another batch). Gen agnostic ideally? :)

Regards,

Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-26 11:08             ` Tvrtko Ursulin
@ 2015-01-28  9:20               ` Daniel Vetter
  0 siblings, 0 replies; 38+ messages in thread
From: Daniel Vetter @ 2015-01-28  9:20 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: intel-gfx

On Mon, Jan 26, 2015 at 11:08:43AM +0000, Tvrtko Ursulin wrote:
> 
> On 01/24/2015 09:47 AM, Daniel Vetter wrote:
> >On Fri, Jan 23, 2015 at 5:49 PM, Tvrtko Ursulin
> ><tvrtko.ursulin@linux.intel.com> wrote:
> >>Could you please translate this into something understandable by newcomers?
> >>:)
> >
> >I don't know which parts are confusing without questions so please ask
> >them ... the questions below about scheduler interactions seem fairly
> >advanced ;-)
> 
> I suppose for starters, when I took over this work I read the last thread
> from December.  There you were saying to Jesse to convert the separate ioctl
> into execbuf interface for in & out fences.
> 
> That's what I did I thought. So is that not the fashion of the day any more
> or I misunderstood something?
> 
> People will scream very soon for something along these lines anyway since it
> is kind of packaged with the scheduler I am told and both will be very much
> desired soon.

That's still the proper fashion imo and what I've thought I've explained
...

About the screaming: The real blocker here is destaging android fences.
I've been telling this to everyone for a while already, but can't hurt to
reinforce ;-)

> Idea is to allow submitting of work and not block in userspace rather let
> the scheduler queue and shuffle depending on fences.

Well that's also possible without explicit fences, but not if your
userspace assumes explicit fences exist ofc.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-26  9:08                 ` Chris Wilson
@ 2015-01-28  9:22                   ` Daniel Vetter
  2015-01-28  9:23                     ` Chris Wilson
  0 siblings, 1 reply; 38+ messages in thread
From: Daniel Vetter @ 2015-01-28  9:22 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Tvrtko Ursulin, intel-gfx

On Mon, Jan 26, 2015 at 09:08:03AM +0000, Chris Wilson wrote:
> On Mon, Jan 26, 2015 at 08:52:39AM +0100, Daniel Vetter wrote:
> > I think the problem will be platforms that want full explicit fence (like
> > android) but allow delayed creation of the fence fd from a gl sync object
> > (like the android egl extension allows).
> > 
> > I'm not sure yet how to best expose that really since just creating a
> > fence from the implicit request attached to the batch might upset the
> > interface purists with the mix in implicit and explicit fencing ;-) Hence
> > why I think for now we should just do the eager fd creation at execbuf
> > until ppl scream (well maybe not merge this patch until ppl scream ...).
> 
> Everything we do is buffer centric. Even in the future with random bits
> of memory, we will still use buffers behind the scenes. From an
> interface perspective, it is clearer to me if we say "give me a fence for
> this buffer". Exactly the same way as we say "is this buffer busy" or
> "wait on this buffer". The change is that we now hand back an fd to slot
> into an event loop. That, to me, is a much smaller evolutionary step of
> the existing API, and yet more versatile than just attaching one to the
> execbuf.

The problem is that big parts of the world do not subscribe to that buffer
centric view of gfx. Imo since those parts will be the primary users of
this interface we should try to fit their ideas as much as feasible. Later
on (if we need it) we can add some glue to tie in the buffer-centric
implicit model with the explicit model.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-28  9:22                   ` Daniel Vetter
@ 2015-01-28  9:23                     ` Chris Wilson
  2015-01-28  9:50                       ` Daniel Vetter
  0 siblings, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-28  9:23 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, Jan 28, 2015 at 10:22:15AM +0100, Daniel Vetter wrote:
> On Mon, Jan 26, 2015 at 09:08:03AM +0000, Chris Wilson wrote:
> > On Mon, Jan 26, 2015 at 08:52:39AM +0100, Daniel Vetter wrote:
> > > I think the problem will be platforms that want full explicit fence (like
> > > android) but allow delayed creation of the fence fd from a gl sync object
> > > (like the android egl extension allows).
> > > 
> > > I'm not sure yet how to best expose that really since just creating a
> > > fence from the implicit request attached to the batch might upset the
> > > interface purists with the mix in implicit and explicit fencing ;-) Hence
> > > why I think for now we should just do the eager fd creation at execbuf
> > > until ppl scream (well maybe not merge this patch until ppl scream ...).
> > 
> > Everything we do is buffer centric. Even in the future with random bits
> > of memory, we will still use buffers behind the scenes. From an
> > interface perspective, it is clearer to me if we say "give me a fence for
> > this buffer". Exactly the same way as we say "is this buffer busy" or
> > "wait on this buffer". The change is that we now hand back an fd to slot
> > into an event loop. That, to me, is a much smaller evolutionary step of
> > the existing API, and yet more versatile than just attaching one to the
> > execbuf.
> 
> The problem is that big parts of the world do not subscribe to that buffer
> centric view of gfx. Imo since those parts will be the primary users of
> this interface we should try to fit their ideas as much as feasible. Later
> on (if we need it) we can add some glue to tie in the buffer-centric
> implicit model with the explicit model.

They won't be using execbuffer either. So what's your point?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v3] drm/i915: Android native sync support
  2015-01-27 13:43         ` Tvrtko Ursulin
@ 2015-01-28  9:25           ` Daniel Vetter
  0 siblings, 0 replies; 38+ messages in thread
From: Daniel Vetter @ 2015-01-28  9:25 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Tue, Jan 27, 2015 at 01:43:02PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/27/2015 12:18 PM, Chris Wilson wrote:
> >On Tue, Jan 27, 2015 at 12:13:14PM +0000, Tvrtko Ursulin wrote:
> >>On 01/27/2015 11:40 AM, Chris Wilson wrote:
> 
> [snip]
> 
> >>>Explain how fence_release interacts with enable_signalling. Presumably
> >>>either the core fence routines cleanup the outstanding signalling, or we
> >>>are expected to. Doing so will remove the requirement for the extra
> >>>ref/unref (including the worker).
> >>
> >>I think normally we would be expected to use fence_get/put on the
> >>signaling side of things but that is not possible here since struct
> >>fence is embedded in the request.
> >>
> >>So as it is, sync_fence_release will tear everything down with our
> >>callback still on the irq_queue which is what taking a request
> >>reference sorts out.
> >
> >So you are saying that we have a callback for when the fence is
> >discarded by userspace and we ignore that opportunity for disabling
> >interrupt generation, and remove the extra request references?
> 
> I can change it to work like that sure, don't see anything obvious
> preventing this rework. Can use the fence lock to avoid wait queue removal
> race and that should be pretty much it.
> 
> On a related not, do you have any ideas for submitting a batch which can
> either keep the GPU busy for a configurable time, or until signaled by
> something else (possibly another batch). Gen agnostic ideally? :)

self-tuning busy loads using the blitter/rendercopy. We have bazillion of
existing copypasta in igt tests for this, would be great if someone
extracts it into a real igt helper library and converts everyone over.

Iirc the one in kms_flip or gem_wait is probably the best starting point.
There's no way afaik to block a batch on an mbox or similar from
userspace.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v3] drm/i915: Android native sync support
  2015-01-27 11:29 ` [RFC v3] " Tvrtko Ursulin
  2015-01-27 11:40   ` Chris Wilson
@ 2015-01-28  9:29   ` Daniel Vetter
  2015-01-28 16:52     ` Tvrtko Ursulin
  1 sibling, 1 reply; 38+ messages in thread
From: Daniel Vetter @ 2015-01-28  9:29 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Add Android native sync support with fences exported as file descriptors via
> the execbuf ioctl (rsvd2 field is used).
> 
> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
> the final destination, cleaned up, with some fixes and preliminary light
> testing.
> 
> GEM requests are extended with fence structures which are associated with
> Android sync fences exported to user space via file descriptors. Fences which
> are waited upon, and while exported to userspace, are referenced and added to
> the irq_queue so they are signalled when requests are completed. There is no
> overhead apart from the where fences are not requested.
> 
> Based on patches by Jesse Barnes:
>    drm/i915: Android sync points for i915 v3
>    drm/i915: add fences to the request struct
>    drm/i915: sync fence fixes/updates
> 
> To do:
>    * Extend driver data with context id / ring id (TBD).
> 
> v2:
>    * Code review comments. (Chris Wilson)
>    * ring->add_request() was a wrong thing to call - rebase on top of John
>      Harrison's (drm/i915: Early alloc request) to ensure correct request is
>      present before creating a fence.
>    * Take a request reference from signalling path as well to ensure request
>      sticks around while fence is on the request completion wait queue.
> 
> v3:
>    * Use worker to unreference on completion so it can lock the mutex from
>      interrupt context.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> Cc: John Harrison <John.C.Harrison@Intel.com>

btw for merging we need to split the conversion to fences out from the
actual userspace interface. And imo we should replace a lot of our
existing usage of i915_gem_request with fences within the driver, too. Not
tacked on the side like in your patch here. All the recent refactoring
have been aiming to match i915 requests to the internal fence interfaces,
so we should be pretty much there.

We also need this to be able to integrate with the scheduler properly - if
that needs to deal both with fences and i915 requests separately it'll be
a bit more messy. If it's all just fences the code should be streamlined a
lot.

Sorry for spotting this only just now, I've thought Jesse's old patches
had this already and you've carried that approach over.
-Daniel

> ---
>  drivers/gpu/drm/i915/Kconfig               |  14 ++
>  drivers/gpu/drm/i915/Makefile              |   1 +
>  drivers/gpu/drm/i915/i915_drv.h            |  27 +++
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |  24 +++
>  drivers/gpu/drm/i915/i915_sync.c           | 254 +++++++++++++++++++++++++++++
>  include/uapi/drm/i915_drm.h                |   8 +-
>  6 files changed, 325 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/gpu/drm/i915/i915_sync.c
> 
> diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
> index 4e39ab3..abafe68 100644
> --- a/drivers/gpu/drm/i915/Kconfig
> +++ b/drivers/gpu/drm/i915/Kconfig
> @@ -43,6 +43,20 @@ config DRM_I915_KMS
>  
>  	  If in doubt, say "Y".
>  
> +config DRM_I915_SYNC
> +	bool "Enable explicit sync support"
> +	depends on DRM_I915
> +	default y if STAGING
> +	select STAGING
> +	select ANDROID
> +	select SYNC
> +	help
> +	  Choose this option to enable Android native sync support and the
> +	  corresponding i915 driver code to expose it.  Slightly increases
> +	  driver size and pulls in sync support from staging.
> +
> +	  If in doubt, say "Y".
> +
>  config DRM_I915_FBDEV
>  	bool "Enable legacy fbdev support for the modesetting intel driver"
>  	depends on DRM_I915
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 16e3dc3..bcff481 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -58,6 +58,7 @@ i915-y += intel_audio.o \
>  	  intel_sprite.o
>  i915-$(CONFIG_ACPI)		+= intel_acpi.o intel_opregion.o
>  i915-$(CONFIG_DRM_I915_FBDEV)	+= intel_fbdev.o
> +i915-$(CONFIG_DRM_I915_SYNC)	+= i915_sync.o
>  
>  # modesetting output/encoder code
>  i915-y += dvo_ch7017.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 023f422..c4d254c 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -49,6 +49,7 @@
>  #include <linux/intel-iommu.h>
>  #include <linux/kref.h>
>  #include <linux/pm_qos.h>
> +#include <linux/fence.h>
>  
>  /* General customization:
>   */
> @@ -2092,6 +2093,15 @@ struct drm_i915_gem_request {
>  	struct list_head client_list;
>  
>  	uint32_t uniq;
> +
> +#ifdef CONFIG_DRM_I915_SYNC
> +	/* core fence obj for this request, may be exported */
> +	struct fence fence;
> +
> +	wait_queue_t wait;
> +
> +	struct work_struct unref_work;
> +#endif
>  };
>  
>  void i915_gem_request_free(struct kref *req_ref);
> @@ -2583,6 +2593,23 @@ void i915_init_vm(struct drm_i915_private *dev_priv,
>  void i915_gem_free_object(struct drm_gem_object *obj);
>  void i915_gem_vma_destroy(struct i915_vma *vma);
>  
> +/* i915_sync.c */
> +struct sync_fence;
> +
> +#ifdef CONFIG_DRM_I915_SYNC
> +int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
> +				struct intel_context *ctx,
> +				struct sync_fence **sync_fence, int *fence_fd);
> +#else
> +static inline
> +int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
> +				struct intel_context *ctx,
> +				struct sync_fence **sync_fence, int *fence_fd)
> +{
> +	return -ENODEV;
> +}
> +#endif
> +
>  #define PIN_MAPPABLE 0x1
>  #define PIN_NONBLOCK 0x2
>  #define PIN_GLOBAL 0x4
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2a3d1a9..b04157a 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -32,6 +32,7 @@
>  #include "i915_trace.h"
>  #include "intel_drv.h"
>  #include <linux/dma_remapping.h>
> +#include "../../../staging/android/sync.h"
>  
>  #define  __EXEC_OBJECT_HAS_PIN (1<<31)
>  #define  __EXEC_OBJECT_HAS_FENCE (1<<30)
> @@ -1356,6 +1357,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	u32 flags;
>  	int ret;
>  	bool need_relocs;
> +	struct sync_fence *sync_fence = NULL;
> +	int fence_fd = -1;
>  
>  	if (!i915_gem_check_execbuffer(args))
>  		return -EINVAL;
> @@ -1509,8 +1512,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
>  	if (ret)
>  		goto req_err;
>  
> +	if (args->flags & I915_EXEC_FENCE_OUT) {
> +		ret = i915_create_sync_fence_ring(ring, ctx,
> +						  &sync_fence, &fence_fd);
> +		if (ret)
> +			goto req_err;
> +	}
> +
>  	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
>  				      &eb->vmas, batch_obj, exec_start, flags);
> +	if (ret)
> +		goto exec_err;
> +
> +	if (sync_fence) {
> +		sync_fence_install(sync_fence, fence_fd);
> +		args->rsvd2 = fence_fd;
> +		sync_fence = NULL;
> +	}
> +
> +exec_err:
> +	if (sync_fence) {
> +		fput(sync_fence->file);
> +		put_unused_fd(fence_fd);
> +	}
>  
>  req_err:
>  	/*
> diff --git a/drivers/gpu/drm/i915/i915_sync.c b/drivers/gpu/drm/i915/i915_sync.c
> new file mode 100644
> index 0000000..77b7bc9
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_sync.c
> @@ -0,0 +1,254 @@
> +/*
> + * Copyright © 2013-2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Jesse Barnes <jbarnes@virtuousgeek.org>
> + *    Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> + *
> + */
> +
> +#include <drm/drmP.h>
> +#include <drm/drm_vma_manager.h>
> +#include <drm/i915_drm.h>
> +#include "i915_drv.h"
> +#include "i915_trace.h"
> +#include "intel_drv.h"
> +#include <linux/oom.h>
> +#include <linux/shmem_fs.h>
> +#include <linux/slab.h>
> +#include <linux/swap.h>
> +#include <linux/pci.h>
> +#include <linux/dma-buf.h>
> +#include "../../../staging/android/sync.h"
> +
> +static DEFINE_SPINLOCK(fence_lock);
> +
> +/*
> + * i915 Android native sync fences.
> + *
> + * We implement sync points in terms of i915 seqnos. They're exposed through
> + * the DRM_IOCTL_I915_GEM_EXECBUFFER2 ioctl, and can be mixed and matched
> + * with other Android timelines and aggregated into sync_fences, etc.
> + *
> + * TODO:
> + *   * Display engine fences.
> + *   * Extend driver data with context id / ring id.
> + */
> +
> +#define to_i915_request(x) container_of(x, struct drm_i915_gem_request, fence)
> +
> +static const char *i915_fence_get_driver_name(struct fence *fence)
> +{
> +	return "i915";
> +}
> +
> +static const char *i915_fence_ring_get_timeline_name(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +
> +	return req->ring->name;
> +}
> +
> +static void i915_gem_request_unreference_worker(struct work_struct *work)
> +{
> +	struct drm_i915_gem_request *req =
> +		container_of(work, struct drm_i915_gem_request, unref_work);
> +	struct drm_device *dev = req->ring->dev;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	i915_gem_request_unreference(req);
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
> +static int
> +i915_fence_ring_check(wait_queue_t *wait, unsigned mode, int flags, void *key)
> +{
> +	struct drm_i915_gem_request *req = wait->private;
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	if (!i915_gem_request_completed(req, false))
> +		return 0;
> +
> +	fence_signal_locked(&req->fence);
> +
> +	__remove_wait_queue(&ring->irq_queue, wait);
> +	ring->irq_put(ring);
> +
> +	INIT_WORK(&req->unref_work, i915_gem_request_unreference_worker);
> +	schedule_work(&req->unref_work);
> +
> +	return 0;
> +}
> +
> +static bool i915_fence_ring_enable_signaling(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct intel_engine_cs *ring = req->ring;
> +	struct drm_i915_private *dev_priv = ring->dev->dev_private;
> +	wait_queue_t *wait = &req->wait;
> +
> +	/* queue fence wait queue on irq queue and get fence */
> +	if (i915_gem_request_completed(req, false) ||
> +	    i915_terminally_wedged(&dev_priv->gpu_error))
> +		return false;
> +
> +	if (!ring->irq_get(ring))
> +		return false;
> +
> +	if (i915_gem_request_completed(req, false)) {
> +		ring->irq_put(ring);
> +		return true;
> +	}
> +
> +	wait->flags = 0;
> +	wait->private = req;
> +	wait->func = i915_fence_ring_check;
> +
> +	i915_gem_request_reference(req);
> +
> +	__add_wait_queue(&ring->irq_queue, wait);
> +
> +	return true;
> +}
> +
> +static bool i915_fence_ring_signaled(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +
> +	return i915_gem_request_completed(req, false);
> +}
> +
> +static signed long
> +i915_fence_ring_wait(struct fence *fence, bool intr, signed long timeout_js)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
> +	int ret;
> +	s64 timeout;
> +
> +	timeout = jiffies_to_nsecs(timeout_js);
> +
> +	ret = __i915_wait_request(req,
> +				  atomic_read(&dev_priv->gpu_error.reset_counter),
> +				  intr, &timeout, NULL);
> +	if (ret == -ETIME)
> +		return 0;
> +
> +	return ret;
> +}
> +
> +static int
> +i915_fence_ring_fill_driver_data(struct fence *fence, void *data, int size)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +
> +	if (size < sizeof(req->seqno))
> +		return -ENOMEM;
> +
> +	memcpy(data, &req->seqno,
> +	       sizeof(req->seqno));
> +
> +	return sizeof(req->seqno);
> +}
> +
> +static void i915_fence_ring_value_str(struct fence *fence, char *str, int size)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +
> +	snprintf(str, size, "%u", req->seqno);
> +}
> +
> +static void
> +i915_fence_ring_timeline_value_str(struct fence *fence, char *str, int size)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct intel_engine_cs *ring = req->ring;
> +
> +	snprintf(str, size, "%u", ring->get_seqno(ring, false));
> +}
> +
> +static void i915_fence_release(struct fence *fence)
> +{
> +	struct drm_i915_gem_request *req = to_i915_request(fence);
> +	struct drm_device *dev = req->ring->dev;
> +
> +	mutex_lock(&dev->struct_mutex);
> +	i915_gem_request_unreference(req);
> +	mutex_unlock(&dev->struct_mutex);
> +}
> +
> +static struct fence_ops i915_fence_ring_ops = {
> +	.get_driver_name =	i915_fence_get_driver_name,
> +	.get_timeline_name =	i915_fence_ring_get_timeline_name,
> +	.enable_signaling =	i915_fence_ring_enable_signaling,
> +	.signaled =		i915_fence_ring_signaled,
> +	.wait =			i915_fence_ring_wait,
> +	.fill_driver_data =	i915_fence_ring_fill_driver_data,
> +	.fence_value_str =	i915_fence_ring_value_str,
> +	.timeline_value_str =	i915_fence_ring_timeline_value_str,
> +	.release =		i915_fence_release
> +};
> +
> +int i915_create_sync_fence_ring(struct intel_engine_cs *ring,
> +				struct intel_context *ctx,
> +				struct sync_fence **sync_fence, int *fence_fd)
> +{
> +	struct drm_i915_gem_request *req;
> +	struct sync_fence *sfence;
> +	char ring_name[6] = "ring0";
> +	int fd;
> +
> +	if (!intel_ring_initialized(ring)) {
> +		DRM_DEBUG("Ring not initialized!\n");
> +		return -EAGAIN;
> +	}
> +
> +	req = ring->outstanding_lazy_request;
> +	if (WARN_ON_ONCE(!req))
> +		return -ECANCELED;
> +
> +	fd = get_unused_fd_flags(O_CLOEXEC);
> +	if (fd < 0) {
> +		DRM_DEBUG("No available file descriptor!\n");
> +		return fd;
> +	}
> +
> +	i915_gem_request_reference(req);
> +
> +	fence_init(&req->fence, &i915_fence_ring_ops, &fence_lock,
> +		   ctx->user_handle, req->seqno);
> +
> +	ring_name[4] += ring->id;
> +	sfence = sync_fence_create_dma(ring_name, &req->fence);
> +	if (!sfence)
> +		goto err;
> +
> +	*sync_fence = sfence;
> +	*fence_fd = fd;
> +
> +	return 0;
> +
> +err:
> +	i915_gem_request_unreference(req);
> +	put_unused_fd(fd);
> +	return -ENOMEM;
> +}
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 2e559f6e..c197770 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -246,7 +246,7 @@ typedef struct _drm_i915_sarea {
>  #define DRM_IOCTL_I915_HWS_ADDR		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
>  #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
> -#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
>  #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
>  #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
>  #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -718,7 +718,7 @@ struct drm_i915_gem_execbuffer2 {
>  #define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
>  	__u64 flags;
>  	__u64 rsvd1; /* now used for context info */
> -	__u64 rsvd2;
> +	__u64 rsvd2; /* now used for fence fd */
>  };
>  
>  /** Resets the SO write offset registers for transform feedback on gen7. */
> @@ -750,7 +750,9 @@ struct drm_i915_gem_execbuffer2 {
>   */
>  #define I915_EXEC_HANDLE_LUT		(1<<12)
>  
> -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
> +#define I915_EXEC_FENCE_OUT		(1<<13)
> +
> +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_FENCE_OUT<<1)
>  
>  #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
>  #define i915_execbuffer2_set_context_id(eb2, context) \
> -- 
> 2.2.2
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-28  9:23                     ` Chris Wilson
@ 2015-01-28  9:50                       ` Daniel Vetter
  2015-01-28 10:07                         ` Chris Wilson
  0 siblings, 1 reply; 38+ messages in thread
From: Daniel Vetter @ 2015-01-28  9:50 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Tvrtko Ursulin, intel-gfx

On Wed, Jan 28, 2015 at 09:23:46AM +0000, Chris Wilson wrote:
> On Wed, Jan 28, 2015 at 10:22:15AM +0100, Daniel Vetter wrote:
> > On Mon, Jan 26, 2015 at 09:08:03AM +0000, Chris Wilson wrote:
> > > On Mon, Jan 26, 2015 at 08:52:39AM +0100, Daniel Vetter wrote:
> > > > I think the problem will be platforms that want full explicit fence (like
> > > > android) but allow delayed creation of the fence fd from a gl sync object
> > > > (like the android egl extension allows).
> > > > 
> > > > I'm not sure yet how to best expose that really since just creating a
> > > > fence from the implicit request attached to the batch might upset the
> > > > interface purists with the mix in implicit and explicit fencing ;-) Hence
> > > > why I think for now we should just do the eager fd creation at execbuf
> > > > until ppl scream (well maybe not merge this patch until ppl scream ...).
> > > 
> > > Everything we do is buffer centric. Even in the future with random bits
> > > of memory, we will still use buffers behind the scenes. From an
> > > interface perspective, it is clearer to me if we say "give me a fence for
> > > this buffer". Exactly the same way as we say "is this buffer busy" or
> > > "wait on this buffer". The change is that we now hand back an fd to slot
> > > into an event loop. That, to me, is a much smaller evolutionary step of
> > > the existing API, and yet more versatile than just attaching one to the
> > > execbuf.
> > 
> > The problem is that big parts of the world do not subscribe to that buffer
> > centric view of gfx. Imo since those parts will be the primary users of
> > this interface we should try to fit their ideas as much as feasible. Later
> > on (if we need it) we can add some glue to tie in the buffer-centric
> > implicit model with the explicit model.
> 
> They won't be using execbuffer either. So what's your point?

Android very much will user execbuffer. And even the in-flight buffered
svm stuff will keep on using execbuf (just without any relocs).

And once we indeed can make the case (plus have the hw) for direct
userspace submission I think the kernel shouldn't be in the business of
creating fence objects: The ring will be fully under control of
userspace, and that's the only place we could insert a seqno into. So
again the same trust issues.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-28  9:50                       ` Daniel Vetter
@ 2015-01-28 10:07                         ` Chris Wilson
  2015-02-25 20:46                           ` Jesse Barnes
  0 siblings, 1 reply; 38+ messages in thread
From: Chris Wilson @ 2015-01-28 10:07 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Wed, Jan 28, 2015 at 10:50:18AM +0100, Daniel Vetter wrote:
> On Wed, Jan 28, 2015 at 09:23:46AM +0000, Chris Wilson wrote:
> > On Wed, Jan 28, 2015 at 10:22:15AM +0100, Daniel Vetter wrote:
> > > On Mon, Jan 26, 2015 at 09:08:03AM +0000, Chris Wilson wrote:
> > > > On Mon, Jan 26, 2015 at 08:52:39AM +0100, Daniel Vetter wrote:
> > > > > I think the problem will be platforms that want full explicit fence (like
> > > > > android) but allow delayed creation of the fence fd from a gl sync object
> > > > > (like the android egl extension allows).
> > > > > 
> > > > > I'm not sure yet how to best expose that really since just creating a
> > > > > fence from the implicit request attached to the batch might upset the
> > > > > interface purists with the mix in implicit and explicit fencing ;-) Hence
> > > > > why I think for now we should just do the eager fd creation at execbuf
> > > > > until ppl scream (well maybe not merge this patch until ppl scream ...).
> > > > 
> > > > Everything we do is buffer centric. Even in the future with random bits
> > > > of memory, we will still use buffers behind the scenes. From an
> > > > interface perspective, it is clearer to me if we say "give me a fence for
> > > > this buffer". Exactly the same way as we say "is this buffer busy" or
> > > > "wait on this buffer". The change is that we now hand back an fd to slot
> > > > into an event loop. That, to me, is a much smaller evolutionary step of
> > > > the existing API, and yet more versatile than just attaching one to the
> > > > execbuf.
> > > 
> > > The problem is that big parts of the world do not subscribe to that buffer
> > > centric view of gfx. Imo since those parts will be the primary users of
> > > this interface we should try to fit their ideas as much as feasible. Later
> > > on (if we need it) we can add some glue to tie in the buffer-centric
> > > implicit model with the explicit model.
> > 
> > They won't be using execbuffer either. So what's your point?
> 
> Android very much will user execbuffer. And even the in-flight buffered
> svm stuff will keep on using execbuf (just without any relocs).

So still buffer-centric and would benefit from the more general and more
explict fencing rather than just execbuf. If you look at the throttling
in mesa, you can already see a place that would rather create a fence on
a buffer rather than its very approximate execbuf tracking.
 
> And once we indeed can make the case (plus have the hw) for direct
> userspace submission I think the kernel shouldn't be in the business of
> creating fence objects: The ring will be fully under control of
> userspace, and that's the only place we could insert a seqno into. So
> again the same trust issues.

No buffers, no requests, nothing for the kernel to do. No impact on
designing an API that is useful today.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v3] drm/i915: Android native sync support
  2015-01-28  9:29   ` Daniel Vetter
@ 2015-01-28 16:52     ` Tvrtko Ursulin
  2015-01-29 16:14       ` Daniel Vetter
  0 siblings, 1 reply; 38+ messages in thread
From: Tvrtko Ursulin @ 2015-01-28 16:52 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: Intel-gfx


On 01/28/2015 09:29 AM, Daniel Vetter wrote:
> On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Add Android native sync support with fences exported as file descriptors via
>> the execbuf ioctl (rsvd2 field is used).
>>
>> This is a continuation of Jesse Barnes's previous work, squashed to arrive at
>> the final destination, cleaned up, with some fixes and preliminary light
>> testing.
>>
>> GEM requests are extended with fence structures which are associated with
>> Android sync fences exported to user space via file descriptors. Fences which
>> are waited upon, and while exported to userspace, are referenced and added to
>> the irq_queue so they are signalled when requests are completed. There is no
>> overhead apart from the where fences are not requested.
>>
>> Based on patches by Jesse Barnes:
>>     drm/i915: Android sync points for i915 v3
>>     drm/i915: add fences to the request struct
>>     drm/i915: sync fence fixes/updates
>>
>> To do:
>>     * Extend driver data with context id / ring id (TBD).
>>
>> v2:
>>     * Code review comments. (Chris Wilson)
>>     * ring->add_request() was a wrong thing to call - rebase on top of John
>>       Harrison's (drm/i915: Early alloc request) to ensure correct request is
>>       present before creating a fence.
>>     * Take a request reference from signalling path as well to ensure request
>>       sticks around while fence is on the request completion wait queue.
>>
>> v3:
>>     * Use worker to unreference on completion so it can lock the mutex from
>>       interrupt context.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
>> Cc: John Harrison <John.C.Harrison@Intel.com>
>
> btw for merging we need to split the conversion to fences out from the
> actual userspace interface. And imo we should replace a lot of our
> existing usage of i915_gem_request with fences within the driver, too. Not
> tacked on the side like in your patch here. All the recent refactoring
> have been aiming to match i915 requests to the internal fence interfaces,
> so we should be pretty much there.

Ok I did not realize this. It did not make sense to me to split it out 
if the only access point to create them is via Android native sync, but 
from what you are saying fences should be initialized and active for all 
requests all the time. With the native sync part established on demand.

In what respects has the refactoring been aligning requests and fences?

> We also need this to be able to integrate with the scheduler properly - if
> that needs to deal both with fences and i915 requests separately it'll be
> a bit more messy. If it's all just fences the code should be streamlined a
> lot.

Requests will remain as main data structure representing the unit of GPU 
work?

Is so, it sounds logical that fences are associated (or aggregated) with 
requests. I don't see how scheduler would work with fences and not requests.

Regards,

Tvrtko

P.S. Thanks for pointing out self-tuning busy loads in another thread - 
it works nicely.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v3] drm/i915: Android native sync support
  2015-01-28 16:52     ` Tvrtko Ursulin
@ 2015-01-29 16:14       ` Daniel Vetter
  0 siblings, 0 replies; 38+ messages in thread
From: Daniel Vetter @ 2015-01-29 16:14 UTC (permalink / raw)
  To: Tvrtko Ursulin; +Cc: Intel-gfx

On Wed, Jan 28, 2015 at 04:52:53PM +0000, Tvrtko Ursulin wrote:
> 
> On 01/28/2015 09:29 AM, Daniel Vetter wrote:
> >On Tue, Jan 27, 2015 at 11:29:36AM +0000, Tvrtko Ursulin wrote:
> >>From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>
> >>Add Android native sync support with fences exported as file descriptors via
> >>the execbuf ioctl (rsvd2 field is used).
> >>
> >>This is a continuation of Jesse Barnes's previous work, squashed to arrive at
> >>the final destination, cleaned up, with some fixes and preliminary light
> >>testing.
> >>
> >>GEM requests are extended with fence structures which are associated with
> >>Android sync fences exported to user space via file descriptors. Fences which
> >>are waited upon, and while exported to userspace, are referenced and added to
> >>the irq_queue so they are signalled when requests are completed. There is no
> >>overhead apart from the where fences are not requested.
> >>
> >>Based on patches by Jesse Barnes:
> >>    drm/i915: Android sync points for i915 v3
> >>    drm/i915: add fences to the request struct
> >>    drm/i915: sync fence fixes/updates
> >>
> >>To do:
> >>    * Extend driver data with context id / ring id (TBD).
> >>
> >>v2:
> >>    * Code review comments. (Chris Wilson)
> >>    * ring->add_request() was a wrong thing to call - rebase on top of John
> >>      Harrison's (drm/i915: Early alloc request) to ensure correct request is
> >>      present before creating a fence.
> >>    * Take a request reference from signalling path as well to ensure request
> >>      sticks around while fence is on the request completion wait queue.
> >>
> >>v3:
> >>    * Use worker to unreference on completion so it can lock the mutex from
> >>      interrupt context.
> >>
> >>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >>Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
> >>Cc: John Harrison <John.C.Harrison@Intel.com>
> >
> >btw for merging we need to split the conversion to fences out from the
> >actual userspace interface. And imo we should replace a lot of our
> >existing usage of i915_gem_request with fences within the driver, too. Not
> >tacked on the side like in your patch here. All the recent refactoring
> >have been aiming to match i915 requests to the internal fence interfaces,
> >so we should be pretty much there.
> 
> Ok I did not realize this. It did not make sense to me to split it out if
> the only access point to create them is via Android native sync, but from
> what you are saying fences should be initialized and active for all requests
> all the time. With the native sync part established on demand.
> 
> In what respects has the refactoring been aligning requests and fences?

requests are also refcounted, like fences. The actual interface for
waiting might still have a slight mismatch.

> >We also need this to be able to integrate with the scheduler properly - if
> >that needs to deal both with fences and i915 requests separately it'll be
> >a bit more messy. If it's all just fences the code should be streamlined a
> >lot.
> 
> Requests will remain as main data structure representing the unit of GPU
> work?

Yes. requests will be just a subclass of fences (through structure
embedding).

> Is so, it sounds logical that fences are associated (or aggregated) with
> requests. I don't see how scheduler would work with fences and not requests.

The important part is that the scheduler can work with fences which are
_not_ i915 requests (e.g. from the camera pipeline or similar things).
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-01-28 10:07                         ` Chris Wilson
@ 2015-02-25 20:46                           ` Jesse Barnes
  2015-02-26  9:13                             ` Chris Wilson
  0 siblings, 1 reply; 38+ messages in thread
From: Jesse Barnes @ 2015-02-25 20:46 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, Tvrtko Ursulin, intel-gfx

On 01/28/2015 02:07 AM, Chris Wilson wrote:
> On Wed, Jan 28, 2015 at 10:50:18AM +0100, Daniel Vetter wrote:
>> On Wed, Jan 28, 2015 at 09:23:46AM +0000, Chris Wilson wrote:
>>> On Wed, Jan 28, 2015 at 10:22:15AM +0100, Daniel Vetter wrote:
>>>> On Mon, Jan 26, 2015 at 09:08:03AM +0000, Chris Wilson wrote:
>>>>> On Mon, Jan 26, 2015 at 08:52:39AM +0100, Daniel Vetter wrote:
>>>>>> I think the problem will be platforms that want full explicit fence (like
>>>>>> android) but allow delayed creation of the fence fd from a gl sync object
>>>>>> (like the android egl extension allows).
>>>>>>
>>>>>> I'm not sure yet how to best expose that really since just creating a
>>>>>> fence from the implicit request attached to the batch might upset the
>>>>>> interface purists with the mix in implicit and explicit fencing ;-) Hence
>>>>>> why I think for now we should just do the eager fd creation at execbuf
>>>>>> until ppl scream (well maybe not merge this patch until ppl scream ...).
>>>>>
>>>>> Everything we do is buffer centric. Even in the future with random bits
>>>>> of memory, we will still use buffers behind the scenes. From an
>>>>> interface perspective, it is clearer to me if we say "give me a fence for
>>>>> this buffer". Exactly the same way as we say "is this buffer busy" or
>>>>> "wait on this buffer". The change is that we now hand back an fd to slot
>>>>> into an event loop. That, to me, is a much smaller evolutionary step of
>>>>> the existing API, and yet more versatile than just attaching one to the
>>>>> execbuf.
>>>>
>>>> The problem is that big parts of the world do not subscribe to that buffer
>>>> centric view of gfx. Imo since those parts will be the primary users of
>>>> this interface we should try to fit their ideas as much as feasible. Later
>>>> on (if we need it) we can add some glue to tie in the buffer-centric
>>>> implicit model with the explicit model.
>>>
>>> They won't be using execbuffer either. So what's your point?
>>
>> Android very much will user execbuffer. And even the in-flight buffered
>> svm stuff will keep on using execbuf (just without any relocs).
> 
> So still buffer-centric and would benefit from the more general and more
> explict fencing rather than just execbuf. If you look at the throttling
> in mesa, you can already see a place that would rather create a fence on
> a buffer rather than its very approximate execbuf tracking.
>  
>> And once we indeed can make the case (plus have the hw) for direct
>> userspace submission I think the kernel shouldn't be in the business of
>> creating fence objects: The ring will be fully under control of
>> userspace, and that's the only place we could insert a seqno into. So
>> again the same trust issues.
> 
> No buffers, no requests, nothing for the kernel to do. No impact on
> designing an API that is useful today.

If Mesa really wants this, we should investigate intra-batch fences
again, both with and without buffer tracking (because afaik Mesa wants
bufferless SVM too).

But you said you think an fd is too heavyweight even?  What do you mean
by that?  Or were you just preferring re-use of an existing object (i.e.
the buffer) that we already track & pass around?

Jesse

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [RFC v2] drm/i915: Android native sync support
  2015-02-25 20:46                           ` Jesse Barnes
@ 2015-02-26  9:13                             ` Chris Wilson
  0 siblings, 0 replies; 38+ messages in thread
From: Chris Wilson @ 2015-02-26  9:13 UTC (permalink / raw)
  To: Jesse Barnes; +Cc: intel-gfx

On Wed, Feb 25, 2015 at 12:46:31PM -0800, Jesse Barnes wrote:
> On 01/28/2015 02:07 AM, Chris Wilson wrote:
> > On Wed, Jan 28, 2015 at 10:50:18AM +0100, Daniel Vetter wrote:
> >> On Wed, Jan 28, 2015 at 09:23:46AM +0000, Chris Wilson wrote:
> >>> On Wed, Jan 28, 2015 at 10:22:15AM +0100, Daniel Vetter wrote:
> >>>> On Mon, Jan 26, 2015 at 09:08:03AM +0000, Chris Wilson wrote:
> >>>>> On Mon, Jan 26, 2015 at 08:52:39AM +0100, Daniel Vetter wrote:
> >>>>>> I think the problem will be platforms that want full explicit fence (like
> >>>>>> android) but allow delayed creation of the fence fd from a gl sync object
> >>>>>> (like the android egl extension allows).
> >>>>>>
> >>>>>> I'm not sure yet how to best expose that really since just creating a
> >>>>>> fence from the implicit request attached to the batch might upset the
> >>>>>> interface purists with the mix in implicit and explicit fencing ;-) Hence
> >>>>>> why I think for now we should just do the eager fd creation at execbuf
> >>>>>> until ppl scream (well maybe not merge this patch until ppl scream ...).
> >>>>>
> >>>>> Everything we do is buffer centric. Even in the future with random bits
> >>>>> of memory, we will still use buffers behind the scenes. From an
> >>>>> interface perspective, it is clearer to me if we say "give me a fence for
> >>>>> this buffer". Exactly the same way as we say "is this buffer busy" or
> >>>>> "wait on this buffer". The change is that we now hand back an fd to slot
> >>>>> into an event loop. That, to me, is a much smaller evolutionary step of
> >>>>> the existing API, and yet more versatile than just attaching one to the
> >>>>> execbuf.
> >>>>
> >>>> The problem is that big parts of the world do not subscribe to that buffer
> >>>> centric view of gfx. Imo since those parts will be the primary users of
> >>>> this interface we should try to fit their ideas as much as feasible. Later
> >>>> on (if we need it) we can add some glue to tie in the buffer-centric
> >>>> implicit model with the explicit model.
> >>>
> >>> They won't be using execbuffer either. So what's your point?
> >>
> >> Android very much will user execbuffer. And even the in-flight buffered
> >> svm stuff will keep on using execbuf (just without any relocs).
> > 
> > So still buffer-centric and would benefit from the more general and more
> > explict fencing rather than just execbuf. If you look at the throttling
> > in mesa, you can already see a place that would rather create a fence on
> > a buffer rather than its very approximate execbuf tracking.
> >  
> >> And once we indeed can make the case (plus have the hw) for direct
> >> userspace submission I think the kernel shouldn't be in the business of
> >> creating fence objects: The ring will be fully under control of
> >> userspace, and that's the only place we could insert a seqno into. So
> >> again the same trust issues.
> > 
> > No buffers, no requests, nothing for the kernel to do. No impact on
> > designing an API that is useful today.
> 
> If Mesa really wants this, we should investigate intra-batch fences
> again, both with and without buffer tracking (because afaik Mesa wants
> bufferless SVM too).
> 
> But you said you think an fd is too heavyweight even?  What do you mean
> by that?  Or were you just preferring re-use of an existing object (i.e.
> the buffer) that we already track & pass around?

Mostly it is burn from X using select() and so we see fd handling very
high on the profiles when all X has to do is flip.

However, we can and do have up to several thousand batches in flight,
and many more pending retirement from userspace. That is a scary
prospect if I wanted to replace the signalling on the buffer with
individual fences, both from the scaling issue and running into resource
limits (i.e. back to the reason why our bo are currently fd-less).
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2015-02-26  9:13 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-01-22 11:15 [RFC] drm/i915: Android native sync support Tvrtko Ursulin
2015-01-22 11:42 ` Chris Wilson
2015-01-22 13:41   ` Tvrtko Ursulin
2015-01-22 13:49     ` Chris Wilson
2015-01-22 13:56       ` Tvrtko Ursulin
2015-01-22 14:04     ` Damien Lespiau
2015-01-22 15:28       ` Tvrtko Ursulin
2015-01-22 15:47         ` Damien Lespiau
2015-01-22 15:54           ` Tvrtko Ursulin
2015-01-22 16:07             ` Damien Lespiau
2015-01-23 11:13 ` [RFC v2] " Tvrtko Ursulin
2015-01-23 11:27   ` Chris Wilson
2015-01-23 14:02     ` Tvrtko Ursulin
2015-01-23 15:53       ` Daniel Vetter
2015-01-23 16:49         ` Tvrtko Ursulin
2015-01-24  9:47           ` Daniel Vetter
2015-01-26 11:08             ` Tvrtko Ursulin
2015-01-28  9:20               ` Daniel Vetter
2015-01-23 17:30         ` Chris Wilson
2015-01-24  9:41           ` Daniel Vetter
2015-01-24 16:08             ` Chris Wilson
2015-01-26  7:52               ` Daniel Vetter
2015-01-26  9:08                 ` Chris Wilson
2015-01-28  9:22                   ` Daniel Vetter
2015-01-28  9:23                     ` Chris Wilson
2015-01-28  9:50                       ` Daniel Vetter
2015-01-28 10:07                         ` Chris Wilson
2015-02-25 20:46                           ` Jesse Barnes
2015-02-26  9:13                             ` Chris Wilson
2015-01-27 11:29 ` [RFC v3] " Tvrtko Ursulin
2015-01-27 11:40   ` Chris Wilson
2015-01-27 12:13     ` Tvrtko Ursulin
2015-01-27 12:18       ` Chris Wilson
2015-01-27 13:43         ` Tvrtko Ursulin
2015-01-28  9:25           ` Daniel Vetter
2015-01-28  9:29   ` Daniel Vetter
2015-01-28 16:52     ` Tvrtko Ursulin
2015-01-29 16:14       ` Daniel Vetter

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.