All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH 41/41] drm/i915: Support explicit fencing for execbuf
Date: Thu, 20 Oct 2016 16:04:23 +0100	[thread overview]
Message-ID: <20161020150423.4560-42-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20161020150423.4560-1-chris@chris-wilson.co.uk>

Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  1 +
 drivers/gpu/drm/i915/i915_drv.c            |  3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 53 +++++++++++++++++++++++++++---
 include/uapi/drm/i915_drm.h                | 36 +++++++++++++++++++-
 4 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 1c1b19ccb92f..c383684b538f 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -18,6 +18,7 @@ config DRM_I915
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select ACPI_BUTTON if ACPI
+	select SYNC_FILE
 	help
 	  Choose this option if you have a system that has "Intel Graphics
 	  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index abfc5007f170..4df75e63cf22 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -340,6 +340,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
+	case I915_PARAM_HAS_EXEC_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -2529,7 +2530,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 775c946bb9fb..f60b57b7d677 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
 
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
+#include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
 #include <drm/drmP.h>
@@ -1596,6 +1597,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_execbuffer_params *params = &params_master;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 dispatch_flags;
+	struct fence *in_fence = NULL;
+	struct sync_file *out_fence = NULL;
+	int out_fence_fd = -1;
 	int ret;
 	bool need_relocs;
 
@@ -1639,6 +1643,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		dispatch_flags |= I915_DISPATCH_RS;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_IN) {
+		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!in_fence) {
+			ret = -EINVAL;
+			goto pre_mutex_err;
+		}
+	}
+
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			out_fence_fd = -1;
+			goto pre_mutex_err;
+		}
+	}
+
 	/* Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
 	 * process. Upon first dispatch, we acquire another prolonged
@@ -1783,6 +1804,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err_batch_unpin;
 	}
 
+	if (in_fence) {
+		ret = i915_gem_request_await_fence(params->request, in_fence);
+		if (ret < 0)
+			goto err_request;
+	}
+
+	if (out_fence_fd != -1) {
+		out_fence = sync_file_create(fence_get(&params->request->fence));
+		if (!out_fence) {
+			ret = -ENOMEM;
+			goto err_request;
+		}
+	}
+
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
 	 * request is retired will the the batch_obj be moved onto the
@@ -1810,6 +1845,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
 	__i915_add_request(params->request, ret == 0);
+	if (out_fence) {
+		if (ret == 0) {
+			fd_install(out_fence_fd, out_fence->file);
+			args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
+			args->rsvd2 |= (u64)out_fence_fd << 32;
+			out_fence_fd = -1;
+		} else {
+			fput(out_fence->file);
+		}
+	}
 
 err_batch_unpin:
 	/*
@@ -1831,6 +1876,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+	if (out_fence_fd != -1)
+		put_unused_fd(out_fence_fd);
+	fence_put(in_fence);
 	return ret;
 }
 
@@ -1938,11 +1986,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->rsvd2 != 0) {
-		DRM_DEBUG("dirty rvsd2 field\n");
-		return -EINVAL;
-	}
-
 	exec2_list = drm_malloc_gfp(args->buffer_count,
 				    sizeof(*exec2_list),
 				    GFP_TEMPORARY);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a2fa511b46b3..a1d04d8bc80a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR	DRM_I915_GEM_EXECBUFFER2
 #define DRM_I915_GET_SPRITE_COLORKEY	0x2a
 #define DRM_I915_SET_SPRITE_COLORKEY	0x2b
 #define DRM_I915_GEM_WAIT	0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -393,6 +395,13 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_ASYNC	 41
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
+ * both being able to pass in a sync_file fd to wait upon before executing,
+ * and being able to return a new sync_file fd that is signaled when the
+ * current request is complete.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE	 42
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -845,7 +854,32 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_IN		(1<<16)
+
+/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
+ * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
+ * to the caller, and it should be close() after use. (The fd is a regular
+ * file descriptor and will be cleaned up on process termination. It holds
+ * a reference to the request, but nothing else.)
+ *
+ * The sync_file fd can be combined with other sync_file and passed either
+ * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
+ * will only occur after this request completes), or to other devices.
+ *
+ * Using I915_EXEC_FENCE_OUT requires use of
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
+ * back to userspace. Failure to do so will cause the out-fence to always
+ * be reported as zero, and the real fence fd to be leaked.
+ */
+#define I915_EXEC_FENCE_OUT		(1<<17)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-10-20 15:05 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-20 15:03 Multiple timelines, multiple times Chris Wilson
2016-10-20 15:03 ` [PATCH 01/41] drm/i915: Move user fault tracking to a separate list Chris Wilson
2016-10-20 15:03 ` [PATCH 02/41] drm/i915: Use RPM as the barrier for controlling user mmap access Chris Wilson
2016-10-20 15:03 ` [PATCH 03/41] drm/i915: Remove superfluous locking around userfault_list Chris Wilson
2016-10-20 15:03 ` [PATCH 04/41] drm/i915: Remove RPM sequence checking Chris Wilson
2016-10-21 10:19   ` Imre Deak
2016-10-20 15:03 ` [PATCH 05/41] drm/i915: Move fence cancellation to runtime suspend Chris Wilson
2016-10-21 12:48   ` Imre Deak
2016-10-21 13:52     ` Chris Wilson
2016-10-21 14:05     ` [PATCH v2] " Chris Wilson
2016-10-24  9:55       ` Imre Deak
2016-10-20 15:03 ` [PATCH 06/41] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
2016-10-20 15:03 ` [PATCH 07/41] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
2016-10-20 15:03 ` [PATCH 08/41] drm/i915: Remove superfluous wait_for_error() from throttle-ioctl Chris Wilson
2016-10-20 15:41   ` Joonas Lahtinen
2016-10-20 15:03 ` [PATCH 09/41] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
2016-10-20 15:03 ` [PATCH 10/41] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Chris Wilson
2016-10-20 15:03 ` [PATCH 11/41] drm/i915: Defer active reference until required Chris Wilson
2016-10-20 15:03 ` [PATCH 12/41] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
2016-10-20 16:22   ` Tvrtko Ursulin
2016-10-20 20:36     ` Chris Wilson
2016-10-21  7:21       ` Tvrtko Ursulin
2016-10-21  7:50         ` Chris Wilson
2016-10-21  7:53           ` Tvrtko Ursulin
2016-10-21  7:56   ` [PATCH v4] " Chris Wilson
2016-10-21  8:07     ` Tvrtko Ursulin
2016-10-20 15:03 ` [PATCH 13/41] drm/i915: Reuse the active golden render state batch Chris Wilson
2016-10-20 15:03 ` [PATCH 14/41] drm/i915: Markup GEM API with lockdep asserts Chris Wilson
2016-10-20 15:03 ` [PATCH 15/41] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
2016-10-20 15:55   ` Tvrtko Ursulin
2016-10-20 15:03 ` [PATCH 16/41] drm/i915: Use radixtree to jump start intel_partial_pages() Chris Wilson
2016-10-20 15:03 ` [PATCH 17/41] drm/i915: Refactor object page API Chris Wilson
2016-10-20 15:04 ` [PATCH 18/41] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
2016-10-20 15:04 ` [PATCH 19/41] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
2016-10-20 15:04 ` [PATCH 20/41] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
2016-10-20 15:04 ` [PATCH 21/41] drm/i915: Implement pread without struct-mutex Chris Wilson
2016-10-20 15:04 ` [PATCH 22/41] drm/i915: Implement pwrite " Chris Wilson
2016-10-20 15:04 ` [PATCH 23/41] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
2016-10-20 15:04 ` [PATCH 24/41] drm/i915: Move object release to a freelist + worker Chris Wilson
2016-10-20 15:04 ` [PATCH 25/41] drm/i915: Use lockless object free Chris Wilson
2016-10-20 15:04 ` [PATCH 26/41] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
2016-10-20 15:04 ` [PATCH 27/41] drm/i915: Restore nonblocking awaits for modesetting Chris Wilson
2016-10-20 15:04 ` [PATCH 28/41] drm/i915: Combine seqno + tracking into a global timeline struct Chris Wilson
2016-10-20 15:04 ` [PATCH 29/41] drm/i915: Queue the idling context switch after all other timelines Chris Wilson
2016-10-20 15:04 ` [PATCH 30/41] drm/i915: Wait first for submission, before waiting for request completion Chris Wilson
2016-10-20 15:04 ` [PATCH 31/41] drm/i915: Introduce a global_seqno for each request Chris Wilson
2016-10-20 15:04 ` [PATCH 32/41] drm/i915: Rename ->emit_request to ->emit_breadcrumb Chris Wilson
2016-10-20 15:04 ` [PATCH 33/41] drm/i915: Record space required for breadcrumb emission Chris Wilson
2016-10-20 15:04 ` [PATCH 34/41] drm/i915: Defer " Chris Wilson
2016-10-20 15:04 ` [PATCH 35/41] drm/i915: Move the global sync optimisation to the timeline Chris Wilson
2016-10-20 15:04 ` [PATCH 36/41] drm/i915: Create a unique name for the context Chris Wilson
2016-10-20 15:04 ` [PATCH 37/41] drm/i915: Reserve space in the global seqno during request allocation Chris Wilson
2016-10-20 15:04 ` [PATCH 38/41] drm/i915: Defer setting of global seqno on request to submission Chris Wilson
2016-10-20 15:04 ` [PATCH 39/41] drm/i915: Enable multiple timelines Chris Wilson
2016-10-20 15:04 ` [PATCH 40/41] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-10-20 15:04 ` Chris Wilson [this message]
  -- strict thread matches above, loose matches on Subject: below --
2016-10-14 12:17 Fencing, fencing, fencing Chris Wilson
2016-10-14 12:18 ` [PATCH 41/41] drm/i915: Support explicit fencing for execbuf Chris Wilson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161020150423.4560-42-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.