All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: intel-gfx@lists.freedesktop.org
Cc: mika.kuoppala@intel.com
Subject: [PATCH 38/38] drm/i915: Support explicit fencing for execbuf
Date: Tue, 20 Sep 2016 09:30:12 +0100	[thread overview]
Message-ID: <20160920083012.2754-39-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20160920083012.2754-1-chris@chris-wilson.co.uk>

Now that the user can opt-out of implicit fencing, we need to give them
back control over the fencing. We employ sync_file to wrap our
drm_i915_gem_request and provide an fd that userspace can merge with
other sync_file fds and pass back to the kernel to wait upon before
future execution.

Testcase: igt/gem_exec_fence
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/Kconfig               |  1 +
 drivers/gpu/drm/i915/i915_drv.c            |  3 +-
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 53 +++++++++++++++++++++++++++---
 include/uapi/drm/i915_drm.h                | 36 +++++++++++++++++++-
 4 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 92ecced1bc8f..ca23fd17c3ce 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -19,6 +19,7 @@ config DRM_I915
 	select INPUT if ACPI
 	select ACPI_VIDEO if ACPI
 	select ACPI_BUTTON if ACPI
+	select SYNC_FILE
 	help
 	  Choose this option if you have a system that has "Intel Graphics
 	  Media Accelerator" or "HD Graphics" integrated graphics,
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 19ee76284371..3acc1fd9a9e1 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -333,6 +333,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_COHERENT_PHYS_GTT:
 	case I915_PARAM_HAS_EXEC_SOFTPIN:
 	case I915_PARAM_HAS_EXEC_ASYNC:
+	case I915_PARAM_HAS_EXEC_FENCE:
 		/* For the time being all of these are always true;
 		 * if some supported hardware does not have one of these
 		 * features this value needs to be provided from
@@ -2535,7 +2536,7 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(I915_HWS_ADDR, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_INIT, drm_noop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(I915_GEM_EXECBUFFER2_WR, i915_gem_execbuffer2, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(I915_GEM_PIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_UNPIN, i915_gem_reject_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF_DRV(I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7038da9aa68f..a99bd002596c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -28,6 +28,7 @@
 
 #include <linux/dma_remapping.h>
 #include <linux/reservation.h>
+#include <linux/sync_file.h>
 #include <linux/uaccess.h>
 
 #include <drm/drmP.h>
@@ -1585,6 +1586,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	struct i915_execbuffer_params *params = &params_master;
 	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
 	u32 dispatch_flags;
+	struct fence *in_fence = NULL;
+	struct sync_file *out_fence = NULL;
+	int out_fence_fd = -1;
 	int ret;
 	bool need_relocs;
 
@@ -1628,6 +1632,23 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		dispatch_flags |= I915_DISPATCH_RS;
 	}
 
+	if (args->flags & I915_EXEC_FENCE_IN) {
+		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!in_fence) {
+			ret = -EINVAL;
+			goto pre_mutex_err;
+		}
+	}
+
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			out_fence_fd = -1;
+			goto pre_mutex_err;
+		}
+	}
+
 	/* Take a local wakeref for preparing to dispatch the execbuf as
 	 * we expect to access the hardware fairly frequently in the
 	 * process. Upon first dispatch, we acquire another prolonged
@@ -1772,6 +1793,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 		goto err_batch_unpin;
 	}
 
+	if (in_fence) {
+		ret = i915_gem_request_await_fence(params->request, in_fence);
+		if (ret < 0)
+			goto err_request;
+	}
+
+	if (out_fence_fd != -1) {
+		out_fence = sync_file_create(fence_get(&params->request->fence));
+		if (!out_fence) {
+			ret = -ENOMEM;
+			goto err_request;
+		}
+	}
+
 	/* Whilst this request exists, batch_obj will be on the
 	 * active_list, and so will hold the active reference. Only when this
 	 * request is retired will the the batch_obj be moved onto the
@@ -1799,6 +1834,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	ret = execbuf_submit(params, args, &eb->vmas);
 err_request:
 	__i915_add_request(params->request, ret == 0);
+	if (out_fence) {
+		if (ret == 0) {
+			fd_install(out_fence_fd, out_fence->file);
+			args->rsvd2 &= GENMASK_ULL(0, 31); /* keep in-fence */
+			args->rsvd2 |= (u64)out_fence_fd << 32;
+			out_fence_fd = -1;
+		} else {
+			fput(out_fence->file);
+		}
+	}
 
 err_batch_unpin:
 	/*
@@ -1820,6 +1865,9 @@ pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+	if (out_fence_fd != -1)
+		put_unused_fd(out_fence_fd);
+	fence_put(in_fence);
 	return ret;
 }
 
@@ -1927,11 +1975,6 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	if (args->rsvd2 != 0) {
-		DRM_DEBUG("dirty rvsd2 field\n");
-		return -EINVAL;
-	}
-
 	exec2_list = drm_malloc_gfp(args->buffer_count,
 				    sizeof(*exec2_list),
 				    GFP_TEMPORARY);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index a2fa511b46b3..a1d04d8bc80a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -246,6 +246,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
 #define DRM_I915_GEM_EXECBUFFER2	0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR	DRM_I915_GEM_EXECBUFFER2
 #define DRM_I915_GET_SPRITE_COLORKEY	0x2a
 #define DRM_I915_SET_SPRITE_COLORKEY	0x2b
 #define DRM_I915_GEM_WAIT	0x2c
@@ -279,6 +280,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -393,6 +395,13 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_HAS_EXEC_ASYNC	 41
 
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
+ * both being able to pass in a sync_file fd to wait upon before executing,
+ * and being able to return a new sync_file fd that is signaled when the
+ * current request is complete.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE	 42
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -845,7 +854,32 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_IN		(1<<16)
+
+/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
+ * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
+ * to the caller, and it should be close() after use. (The fd is a regular
+ * file descriptor and will be cleaned up on process termination. It holds
+ * a reference to the request, but nothing else.)
+ *
+ * The sync_file fd can be combined with other sync_file and passed either
+ * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
+ * will only occur after this request completes), or to other devices.
+ *
+ * Using I915_EXEC_FENCE_OUT requires use of
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
+ * back to userspace. Failure to do so will cause the out-fence to always
+ * be reported as zero, and the real fence fd to be leaked.
+ */
+#define I915_EXEC_FENCE_OUT		(1<<17)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_OUT<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2016-09-20  8:31 UTC|newest]

Thread overview: 64+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-20  8:29 Multiple timelines, take 2 Chris Wilson
2016-09-20  8:29 ` [PATCH 01/38] drm/i915: Allow disabling error capture Chris Wilson
2016-09-21  6:13   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 02/38] drm/i915: Stop the machine whilst capturing the GPU crash dump Chris Wilson
2016-09-26  8:58   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 03/38] drm/i915: Always use the GTT for error capture Chris Wilson
2016-09-21  7:24   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 04/38] drm/i915: Consolidate error object printing Chris Wilson
2016-09-20  8:29 ` [PATCH 05/38] drm/i915: Compress GPU objects in error state Chris Wilson
2016-09-21  7:55   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 06/38] drm/i915: Support asynchronous waits on struct fence from i915_gem_request Chris Wilson
2016-09-21  8:05   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 07/38] drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate Chris Wilson
2016-09-20  8:29 ` [PATCH 08/38] drm/i915: Rearrange i915_wait_request() accounting with callers Chris Wilson
2016-09-21  8:12   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 09/38] drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Chris Wilson
2016-09-20  8:29 ` [PATCH 10/38] drm/i915: Defer active reference until required Chris Wilson
2016-09-21  8:44   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 11/38] drm/i915: Introduce an internal allocator for disposable private objects Chris Wilson
2016-09-21 11:50   ` Joonas Lahtinen
2016-09-27  9:10     ` Chris Wilson
2016-09-20  8:29 ` [PATCH 12/38] drm/i915: Reuse the active golden render state batch Chris Wilson
2016-09-26  7:24   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 13/38] drm/i915: Markup GEM API with lockdep asserts Chris Wilson
2016-09-21 11:56   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 14/38] drm/i915: Use a radixtree for random access to the object's backing storage Chris Wilson
2016-09-20  8:29 ` [PATCH 15/38] drm/i915: Refactor object page API Chris Wilson
2016-09-20  8:29 ` [PATCH 16/38] drm/i915: Pass around sg_table to get_pages/put_pages backend Chris Wilson
2016-09-20 11:24   ` kbuild test robot
2016-09-20  8:29 ` [PATCH 17/38] drm/i915: Move object backing storage manipulation to its own locking Chris Wilson
2016-09-20  8:29 ` [PATCH 18/38] drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Chris Wilson
2016-09-20  8:29 ` [PATCH 19/38] drm/i915: Implement pread without struct-mutex Chris Wilson
2016-09-20  8:29 ` [PATCH 20/38] drm/i915: Implement pwrite " Chris Wilson
2016-09-20 13:47   ` kbuild test robot
2016-09-20  8:29 ` [PATCH 21/38] drm/i915: Acquire the backing storage outside of struct_mutex in set-domain Chris Wilson
2016-09-20  8:29 ` [PATCH 22/38] drm/i915: Move object release to a freelist + worker Chris Wilson
2016-09-20  8:29 ` [PATCH 23/38] drm/i915: Use lockless object free Chris Wilson
2016-09-20  8:29 ` [PATCH 24/38] drm/i915: Move GEM activity tracking into a common struct reservation_object Chris Wilson
2016-09-26  7:53   ` Joonas Lahtinen
2016-09-20  8:29 ` [PATCH 25/38] drm: Add reference counting to drm_atomic_state Chris Wilson
2016-09-21  7:24   ` Sean Paul
2016-09-20  8:30 ` [PATCH 26/38] drm/i915: Restore nonblocking awaits for modesetting Chris Wilson
2016-09-26  8:11   ` Joonas Lahtinen
2016-09-20  8:30 ` [PATCH 27/38] drm/i915: Combine seqno + tracking into a global timeline struct Chris Wilson
2016-09-20  8:30 ` [PATCH 28/38] drm/i915: Queue the idling context switch after all other timelines Chris Wilson
2016-09-26  8:49   ` Joonas Lahtinen
2016-09-20  8:30 ` [PATCH 29/38] drm/i915: Wait first for submission, before waiting for request completion Chris Wilson
2016-09-20  8:30 ` [PATCH 30/38] drm/i915: Introduce a global_seqno for each request Chris Wilson
2016-09-20  8:30 ` [PATCH 31/38] drm/i915: Record space required for request emission Chris Wilson
2016-09-20  8:30 ` [PATCH 32/38] drm/i915: Defer " Chris Wilson
2016-09-26  8:53   ` Joonas Lahtinen
2016-09-26  9:04     ` Chris Wilson
2016-09-26  9:06       ` Joonas Lahtinen
2016-09-26  9:25         ` Chris Wilson
2016-09-20  8:30 ` [PATCH 33/38] drm/i915: Move the global sync optimisation to the timeline Chris Wilson
2016-09-20  8:30 ` [PATCH 34/38] drm/i915: Create a unique name for the context Chris Wilson
2016-09-20  8:30 ` [PATCH 35/38] drm/i915: Reserve space in the global seqno during request allocation Chris Wilson
2016-09-20 18:49   ` kbuild test robot
2016-09-20 18:49   ` [PATCH] drm/i915: fix semicolon.cocci warnings kbuild test robot
2016-09-20  8:30 ` [PATCH 36/38] drm/i915: Enable multiple timelines Chris Wilson
2016-09-26  8:55   ` Joonas Lahtinen
2016-09-20  8:30 ` [PATCH 37/38] drm/i915: Enable userspace to opt-out of implicit fencing Chris Wilson
2016-09-20  8:30 ` Chris Wilson [this message]
2016-09-20  9:24 ` ✗ Fi.CI.BAT: failure for series starting with [01/38] drm/i915: Allow disabling error capture Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160920083012.2754-39-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=intel-gfx@lists.freedesktop.org \
    --cc=mika.kuoppala@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.