* [Intel-gfx] [PATCH 1/3] drm/i915: Flush idle barriers when waiting
@ 2020-02-21 9:40 Chris Wilson
2020-02-21 9:40 ` [Intel-gfx] [PATCH 2/3] drm/i915: Allow userspace to specify ringsize on construction Chris Wilson
` (2 more replies)
0 siblings, 3 replies; 8+ messages in thread
From: Chris Wilson @ 2020-02-21 9:40 UTC (permalink / raw)
To: intel-gfx
If we do find ourselves with an idle barrier inside our active while
waiting, attempt to flush it by emitting a pulse using the kernel
context.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Steve Carbonari <steven.carbonari@intel.com>
---
drivers/gpu/drm/i915/i915_active.c | 42 ++++++++++++++----
drivers/gpu/drm/i915/selftests/i915_active.c | 46 ++++++++++++++++++++
2 files changed, 79 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 9ccb931a733e..fae7e3820592 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -7,6 +7,7 @@
#include <linux/debugobjects.h>
#include "gt/intel_context.h"
+#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_engine_pm.h"
#include "gt/intel_ring.h"
@@ -460,26 +461,49 @@ static void enable_signaling(struct i915_active_fence *active)
dma_fence_put(fence);
}
-int i915_active_wait(struct i915_active *ref)
+static int flush_barrier(struct active_node *it)
{
- struct active_node *it, *n;
- int err = 0;
+ struct intel_engine_cs *engine;
- might_sleep();
+ if (!is_barrier(&it->base))
+ return 0;
- if (!i915_active_acquire_if_busy(ref))
+ engine = __barrier_to_engine(it);
+ smp_rmb(); /* serialise with add_active_barriers */
+ if (!is_barrier(&it->base))
return 0;
- /* Flush lazy signals */
+ return intel_engine_flush_barriers(engine);
+}
+
+static int flush_lazy_signals(struct i915_active *ref)
+{
+ struct active_node *it, *n;
+ int err = 0;
+
enable_signaling(&ref->excl);
rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
- if (is_barrier(&it->base)) /* unconnected idle barrier */
- continue;
+ err = flush_barrier(it); /* unconnected idle barrier? */
+ if (err)
+ break;
enable_signaling(&it->base);
}
- /* Any fence added after the wait begins will not be auto-signaled */
+ return err;
+}
+
+int i915_active_wait(struct i915_active *ref)
+{
+ int err;
+
+ might_sleep();
+
+ if (!i915_active_acquire_if_busy(ref))
+ return 0;
+
+ /* Any fence added after the wait begins will not be auto-signaled */
+ err = flush_lazy_signals(ref);
i915_active_release(ref);
if (err)
return err;
diff --git a/drivers/gpu/drm/i915/selftests/i915_active.c b/drivers/gpu/drm/i915/selftests/i915_active.c
index ef572a0c2566..067e30b8927f 100644
--- a/drivers/gpu/drm/i915/selftests/i915_active.c
+++ b/drivers/gpu/drm/i915/selftests/i915_active.c
@@ -201,11 +201,57 @@ static int live_active_retire(void *arg)
return err;
}
+static int live_active_barrier(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_engine_cs *engine;
+ struct live_active *active;
+ int err = 0;
+
+ /* Check that we get a callback when requests retire upon waiting */
+
+ active = __live_alloc(i915);
+ if (!active)
+ return -ENOMEM;
+
+ err = i915_active_acquire(&active->base);
+ if (err)
+ goto out;
+
+ for_each_uabi_engine(engine, i915) {
+ err = i915_active_acquire_preallocate_barrier(&active->base,
+ engine);
+ if (err)
+ break;
+
+ i915_active_acquire_barrier(&active->base);
+ }
+
+ i915_active_release(&active->base);
+
+ if (err == 0)
+ err = i915_active_wait(&active->base);
+
+ if (err == 0 && !READ_ONCE(active->retired)) {
+ pr_err("i915_active not retired after flushing barriers!\n");
+ err = -EINVAL;
+ }
+
+out:
+ __live_put(active);
+
+ if (igt_flush_test(i915))
+ err = -EIO;
+
+ return err;
+}
+
int i915_active_live_selftests(struct drm_i915_private *i915)
{
static const struct i915_subtest tests[] = {
SUBTEST(live_active_wait),
SUBTEST(live_active_retire),
+ SUBTEST(live_active_barrier),
};
if (intel_gt_is_wedged(&i915->gt))
--
2.25.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Intel-gfx] [PATCH 2/3] drm/i915: Allow userspace to specify ringsize on construction
2020-02-21 9:40 [Intel-gfx] [PATCH 1/3] drm/i915: Flush idle barriers when waiting Chris Wilson
@ 2020-02-21 9:40 ` Chris Wilson
2020-02-21 9:40 ` [Intel-gfx] [PATCH 3/3] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions Chris Wilson
2020-02-21 21:18 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [1/3] drm/i915: Flush idle barriers when waiting (rev2) Patchwork
2 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2020-02-21 9:40 UTC (permalink / raw)
To: intel-gfx
No good reason why we must always use a static ringsize, so let
userspace select one during construction.
Link: https://github.com/intel/compute-runtime/pull/261
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Steve Carbonari <steven.carbonari@intel.com>
Reviewed-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/gem/i915_gem_context.c | 110 ++++++++++++++++--
drivers/gpu/drm/i915/gt/intel_context_param.c | 63 ++++++++++
drivers/gpu/drm/i915/gt/intel_context_param.h | 14 +++
drivers/gpu/drm/i915/gt/intel_lrc.c | 1 +
include/uapi/drm/i915_drm.h | 21 ++++
6 files changed, 202 insertions(+), 8 deletions(-)
create mode 100644 drivers/gpu/drm/i915/gt/intel_context_param.c
create mode 100644 drivers/gpu/drm/i915/gt/intel_context_param.h
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index b314d44ded5e..22a23134e98e 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -82,6 +82,7 @@ gt-y += \
gt/gen8_ppgtt.o \
gt/intel_breadcrumbs.o \
gt/intel_context.o \
+ gt/intel_context_param.o \
gt/intel_context_sseu.o \
gt/intel_engine_cs.o \
gt/intel_engine_heartbeat.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index adcebf22a3d3..b24ee8e104cf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -71,6 +71,7 @@
#include "gt/gen6_ppgtt.h"
#include "gt/intel_context.h"
+#include "gt/intel_context_param.h"
#include "gt/intel_engine_heartbeat.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_ring.h"
@@ -668,23 +669,30 @@ __create_context(struct drm_i915_private *i915)
return ERR_PTR(err);
}
-static void
+static int
context_apply_all(struct i915_gem_context *ctx,
- void (*fn)(struct intel_context *ce, void *data),
+ int (*fn)(struct intel_context *ce, void *data),
void *data)
{
struct i915_gem_engines_iter it;
struct intel_context *ce;
+ int err = 0;
- for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
- fn(ce, data);
+ for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+ err = fn(ce, data);
+ if (err)
+ break;
+ }
i915_gem_context_unlock_engines(ctx);
+
+ return err;
}
-static void __apply_ppgtt(struct intel_context *ce, void *vm)
+static int __apply_ppgtt(struct intel_context *ce, void *vm)
{
i915_vm_put(ce->vm);
ce->vm = i915_vm_get(vm);
+ return 0;
}
static struct i915_address_space *
@@ -722,9 +730,10 @@ static void __set_timeline(struct intel_timeline **dst,
intel_timeline_put(old);
}
-static void __apply_timeline(struct intel_context *ce, void *timeline)
+static int __apply_timeline(struct intel_context *ce, void *timeline)
{
__set_timeline(&ce->timeline, timeline);
+ return 0;
}
static void __assign_timeline(struct i915_gem_context *ctx,
@@ -1215,6 +1224,63 @@ static int set_ppgtt(struct drm_i915_file_private *file_priv,
return err;
}
+static int __apply_ringsize(struct intel_context *ce, void *sz)
+{
+ return intel_context_set_ring_size(ce, (unsigned long)sz);
+}
+
+static int set_ringsize(struct i915_gem_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
+ return -ENODEV;
+
+ if (args->size)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(args->value, I915_GTT_PAGE_SIZE))
+ return -EINVAL;
+
+ if (args->value < I915_GTT_PAGE_SIZE)
+ return -EINVAL;
+
+ if (args->value > 128 * I915_GTT_PAGE_SIZE)
+ return -EINVAL;
+
+ return context_apply_all(ctx,
+ __apply_ringsize,
+ __intel_context_ring_size(args->value));
+}
+
+static int __get_ringsize(struct intel_context *ce, void *arg)
+{
+ long sz;
+
+ sz = intel_context_get_ring_size(ce);
+ GEM_BUG_ON(sz > INT_MAX);
+
+ return sz; /* stop on first engine */
+}
+
+static int get_ringsize(struct i915_gem_context *ctx,
+ struct drm_i915_gem_context_param *args)
+{
+ int sz;
+
+ if (!HAS_LOGICAL_RING_CONTEXTS(ctx->i915))
+ return -ENODEV;
+
+ if (args->size)
+ return -EINVAL;
+
+ sz = context_apply_all(ctx, __get_ringsize, NULL);
+ if (sz < 0)
+ return sz;
+
+ args->value = sz;
+ return 0;
+}
+
static int
user_to_context_sseu(struct drm_i915_private *i915,
const struct drm_i915_gem_context_param_sseu *user,
@@ -1852,17 +1918,19 @@ set_persistence(struct i915_gem_context *ctx,
return __context_set_persistence(ctx, args->value);
}
-static void __apply_priority(struct intel_context *ce, void *arg)
+static int __apply_priority(struct intel_context *ce, void *arg)
{
struct i915_gem_context *ctx = arg;
if (!intel_engine_has_semaphores(ce->engine))
- return;
+ return 0;
if (ctx->sched.priority >= I915_PRIORITY_NORMAL)
intel_context_set_use_semaphores(ce);
else
intel_context_clear_use_semaphores(ce);
+
+ return 0;
}
static int set_priority(struct i915_gem_context *ctx,
@@ -1955,6 +2023,10 @@ static int ctx_setparam(struct drm_i915_file_private *fpriv,
ret = set_persistence(ctx, args);
break;
+ case I915_CONTEXT_PARAM_RINGSIZE:
+ ret = set_ringsize(ctx, args);
+ break;
+
case I915_CONTEXT_PARAM_BAN_PERIOD:
default:
ret = -EINVAL;
@@ -1983,6 +2055,18 @@ static int create_setparam(struct i915_user_extension __user *ext, void *data)
return ctx_setparam(arg->fpriv, arg->ctx, &local.param);
}
+static int copy_ring_size(struct intel_context *dst,
+ struct intel_context *src)
+{
+ long sz;
+
+ sz = intel_context_get_ring_size(src);
+ if (sz < 0)
+ return sz;
+
+ return intel_context_set_ring_size(dst, sz);
+}
+
static int clone_engines(struct i915_gem_context *dst,
struct i915_gem_context *src)
{
@@ -2026,6 +2110,12 @@ static int clone_engines(struct i915_gem_context *dst,
}
intel_context_set_gem(clone->engines[n], dst);
+
+ /* Copy across the preferred ringsize */
+ if (copy_ring_size(clone->engines[n], e->engines[n])) {
+ __free_engines(clone, n + 1);
+ goto err_unlock;
+ }
}
clone->num_engines = n;
@@ -2388,6 +2478,10 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
args->value = i915_gem_context_is_persistent(ctx);
break;
+ case I915_CONTEXT_PARAM_RINGSIZE:
+ ret = get_ringsize(ctx, args);
+ break;
+
case I915_CONTEXT_PARAM_BAN_PERIOD:
default:
ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.c b/drivers/gpu/drm/i915/gt/intel_context_param.c
new file mode 100644
index 000000000000..65dcd090245d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_active.h"
+#include "intel_context.h"
+#include "intel_context_param.h"
+#include "intel_ring.h"
+
+int intel_context_set_ring_size(struct intel_context *ce, long sz)
+{
+ int err;
+
+ if (intel_context_lock_pinned(ce))
+ return -EINTR;
+
+ err = i915_active_wait(&ce->active);
+ if (err < 0)
+ goto unlock;
+
+ if (intel_context_is_pinned(ce)) {
+ err = -EBUSY; /* In active use, come back later! */
+ goto unlock;
+ }
+
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ struct intel_ring *ring;
+
+ /* Replace the existing ringbuffer */
+ ring = intel_engine_create_ring(ce->engine, sz);
+ if (IS_ERR(ring)) {
+ err = PTR_ERR(ring);
+ goto unlock;
+ }
+
+ intel_ring_put(ce->ring);
+ ce->ring = ring;
+
+ /* Context image will be updated on next pin */
+ } else {
+ ce->ring = __intel_context_ring_size(sz);
+ }
+
+unlock:
+ intel_context_unlock_pinned(ce);
+ return err;
+}
+
+long intel_context_get_ring_size(struct intel_context *ce)
+{
+ long sz = (unsigned long)READ_ONCE(ce->ring);
+
+ if (test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+ if (intel_context_lock_pinned(ce))
+ return -EINTR;
+
+ sz = ce->ring->size;
+ intel_context_unlock_pinned(ce);
+ }
+
+ return sz;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_context_param.h b/drivers/gpu/drm/i915/gt/intel_context_param.h
new file mode 100644
index 000000000000..f053d8633fe2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gt/intel_context_param.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef INTEL_CONTEXT_PARAM_H
+#define INTEL_CONTEXT_PARAM_H
+
+struct intel_context;
+
+int intel_context_set_ring_size(struct intel_context *ce, long sz);
+long intel_context_get_ring_size(struct intel_context *ce);
+
+#endif /* INTEL_CONTEXT_PARAM_H */
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 47561dc29304..39b0125b7143 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -2966,6 +2966,7 @@ __execlists_update_reg_state(const struct intel_context *ce,
regs[CTX_RING_START] = i915_ggtt_offset(ring->vma);
regs[CTX_RING_HEAD] = head;
regs[CTX_RING_TAIL] = ring->tail;
+ regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID;
/* RPCS */
if (engine->class == RENDER_CLASS) {
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 829c0a48577f..2813e579b480 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1619,6 +1619,27 @@ struct drm_i915_gem_context_param {
* By default, new contexts allow persistence.
*/
#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
+
+/*
+ * I915_CONTEXT_PARAM_RINGSIZE:
+ *
+ * Sets the size of the CS ringbuffer to use for logical ring contexts. This
+ * applies a limit of how many batches can be queued to HW before the caller
+ * is blocked due to lack of space for more commands.
+ *
+ * Only reliably possible to be set prior to first use, i.e. during
+ * construction. At any later point, the current execution must be flushed as
+ * the ring can only be changed while the context is idle. Note, the ringsize
+ * can be specified as a constructor property, see
+ * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required.
+ *
+ * Only applies to the current set of engine and lost when those engines
+ * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
+ *
+ * Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
+ * Default is 16 KiB.
+ */
+#define I915_CONTEXT_PARAM_RINGSIZE 0xc
/* Must be kept compact -- no holes and well documented */
__u64 value;
--
2.25.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Intel-gfx] [PATCH 3/3] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions
2020-02-21 9:40 [Intel-gfx] [PATCH 1/3] drm/i915: Flush idle barriers when waiting Chris Wilson
2020-02-21 9:40 ` [Intel-gfx] [PATCH 2/3] drm/i915: Allow userspace to specify ringsize on construction Chris Wilson
@ 2020-02-21 9:40 ` Chris Wilson
2020-02-21 13:15 ` [Intel-gfx] [PATCH] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl Chris Wilson
2020-02-21 21:18 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [1/3] drm/i915: Flush idle barriers when waiting (rev2) Patchwork
2 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2020-02-21 9:40 UTC (permalink / raw)
To: intel-gfx
Check the user's flags on the struct file before deciding whether or not
to stall before submitting a request. This allows us to reasonably
cheaply honour O_NONBLOCK without checking at more critical phases
during request submission.
Suggested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Steve Carbonari <steven.carbonari@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 21 ++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 87fa5f42c39a..8646d76f4b6e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2327,15 +2327,22 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
intel_context_timeline_unlock(tl);
if (rq) {
- if (i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT) < 0) {
- i915_request_put(rq);
- err = -EINTR;
- goto err_exit;
- }
+ bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+ long timeout;
+
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ if (nonblock)
+ timeout = 0;
+ timeout = i915_request_wait(rq,
+ I915_WAIT_INTERRUPTIBLE,
+ timeout);
i915_request_put(rq);
+
+ if (timeout < 0) {
+ err = nonblock ? -EWOULDBLOCK : timeout;
+ goto err_exit;
+ }
}
eb->engine = ce->engine;
--
2.25.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Intel-gfx] [PATCH] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
2020-02-21 9:40 ` [Intel-gfx] [PATCH 3/3] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions Chris Wilson
@ 2020-02-21 13:15 ` Chris Wilson
2020-02-21 13:31 ` Maarten Lankhorst
0 siblings, 1 reply; 8+ messages in thread
From: Chris Wilson @ 2020-02-21 13:15 UTC (permalink / raw)
To: intel-gfx
As we no longer stash anything inside i915_vma under the exclusive
protection of struct_mutex, we do not need to revoke the i915_vma
stashes before dropping struct_mutex to handle pagefaults. Knowing that
we must drop the struct_mutex while keeping the eb->vma around, means
that we are required to hold onto to the object reference until we have
marked the vma as active.
Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock")
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 109 +++++++-----------
1 file changed, 43 insertions(+), 66 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 3ccd3a096486..e4e50155145e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -48,17 +48,15 @@ enum {
#define DBG_FORCE_RELOC 0 /* choose one of the above! */
};
-#define __EXEC_OBJECT_HAS_REF BIT(31)
-#define __EXEC_OBJECT_HAS_PIN BIT(30)
-#define __EXEC_OBJECT_HAS_FENCE BIT(29)
-#define __EXEC_OBJECT_NEEDS_MAP BIT(28)
-#define __EXEC_OBJECT_NEEDS_BIAS BIT(27)
-#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */
+#define __EXEC_OBJECT_HAS_PIN BIT(31)
+#define __EXEC_OBJECT_HAS_FENCE BIT(30)
+#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
+#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
+#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
#define __EXEC_HAS_RELOC BIT(31)
-#define __EXEC_VALIDATED BIT(30)
-#define __EXEC_INTERNAL_FLAGS (~0u << 30)
+#define __EXEC_INTERNAL_FLAGS (~0u << 31)
#define UPDATE PIN_OFFSET_FIXED
#define BATCH_OFFSET_BIAS (256*1024)
@@ -473,24 +471,17 @@ eb_validate_vma(struct i915_execbuffer *eb,
return 0;
}
-static int
+static void
eb_add_vma(struct i915_execbuffer *eb,
unsigned int i, unsigned batch_idx,
struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
struct eb_vma *ev = &eb->vma[i];
- int err;
GEM_BUG_ON(i915_vma_is_closed(vma));
- if (!(eb->args->flags & __EXEC_VALIDATED)) {
- err = eb_validate_vma(eb, entry, vma);
- if (unlikely(err))
- return err;
- }
-
- ev->vma = vma;
+ ev->vma = i915_vma_get(vma);
ev->exec = entry;
ev->flags = entry->flags;
@@ -523,20 +514,14 @@ eb_add_vma(struct i915_execbuffer *eb,
eb->batch = ev;
}
- err = 0;
if (eb_pin_vma(eb, entry, ev)) {
if (entry->offset != vma->node.start) {
entry->offset = vma->node.start | UPDATE;
eb->args->flags |= __EXEC_HAS_RELOC;
}
} else {
- eb_unreserve_vma(ev);
-
list_add_tail(&ev->bind_link, &eb->unbound);
- if (drm_mm_node_allocated(&vma->node))
- err = i915_vma_unbind(vma);
}
- return err;
}
static inline int use_cpu_reloc(const struct reloc_cache *cache,
@@ -585,6 +570,14 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, struct eb_vma *ev)
pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
}
+ if (drm_mm_node_allocated(&vma->node) &&
+ eb_vma_misplaced(entry, vma, ev->flags)) {
+ eb_unreserve_vma(ev);
+ err = i915_vma_unbind(vma);
+ if (err)
+ return err;
+ }
+
err = i915_vma_pin(vma,
entry->pad_to_size, entry->alignment,
pin_flags);
@@ -643,7 +636,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
if (err)
break;
}
- if (err != -ENOSPC)
+ if (!(err == -ENOSPC || err == -EAGAIN))
return err;
/* Resort *all* the objects into priority order */
@@ -673,6 +666,11 @@ static int eb_reserve(struct i915_execbuffer *eb)
}
list_splice_tail(&last, &eb->unbound);
+ if (err == -EAGAIN) {
+ flush_workqueue(eb->i915->mm.userptr_wq);
+ continue;
+ }
+
switch (pass++) {
case 0:
break;
@@ -726,17 +724,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
unsigned int i, batch;
int err;
+ if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
+ return -ENOENT;
+
INIT_LIST_HEAD(&eb->relocs);
INIT_LIST_HEAD(&eb->unbound);
batch = eb_batch_index(eb);
- mutex_lock(&eb->gem_context->mutex);
- if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
- err = -ENOENT;
- goto err_ctx;
- }
-
for (i = 0; i < eb->buffer_count; i++) {
u32 handle = eb->exec[i].handle;
struct i915_lut_handle *lut;
@@ -781,25 +776,19 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
i915_gem_object_unlock(obj);
add_vma:
- err = eb_add_vma(eb, i, batch, vma);
+ err = eb_validate_vma(eb, &eb->exec[i], vma);
if (unlikely(err))
goto err_vma;
- GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
- eb_vma_misplaced(&eb->exec[i], vma, eb->vma[i].flags));
+ eb_add_vma(eb, i, batch, vma);
}
- mutex_unlock(&eb->gem_context->mutex);
-
- eb->args->flags |= __EXEC_VALIDATED;
- return eb_reserve(eb);
+ return 0;
err_obj:
i915_gem_object_put(obj);
err_vma:
eb->vma[i].vma = NULL;
-err_ctx:
- mutex_unlock(&eb->gem_context->mutex);
return err;
}
@@ -840,19 +829,10 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
if (ev->flags & __EXEC_OBJECT_HAS_PIN)
__eb_unreserve_vma(vma, ev->flags);
- if (ev->flags & __EXEC_OBJECT_HAS_REF)
- i915_vma_put(vma);
+ i915_vma_put(vma);
}
}
-static void eb_reset_vmas(const struct i915_execbuffer *eb)
-{
- eb_release_vmas(eb);
- if (eb->lut_size > 0)
- memset(eb->buckets, 0,
- sizeof(struct hlist_head) << eb->lut_size);
-}
-
static void eb_destroy(const struct i915_execbuffer *eb)
{
GEM_BUG_ON(eb->reloc_cache.rq);
@@ -1661,8 +1641,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
goto out;
}
- /* We may process another execbuffer during the unlock... */
- eb_reset_vmas(eb);
mutex_unlock(&dev->struct_mutex);
/*
@@ -1701,11 +1679,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
goto out;
}
- /* reacquire the objects */
- err = eb_lookup_vmas(eb);
- if (err)
- goto err;
-
GEM_BUG_ON(!eb->batch);
list_for_each_entry(ev, &eb->relocs, reloc_link) {
@@ -1756,8 +1729,17 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
static int eb_relocate(struct i915_execbuffer *eb)
{
- if (eb_lookup_vmas(eb))
- goto slow;
+ int err;
+
+ mutex_lock(&eb->gem_context->mutex);
+ err = eb_lookup_vmas(eb);
+ mutex_unlock(&eb->gem_context->mutex);
+ if (err)
+ return err;
+
+ err = eb_reserve(eb);
+ if (err)
+ return err;
/* The objects are in their final locations, apply the relocations. */
if (eb->args->flags & __EXEC_HAS_RELOC) {
@@ -1765,14 +1747,11 @@ static int eb_relocate(struct i915_execbuffer *eb)
list_for_each_entry(ev, &eb->relocs, reloc_link) {
if (eb_relocate_vma(eb, ev))
- goto slow;
+ return eb_relocate_slow(eb);
}
}
return 0;
-
-slow:
- return eb_relocate_slow(eb);
}
static int eb_move_to_gpu(struct i915_execbuffer *eb)
@@ -1854,8 +1833,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
i915_vma_unlock(vma);
__eb_unreserve_vma(vma, flags);
- if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
- i915_vma_put(vma);
+ i915_vma_put(vma);
ev->vma = NULL;
}
@@ -2115,8 +2093,7 @@ static int eb_parse(struct i915_execbuffer *eb)
goto err_trampoline;
eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
- eb->vma[eb->buffer_count].flags =
- __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
+ eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
eb->batch = &eb->vma[eb->buffer_count++];
eb->trampoline = trampoline;
--
2.25.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
2020-02-21 13:15 ` [Intel-gfx] [PATCH] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl Chris Wilson
@ 2020-02-21 13:31 ` Maarten Lankhorst
2020-02-21 13:36 ` Chris Wilson
0 siblings, 1 reply; 8+ messages in thread
From: Maarten Lankhorst @ 2020-02-21 13:31 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
Op 21-02-2020 om 14:15 schreef Chris Wilson:
> As we no longer stash anything inside i915_vma under the exclusive
> protection of struct_mutex, we do not need to revoke the i915_vma
> stashes before dropping struct_mutex to handle pagefaults. Knowing that
> we must drop the struct_mutex while keeping the eb->vma around, means
> that we are required to hold onto to the object reference until we have
> marked the vma as active.
>
> Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock")
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> ---
> .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 109 +++++++-----------
> 1 file changed, 43 insertions(+), 66 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> index 3ccd3a096486..e4e50155145e 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> @@ -48,17 +48,15 @@ enum {
> #define DBG_FORCE_RELOC 0 /* choose one of the above! */
> };
>
> -#define __EXEC_OBJECT_HAS_REF BIT(31)
> -#define __EXEC_OBJECT_HAS_PIN BIT(30)
> -#define __EXEC_OBJECT_HAS_FENCE BIT(29)
> -#define __EXEC_OBJECT_NEEDS_MAP BIT(28)
> -#define __EXEC_OBJECT_NEEDS_BIAS BIT(27)
> -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */
> +#define __EXEC_OBJECT_HAS_PIN BIT(31)
> +#define __EXEC_OBJECT_HAS_FENCE BIT(30)
> +#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
> +#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
> +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
> #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
>
> #define __EXEC_HAS_RELOC BIT(31)
> -#define __EXEC_VALIDATED BIT(30)
> -#define __EXEC_INTERNAL_FLAGS (~0u << 30)
> +#define __EXEC_INTERNAL_FLAGS (~0u << 31)
> #define UPDATE PIN_OFFSET_FIXED
>
> #define BATCH_OFFSET_BIAS (256*1024)
> @@ -473,24 +471,17 @@ eb_validate_vma(struct i915_execbuffer *eb,
> return 0;
> }
>
> -static int
> +static void
> eb_add_vma(struct i915_execbuffer *eb,
> unsigned int i, unsigned batch_idx,
> struct i915_vma *vma)
> {
> struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
> struct eb_vma *ev = &eb->vma[i];
> - int err;
>
> GEM_BUG_ON(i915_vma_is_closed(vma));
>
> - if (!(eb->args->flags & __EXEC_VALIDATED)) {
> - err = eb_validate_vma(eb, entry, vma);
> - if (unlikely(err))
> - return err;
> - }
> -
> - ev->vma = vma;
> + ev->vma = i915_vma_get(vma);
> ev->exec = entry;
> ev->flags = entry->flags;
>
> @@ -523,20 +514,14 @@ eb_add_vma(struct i915_execbuffer *eb,
> eb->batch = ev;
> }
>
> - err = 0;
> if (eb_pin_vma(eb, entry, ev)) {
> if (entry->offset != vma->node.start) {
> entry->offset = vma->node.start | UPDATE;
> eb->args->flags |= __EXEC_HAS_RELOC;
> }
> } else {
> - eb_unreserve_vma(ev);
> -
> list_add_tail(&ev->bind_link, &eb->unbound);
> - if (drm_mm_node_allocated(&vma->node))
> - err = i915_vma_unbind(vma);
> }
> - return err;
> }
>
> static inline int use_cpu_reloc(const struct reloc_cache *cache,
> @@ -585,6 +570,14 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, struct eb_vma *ev)
> pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> }
>
> + if (drm_mm_node_allocated(&vma->node) &&
> + eb_vma_misplaced(entry, vma, ev->flags)) {
> + eb_unreserve_vma(ev);
> + err = i915_vma_unbind(vma);
> + if (err)
> + return err;
> + }
> +
> err = i915_vma_pin(vma,
> entry->pad_to_size, entry->alignment,
> pin_flags);
> @@ -643,7 +636,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
> if (err)
> break;
> }
> - if (err != -ENOSPC)
> + if (!(err == -ENOSPC || err == -EAGAIN))
> return err;
>
> /* Resort *all* the objects into priority order */
> @@ -673,6 +666,11 @@ static int eb_reserve(struct i915_execbuffer *eb)
> }
> list_splice_tail(&last, &eb->unbound);
>
> + if (err == -EAGAIN) {
> + flush_workqueue(eb->i915->mm.userptr_wq);
> + continue;
> + }
> +
> switch (pass++) {
> case 0:
> break;
> @@ -726,17 +724,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> unsigned int i, batch;
> int err;
>
> + if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
> + return -ENOENT;
> +
> INIT_LIST_HEAD(&eb->relocs);
> INIT_LIST_HEAD(&eb->unbound);
>
> batch = eb_batch_index(eb);
>
> - mutex_lock(&eb->gem_context->mutex);
> - if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
> - err = -ENOENT;
> - goto err_ctx;
> - }
> -
> for (i = 0; i < eb->buffer_count; i++) {
> u32 handle = eb->exec[i].handle;
> struct i915_lut_handle *lut;
> @@ -781,25 +776,19 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> i915_gem_object_unlock(obj);
>
> add_vma:
> - err = eb_add_vma(eb, i, batch, vma);
> + err = eb_validate_vma(eb, &eb->exec[i], vma);
> if (unlikely(err))
> goto err_vma;
>
> - GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
> - eb_vma_misplaced(&eb->exec[i], vma, eb->vma[i].flags));
> + eb_add_vma(eb, i, batch, vma);
> }
>
> - mutex_unlock(&eb->gem_context->mutex);
> -
> - eb->args->flags |= __EXEC_VALIDATED;
> - return eb_reserve(eb);
> + return 0;
>
> err_obj:
> i915_gem_object_put(obj);
> err_vma:
> eb->vma[i].vma = NULL;
> -err_ctx:
> - mutex_unlock(&eb->gem_context->mutex);
> return err;
> }
>
> @@ -840,19 +829,10 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
> if (ev->flags & __EXEC_OBJECT_HAS_PIN)
> __eb_unreserve_vma(vma, ev->flags);
>
> - if (ev->flags & __EXEC_OBJECT_HAS_REF)
> - i915_vma_put(vma);
> + i915_vma_put(vma);
> }
> }
>
> -static void eb_reset_vmas(const struct i915_execbuffer *eb)
> -{
> - eb_release_vmas(eb);
> - if (eb->lut_size > 0)
> - memset(eb->buckets, 0,
> - sizeof(struct hlist_head) << eb->lut_size);
> -}
> -
> static void eb_destroy(const struct i915_execbuffer *eb)
> {
> GEM_BUG_ON(eb->reloc_cache.rq);
> @@ -1661,8 +1641,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
> goto out;
> }
>
> - /* We may process another execbuffer during the unlock... */
> - eb_reset_vmas(eb);
> mutex_unlock(&dev->struct_mutex);
>
> /*
> @@ -1701,11 +1679,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
> goto out;
> }
>
> - /* reacquire the objects */
> - err = eb_lookup_vmas(eb);
> - if (err)
> - goto err;
> -
> GEM_BUG_ON(!eb->batch);
>
> list_for_each_entry(ev, &eb->relocs, reloc_link) {
> @@ -1756,8 +1729,17 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
>
> static int eb_relocate(struct i915_execbuffer *eb)
> {
> - if (eb_lookup_vmas(eb))
> - goto slow;
> + int err;
> +
> + mutex_lock(&eb->gem_context->mutex);
> + err = eb_lookup_vmas(eb);
> + mutex_unlock(&eb->gem_context->mutex);
> + if (err)
> + return err;
> +
> + err = eb_reserve(eb);
> + if (err)
> + return err;
>
> /* The objects are in their final locations, apply the relocations. */
> if (eb->args->flags & __EXEC_HAS_RELOC) {
> @@ -1765,14 +1747,11 @@ static int eb_relocate(struct i915_execbuffer *eb)
>
> list_for_each_entry(ev, &eb->relocs, reloc_link) {
> if (eb_relocate_vma(eb, ev))
> - goto slow;
> + return eb_relocate_slow(eb);
> }
> }
>
> return 0;
> -
> -slow:
> - return eb_relocate_slow(eb);
> }
>
> static int eb_move_to_gpu(struct i915_execbuffer *eb)
> @@ -1854,8 +1833,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
> i915_vma_unlock(vma);
>
> __eb_unreserve_vma(vma, flags);
> - if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
> - i915_vma_put(vma);
> + i915_vma_put(vma);
>
> ev->vma = NULL;
> }
> @@ -2115,8 +2093,7 @@ static int eb_parse(struct i915_execbuffer *eb)
> goto err_trampoline;
>
> eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
> - eb->vma[eb->buffer_count].flags =
> - __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
> + eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
> eb->batch = &eb->vma[eb->buffer_count++];
>
> eb->trampoline = trampoline;
Did you steal this from me? :P
I Ihave a similar patch in my series..
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Intel-gfx] [PATCH] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
2020-02-21 13:31 ` Maarten Lankhorst
@ 2020-02-21 13:36 ` Chris Wilson
0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2020-02-21 13:36 UTC (permalink / raw)
To: Maarten Lankhorst, intel-gfx
Quoting Maarten Lankhorst (2020-02-21 13:31:44)
> Op 21-02-2020 om 14:15 schreef Chris Wilson:
> > As we no longer stash anything inside i915_vma under the exclusive
> > protection of struct_mutex, we do not need to revoke the i915_vma
> > stashes before dropping struct_mutex to handle pagefaults. Knowing that
> > we must drop the struct_mutex while keeping the eb->vma around, means
> > that we are required to hold onto to the object reference until we have
> > marked the vma as active.
> >
> > Fixes: 155ab8836caa ("drm/i915: Move object close under its own lock")
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
> > ---
> > .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 109 +++++++-----------
> > 1 file changed, 43 insertions(+), 66 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > index 3ccd3a096486..e4e50155145e 100644
> > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
> > @@ -48,17 +48,15 @@ enum {
> > #define DBG_FORCE_RELOC 0 /* choose one of the above! */
> > };
> >
> > -#define __EXEC_OBJECT_HAS_REF BIT(31)
> > -#define __EXEC_OBJECT_HAS_PIN BIT(30)
> > -#define __EXEC_OBJECT_HAS_FENCE BIT(29)
> > -#define __EXEC_OBJECT_NEEDS_MAP BIT(28)
> > -#define __EXEC_OBJECT_NEEDS_BIAS BIT(27)
> > -#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 27) /* all of the above */
> > +#define __EXEC_OBJECT_HAS_PIN BIT(31)
> > +#define __EXEC_OBJECT_HAS_FENCE BIT(30)
> > +#define __EXEC_OBJECT_NEEDS_MAP BIT(29)
> > +#define __EXEC_OBJECT_NEEDS_BIAS BIT(28)
> > +#define __EXEC_OBJECT_INTERNAL_FLAGS (~0u << 28) /* all of the above */
> > #define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
> >
> > #define __EXEC_HAS_RELOC BIT(31)
> > -#define __EXEC_VALIDATED BIT(30)
> > -#define __EXEC_INTERNAL_FLAGS (~0u << 30)
> > +#define __EXEC_INTERNAL_FLAGS (~0u << 31)
> > #define UPDATE PIN_OFFSET_FIXED
> >
> > #define BATCH_OFFSET_BIAS (256*1024)
> > @@ -473,24 +471,17 @@ eb_validate_vma(struct i915_execbuffer *eb,
> > return 0;
> > }
> >
> > -static int
> > +static void
> > eb_add_vma(struct i915_execbuffer *eb,
> > unsigned int i, unsigned batch_idx,
> > struct i915_vma *vma)
> > {
> > struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
> > struct eb_vma *ev = &eb->vma[i];
> > - int err;
> >
> > GEM_BUG_ON(i915_vma_is_closed(vma));
> >
> > - if (!(eb->args->flags & __EXEC_VALIDATED)) {
> > - err = eb_validate_vma(eb, entry, vma);
> > - if (unlikely(err))
> > - return err;
> > - }
> > -
> > - ev->vma = vma;
> > + ev->vma = i915_vma_get(vma);
> > ev->exec = entry;
> > ev->flags = entry->flags;
> >
> > @@ -523,20 +514,14 @@ eb_add_vma(struct i915_execbuffer *eb,
> > eb->batch = ev;
> > }
> >
> > - err = 0;
> > if (eb_pin_vma(eb, entry, ev)) {
> > if (entry->offset != vma->node.start) {
> > entry->offset = vma->node.start | UPDATE;
> > eb->args->flags |= __EXEC_HAS_RELOC;
> > }
> > } else {
> > - eb_unreserve_vma(ev);
> > -
> > list_add_tail(&ev->bind_link, &eb->unbound);
> > - if (drm_mm_node_allocated(&vma->node))
> > - err = i915_vma_unbind(vma);
> > }
> > - return err;
> > }
> >
> > static inline int use_cpu_reloc(const struct reloc_cache *cache,
> > @@ -585,6 +570,14 @@ static int eb_reserve_vma(const struct i915_execbuffer *eb, struct eb_vma *ev)
> > pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> > }
> >
> > + if (drm_mm_node_allocated(&vma->node) &&
> > + eb_vma_misplaced(entry, vma, ev->flags)) {
> > + eb_unreserve_vma(ev);
> > + err = i915_vma_unbind(vma);
> > + if (err)
> > + return err;
> > + }
> > +
> > err = i915_vma_pin(vma,
> > entry->pad_to_size, entry->alignment,
> > pin_flags);
> > @@ -643,7 +636,7 @@ static int eb_reserve(struct i915_execbuffer *eb)
> > if (err)
> > break;
> > }
> > - if (err != -ENOSPC)
> > + if (!(err == -ENOSPC || err == -EAGAIN))
> > return err;
> >
> > /* Resort *all* the objects into priority order */
> > @@ -673,6 +666,11 @@ static int eb_reserve(struct i915_execbuffer *eb)
> > }
> > list_splice_tail(&last, &eb->unbound);
> >
> > + if (err == -EAGAIN) {
> > + flush_workqueue(eb->i915->mm.userptr_wq);
> > + continue;
> > + }
> > +
> > switch (pass++) {
> > case 0:
> > break;
> > @@ -726,17 +724,14 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> > unsigned int i, batch;
> > int err;
> >
> > + if (unlikely(i915_gem_context_is_closed(eb->gem_context)))
> > + return -ENOENT;
> > +
> > INIT_LIST_HEAD(&eb->relocs);
> > INIT_LIST_HEAD(&eb->unbound);
> >
> > batch = eb_batch_index(eb);
> >
> > - mutex_lock(&eb->gem_context->mutex);
> > - if (unlikely(i915_gem_context_is_closed(eb->gem_context))) {
> > - err = -ENOENT;
> > - goto err_ctx;
> > - }
> > -
> > for (i = 0; i < eb->buffer_count; i++) {
> > u32 handle = eb->exec[i].handle;
> > struct i915_lut_handle *lut;
> > @@ -781,25 +776,19 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
> > i915_gem_object_unlock(obj);
> >
> > add_vma:
> > - err = eb_add_vma(eb, i, batch, vma);
> > + err = eb_validate_vma(eb, &eb->exec[i], vma);
> > if (unlikely(err))
> > goto err_vma;
> >
> > - GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
> > - eb_vma_misplaced(&eb->exec[i], vma, eb->vma[i].flags));
> > + eb_add_vma(eb, i, batch, vma);
> > }
> >
> > - mutex_unlock(&eb->gem_context->mutex);
> > -
> > - eb->args->flags |= __EXEC_VALIDATED;
> > - return eb_reserve(eb);
> > + return 0;
> >
> > err_obj:
> > i915_gem_object_put(obj);
> > err_vma:
> > eb->vma[i].vma = NULL;
> > -err_ctx:
> > - mutex_unlock(&eb->gem_context->mutex);
> > return err;
> > }
> >
> > @@ -840,19 +829,10 @@ static void eb_release_vmas(const struct i915_execbuffer *eb)
> > if (ev->flags & __EXEC_OBJECT_HAS_PIN)
> > __eb_unreserve_vma(vma, ev->flags);
> >
> > - if (ev->flags & __EXEC_OBJECT_HAS_REF)
> > - i915_vma_put(vma);
> > + i915_vma_put(vma);
> > }
> > }
> >
> > -static void eb_reset_vmas(const struct i915_execbuffer *eb)
> > -{
> > - eb_release_vmas(eb);
> > - if (eb->lut_size > 0)
> > - memset(eb->buckets, 0,
> > - sizeof(struct hlist_head) << eb->lut_size);
> > -}
> > -
> > static void eb_destroy(const struct i915_execbuffer *eb)
> > {
> > GEM_BUG_ON(eb->reloc_cache.rq);
> > @@ -1661,8 +1641,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
> > goto out;
> > }
> >
> > - /* We may process another execbuffer during the unlock... */
> > - eb_reset_vmas(eb);
> > mutex_unlock(&dev->struct_mutex);
> >
> > /*
> > @@ -1701,11 +1679,6 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
> > goto out;
> > }
> >
> > - /* reacquire the objects */
> > - err = eb_lookup_vmas(eb);
> > - if (err)
> > - goto err;
> > -
> > GEM_BUG_ON(!eb->batch);
> >
> > list_for_each_entry(ev, &eb->relocs, reloc_link) {
> > @@ -1756,8 +1729,17 @@ static noinline int eb_relocate_slow(struct i915_execbuffer *eb)
> >
> > static int eb_relocate(struct i915_execbuffer *eb)
> > {
> > - if (eb_lookup_vmas(eb))
> > - goto slow;
> > + int err;
> > +
> > + mutex_lock(&eb->gem_context->mutex);
> > + err = eb_lookup_vmas(eb);
> > + mutex_unlock(&eb->gem_context->mutex);
> > + if (err)
> > + return err;
> > +
> > + err = eb_reserve(eb);
> > + if (err)
> > + return err;
> >
> > /* The objects are in their final locations, apply the relocations. */
> > if (eb->args->flags & __EXEC_HAS_RELOC) {
> > @@ -1765,14 +1747,11 @@ static int eb_relocate(struct i915_execbuffer *eb)
> >
> > list_for_each_entry(ev, &eb->relocs, reloc_link) {
> > if (eb_relocate_vma(eb, ev))
> > - goto slow;
> > + return eb_relocate_slow(eb);
> > }
> > }
> >
> > return 0;
> > -
> > -slow:
> > - return eb_relocate_slow(eb);
> > }
> >
> > static int eb_move_to_gpu(struct i915_execbuffer *eb)
> > @@ -1854,8 +1833,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
> > i915_vma_unlock(vma);
> >
> > __eb_unreserve_vma(vma, flags);
> > - if (unlikely(flags & __EXEC_OBJECT_HAS_REF))
> > - i915_vma_put(vma);
> > + i915_vma_put(vma);
> >
> > ev->vma = NULL;
> > }
> > @@ -2115,8 +2093,7 @@ static int eb_parse(struct i915_execbuffer *eb)
> > goto err_trampoline;
> >
> > eb->vma[eb->buffer_count].vma = i915_vma_get(shadow);
> > - eb->vma[eb->buffer_count].flags =
> > - __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF;
> > + eb->vma[eb->buffer_count].flags = __EXEC_OBJECT_HAS_PIN;
> > eb->batch = &eb->vma[eb->buffer_count++];
> >
> > eb->trampoline = trampoline;
>
> Did you steal this from me? :P
It was in the series that removes struct_mutex. I think we should just
remove struct_mutex right away so that we have parallel execbuf as a
baseline.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [1/3] drm/i915: Flush idle barriers when waiting (rev2)
2020-02-21 9:40 [Intel-gfx] [PATCH 1/3] drm/i915: Flush idle barriers when waiting Chris Wilson
2020-02-21 9:40 ` [Intel-gfx] [PATCH 2/3] drm/i915: Allow userspace to specify ringsize on construction Chris Wilson
2020-02-21 9:40 ` [Intel-gfx] [PATCH 3/3] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions Chris Wilson
@ 2020-02-21 21:18 ` Patchwork
2 siblings, 0 replies; 8+ messages in thread
From: Patchwork @ 2020-02-21 21:18 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
== Series Details ==
Series: series starting with [1/3] drm/i915: Flush idle barriers when waiting (rev2)
URL : https://patchwork.freedesktop.org/series/73751/
State : failure
== Summary ==
Applying: drm/i915: Flush idle barriers when waiting
Applying: drm/i915: Allow userspace to specify ringsize on construction
Applying: drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
Using index info to reconstruct a base tree...
M drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
Falling back to patching base and 3-way merge...
Auto-merging drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
CONFLICT (content): Merge conflict in drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
error: Failed to merge in the changes.
hint: Use 'git am --show-current-patch' to see the failed patch
Patch failed at 0003 drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Intel-gfx] [PATCH 3/3] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions
@ 2019-11-15 16:05 ` Chris Wilson
0 siblings, 0 replies; 8+ messages in thread
From: Chris Wilson @ 2019-11-15 16:05 UTC (permalink / raw)
To: intel-gfx
Check the user's flags on the struct file before deciding whether or not
to stall before submitting a request. This allows us to reasonably
cheaply honour O_NONBLOCK without checking at more critical phases
during request submission.
Suggested-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 21 ++++++++++++-------
1 file changed, 14 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f0998f1225af..8da95772c8f2 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2249,15 +2249,22 @@ static int __eb_pin_engine(struct i915_execbuffer *eb, struct intel_context *ce)
intel_context_timeline_unlock(tl);
if (rq) {
- if (i915_request_wait(rq,
- I915_WAIT_INTERRUPTIBLE,
- MAX_SCHEDULE_TIMEOUT) < 0) {
- i915_request_put(rq);
- err = -EINTR;
- goto err_exit;
- }
+ bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+ long timeout;
+
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ if (nonblock)
+ timeout = 0;
+ timeout = i915_request_wait(rq,
+ I915_WAIT_INTERRUPTIBLE,
+ timeout);
i915_request_put(rq);
+
+ if (timeout < 0) {
+ err = nonblock ? -EWOULDBLOCK : timeout;
+ goto err_exit;
+ }
}
eb->engine = ce->engine;
--
2.24.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
end of thread, other threads:[~2020-02-21 21:18 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-21 9:40 [Intel-gfx] [PATCH 1/3] drm/i915: Flush idle barriers when waiting Chris Wilson
2020-02-21 9:40 ` [Intel-gfx] [PATCH 2/3] drm/i915: Allow userspace to specify ringsize on construction Chris Wilson
2020-02-21 9:40 ` [Intel-gfx] [PATCH 3/3] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions Chris Wilson
2020-02-21 13:15 ` [Intel-gfx] [PATCH] drm/i915/gem: Only call eb_lookup_vma once during execbuf ioctl Chris Wilson
2020-02-21 13:31 ` Maarten Lankhorst
2020-02-21 13:36 ` Chris Wilson
2020-02-21 21:18 ` [Intel-gfx] ✗ Fi.CI.BUILD: failure for series starting with [1/3] drm/i915: Flush idle barriers when waiting (rev2) Patchwork
-- strict thread matches above, loose matches on Subject: below --
2019-11-15 16:05 [PATCH 1/3] drm/i915: Flush idle barriers when waiting Chris Wilson
2019-11-15 16:05 ` [Intel-gfx] [PATCH 3/3] drm/i915/gem: Honour O_NONBLOCK before throttling execbuf submissions Chris Wilson
2019-11-15 16:05 ` Chris Wilson
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.