All of lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Auld <matthew.auld@intel.com>
To: intel-gfx@lists.freedesktop.org
Subject: [PATCH v2 10/37] drm/i915/blt: support copying objects
Date: Thu, 27 Jun 2019 21:56:06 +0100	[thread overview]
Message-ID: <20190627205633.1143-11-matthew.auld@intel.com> (raw)
In-Reply-To: <20190627205633.1143-1-matthew.auld@intel.com>

We can already clear an object with the blt, so try to do the same to
support copying from one object backing store to another. Really this is
just object -> object, which is not that useful yet, what we really want
is two backing stores, but that will require some vma rework first,
otherwise we are stuck with "tmp" objects.

Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Abdiel Janulgue <abdiel.janulgue@linux.intel.com
---
 .../gpu/drm/i915/gem/i915_gem_object_blt.c    | 135 ++++++++++++++++++
 .../gpu/drm/i915/gem/i915_gem_object_blt.h    |   8 ++
 .../i915/gem/selftests/i915_gem_object_blt.c  | 105 ++++++++++++++
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h  |   3 +-
 4 files changed, 250 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
index cb42e3a312e2..c2b28e06c379 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
@@ -102,6 +102,141 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 	return err;
 }
 
+int intel_emit_vma_copy_blt(struct i915_request *rq,
+			    struct i915_vma *src,
+			    struct i915_vma *dst)
+{
+	const int gen = INTEL_GEN(rq->i915);
+	u32 *cs;
+
+	GEM_BUG_ON(src->size != dst->size);
+
+	cs = intel_ring_begin(rq, 10);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (gen >= 9) {
+		*cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10-2);
+		*cs++ = BLT_DEPTH_32 | PAGE_SIZE;
+		*cs++ = 0;
+		*cs++ = src->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+		*cs++ = lower_32_bits(dst->node.start);
+		*cs++ = upper_32_bits(dst->node.start);
+		*cs++ = 0;
+		*cs++ = PAGE_SIZE;
+		*cs++ = lower_32_bits(src->node.start);
+		*cs++ = upper_32_bits(src->node.start);
+	} else if (gen >= 8) {
+		*cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10-2);
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
+		*cs++ = 0;
+		*cs++ = src->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+		*cs++ = lower_32_bits(dst->node.start);
+		*cs++ = upper_32_bits(dst->node.start);
+		*cs++ = 0;
+		*cs++ = PAGE_SIZE;
+		*cs++ = lower_32_bits(src->node.start);
+		*cs++ = upper_32_bits(src->node.start);
+	} else {
+		*cs++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8-2);
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
+		*cs++ = 0;
+		*cs++ = src->size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
+		*cs++ = dst->node.start;
+		*cs++ = 0;
+		*cs++ = PAGE_SIZE;
+		*cs++ = src->node.start;
+		*cs++ = MI_NOOP;
+		*cs++ = MI_NOOP;
+	}
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
+			     struct drm_i915_gem_object *dst,
+			     struct intel_context *ce)
+{
+	struct drm_i915_private *i915 = to_i915(src->base.dev);
+	struct i915_gem_context *ctx = ce->gem_context;
+	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
+	struct drm_gem_object *objs[] = { &src->base, &dst->base };
+	struct ww_acquire_ctx acquire;
+	struct i915_vma *vma_src, *vma_dst;
+	struct i915_request *rq;
+	int err;
+
+	vma_src = i915_vma_instance(src, vm, NULL);
+	if (IS_ERR(vma_src))
+		return PTR_ERR(vma_src);
+
+	err = i915_vma_pin(vma_src, 0, 0, PIN_USER);
+	if (unlikely(err))
+		return err;
+
+	vma_dst = i915_vma_instance(dst, vm, NULL);
+	if (IS_ERR(vma_dst))
+		goto out_unpin_src;
+
+	err = i915_vma_pin(vma_dst, 0, 0, PIN_USER);
+	if (unlikely(err))
+		goto out_unpin_src;
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin_dst;
+	}
+
+	err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+	if (unlikely(err))
+		goto out_request;
+
+	if (src->cache_dirty & ~src->cache_coherent)
+		i915_gem_clflush_object(src, 0);
+
+	if (dst->cache_dirty & ~dst->cache_coherent)
+		i915_gem_clflush_object(dst, 0);
+
+	err = i915_request_await_object(rq, src, false);
+	if (unlikely(err))
+		goto out_unlock;
+
+	err = i915_vma_move_to_active(vma_src, rq, 0);
+	if (unlikely(err))
+		goto out_unlock;
+
+	err = i915_request_await_object(rq, dst, true);
+	if (unlikely(err))
+		goto out_unlock;
+
+	err = i915_vma_move_to_active(vma_dst, rq, EXEC_OBJECT_WRITE);
+	if (unlikely(err))
+		goto out_unlock;
+
+	if (ce->engine->emit_init_breadcrumb) {
+		err = ce->engine->emit_init_breadcrumb(rq);
+		if (unlikely(err))
+			goto out_unlock;
+	}
+
+	err = intel_emit_vma_copy_blt(rq, vma_src, vma_dst);
+out_unlock:
+	drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
+out_request:
+	if (unlikely(err))
+		i915_request_skip(rq, err);
+
+	i915_request_add(rq);
+out_unpin_dst:
+	i915_vma_unpin(vma_dst);
+out_unpin_src:
+	i915_vma_unpin(vma_src);
+	return err;
+}
+
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 #include "selftests/i915_gem_object_blt.c"
 #endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
index 7ec7de6ac0c0..17fac835f391 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.h
@@ -21,4 +21,12 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 			     struct intel_context *ce,
 			     u32 value);
 
+int intel_emit_vma_copy_blt(struct i915_request *rq,
+			    struct i915_vma *src,
+			    struct i915_vma *dst);
+
+int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
+			     struct drm_i915_gem_object *dst,
+			     struct intel_context *ce);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
index e23d8c9e9298..1f28a12f7bb4 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object_blt.c
@@ -94,10 +94,115 @@ static int igt_fill_blt(void *arg)
 	return err;
 }
 
+static int igt_copy_blt(void *arg)
+{
+	struct intel_context *ce = arg;
+	struct drm_i915_private *i915 = ce->gem_context->i915;
+	struct drm_i915_gem_object *src, *dst;
+	struct rnd_state prng;
+	IGT_TIMEOUT(end);
+	u32 *vaddr;
+	int err = 0;
+
+	prandom_seed_state(&prng, i915_selftest.random_seed);
+
+	do {
+		u32 sz = prandom_u32_state(&prng) % SZ_32M;
+		u32 val = prandom_u32_state(&prng);
+		u32 i;
+
+		sz = round_up(sz, PAGE_SIZE);
+
+		pr_debug("%s with sz=%x, val=%x\n", __func__, sz, val);
+
+		src = i915_gem_object_create_internal(i915, sz);
+		if (IS_ERR(src)) {
+			err = PTR_ERR(vaddr);
+			goto err_flush;
+		}
+
+		vaddr = i915_gem_object_pin_map(src, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_src;
+		}
+
+		memset32(vaddr, val, src->base.size / sizeof(u32));
+
+		i915_gem_object_unpin_map(src);
+
+		if (!(src->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+			src->cache_dirty = true;
+
+		dst = i915_gem_object_create_internal(i915, sz);
+		if (IS_ERR(dst)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_src;
+		}
+
+		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WB);
+		if (IS_ERR(vaddr)) {
+			err = PTR_ERR(vaddr);
+			goto err_put_dst;
+		}
+
+		memset32(vaddr, val ^ 0xdeadbeaf, dst->base.size / sizeof(u32));
+
+		if (!(dst->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+			dst->cache_dirty = true;
+
+		mutex_lock(&i915->drm.struct_mutex);
+		err = i915_gem_object_copy_blt(src, dst, ce);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err)
+			goto err_unpin;
+
+		i915_gem_object_lock(dst);
+		err = i915_gem_object_set_to_cpu_domain(dst, false);
+		i915_gem_object_unlock(dst);
+		if (err)
+			goto err_unpin;
+
+		for (i = 0; i < dst->base.size / sizeof(u32); ++i) {
+			if (vaddr[i] != val) {
+				pr_err("vaddr[%u]=%x, expected=%x\n", i,
+				       vaddr[i], val);
+				err = -EINVAL;
+				goto err_unpin;
+			}
+		}
+
+		i915_gem_object_unpin_map(dst);
+
+		i915_gem_object_put(src);
+		i915_gem_object_put(dst);
+	} while (!time_after(jiffies, end));
+
+	goto err_flush;
+
+err_unpin:
+	i915_gem_object_unpin_map(dst);
+err_put_dst:
+	i915_gem_object_put(dst);
+err_put_src:
+	i915_gem_object_put(src);
+err_flush:
+	mutex_lock(&i915->drm.struct_mutex);
+	if (igt_flush_test(i915, I915_WAIT_LOCKED))
+		err = -EIO;
+	mutex_unlock(&i915->drm.struct_mutex);
+
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
 int i915_gem_object_blt_live_selftests(struct drm_i915_private *i915)
 {
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_fill_blt),
+		SUBTEST(igt_copy_blt),
 	};
 
 	if (i915_terminally_wedged(i915))
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index eec31e36aca7..e3b23351669c 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -182,7 +182,8 @@
 #define COLOR_BLT_CMD			(2<<29 | 0x40<<22 | (5-2))
 #define XY_COLOR_BLT_CMD		(2 << 29 | 0x50 << 22)
 #define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|4)
-#define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
+#define GEN9_XY_FAST_COPY_BLT_CMD	((2<<29)|(0x42<<22))
+#define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22))
 #define XY_MONO_SRC_COPY_IMM_BLT	((2<<29)|(0x71<<22)|5)
 #define   BLT_WRITE_A			(2<<20)
 #define   BLT_WRITE_RGB			(1<<20)
-- 
2.20.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2019-06-27 20:56 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-27 20:55 [PATCH v2 00/37] Introduce memory region concept (including device local memory) Matthew Auld
2019-06-27 20:55 ` [PATCH v2 01/37] drm/i915: buddy allocator Matthew Auld
2019-06-27 22:28   ` Chris Wilson
2019-06-28  9:35   ` Chris Wilson
2019-06-27 20:55 ` [PATCH v2 02/37] drm/i915: introduce intel_memory_region Matthew Auld
2019-06-27 22:47   ` Chris Wilson
2019-06-28  8:09   ` Chris Wilson
2019-06-27 20:55 ` [PATCH v2 03/37] drm/i915/region: support basic eviction Matthew Auld
2019-06-27 22:59   ` Chris Wilson
2019-07-30 16:26   ` Daniel Vetter
2019-08-15 10:48     ` Matthew Auld
2019-08-15 14:26       ` Daniel Vetter
2019-08-15 14:34         ` Daniel Vetter
2019-08-15 14:57         ` Tang, CQ
2019-08-15 16:20           ` Daniel Vetter
2019-08-15 16:35             ` Tang, CQ
2019-08-15 15:26       ` Chris Wilson
2019-08-15 16:23         ` Daniel Vetter
2019-06-27 20:56 ` [PATCH v2 04/37] drm/i915/region: support continuous allocations Matthew Auld
2019-06-27 23:01   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 05/37] drm/i915/region: support volatile objects Matthew Auld
2019-06-27 23:03   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 06/37] drm/i915: Add memory region information to device_info Matthew Auld
2019-06-27 23:05   ` Chris Wilson
2019-06-27 23:08   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 07/37] drm/i915: support creating LMEM objects Matthew Auld
2019-06-27 23:11   ` Chris Wilson
2019-06-27 23:16   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 08/37] drm/i915: setup io-mapping for LMEM Matthew Auld
2019-06-27 20:56 ` [PATCH v2 09/37] drm/i915/lmem: support kernel mapping Matthew Auld
2019-06-27 23:27   ` Chris Wilson
2019-06-27 20:56 ` Matthew Auld [this message]
2019-06-27 23:35   ` [PATCH v2 10/37] drm/i915/blt: support copying objects Chris Wilson
2019-06-27 20:56 ` [PATCH v2 11/37] drm/i915/selftests: move gpu-write-dw into utils Matthew Auld
2019-06-27 20:56 ` [PATCH v2 12/37] drm/i915/selftests: add write-dword test for LMEM Matthew Auld
2019-06-27 20:56 ` [PATCH v2 13/37] drm/i915/selftests: don't just test CACHE_NONE for huge-pages Matthew Auld
2019-06-27 23:40   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 14/37] drm/i915/selftest: extend coverage to include LMEM huge-pages Matthew Auld
2019-06-27 23:42   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 15/37] drm/i915/lmem: support CPU relocations Matthew Auld
2019-06-27 23:46   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 16/37] drm/i915/lmem: support pread Matthew Auld
2019-06-27 23:50   ` Chris Wilson
2019-07-30  8:58   ` Daniel Vetter
2019-07-30  9:25     ` Matthew Auld
2019-07-30  9:50       ` Daniel Vetter
2019-07-30 12:05     ` Chris Wilson
2019-07-30 12:42       ` Daniel Vetter
2019-06-27 20:56 ` [PATCH v2 17/37] drm/i915/lmem: support pwrite Matthew Auld
2019-06-27 20:56 ` [PATCH v2 18/37] drm/i915: enumerate and init each supported region Matthew Auld
2019-06-27 20:56 ` [PATCH v2 19/37] drm/i915: treat shmem as a region Matthew Auld
2019-06-27 23:55   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 20/37] drm/i915: treat stolen " Matthew Auld
2019-06-27 20:56 ` [PATCH v2 21/37] drm/i915: define HAS_MAPPABLE_APERTURE Matthew Auld
2019-06-27 20:56 ` [PATCH v2 22/37] drm/i915: do not map aperture if it is not available Matthew Auld
2019-06-27 20:56 ` [PATCH v2 23/37] drm/i915: expose missing map_gtt support to users Matthew Auld
2019-06-27 23:59   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 24/37] drm/i915: set num_fence_regs to 0 if there is no aperture Matthew Auld
2019-06-28  0:00   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 25/37] drm/i915/selftests: check for missing aperture Matthew Auld
2019-06-27 20:56 ` [PATCH v2 26/37] drm/i915: error capture with no ggtt slot Matthew Auld
2019-06-27 20:56 ` [PATCH v2 27/37] drm/i915: Don't try to place HWS in non-existing mappable region Matthew Auld
2019-06-27 20:56 ` [PATCH v2 28/37] drm/i915: Allow i915 to manage the vma offset nodes instead of drm core Matthew Auld
2019-06-28  0:05   ` Chris Wilson
2019-06-28  0:08   ` Chris Wilson
2019-06-28  0:09   ` Chris Wilson
2019-06-28  0:10   ` Chris Wilson
2019-06-27 20:56 ` [PATCH v2 29/37] drm/i915: Introduce DRM_I915_GEM_MMAP_OFFSET Matthew Auld
2019-06-28  0:12   ` Chris Wilson
2019-07-30  9:49   ` Daniel Vetter
2019-07-30 14:28     ` Matthew Auld
2019-07-30 16:22       ` Daniel Vetter
2019-08-12 16:18         ` Daniel Vetter
2019-06-27 20:56 ` [PATCH v2 30/37] drm/i915/lmem: add helper to get CPU accessible offset Matthew Auld
2019-06-27 20:56 ` [PATCH v2 31/37] drm/i915: Add cpu and lmem fault handlers Matthew Auld
2019-06-27 20:56 ` [PATCH v2 32/37] drm/i915: cpu-map based dumb buffers Matthew Auld
2019-06-27 20:56 ` [PATCH v2 33/37] drm/i915: support basic object migration Matthew Auld
2019-06-27 20:56 ` [PATCH v2 34/37] drm/i915: Introduce GEM_OBJECT_SETPARAM with I915_PARAM_MEMORY_REGION Matthew Auld
2019-06-28  0:22   ` Chris Wilson
2019-06-28  5:53   ` Tvrtko Ursulin
2019-07-30 16:17   ` Daniel Vetter
2019-06-27 20:56 ` [PATCH v2 35/37] drm/i915/query: Expose memory regions through the query uAPI Matthew Auld
2019-06-28  5:59   ` Tvrtko Ursulin
2019-06-27 20:56 ` [PATCH v2 36/37] HAX drm/i915: add the fake lmem region Matthew Auld
2019-06-27 20:56 ` [PATCH v2 37/37] HAX drm/i915/lmem: default userspace allocations to LMEM Matthew Auld
2019-06-27 21:36 ` ✗ Fi.CI.CHECKPATCH: warning for Introduce memory region concept (including device local memory) (rev2) Patchwork
2019-06-27 21:50 ` ✗ Fi.CI.SPARSE: " Patchwork
2019-06-28  9:59 ` ✗ Fi.CI.BAT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190627205633.1143-11-matthew.auld@intel.com \
    --to=matthew.auld@intel.com \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.