All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	"Christian König" <christian.koenig@amd.com>,
	"Daniel Vetter" <daniel.vetter@ffwll.ch>
Subject: [PATCH v3 07/12] drm, drm/i915: Move the memcpy_from_wc functionality to core drm
Date: Fri, 21 May 2021 17:32:48 +0200	[thread overview]
Message-ID: <20210521153253.518037-8-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20210521153253.518037-1-thomas.hellstrom@linux.intel.com>

Memcpy from wc will be used as well by TTM memcpy.
Move it to core drm, and make the interface do the right thing
even on !X86.

Cc: Christian König <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/Makefile                      |  2 +-
 drivers/gpu/drm/drm_drv.c                     |  2 +
 .../drm/{i915/i915_memcpy.c => drm_memcpy.c}  | 63 ++++++++++++++-----
 drivers/gpu/drm/i915/Makefile                 |  1 -
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  5 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c      |  7 ++-
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c    | 11 ++--
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  4 +-
 drivers/gpu/drm/i915/i915_drv.c               |  2 -
 drivers/gpu/drm/i915/i915_gpu_error.c         |  8 +--
 drivers/gpu/drm/i915/i915_memcpy.h            | 34 ----------
 .../drm/i915/selftests/intel_memory_region.c  |  7 ++-
 include/drm/drm_memcpy.h                      | 47 ++++++++++++++
 14 files changed, 121 insertions(+), 76 deletions(-)
 rename drivers/gpu/drm/{i915/i915_memcpy.c => drm_memcpy.c} (70%)
 delete mode 100644 drivers/gpu/drm/i915/i915_memcpy.h
 create mode 100644 include/drm/drm_memcpy.h

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index a91cc7684904..f3ab8586c3d7 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -18,7 +18,7 @@ drm-y       :=	drm_aperture.o drm_auth.o drm_cache.o \
 		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
 		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
 		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
-		drm_managed.o drm_vblank_work.o
+		drm_managed.o drm_vblank_work.o drm_memcpy.o \
 
 drm-$(CONFIG_DRM_LEGACY) += drm_agpsupport.o drm_bufs.o drm_context.o drm_dma.o \
 			    drm_legacy_misc.o drm_lock.o drm_memory.o drm_scatter.o \
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3d8d68a98b95..351cc2900cf1 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -40,6 +40,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_managed.h>
+#include <drm/drm_memcpy.h>
 #include <drm/drm_mode_object.h>
 #include <drm/drm_print.h>
 
@@ -1041,6 +1042,7 @@ static int __init drm_core_init(void)
 
 	drm_connector_ida_init();
 	idr_init(&drm_minors_idr);
+	drm_memcpy_init_early();
 
 	ret = drm_sysfs_init();
 	if (ret < 0) {
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/drm_memcpy.c
similarity index 70%
rename from drivers/gpu/drm/i915/i915_memcpy.c
rename to drivers/gpu/drm/drm_memcpy.c
index 1b021a4902de..740377749caa 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/drm_memcpy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 /*
  * Copyright © 2016 Intel Corporation
  *
@@ -22,16 +23,12 @@
  *
  */
 
+#ifdef CONFIG_X86
+#include <linux/dma-buf-map.h>
 #include <linux/kernel.h>
 #include <asm/fpu/api.h>
 
-#include "i915_memcpy.h"
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
-#define CI_BUG_ON(expr) BUG_ON(expr)
-#else
-#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
-#endif
+#include "drm/drm_memcpy.h"
 
 static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
 
@@ -94,23 +91,24 @@ static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
 }
 
 /**
- * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
+ * drm_memcpy_from_wc: perform an accelerated *aligned* read from WC
  * @dst: destination pointer
  * @src: source pointer
  * @len: how many bytes to copy
  *
- * i915_memcpy_from_wc copies @len bytes from @src to @dst using
+ * drm_memcpy_from_wc copies @len bytes from @src to @dst using
  * non-temporal instructions where available. Note that all arguments
  * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
  * of 16.
  *
  * To test whether accelerated reads from WC are supported, use
- * i915_memcpy_from_wc(NULL, NULL, 0);
+ * drm_memcpy_from_wc(NULL, NULL, 0);
+ * This interface is intended for memremapped memory without the __iomem tag.
  *
  * Returns true if the copy was successful, false if the preconditions
  * are not met.
  */
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 {
 	if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
 		return false;
@@ -123,24 +121,53 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 
 	return false;
 }
+EXPORT_SYMBOL(drm_memcpy_from_wc);
 
 /**
- * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
+ * drm_memcpy_from_wc_dbm: perform an accelerated *aligned* read from WC with
+ * struct dma_buf_map arguments.
+ * @dst: destination map
+ * @src: source map
+ * @len: how many bytes to copy
+ *
+ * This is identical to drm_memcpy_from_wc, except it's intended for
+ * potentially ioremapped memory rather than memremapped memory.
+ *
+ * Returns true if the copy was successful, false if the preconditions
+ * are not met.
+ */
+bool drm_memcpy_from_wc_dbm(struct dma_buf_map *dst,
+			    const struct dma_buf_map *src,
+			    unsigned long len)
+{
+	/* For X86 we can safely drop __iomem */
+	return drm_memcpy_from_wc(dst->is_iomem ?
+				  (void __force *)dst->vaddr_iomem :
+				  dst->vaddr,
+				  src->is_iomem ?
+				  (void const __force *)src->vaddr_iomem :
+				  src->vaddr,
+				  len);
+}
+EXPORT_SYMBOL(drm_memcpy_from_wc_dbm);
+
+/**
+ * drm_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
  * @dst: destination pointer
  * @src: source pointer
  * @len: how many bytes to copy
  *
- * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from
+ * Like drm_memcpy_from_wc(), the unaligned variant copies @len bytes from
  * @src to @dst using * non-temporal instructions where available, but
  * accepts that its arguments may not be aligned, but are valid for the
  * potential 16-byte read past the end.
+ *
+ * This interface is intended for mremapped memory without the __iomem tag.
  */
-void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+void drm_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 {
 	unsigned long addr;
 
-	CI_BUG_ON(!i915_has_memcpy_from_wc());
-
 	addr = (unsigned long)src;
 	if (!IS_ALIGNED(addr, 16)) {
 		unsigned long x = min(ALIGN(addr, 16) - addr, len);
@@ -155,8 +182,9 @@ void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len
 	if (likely(len))
 		__memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
 }
+EXPORT_SYMBOL(drm_unaligned_memcpy_from_wc);
 
-void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
+void drm_memcpy_init_early(void)
 {
 	/*
 	 * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
@@ -166,3 +194,4 @@ void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
 	    !boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		static_branch_enable(&has_movntdqa);
 }
+#endif
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cb8823570996..998606b7f49f 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -61,7 +61,6 @@ i915-y += i915_drv.o \
 # core library code
 i915-y += \
 	dma_resv_utils.o \
-	i915_memcpy.o \
 	i915_mm.o \
 	i915_sw_fence.o \
 	i915_sw_fence_work.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 297143511f99..77285e421fb8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -10,6 +10,7 @@
 #include <linux/uaccess.h>
 
 #include <drm/drm_syncobj.h>
+#include <drm/drm_memcpy.h>
 
 #include "display/intel_frontbuffer.h"
 
@@ -28,7 +29,6 @@
 #include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
 
 struct eb_vma {
 	struct i915_vma *vma;
@@ -2503,7 +2503,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 		!(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
 
 	pw->batch_map = ERR_PTR(-ENODEV);
-	if (needs_clflush && i915_has_memcpy_from_wc())
+	if (needs_clflush && drm_has_memcpy_from_wc())
 		pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
 
 	if (IS_ERR(pw->batch_map)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 5706d471692d..e9247afb0320 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -24,6 +24,8 @@
 
 #include <linux/sched/mm.h>
 
+#include <drm/drm_memcpy.h>
+
 #include "display/intel_frontbuffer.h"
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
@@ -31,7 +33,6 @@
 #include "i915_gem_mman.h"
 #include "i915_gem_object.h"
 #include "i915_globals.h"
-#include "i915_memcpy.h"
 #include "i915_trace.h"
 
 static struct i915_global_object {
@@ -374,7 +375,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
 				    PAGE_SIZE);
 
 	src_ptr = src_map + offset_in_page(offset);
-	if (!i915_memcpy_from_wc(dst, (void __force *)src_ptr, size))
+	if (!drm_memcpy_from_wc(dst, (void __force *)src_ptr, size))
 		memcpy_fromio(dst, src_ptr, size);
 
 	io_mapping_unmap(src_map);
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 8784257ec808..92ada67a3835 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -5,9 +5,10 @@
 
 #include <linux/crc32.h>
 
+#include <drm/drm_memcpy.h>
+
 #include "gem/i915_gem_stolen.h"
 
-#include "i915_memcpy.h"
 #include "i915_selftest.h"
 #include "intel_gpu_commands.h"
 #include "selftests/igt_reset.h"
@@ -99,7 +100,7 @@ __igt_reset_stolen(struct intel_gt *gt,
 			memset_io(s, STACK_MAGIC, PAGE_SIZE);
 
 		in = (void __force *)s;
-		if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
+		if (drm_memcpy_from_wc(tmp, in, PAGE_SIZE))
 			in = tmp;
 		crc[page] = crc32_le(0, in, PAGE_SIZE);
 
@@ -135,7 +136,7 @@ __igt_reset_stolen(struct intel_gt *gt,
 				      PAGE_SIZE);
 
 		in = (void __force *)s;
-		if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
+		if (drm_memcpy_from_wc(tmp, in, PAGE_SIZE))
 			in = tmp;
 		x = crc32_le(0, in, PAGE_SIZE);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index c36d5eb5bbb9..f045e42be6ca 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -5,9 +5,10 @@
 
 #include <linux/debugfs.h>
 
+#include <drm/drm_memcpy.h>
+
 #include "gt/intel_gt.h"
 #include "i915_drv.h"
-#include "i915_memcpy.h"
 #include "intel_guc_log.h"
 
 static void guc_log_capture_logs(struct intel_guc_log *log);
@@ -295,13 +296,13 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
 
 		/* Just copy the newly written data */
 		if (read_offset > write_offset) {
-			i915_memcpy_from_wc(dst_data, src_data, write_offset);
+			drm_memcpy_from_wc(dst_data, src_data, write_offset);
 			bytes_to_copy = buffer_size - read_offset;
 		} else {
 			bytes_to_copy = write_offset - read_offset;
 		}
-		i915_memcpy_from_wc(dst_data + read_offset,
-				    src_data + read_offset, bytes_to_copy);
+		drm_memcpy_from_wc(dst_data + read_offset,
+				   src_data + read_offset, bytes_to_copy);
 
 		src_data += buffer_size;
 		dst_data += buffer_size;
@@ -569,7 +570,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
 	 * it should be present on the chipsets supporting GuC based
 	 * submisssions.
 	 */
-	if (!i915_has_memcpy_from_wc()) {
+	if (!drm_has_memcpy_from_wc()) {
 		ret = -ENXIO;
 		goto out_unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 5b4b2bd46e7c..98653f1a2b1d 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -24,12 +24,12 @@
  *    Brad Volkin <bradley.d.volkin@intel.com>
  *
  */
+#include <drm/drm_memcpy.h>
 
 #include "gt/intel_engine.h"
 #include "gt/intel_gpu_commands.h"
 
 #include "i915_drv.h"
-#include "i915_memcpy.h"
 
 /**
  * DOC: batch buffer command parser
@@ -1152,7 +1152,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 
 	if (src) {
 		GEM_BUG_ON(!needs_clflush);
-		i915_unaligned_memcpy_from_wc(dst, src + offset, length);
+		drm_unaligned_memcpy_from_wc(dst, src + offset, length);
 	} else {
 		struct scatterlist *sg;
 		void *ptr;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 122dd297b6af..0df9dd62c717 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -72,7 +72,6 @@
 #include "i915_drv.h"
 #include "i915_ioc32.h"
 #include "i915_irq.h"
-#include "i915_memcpy.h"
 #include "i915_perf.h"
 #include "i915_query.h"
 #include "i915_suspend.h"
@@ -325,7 +324,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 	mutex_init(&dev_priv->pps_mutex);
 	mutex_init(&dev_priv->hdcp_comp_mutex);
 
-	i915_memcpy_init_early(dev_priv);
 	intel_runtime_pm_init_early(&dev_priv->runtime_pm);
 
 	ret = i915_workqueues_init(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 99ca242ec13b..ee11920fbea5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -34,6 +34,7 @@
 #include <linux/utsname.h>
 #include <linux/zlib.h>
 
+#include <drm/drm_memcpy.h>
 #include <drm/drm_print.h>
 
 #include "display/intel_csr.h"
@@ -46,7 +47,6 @@
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
-#include "i915_memcpy.h"
 #include "i915_scatterlist.h"
 
 #define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -255,7 +255,7 @@ static bool compress_init(struct i915_vma_compress *c)
 	}
 
 	c->tmp = NULL;
-	if (i915_has_memcpy_from_wc())
+	if (drm_has_memcpy_from_wc())
 		c->tmp = pool_alloc(&c->pool, ALLOW_FAIL);
 
 	return true;
@@ -295,7 +295,7 @@ static int compress_page(struct i915_vma_compress *c,
 	struct z_stream_s *zstream = &c->zstream;
 
 	zstream->next_in = src;
-	if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
+	if (wc && c->tmp && drm_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
 		zstream->next_in = c->tmp;
 	zstream->avail_in = PAGE_SIZE;
 
@@ -395,7 +395,7 @@ static int compress_page(struct i915_vma_compress *c,
 	if (!ptr)
 		return -ENOMEM;
 
-	if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
+	if (!(wc && drm_memcpy_from_wc(ptr, src, PAGE_SIZE)))
 		memcpy(ptr, src, PAGE_SIZE);
 	dst->pages[dst->page_count++] = ptr;
 	cond_resched();
diff --git a/drivers/gpu/drm/i915/i915_memcpy.h b/drivers/gpu/drm/i915/i915_memcpy.h
deleted file mode 100644
index 3df063a3293b..000000000000
--- a/drivers/gpu/drm/i915/i915_memcpy.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __I915_MEMCPY_H__
-#define __I915_MEMCPY_H__
-
-#include <linux/types.h>
-
-struct drm_i915_private;
-
-void i915_memcpy_init_early(struct drm_i915_private *i915);
-
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
-void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len);
-
-/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
- * as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot
- * perform the operation. To check beforehand, pass in the parameters to
- * to i915_can_memcpy_from_wc() - since we only care about the low 4 bits,
- * you only need to pass in the minor offsets, page-aligned pointers are
- * always valid.
- *
- * For just checking for SSE4.1, in the foreknowledge that the future use
- * will be correctly aligned, just use i915_has_memcpy_from_wc().
- */
-#define i915_can_memcpy_from_wc(dst, src, len) \
-	i915_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0)
-
-#define i915_has_memcpy_from_wc() \
-	i915_memcpy_from_wc(NULL, NULL, 0)
-
-#endif /* __I915_MEMCPY_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index c85d516b85cd..6bb399e9be78 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -6,6 +6,8 @@
 #include <linux/prime_numbers.h>
 #include <linux/sort.h>
 
+#include <drm/drm_memcpy.h>
+
 #include "../i915_selftest.h"
 
 #include "mock_drm.h"
@@ -20,7 +22,6 @@
 #include "gem/selftests/mock_context.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
-#include "i915_memcpy.h"
 #include "selftests/igt_flush_test.h"
 #include "selftests/i915_random.h"
 
@@ -901,7 +902,7 @@ static inline void igt_memcpy(void *dst, const void *src, size_t size)
 
 static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size)
 {
-	i915_memcpy_from_wc(dst, src, size);
+	drm_memcpy_from_wc(dst, src, size);
 }
 
 static int _perf_memcpy(struct intel_memory_region *src_mr,
@@ -925,7 +926,7 @@ static int _perf_memcpy(struct intel_memory_region *src_mr,
 		{
 			"memcpy_from_wc",
 			igt_memcpy_from_wc,
-			!i915_has_memcpy_from_wc(),
+			!drm_has_memcpy_from_wc(),
 		},
 	};
 	struct drm_i915_gem_object *src, *dst;
diff --git a/include/drm/drm_memcpy.h b/include/drm/drm_memcpy.h
new file mode 100644
index 000000000000..b1d709753b4d
--- /dev/null
+++ b/include/drm/drm_memcpy.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __DRM_MEMCPY_H__
+#define __DRM_MEMCPY_H__
+
+#include <linux/types.h>
+
+struct dma_buf_map;
+
+#ifdef CONFIG_X86
+bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+bool drm_memcpy_from_wc_dbm(struct dma_buf_map *dst,
+			    const struct dma_buf_map *src,
+			    unsigned long len);
+void drm_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+
+/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
+ * as well as SSE4.1 support. drm_memcpy_from_wc() will report if it cannot
+ * perform the operation. To check beforehand, pass in the parameters to
+ * drm_can_memcpy_from_wc() - since we only care about the low 4 bits,
+ * you only need to pass in the minor offsets, page-aligned pointers are
+ * always valid.
+ *
+ * For just checking for SSE4.1, in the foreknowledge that the future use
+ * will be correctly aligned, just use drm_has_memcpy_from_wc().
+ */
+#define drm_can_memcpy_from_wc(dst, src, len) \
+	drm_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0)
+
+#define drm_has_memcpy_from_wc() \
+	drm_memcpy_from_wc(NULL, NULL, 0)
+
+void drm_memcpy_init_early(void);
+
+#else
+
+#define drm_memcpy_from_wc(_dst, _src, _len) (false)
+#define drm_memcpy_from_wc_dbm(_dst, _src, _len) (false)
+#define drm_can_memcpy_from_wc(_dst, _src, _len) (false)
+#define drm_has_memcpy_from_wc() (false)
+#define drm_unaligned_memcpy_from_wc(_dst, _src, _len) WARN_ON(1)
+#define drm_memcpy_init_early() do {} while (0)
+#endif /* CONFIG_X86 */
+#endif /* __DRM_MEMCPY_H__ */
-- 
2.31.1


WARNING: multiple messages have this Message-ID (diff)
From: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
To: intel-gfx@lists.freedesktop.org, dri-devel@lists.freedesktop.org
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
	"Christian König" <christian.koenig@amd.com>,
	"Daniel Vetter" <daniel.vetter@ffwll.ch>
Subject: [Intel-gfx] [PATCH v3 07/12] drm, drm/i915: Move the memcpy_from_wc functionality to core drm
Date: Fri, 21 May 2021 17:32:48 +0200	[thread overview]
Message-ID: <20210521153253.518037-8-thomas.hellstrom@linux.intel.com> (raw)
In-Reply-To: <20210521153253.518037-1-thomas.hellstrom@linux.intel.com>

Memcpy from wc will be used as well by TTM memcpy.
Move it to core drm, and make the interface do the right thing
even on !X86.

Cc: Christian König <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Airlie <airlied@gmail.com>
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
---
 drivers/gpu/drm/Makefile                      |  2 +-
 drivers/gpu/drm/drm_drv.c                     |  2 +
 .../drm/{i915/i915_memcpy.c => drm_memcpy.c}  | 63 ++++++++++++++-----
 drivers/gpu/drm/i915/Makefile                 |  1 -
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    |  4 +-
 drivers/gpu/drm/i915/gem/i915_gem_object.c    |  5 +-
 drivers/gpu/drm/i915/gt/selftest_reset.c      |  7 ++-
 drivers/gpu/drm/i915/gt/uc/intel_guc_log.c    | 11 ++--
 drivers/gpu/drm/i915/i915_cmd_parser.c        |  4 +-
 drivers/gpu/drm/i915/i915_drv.c               |  2 -
 drivers/gpu/drm/i915/i915_gpu_error.c         |  8 +--
 drivers/gpu/drm/i915/i915_memcpy.h            | 34 ----------
 .../drm/i915/selftests/intel_memory_region.c  |  7 ++-
 include/drm/drm_memcpy.h                      | 47 ++++++++++++++
 14 files changed, 121 insertions(+), 76 deletions(-)
 rename drivers/gpu/drm/{i915/i915_memcpy.c => drm_memcpy.c} (70%)
 delete mode 100644 drivers/gpu/drm/i915/i915_memcpy.h
 create mode 100644 include/drm/drm_memcpy.h

diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index a91cc7684904..f3ab8586c3d7 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -18,7 +18,7 @@ drm-y       :=	drm_aperture.o drm_auth.o drm_cache.o \
 		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
 		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
 		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o \
-		drm_managed.o drm_vblank_work.o
+		drm_managed.o drm_vblank_work.o drm_memcpy.o \
 
 drm-$(CONFIG_DRM_LEGACY) += drm_agpsupport.o drm_bufs.o drm_context.o drm_dma.o \
 			    drm_legacy_misc.o drm_lock.o drm_memory.o drm_scatter.o \
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 3d8d68a98b95..351cc2900cf1 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -40,6 +40,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_file.h>
 #include <drm/drm_managed.h>
+#include <drm/drm_memcpy.h>
 #include <drm/drm_mode_object.h>
 #include <drm/drm_print.h>
 
@@ -1041,6 +1042,7 @@ static int __init drm_core_init(void)
 
 	drm_connector_ida_init();
 	idr_init(&drm_minors_idr);
+	drm_memcpy_init_early();
 
 	ret = drm_sysfs_init();
 	if (ret < 0) {
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/drm_memcpy.c
similarity index 70%
rename from drivers/gpu/drm/i915/i915_memcpy.c
rename to drivers/gpu/drm/drm_memcpy.c
index 1b021a4902de..740377749caa 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/drm_memcpy.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 /*
  * Copyright © 2016 Intel Corporation
  *
@@ -22,16 +23,12 @@
  *
  */
 
+#ifdef CONFIG_X86
+#include <linux/dma-buf-map.h>
 #include <linux/kernel.h>
 #include <asm/fpu/api.h>
 
-#include "i915_memcpy.h"
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
-#define CI_BUG_ON(expr) BUG_ON(expr)
-#else
-#define CI_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr)
-#endif
+#include "drm/drm_memcpy.h"
 
 static DEFINE_STATIC_KEY_FALSE(has_movntdqa);
 
@@ -94,23 +91,24 @@ static void __memcpy_ntdqu(void *dst, const void *src, unsigned long len)
 }
 
 /**
- * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC
+ * drm_memcpy_from_wc: perform an accelerated *aligned* read from WC
  * @dst: destination pointer
  * @src: source pointer
  * @len: how many bytes to copy
  *
- * i915_memcpy_from_wc copies @len bytes from @src to @dst using
+ * drm_memcpy_from_wc copies @len bytes from @src to @dst using
  * non-temporal instructions where available. Note that all arguments
  * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
  * of 16.
  *
  * To test whether accelerated reads from WC are supported, use
- * i915_memcpy_from_wc(NULL, NULL, 0);
+ * drm_memcpy_from_wc(NULL, NULL, 0);
+ * This interface is intended for memremapped memory without the __iomem tag.
  *
  * Returns true if the copy was successful, false if the preconditions
  * are not met.
  */
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 {
 	if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15))
 		return false;
@@ -123,24 +121,53 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 
 	return false;
 }
+EXPORT_SYMBOL(drm_memcpy_from_wc);
 
 /**
- * i915_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
+ * drm_memcpy_from_wc_dbm: perform an accelerated *aligned* read from WC with
+ * struct dma_buf_map arguments.
+ * @dst: destination map
+ * @src: source map
+ * @len: how many bytes to copy
+ *
+ * This is identical to drm_memcpy_from_wc, except it's intended for
+ * potentially ioremapped memory rather than memremapped memory.
+ *
+ * Returns true if the copy was successful, false if the preconditions
+ * are not met.
+ */
+bool drm_memcpy_from_wc_dbm(struct dma_buf_map *dst,
+			    const struct dma_buf_map *src,
+			    unsigned long len)
+{
+	/* For X86 we can safely drop __iomem */
+	return drm_memcpy_from_wc(dst->is_iomem ?
+				  (void __force *)dst->vaddr_iomem :
+				  dst->vaddr,
+				  src->is_iomem ?
+				  (void const __force *)src->vaddr_iomem :
+				  src->vaddr,
+				  len);
+}
+EXPORT_SYMBOL(drm_memcpy_from_wc_dbm);
+
+/**
+ * drm_unaligned_memcpy_from_wc: perform a mostly accelerated read from WC
  * @dst: destination pointer
  * @src: source pointer
  * @len: how many bytes to copy
  *
- * Like i915_memcpy_from_wc(), the unaligned variant copies @len bytes from
+ * Like drm_memcpy_from_wc(), the unaligned variant copies @len bytes from
  * @src to @dst using * non-temporal instructions where available, but
  * accepts that its arguments may not be aligned, but are valid for the
  * potential 16-byte read past the end.
+ *
+ * This interface is intended for mremapped memory without the __iomem tag.
  */
-void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+void drm_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 {
 	unsigned long addr;
 
-	CI_BUG_ON(!i915_has_memcpy_from_wc());
-
 	addr = (unsigned long)src;
 	if (!IS_ALIGNED(addr, 16)) {
 		unsigned long x = min(ALIGN(addr, 16) - addr, len);
@@ -155,8 +182,9 @@ void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len
 	if (likely(len))
 		__memcpy_ntdqu(dst, src, DIV_ROUND_UP(len, 16));
 }
+EXPORT_SYMBOL(drm_unaligned_memcpy_from_wc);
 
-void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
+void drm_memcpy_init_early(void)
 {
 	/*
 	 * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
@@ -166,3 +194,4 @@ void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
 	    !boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		static_branch_enable(&has_movntdqa);
 }
+#endif
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index cb8823570996..998606b7f49f 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -61,7 +61,6 @@ i915-y += i915_drv.o \
 # core library code
 i915-y += \
 	dma_resv_utils.o \
-	i915_memcpy.o \
 	i915_mm.o \
 	i915_sw_fence.o \
 	i915_sw_fence_work.o \
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 297143511f99..77285e421fb8 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -10,6 +10,7 @@
 #include <linux/uaccess.h>
 
 #include <drm/drm_syncobj.h>
+#include <drm/drm_memcpy.h>
 
 #include "display/intel_frontbuffer.h"
 
@@ -28,7 +29,6 @@
 #include "i915_sw_fence_work.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
-#include "i915_memcpy.h"
 
 struct eb_vma {
 	struct i915_vma *vma;
@@ -2503,7 +2503,7 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
 		!(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
 
 	pw->batch_map = ERR_PTR(-ENODEV);
-	if (needs_clflush && i915_has_memcpy_from_wc())
+	if (needs_clflush && drm_has_memcpy_from_wc())
 		pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
 
 	if (IS_ERR(pw->batch_map)) {
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 5706d471692d..e9247afb0320 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -24,6 +24,8 @@
 
 #include <linux/sched/mm.h>
 
+#include <drm/drm_memcpy.h>
+
 #include "display/intel_frontbuffer.h"
 #include "i915_drv.h"
 #include "i915_gem_clflush.h"
@@ -31,7 +33,6 @@
 #include "i915_gem_mman.h"
 #include "i915_gem_object.h"
 #include "i915_globals.h"
-#include "i915_memcpy.h"
 #include "i915_trace.h"
 
 static struct i915_global_object {
@@ -374,7 +375,7 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset
 				    PAGE_SIZE);
 
 	src_ptr = src_map + offset_in_page(offset);
-	if (!i915_memcpy_from_wc(dst, (void __force *)src_ptr, size))
+	if (!drm_memcpy_from_wc(dst, (void __force *)src_ptr, size))
 		memcpy_fromio(dst, src_ptr, size);
 
 	io_mapping_unmap(src_map);
diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c
index 8784257ec808..92ada67a3835 100644
--- a/drivers/gpu/drm/i915/gt/selftest_reset.c
+++ b/drivers/gpu/drm/i915/gt/selftest_reset.c
@@ -5,9 +5,10 @@
 
 #include <linux/crc32.h>
 
+#include <drm/drm_memcpy.h>
+
 #include "gem/i915_gem_stolen.h"
 
-#include "i915_memcpy.h"
 #include "i915_selftest.h"
 #include "intel_gpu_commands.h"
 #include "selftests/igt_reset.h"
@@ -99,7 +100,7 @@ __igt_reset_stolen(struct intel_gt *gt,
 			memset_io(s, STACK_MAGIC, PAGE_SIZE);
 
 		in = (void __force *)s;
-		if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
+		if (drm_memcpy_from_wc(tmp, in, PAGE_SIZE))
 			in = tmp;
 		crc[page] = crc32_le(0, in, PAGE_SIZE);
 
@@ -135,7 +136,7 @@ __igt_reset_stolen(struct intel_gt *gt,
 				      PAGE_SIZE);
 
 		in = (void __force *)s;
-		if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE))
+		if (drm_memcpy_from_wc(tmp, in, PAGE_SIZE))
 			in = tmp;
 		x = crc32_le(0, in, PAGE_SIZE);
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
index c36d5eb5bbb9..f045e42be6ca 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
@@ -5,9 +5,10 @@
 
 #include <linux/debugfs.h>
 
+#include <drm/drm_memcpy.h>
+
 #include "gt/intel_gt.h"
 #include "i915_drv.h"
-#include "i915_memcpy.h"
 #include "intel_guc_log.h"
 
 static void guc_log_capture_logs(struct intel_guc_log *log);
@@ -295,13 +296,13 @@ static void guc_read_update_log_buffer(struct intel_guc_log *log)
 
 		/* Just copy the newly written data */
 		if (read_offset > write_offset) {
-			i915_memcpy_from_wc(dst_data, src_data, write_offset);
+			drm_memcpy_from_wc(dst_data, src_data, write_offset);
 			bytes_to_copy = buffer_size - read_offset;
 		} else {
 			bytes_to_copy = write_offset - read_offset;
 		}
-		i915_memcpy_from_wc(dst_data + read_offset,
-				    src_data + read_offset, bytes_to_copy);
+		drm_memcpy_from_wc(dst_data + read_offset,
+				   src_data + read_offset, bytes_to_copy);
 
 		src_data += buffer_size;
 		dst_data += buffer_size;
@@ -569,7 +570,7 @@ int intel_guc_log_relay_open(struct intel_guc_log *log)
 	 * it should be present on the chipsets supporting GuC based
 	 * submisssions.
 	 */
-	if (!i915_has_memcpy_from_wc()) {
+	if (!drm_has_memcpy_from_wc()) {
 		ret = -ENXIO;
 		goto out_unlock;
 	}
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 5b4b2bd46e7c..98653f1a2b1d 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -24,12 +24,12 @@
  *    Brad Volkin <bradley.d.volkin@intel.com>
  *
  */
+#include <drm/drm_memcpy.h>
 
 #include "gt/intel_engine.h"
 #include "gt/intel_gpu_commands.h"
 
 #include "i915_drv.h"
-#include "i915_memcpy.h"
 
 /**
  * DOC: batch buffer command parser
@@ -1152,7 +1152,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
 
 	if (src) {
 		GEM_BUG_ON(!needs_clflush);
-		i915_unaligned_memcpy_from_wc(dst, src + offset, length);
+		drm_unaligned_memcpy_from_wc(dst, src + offset, length);
 	} else {
 		struct scatterlist *sg;
 		void *ptr;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 122dd297b6af..0df9dd62c717 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -72,7 +72,6 @@
 #include "i915_drv.h"
 #include "i915_ioc32.h"
 #include "i915_irq.h"
-#include "i915_memcpy.h"
 #include "i915_perf.h"
 #include "i915_query.h"
 #include "i915_suspend.h"
@@ -325,7 +324,6 @@ static int i915_driver_early_probe(struct drm_i915_private *dev_priv)
 	mutex_init(&dev_priv->pps_mutex);
 	mutex_init(&dev_priv->hdcp_comp_mutex);
 
-	i915_memcpy_init_early(dev_priv);
 	intel_runtime_pm_init_early(&dev_priv->runtime_pm);
 
 	ret = i915_workqueues_init(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 99ca242ec13b..ee11920fbea5 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -34,6 +34,7 @@
 #include <linux/utsname.h>
 #include <linux/zlib.h>
 
+#include <drm/drm_memcpy.h>
 #include <drm/drm_print.h>
 
 #include "display/intel_csr.h"
@@ -46,7 +47,6 @@
 
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
-#include "i915_memcpy.h"
 #include "i915_scatterlist.h"
 
 #define ALLOW_FAIL (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
@@ -255,7 +255,7 @@ static bool compress_init(struct i915_vma_compress *c)
 	}
 
 	c->tmp = NULL;
-	if (i915_has_memcpy_from_wc())
+	if (drm_has_memcpy_from_wc())
 		c->tmp = pool_alloc(&c->pool, ALLOW_FAIL);
 
 	return true;
@@ -295,7 +295,7 @@ static int compress_page(struct i915_vma_compress *c,
 	struct z_stream_s *zstream = &c->zstream;
 
 	zstream->next_in = src;
-	if (wc && c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
+	if (wc && c->tmp && drm_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
 		zstream->next_in = c->tmp;
 	zstream->avail_in = PAGE_SIZE;
 
@@ -395,7 +395,7 @@ static int compress_page(struct i915_vma_compress *c,
 	if (!ptr)
 		return -ENOMEM;
 
-	if (!(wc && i915_memcpy_from_wc(ptr, src, PAGE_SIZE)))
+	if (!(wc && drm_memcpy_from_wc(ptr, src, PAGE_SIZE)))
 		memcpy(ptr, src, PAGE_SIZE);
 	dst->pages[dst->page_count++] = ptr;
 	cond_resched();
diff --git a/drivers/gpu/drm/i915/i915_memcpy.h b/drivers/gpu/drm/i915/i915_memcpy.h
deleted file mode 100644
index 3df063a3293b..000000000000
--- a/drivers/gpu/drm/i915/i915_memcpy.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2019 Intel Corporation
- */
-
-#ifndef __I915_MEMCPY_H__
-#define __I915_MEMCPY_H__
-
-#include <linux/types.h>
-
-struct drm_i915_private;
-
-void i915_memcpy_init_early(struct drm_i915_private *i915);
-
-bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
-void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len);
-
-/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
- * as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot
- * perform the operation. To check beforehand, pass in the parameters to
- * to i915_can_memcpy_from_wc() - since we only care about the low 4 bits,
- * you only need to pass in the minor offsets, page-aligned pointers are
- * always valid.
- *
- * For just checking for SSE4.1, in the foreknowledge that the future use
- * will be correctly aligned, just use i915_has_memcpy_from_wc().
- */
-#define i915_can_memcpy_from_wc(dst, src, len) \
-	i915_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0)
-
-#define i915_has_memcpy_from_wc() \
-	i915_memcpy_from_wc(NULL, NULL, 0)
-
-#endif /* __I915_MEMCPY_H__ */
diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
index c85d516b85cd..6bb399e9be78 100644
--- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c
@@ -6,6 +6,8 @@
 #include <linux/prime_numbers.h>
 #include <linux/sort.h>
 
+#include <drm/drm_memcpy.h>
+
 #include "../i915_selftest.h"
 
 #include "mock_drm.h"
@@ -20,7 +22,6 @@
 #include "gem/selftests/mock_context.h"
 #include "gt/intel_engine_user.h"
 #include "gt/intel_gt.h"
-#include "i915_memcpy.h"
 #include "selftests/igt_flush_test.h"
 #include "selftests/i915_random.h"
 
@@ -901,7 +902,7 @@ static inline void igt_memcpy(void *dst, const void *src, size_t size)
 
 static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size)
 {
-	i915_memcpy_from_wc(dst, src, size);
+	drm_memcpy_from_wc(dst, src, size);
 }
 
 static int _perf_memcpy(struct intel_memory_region *src_mr,
@@ -925,7 +926,7 @@ static int _perf_memcpy(struct intel_memory_region *src_mr,
 		{
 			"memcpy_from_wc",
 			igt_memcpy_from_wc,
-			!i915_has_memcpy_from_wc(),
+			!drm_has_memcpy_from_wc(),
 		},
 	};
 	struct drm_i915_gem_object *src, *dst;
diff --git a/include/drm/drm_memcpy.h b/include/drm/drm_memcpy.h
new file mode 100644
index 000000000000..b1d709753b4d
--- /dev/null
+++ b/include/drm/drm_memcpy.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __DRM_MEMCPY_H__
+#define __DRM_MEMCPY_H__
+
+#include <linux/types.h>
+
+struct dma_buf_map;
+
+#ifdef CONFIG_X86
+bool drm_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+bool drm_memcpy_from_wc_dbm(struct dma_buf_map *dst,
+			    const struct dma_buf_map *src,
+			    unsigned long len);
+void drm_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+
+/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
+ * as well as SSE4.1 support. drm_memcpy_from_wc() will report if it cannot
+ * perform the operation. To check beforehand, pass in the parameters to
+ * drm_can_memcpy_from_wc() - since we only care about the low 4 bits,
+ * you only need to pass in the minor offsets, page-aligned pointers are
+ * always valid.
+ *
+ * For just checking for SSE4.1, in the foreknowledge that the future use
+ * will be correctly aligned, just use drm_has_memcpy_from_wc().
+ */
+#define drm_can_memcpy_from_wc(dst, src, len) \
+	drm_memcpy_from_wc((void *)((unsigned long)(dst) | (unsigned long)(src) | (len)), NULL, 0)
+
+#define drm_has_memcpy_from_wc() \
+	drm_memcpy_from_wc(NULL, NULL, 0)
+
+void drm_memcpy_init_early(void);
+
+#else
+
+#define drm_memcpy_from_wc(_dst, _src, _len) (false)
+#define drm_memcpy_from_wc_dbm(_dst, _src, _len) (false)
+#define drm_can_memcpy_from_wc(_dst, _src, _len) (false)
+#define drm_has_memcpy_from_wc() (false)
+#define drm_unaligned_memcpy_from_wc(_dst, _src, _len) WARN_ON(1)
+#define drm_memcpy_init_early() do {} while (0)
+#endif /* CONFIG_X86 */
+#endif /* __DRM_MEMCPY_H__ */
-- 
2.31.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

  parent reply	other threads:[~2021-05-21 15:33 UTC|newest]

Thread overview: 60+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-21 15:32 [PATCH v3 00/12] drm/i915: Move LMEM (VRAM) management over to TTM Thomas Hellström
2021-05-21 15:32 ` [Intel-gfx] " Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 01/12] drm/i915: Untangle the vma pages_mutex Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 02/12] drm/i915: Don't free shared locks while shared Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 03/12] drm/i915: Fix i915_sg_page_sizes to record dma segments rather than physical pages Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 04/12] drm/i915/ttm Initialize the ttm device and memory managers Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 05/12] drm/i915/ttm: Embed a ttm buffer object in the i915 gem object Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 06/12] drm/ttm: Add a generic TTM memcpy move for page-based iomem Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-25  9:18   ` Matthew Auld
2021-05-25  9:18     ` Matthew Auld
2021-05-25  9:32     ` Thomas Hellström
2021-05-25  9:32       ` Thomas Hellström
2021-05-25  9:58       ` Matthew Auld
2021-05-25  9:58         ` Matthew Auld
2021-05-25 10:07         ` Thomas Hellström
2021-05-25 10:07           ` Thomas Hellström
2021-05-25 15:48           ` Christian König
2021-05-25 15:48             ` Christian König
2021-05-26  7:39             ` Thomas Hellström
2021-05-26  7:39               ` Thomas Hellström
2021-05-26 10:45               ` Christian König
2021-05-26 10:45                 ` Christian König
2021-05-26 10:57                 ` Thomas Hellström
2021-05-26 10:57                   ` Thomas Hellström
2021-05-21 15:32 ` Thomas Hellström [this message]
2021-05-21 15:32   ` [Intel-gfx] [PATCH v3 07/12] drm, drm/i915: Move the memcpy_from_wc functionality to core drm Thomas Hellström
2021-05-24 16:45   ` Matthew Auld
2021-05-24 16:45     ` Matthew Auld
2021-05-24 18:12     ` Thomas Hellström
2021-05-24 18:12       ` Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 08/12] drm/ttm: Use drm_memcpy_from_wc_dbm for TTM bo moves Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-24 18:16   ` Matthew Auld
2021-05-24 18:16     ` [Intel-gfx] " Matthew Auld
2021-05-24 18:47     ` Thomas Hellström
2021-05-24 18:47       ` [Intel-gfx] " Thomas Hellström
2021-05-26 12:48   ` Christian König
2021-05-26 12:48     ` [Intel-gfx] " Christian König
2021-05-21 15:32 ` [PATCH v3 09/12] drm/ttm: Document and optimize ttm_bo_pipeline_gutting() Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-25 11:00   ` Matthew Auld
2021-05-25 11:00     ` Matthew Auld
2021-05-25 13:37     ` Thomas Hellström
2021-05-25 13:37       ` Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 10/12] drm/ttm, drm/amdgpu: Allow the driver some control over swapping Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 11/12] drm/i915/ttm: Introduce a TTM i915 gem object backend Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-21 15:32 ` [PATCH v3 12/12] drm/i915/lmem: Verify checks for lmem residency Thomas Hellström
2021-05-21 15:32   ` [Intel-gfx] " Thomas Hellström
2021-05-21 16:06 ` [Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Move LMEM (VRAM) management over to TTM (rev3) Patchwork
2021-05-21 16:09 ` [Intel-gfx] ✗ Fi.CI.SPARSE: " Patchwork
2021-05-21 16:36 ` [Intel-gfx] ✓ Fi.CI.BAT: success " Patchwork
2021-05-24  0:10 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210521153253.518037-8-thomas.hellstrom@linux.intel.com \
    --to=thomas.hellstrom@linux.intel.com \
    --cc=christian.koenig@amd.com \
    --cc=daniel.vetter@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.