All of lore.kernel.org
 help / color / mirror / Atom feed
From: Chris Wilson <chris@chris-wilson.co.uk>
To: igt-dev@lists.freedesktop.org
Subject: [igt-dev] [PATCH igt v3] lib: Provide an accelerated routine for readback from WC
Date: Tue, 27 Feb 2018 22:20:57 +0000	[thread overview]
Message-ID: <20180227222057.11617-1-chris@chris-wilson.co.uk> (raw)
In-Reply-To: <20180227215040.11297-1-chris@chris-wilson.co.uk>

Reading from WC is awfully slow as each access is uncached and so
performed synchronously, stalling for the memory load. x86 did introduce
some new instructions in SSE 4.1 to provide a small internal buffer to
accelerate reading back a cacheline at a time from uncached memory, for
this purpose.

v2: Don't be lazy and handle misalignment.
v3: Switch out of sse41 before emitting the generic memcpy routine

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 lib/igt_fb.c  |  3 +-
 lib/igt_x86.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_x86.h |  2 ++
 3 files changed, 103 insertions(+), 1 deletion(-)

diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index ecd73053..7404ba7c 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -32,6 +32,7 @@
 #include "drmtest.h"
 #include "igt_fb.h"
 #include "igt_kms.h"
+#include "igt_x86.h"
 #include "ioctl_wrappers.h"
 #include "intel_chipset.h"
 
@@ -1340,7 +1341,7 @@ static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_uplo
 	 * it's faster to copy the whole BO to a temporary buffer and convert
 	 * from there.
 	 */
-	memcpy(buf, blit->linear.map, blit->linear.size);
+	igt_memcpy_from_wc(buf, blit->linear.map, blit->linear.size);
 	y = &buf[blit->linear.offsets[0]];
 	uv = &buf[blit->linear.offsets[1]];
 
diff --git a/lib/igt_x86.c b/lib/igt_x86.c
index 0ed3c6f1..eba4c898 100644
--- a/lib/igt_x86.c
+++ b/lib/igt_x86.c
@@ -36,7 +36,10 @@
 #endif
 
 #include "igt_x86.h"
+
+#include <stdint.h>
 #include <stdio.h>
+#include <string.h>
 
 /**
  * SECTION:igt_x86
@@ -174,3 +177,99 @@ char *igt_x86_features_to_string(unsigned features, char *line)
 	return ret;
 }
 #endif
+
+#if defined(__x86_64__) && !defined(__clang__)
+#define MOVNT 512
+
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#pragma GCC diagnostic ignored "-Wpointer-arith"
+
+#define min(x, y) ({                            \
+	typeof(x) _min1 = (x);                  \
+	typeof(y) _min2 = (y);                  \
+	(void) (&_min1 == &_min2);              \
+	_min1 < _min2 ? _min1 : _min2;		\
+})
+
+#include <smmintrin.h>
+static void memcpy_from_wc_sse41(void *dst, const void *src, unsigned long len)
+{
+	if ((uintptr_t)src & 15) {
+		char buf[16];
+		__m128i *S = (__m128i *)((uintptr_t)src & ~15);
+		unsigned long misalign = (uintptr_t)src & 15;
+		unsigned long copy = min(len, 16 - misalign);
+
+		_mm_storeu_si128((__m128i *)buf,
+				 _mm_stream_load_si128(S));
+
+		memcpy(dst, buf + misalign, copy);
+
+		dst += copy;
+		src += copy;
+		len -= copy;
+	}
+
+	while (len >= 64) {
+		__m128i *S = (__m128i *)src;
+		__m128i *D = (__m128i *)dst;
+		__m128i tmp[4];
+
+		tmp[0] = _mm_stream_load_si128(S + 0);
+		tmp[1] = _mm_stream_load_si128(S + 1);
+		tmp[2] = _mm_stream_load_si128(S + 2);
+		tmp[3] = _mm_stream_load_si128(S + 3);
+
+		_mm_storeu_si128(D + 0, tmp[0]);
+		_mm_storeu_si128(D + 1, tmp[1]);
+		_mm_storeu_si128(D + 2, tmp[2]);
+		_mm_storeu_si128(D + 3, tmp[3]);
+
+		src += 64;
+		dst += 64;
+		len -= 64;
+	}
+
+	while (len >= 16) {
+		_mm_storeu_si128((__m128i *)dst,
+				 _mm_stream_load_si128((__m128i *)src));
+
+		src += 16;
+		dst += 16;
+		len -= 16;
+	}
+
+	if (len) {
+		char buf[16];
+
+		_mm_storeu_si128((__m128i *)buf,
+				 _mm_stream_load_si128((__m128i *)src));
+		memcpy(dst, buf, len);
+	}
+}
+
+#pragma GCC pop_options
+
+static void memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+	memcpy(dst, src, len);
+}
+
+static void (*resolve_memcpy_from_wc(void))(void *, const void *, unsigned long)
+{
+	if (igt_x86_features() & SSE4_1)
+		return memcpy_from_wc_sse41;
+
+	return memcpy_from_wc;
+}
+
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+	__attribute__((ifunc("resolve_memcpy_from_wc")));
+
+#else
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+	memcpy(dst, src, len);
+}
+#endif
diff --git a/lib/igt_x86.h b/lib/igt_x86.h
index 27b7f0fd..d4f8c343 100644
--- a/lib/igt_x86.h
+++ b/lib/igt_x86.h
@@ -55,4 +55,6 @@ static inline char *igt_x86_features_to_string(unsigned features, char *line)
 }
 #endif
 
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+
 #endif /* IGT_X86_H */
-- 
2.16.2

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

  parent reply	other threads:[~2018-02-27 22:21 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-27 21:50 [igt-dev] [PATCH igt] lib: Provide an accelerated routine for readback from WC Chris Wilson
2018-02-27 21:53 ` Chris Wilson
2018-02-27 22:17 ` [igt-dev] [PATCH igt v2] " Chris Wilson
2018-02-27 22:20 ` Chris Wilson [this message]
2018-02-27 22:42 ` [igt-dev] [PATCH igt v4] " Chris Wilson
2018-02-27 23:29 ` [igt-dev] [PATCH igt] " Eric Anholt
2018-02-27 23:44 ` [igt-dev] ✓ Fi.CI.BAT: success for lib: Provide an accelerated routine for readback from WC (rev4) Patchwork
2018-02-28  1:04 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2018-02-28  9:00 ` [igt-dev] [PATCH igt v6] lib: Provide an accelerated routine for readback from WC Chris Wilson
2018-02-28 17:12   ` Ville Syrjälä
2018-03-01  8:43     ` Chris Wilson
2018-02-28  9:31 ` [igt-dev] ✓ Fi.CI.BAT: success for lib: Provide an accelerated routine for readback from WC (rev5) Patchwork
2018-02-28 10:16 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180227222057.11617-1-chris@chris-wilson.co.uk \
    --to=chris@chris-wilson.co.uk \
    --cc=igt-dev@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.