All of lore.kernel.org
 help / color / mirror / Atom feed
* [igt-dev] [PATCH igt] lib: Provide an accelerated routine for readback from WC
@ 2018-02-27 21:50 Chris Wilson
  2018-02-27 21:53 ` Chris Wilson
                   ` (9 more replies)
  0 siblings, 10 replies; 13+ messages in thread
From: Chris Wilson @ 2018-02-27 21:50 UTC (permalink / raw)
  To: igt-dev

Reading from WC is awfully slow as each access is uncached and so
performed synchronously, stalling for the memory load. x86 did introduce
some new instructions in SSE 4.1 to provide a small internal buffer to
accelerate reading back a cacheline at a time from uncached memory, for
this purpose.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 lib/igt_fb.c  |  3 ++-
 lib/igt_x86.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_x86.h |  2 ++
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/lib/igt_fb.c b/lib/igt_fb.c
index ecd73053..7404ba7c 100644
--- a/lib/igt_fb.c
+++ b/lib/igt_fb.c
@@ -32,6 +32,7 @@
 #include "drmtest.h"
 #include "igt_fb.h"
 #include "igt_kms.h"
+#include "igt_x86.h"
 #include "ioctl_wrappers.h"
 #include "intel_chipset.h"
 
@@ -1340,7 +1341,7 @@ static void convert_nv12_to_rgb24(struct igt_fb *fb, struct fb_convert_blit_uplo
 	 * it's faster to copy the whole BO to a temporary buffer and convert
 	 * from there.
 	 */
-	memcpy(buf, blit->linear.map, blit->linear.size);
+	igt_memcpy_from_wc(buf, blit->linear.map, blit->linear.size);
 	y = &buf[blit->linear.offsets[0]];
 	uv = &buf[blit->linear.offsets[1]];
 
diff --git a/lib/igt_x86.c b/lib/igt_x86.c
index 0ed3c6f1..b7b57284 100644
--- a/lib/igt_x86.c
+++ b/lib/igt_x86.c
@@ -36,7 +36,10 @@
 #endif
 
 #include "igt_x86.h"
+
+#include <stdint.h>
 #include <stdio.h>
+#include <string.h>
 
 /**
  * SECTION:igt_x86
@@ -174,3 +177,46 @@ char *igt_x86_features_to_string(unsigned features, char *line)
 	return ret;
 }
 #endif
+
+#if defined(__x86_64__) && !defined(__clang__)
+#define MOVNT 512
+
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+
+#include <smmintrin.h>
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+	if (igt_x86_features() & SSE4_1 && ((uintptr_t)src & 15) == 0) {
+		while (len >= 64) {
+			__m128i *S = (__m128i *)src;
+			__m128i *D = (__m128i *)dst;
+			__m128i tmp[4];
+
+			tmp[0] = _mm_stream_load_si128(S + 0);
+			tmp[1] = _mm_stream_load_si128(S + 1);
+			tmp[2] = _mm_stream_load_si128(S + 2);
+			tmp[3] = _mm_stream_load_si128(S + 3);
+
+			_mm_storeu_si128(D + 0, tmp[0]);
+			_mm_storeu_si128(D + 1, tmp[1]);
+			_mm_storeu_si128(D + 2, tmp[2]);
+			_mm_storeu_si128(D + 3, tmp[3]);
+
+			src += 64;
+			dst += 64;
+			len -= 64;
+		}
+	}
+
+	memcpy(dst, src, len);
+}
+
+#pragma GCC pop_options
+
+#else
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len)
+{
+	memcpy(dst, src, len);
+}
+#endif
diff --git a/lib/igt_x86.h b/lib/igt_x86.h
index 27b7f0fd..d4f8c343 100644
--- a/lib/igt_x86.h
+++ b/lib/igt_x86.h
@@ -55,4 +55,6 @@ static inline char *igt_x86_features_to_string(unsigned features, char *line)
 }
 #endif
 
+void igt_memcpy_from_wc(void *dst, const void *src, unsigned long len);
+
 #endif /* IGT_X86_H */
-- 
2.16.2

_______________________________________________
igt-dev mailing list
igt-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/igt-dev

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2018-03-01  8:44 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-27 21:50 [igt-dev] [PATCH igt] lib: Provide an accelerated routine for readback from WC Chris Wilson
2018-02-27 21:53 ` Chris Wilson
2018-02-27 22:17 ` [igt-dev] [PATCH igt v2] " Chris Wilson
2018-02-27 22:20 ` [igt-dev] [PATCH igt v3] " Chris Wilson
2018-02-27 22:42 ` [igt-dev] [PATCH igt v4] " Chris Wilson
2018-02-27 23:29 ` [igt-dev] [PATCH igt] " Eric Anholt
2018-02-27 23:44 ` [igt-dev] ✓ Fi.CI.BAT: success for lib: Provide an accelerated routine for readback from WC (rev4) Patchwork
2018-02-28  1:04 ` [igt-dev] ✗ Fi.CI.IGT: failure " Patchwork
2018-02-28  9:00 ` [igt-dev] [PATCH igt v6] lib: Provide an accelerated routine for readback from WC Chris Wilson
2018-02-28 17:12   ` Ville Syrjälä
2018-03-01  8:43     ` Chris Wilson
2018-02-28  9:31 ` [igt-dev] ✓ Fi.CI.BAT: success for lib: Provide an accelerated routine for readback from WC (rev5) Patchwork
2018-02-28 10:16 ` [igt-dev] ✓ Fi.CI.IGT: " Patchwork

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.