All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
To: linux-bluetooth@vger.kernel.org
Subject: [PATCH v2 3/3] sbc/sbc_primitives_sse: Optimize sbc_analyze_8s
Date: Tue, 18 Aug 2020 17:02:12 -0700	[thread overview]
Message-ID: <20200819000212.211485-3-luiz.dentz@gmail.com> (raw)
In-Reply-To: <20200819000212.211485-1-luiz.dentz@gmail.com>

From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>

This makes use 128 bit XMM registers whenever possible.

$ time src/sbcenc_mmx -s 8 sin_64m.au > /dev/null
real    0m1.064s
user    0m1.012s
sys     0m0.049s

===  After ====

$ time src/sbcenc -s 8 sin_64m.au > /dev/null
real    0m1.032s
user    0m0.996s
sys     0m0.033s
---
 sbc/sbc_primitives_sse.c | 109 ++++++++++++++++-----------------------
 1 file changed, 44 insertions(+), 65 deletions(-)

diff --git a/sbc/sbc_primitives_sse.c b/sbc/sbc_primitives_sse.c
index 2a903e1..9bff6cf 100644
--- a/sbc/sbc_primitives_sse.c
+++ b/sbc/sbc_primitives_sse.c
@@ -96,80 +96,59 @@ static inline void sbc_analyze_four_sse(const int16_t *in, int32_t *out,
 static inline void sbc_analyze_eight_sse(const int16_t *in, int32_t *out,
 							const FIXED_T *consts)
 {
-	static const SBC_ALIGNED int32_t round_c[2] = {
+	static const SBC_ALIGNED int32_t round_c[4] = {
+		1 << (SBC_PROTO_FIXED8_SCALE - 1),
+		1 << (SBC_PROTO_FIXED8_SCALE - 1),
 		1 << (SBC_PROTO_FIXED8_SCALE - 1),
 		1 << (SBC_PROTO_FIXED8_SCALE - 1),
 	};
 	__asm__ volatile (
-		"movq        (%0), %%mm0\n"
-		"movq       8(%0), %%mm1\n"
-		"movq      16(%0), %%mm2\n"
-		"movq      24(%0), %%mm3\n"
-		"pmaddwd     (%1), %%mm0\n"
-		"pmaddwd    8(%1), %%mm1\n"
-		"pmaddwd   16(%1), %%mm2\n"
-		"pmaddwd   24(%1), %%mm3\n"
-		"paddd       (%2), %%mm0\n"
-		"paddd       (%2), %%mm1\n"
-		"paddd       (%2), %%mm2\n"
-		"paddd       (%2), %%mm3\n"
+		"movdqu      (%0), %%xmm0\n"
+		"movdqu    16(%0), %%xmm1\n"
+		"pmaddwd     (%1), %%xmm0\n"
+		"pmaddwd   16(%1), %%xmm1\n"
+		"paddd       (%2), %%xmm0\n"
+		"paddd       (%2), %%xmm1\n"
 		"\n"
-		"movq      32(%0), %%mm4\n"
-		"movq      40(%0), %%mm5\n"
-		"movq      48(%0), %%mm6\n"
-		"movq      56(%0), %%mm7\n"
-		"pmaddwd   32(%1), %%mm4\n"
-		"pmaddwd   40(%1), %%mm5\n"
-		"pmaddwd   48(%1), %%mm6\n"
-		"pmaddwd   56(%1), %%mm7\n"
-		"paddd      %%mm4, %%mm0\n"
-		"paddd      %%mm5, %%mm1\n"
-		"paddd      %%mm6, %%mm2\n"
-		"paddd      %%mm7, %%mm3\n"
+		"movdqu    32(%0), %%xmm2\n"
+		"movdqu    48(%0), %%xmm3\n"
+		"pmaddwd   32(%1), %%xmm2\n"
+		"pmaddwd   48(%1), %%xmm3\n"
+		"paddd     %%xmm2, %%xmm0\n"
+		"paddd     %%xmm3, %%xmm1\n"
 		"\n"
-		"movq      64(%0), %%mm4\n"
-		"movq      72(%0), %%mm5\n"
-		"movq      80(%0), %%mm6\n"
-		"movq      88(%0), %%mm7\n"
-		"pmaddwd   64(%1), %%mm4\n"
-		"pmaddwd   72(%1), %%mm5\n"
-		"pmaddwd   80(%1), %%mm6\n"
-		"pmaddwd   88(%1), %%mm7\n"
-		"paddd      %%mm4, %%mm0\n"
-		"paddd      %%mm5, %%mm1\n"
-		"paddd      %%mm6, %%mm2\n"
-		"paddd      %%mm7, %%mm3\n"
+		"movdqu    64(%0), %%xmm2\n"
+		"movdqu    80(%0), %%xmm3\n"
+		"pmaddwd   64(%1), %%xmm2\n"
+		"pmaddwd   80(%1), %%xmm3\n"
+		"paddd     %%xmm2, %%xmm0\n"
+		"paddd     %%xmm3, %%xmm1\n"
 		"\n"
-		"movq      96(%0), %%mm4\n"
-		"movq     104(%0), %%mm5\n"
-		"movq     112(%0), %%mm6\n"
-		"movq     120(%0), %%mm7\n"
-		"pmaddwd   96(%1), %%mm4\n"
-		"pmaddwd  104(%1), %%mm5\n"
-		"pmaddwd  112(%1), %%mm6\n"
-		"pmaddwd  120(%1), %%mm7\n"
-		"paddd      %%mm4, %%mm0\n"
-		"paddd      %%mm5, %%mm1\n"
-		"paddd      %%mm6, %%mm2\n"
-		"paddd      %%mm7, %%mm3\n"
+		"movdqu    96(%0), %%xmm2\n"
+		"movdqu   112(%0), %%xmm3\n"
+		"pmaddwd   96(%1), %%xmm2\n"
+		"pmaddwd  112(%1), %%xmm3\n"
+		"paddd     %%xmm2, %%xmm0\n"
+		"paddd     %%xmm3, %%xmm1\n"
 		"\n"
-		"movq     128(%0), %%mm4\n"
-		"movq     136(%0), %%mm5\n"
-		"movq     144(%0), %%mm6\n"
-		"movq     152(%0), %%mm7\n"
-		"pmaddwd  128(%1), %%mm4\n"
-		"pmaddwd  136(%1), %%mm5\n"
-		"pmaddwd  144(%1), %%mm6\n"
-		"pmaddwd  152(%1), %%mm7\n"
-		"paddd      %%mm4, %%mm0\n"
-		"paddd      %%mm5, %%mm1\n"
-		"paddd      %%mm6, %%mm2\n"
-		"paddd      %%mm7, %%mm3\n"
+		"movdqu    128(%0), %%xmm2\n"
+		"movdqu    144(%0), %%xmm3\n"
+		"pmaddwd   128(%1), %%xmm2\n"
+		"pmaddwd   144(%1), %%xmm3\n"
+		"paddd      %%xmm2, %%xmm0\n"
+		"paddd      %%xmm3, %%xmm1\n"
+		"\n"
+		"psrad         %4, %%xmm0\n"
+		"psrad         %4, %%xmm1\n"
 		"\n"
-		"psrad         %4, %%mm0\n"
-		"psrad         %4, %%mm1\n"
-		"psrad         %4, %%mm2\n"
-		"psrad         %4, %%mm3\n"
+		"movdqa     %%xmm0, %%xmm2\n"
+		"movdqa     %%xmm1, %%xmm3\n"
+		"punpckhqdq %%xmm2, %%xmm2\n"
+		"punpckhqdq %%xmm3, %%xmm3\n"
+		"movdq2q    %%xmm0, %%mm0\n"
+		"movdq2q    %%xmm2, %%mm1\n"
+		"movdq2q    %%xmm1, %%mm2\n"
+		"movdq2q    %%xmm3, %%mm3\n"
 		"\n"
 		"packssdw   %%mm0, %%mm0\n"
 		"packssdw   %%mm1, %%mm1\n"
-- 
2.26.2


  parent reply	other threads:[~2020-08-19  0:02 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-19  0:02 [PATCH v2 1/3] sbc: Add initial code for SSE primitives Luiz Augusto von Dentz
2020-08-19  0:02 ` [PATCH v2 2/3] sbc/sbc_primitives_sse: Optimize sbc_analyze_4s Luiz Augusto von Dentz
2020-08-19  0:02 ` Luiz Augusto von Dentz [this message]
2020-09-17 21:50 ` [PATCH v2 1/3] sbc: Add initial code for SSE primitives Luiz Augusto von Dentz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200819000212.211485-3-luiz.dentz@gmail.com \
    --to=luiz.dentz@gmail.com \
    --cc=linux-bluetooth@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.