From: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
To: linux-bluetooth@vger.kernel.org
Subject: [PATCH v2 3/3] sbc/sbc_primitives_sse: Optimize sbc_analyze_8s
Date: Tue, 18 Aug 2020 17:02:12 -0700 [thread overview]
Message-ID: <20200819000212.211485-3-luiz.dentz@gmail.com> (raw)
In-Reply-To: <20200819000212.211485-1-luiz.dentz@gmail.com>
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
This makes use 128 bit XMM registers whenever possible.
$ time src/sbcenc_mmx -s 8 sin_64m.au > /dev/null
real 0m1.064s
user 0m1.012s
sys 0m0.049s
=== After ====
$ time src/sbcenc -s 8 sin_64m.au > /dev/null
real 0m1.032s
user 0m0.996s
sys 0m0.033s
---
sbc/sbc_primitives_sse.c | 109 ++++++++++++++++-----------------------
1 file changed, 44 insertions(+), 65 deletions(-)
diff --git a/sbc/sbc_primitives_sse.c b/sbc/sbc_primitives_sse.c
index 2a903e1..9bff6cf 100644
--- a/sbc/sbc_primitives_sse.c
+++ b/sbc/sbc_primitives_sse.c
@@ -96,80 +96,59 @@ static inline void sbc_analyze_four_sse(const int16_t *in, int32_t *out,
static inline void sbc_analyze_eight_sse(const int16_t *in, int32_t *out,
const FIXED_T *consts)
{
- static const SBC_ALIGNED int32_t round_c[2] = {
+ static const SBC_ALIGNED int32_t round_c[4] = {
+ 1 << (SBC_PROTO_FIXED8_SCALE - 1),
+ 1 << (SBC_PROTO_FIXED8_SCALE - 1),
1 << (SBC_PROTO_FIXED8_SCALE - 1),
1 << (SBC_PROTO_FIXED8_SCALE - 1),
};
__asm__ volatile (
- "movq (%0), %%mm0\n"
- "movq 8(%0), %%mm1\n"
- "movq 16(%0), %%mm2\n"
- "movq 24(%0), %%mm3\n"
- "pmaddwd (%1), %%mm0\n"
- "pmaddwd 8(%1), %%mm1\n"
- "pmaddwd 16(%1), %%mm2\n"
- "pmaddwd 24(%1), %%mm3\n"
- "paddd (%2), %%mm0\n"
- "paddd (%2), %%mm1\n"
- "paddd (%2), %%mm2\n"
- "paddd (%2), %%mm3\n"
+ "movdqu (%0), %%xmm0\n"
+ "movdqu 16(%0), %%xmm1\n"
+ "pmaddwd (%1), %%xmm0\n"
+ "pmaddwd 16(%1), %%xmm1\n"
+ "paddd (%2), %%xmm0\n"
+ "paddd (%2), %%xmm1\n"
"\n"
- "movq 32(%0), %%mm4\n"
- "movq 40(%0), %%mm5\n"
- "movq 48(%0), %%mm6\n"
- "movq 56(%0), %%mm7\n"
- "pmaddwd 32(%1), %%mm4\n"
- "pmaddwd 40(%1), %%mm5\n"
- "pmaddwd 48(%1), %%mm6\n"
- "pmaddwd 56(%1), %%mm7\n"
- "paddd %%mm4, %%mm0\n"
- "paddd %%mm5, %%mm1\n"
- "paddd %%mm6, %%mm2\n"
- "paddd %%mm7, %%mm3\n"
+ "movdqu 32(%0), %%xmm2\n"
+ "movdqu 48(%0), %%xmm3\n"
+ "pmaddwd 32(%1), %%xmm2\n"
+ "pmaddwd 48(%1), %%xmm3\n"
+ "paddd %%xmm2, %%xmm0\n"
+ "paddd %%xmm3, %%xmm1\n"
"\n"
- "movq 64(%0), %%mm4\n"
- "movq 72(%0), %%mm5\n"
- "movq 80(%0), %%mm6\n"
- "movq 88(%0), %%mm7\n"
- "pmaddwd 64(%1), %%mm4\n"
- "pmaddwd 72(%1), %%mm5\n"
- "pmaddwd 80(%1), %%mm6\n"
- "pmaddwd 88(%1), %%mm7\n"
- "paddd %%mm4, %%mm0\n"
- "paddd %%mm5, %%mm1\n"
- "paddd %%mm6, %%mm2\n"
- "paddd %%mm7, %%mm3\n"
+ "movdqu 64(%0), %%xmm2\n"
+ "movdqu 80(%0), %%xmm3\n"
+ "pmaddwd 64(%1), %%xmm2\n"
+ "pmaddwd 80(%1), %%xmm3\n"
+ "paddd %%xmm2, %%xmm0\n"
+ "paddd %%xmm3, %%xmm1\n"
"\n"
- "movq 96(%0), %%mm4\n"
- "movq 104(%0), %%mm5\n"
- "movq 112(%0), %%mm6\n"
- "movq 120(%0), %%mm7\n"
- "pmaddwd 96(%1), %%mm4\n"
- "pmaddwd 104(%1), %%mm5\n"
- "pmaddwd 112(%1), %%mm6\n"
- "pmaddwd 120(%1), %%mm7\n"
- "paddd %%mm4, %%mm0\n"
- "paddd %%mm5, %%mm1\n"
- "paddd %%mm6, %%mm2\n"
- "paddd %%mm7, %%mm3\n"
+ "movdqu 96(%0), %%xmm2\n"
+ "movdqu 112(%0), %%xmm3\n"
+ "pmaddwd 96(%1), %%xmm2\n"
+ "pmaddwd 112(%1), %%xmm3\n"
+ "paddd %%xmm2, %%xmm0\n"
+ "paddd %%xmm3, %%xmm1\n"
"\n"
- "movq 128(%0), %%mm4\n"
- "movq 136(%0), %%mm5\n"
- "movq 144(%0), %%mm6\n"
- "movq 152(%0), %%mm7\n"
- "pmaddwd 128(%1), %%mm4\n"
- "pmaddwd 136(%1), %%mm5\n"
- "pmaddwd 144(%1), %%mm6\n"
- "pmaddwd 152(%1), %%mm7\n"
- "paddd %%mm4, %%mm0\n"
- "paddd %%mm5, %%mm1\n"
- "paddd %%mm6, %%mm2\n"
- "paddd %%mm7, %%mm3\n"
+ "movdqu 128(%0), %%xmm2\n"
+ "movdqu 144(%0), %%xmm3\n"
+ "pmaddwd 128(%1), %%xmm2\n"
+ "pmaddwd 144(%1), %%xmm3\n"
+ "paddd %%xmm2, %%xmm0\n"
+ "paddd %%xmm3, %%xmm1\n"
+ "\n"
+ "psrad %4, %%xmm0\n"
+ "psrad %4, %%xmm1\n"
"\n"
- "psrad %4, %%mm0\n"
- "psrad %4, %%mm1\n"
- "psrad %4, %%mm2\n"
- "psrad %4, %%mm3\n"
+ "movdqa %%xmm0, %%xmm2\n"
+ "movdqa %%xmm1, %%xmm3\n"
+ "punpckhqdq %%xmm2, %%xmm2\n"
+ "punpckhqdq %%xmm3, %%xmm3\n"
+ "movdq2q %%xmm0, %%mm0\n"
+ "movdq2q %%xmm2, %%mm1\n"
+ "movdq2q %%xmm1, %%mm2\n"
+ "movdq2q %%xmm3, %%mm3\n"
"\n"
"packssdw %%mm0, %%mm0\n"
"packssdw %%mm1, %%mm1\n"
--
2.26.2
next prev parent reply other threads:[~2020-08-19 0:02 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-19 0:02 [PATCH v2 1/3] sbc: Add initial code for SSE primitives Luiz Augusto von Dentz
2020-08-19 0:02 ` [PATCH v2 2/3] sbc/sbc_primitives_sse: Optimize sbc_analyze_4s Luiz Augusto von Dentz
2020-08-19 0:02 ` Luiz Augusto von Dentz [this message]
2020-09-17 21:50 ` [PATCH v2 1/3] sbc: Add initial code for SSE primitives Luiz Augusto von Dentz
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200819000212.211485-3-luiz.dentz@gmail.com \
--to=luiz.dentz@gmail.com \
--cc=linux-bluetooth@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.