All of lore.kernel.org
 help / color / mirror / Atom feed
From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
To: "ext Brad Midgley" <bmidgley@gmail.com>
Cc: "Jaska Uimonen" <jaska.uimonen@nokia.com>,
	linux-bluetooth@vger.kernel.org
Subject: Re: [RFC/PATCH] sbc: new filtering function for 8 band fixed point encoding
Date: Tue, 23 Dec 2008 01:30:43 +0200	[thread overview]
Message-ID: <200812230130.43968.siarhei.siamashka@nokia.com> (raw)
In-Reply-To: <200812200012.08430.siarhei.siamashka@nokia.com>

[-- Attachment #1: Type: text/plain, Size: 1086 bytes --]

On Saturday 20 December 2008 00:12:08 ext Siarhei Siamashka wrote:
[...]
> We had a talk with Jaska Uimonen here, and now I'm kind of delegated to
> finish the work on this filtering function for SBC encoder (including the
> final addition of ARM assembly optimizations).  He provided me with his
> last variant of code, which contains some more optimizations to reduce the
> number of operations and also loops unrolling. I will add his changes to
> the patch on next iteration.
>
> Now the question is how to best integrate a fixed filtering function to git
> repository? If I just continue adding changes to the patch in order to make
> it a faster, it will be also not so obvious to see how we got to these code
> transformations just from the commit log.

Next iteration done.  Added support for 4 subbands, number of arithmetic
operations reduced (but without loop unrolling for better code readability),
precision improved for both 16-bit and 32-bit fixed point, 'neginv' macro is
now more portable and faster. The rest is in the code comments.


Best regards,
Siarhei Siamashka

[-- Attachment #2: sbc_analyze_modified.diff --]
[-- Type: text/x-diff, Size: 26435 bytes --]

diff --git a/sbc/sbc.c b/sbc/sbc.c
index 5411893..3d6a412 100644
--- a/sbc/sbc.c
+++ b/sbc/sbc.c
@@ -40,6 +40,7 @@
 #include <string.h>
 #include <stdlib.h>
 #include <sys/types.h>
+#include <limits.h>
 
 #include "sbc_math.h"
 #include "sbc_tables.h"
@@ -93,7 +94,7 @@ struct sbc_decoder_state {
 struct sbc_encoder_state {
 	int subbands;
 	int position[2];
-	int32_t X[2][160];
+	int16_t X[2][160];
 };
 
 /*
@@ -656,75 +657,56 @@ static void sbc_encoder_init(struct sbc_encoder_state *state,
 	state->position[0] = state->position[1] = 9 * frame->subbands;
 }
 
-static inline void _sbc_analyze_four(const int32_t *in, int32_t *out)
+static inline void _sbc_analyze_four(const int16_t *in, int32_t *out)
 {
-	sbc_fixed_t t[8], s[5];
-
-	t[0] = SCALE4_STAGE1( /* Q8 */
-		MULA(_sbc_proto_4[0], in[8] - in[32], /* Q18 */
-		MUL( _sbc_proto_4[1], in[16] - in[24])));
-
-	t[1] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[2], in[1],
-		MULA(_sbc_proto_4[3], in[9],
-		MULA(_sbc_proto_4[4], in[17],
-		MULA(_sbc_proto_4[5], in[25],
-		MUL( _sbc_proto_4[6], in[33]))))));
-
-	t[2] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[7], in[2],
-		MULA(_sbc_proto_4[8], in[10],
-		MULA(_sbc_proto_4[9], in[18],
-		MULA(_sbc_proto_4[10], in[26],
-		MUL( _sbc_proto_4[11], in[34]))))));
-
-	t[3] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[12], in[3],
-		MULA(_sbc_proto_4[13], in[11],
-		MULA(_sbc_proto_4[14], in[19],
-		MULA(_sbc_proto_4[15], in[27],
-		MUL( _sbc_proto_4[16], in[35]))))));
-
-	t[4] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[17], in[4] + in[36],
-		MULA(_sbc_proto_4[18], in[12] + in[28],
-		MUL( _sbc_proto_4[19], in[20]))));
-
-	t[5] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[16], in[5],
-		MULA(_sbc_proto_4[15], in[13],
-		MULA(_sbc_proto_4[14], in[21],
-		MULA(_sbc_proto_4[13], in[29],
-		MUL( _sbc_proto_4[12], in[37]))))));
-
-	/* don't compute t[6]... this term always multiplies
-	 * with cos(pi/2) = 0 */
-
-	t[7] = SCALE4_STAGE1(
-		MULA(_sbc_proto_4[6], in[7],
-		MULA(_sbc_proto_4[5], in[15],
-		MULA(_sbc_proto_4[4], in[23],
-		MULA(_sbc_proto_4[3], in[31],
-		MUL( _sbc_proto_4[2], in[39]))))));
-
-	s[0] = MUL( _anamatrix4[0], t[0] + t[4]);
-	s[1] = MUL( _anamatrix4[2], t[2]);
-	s[2] = MULA(_anamatrix4[1], t[1] + t[3],
-		MUL(_anamatrix4[3], t[5]));
-	s[3] = MULA(_anamatrix4[3], t[1] + t[3],
-		MUL(_anamatrix4[1], -t[5] + t[7]));
-	s[4] = MUL( _anamatrix4[3], t[7]);
-
-	out[0] = SCALE4_STAGE2( s[0] + s[1] + s[2] + s[4]); /* Q0 */
-	out[1] = SCALE4_STAGE2(-s[0] + s[1] + s[3]);
-	out[2] = SCALE4_STAGE2(-s[0] + s[1] - s[3]);
-	out[3] = SCALE4_STAGE2( s[0] + s[1] - s[2] - s[4]);
+	FIXED_A t1[4];
+	FIXED_T t2[4];
+	int i = 0, hop = 0;
+
+	/* rounding coefficient */
+
+	t1[0] = t1[1] = t1[2] = t1[3] =
+		(FIXED_A)1 << (SBC_PROTO_FIXED4_SCALE-1);
+
+	/* low pass polyphase filter */
+	for (hop = 0; hop < 40; hop += 8) {
+		t1[0] +=  (FIXED_A)in[hop] * _sbc_proto_fixed4[hop];
+		t1[1] +=  (FIXED_A)in[hop + 1] * _sbc_proto_fixed4[hop + 1];
+		t1[2] +=  (FIXED_A)in[hop + 2] * _sbc_proto_fixed4[hop + 2];
+		t1[1] +=  (FIXED_A)in[hop + 3] * _sbc_proto_fixed4[hop + 3];
+		t1[0] +=  (FIXED_A)in[hop + 4] * _sbc_proto_fixed4[hop + 4];
+		t1[3] +=  (FIXED_A)in[hop + 5] * _sbc_proto_fixed4[hop + 5];
+		t1[3] +=  (FIXED_A)in[hop + 7] * _sbc_proto_fixed4[hop + 7];
+	}
+
+	/* scaling */
+	t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE;
+	t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE;
+	t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE;
+	t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE;
+
+	/* do the cos transform */
+	for (i = 0, hop = 0; i < 4; hop += 8, i++) {
+		/* rounding coefficient */
+		t1[i]  = (FIXED_A)1 << (SBC_COS_TABLE_FIXED4_SCALE-1-SCALE_OUT_BITS);
+		
+		t1[i] += (FIXED_A)t2[0] * cos_table_fixed_4[0 + hop];
+		t1[i] += (FIXED_A)t2[1] * cos_table_fixed_4[1 + hop];
+		t1[i] += (FIXED_A)t2[2] * cos_table_fixed_4[2 + hop];
+		t1[i] += (FIXED_A)t2[3] * cos_table_fixed_4[5 + hop];
+	}
+
+	/* scaling */
+	out[0] = t1[0] >> (SBC_COS_TABLE_FIXED4_SCALE-SCALE_OUT_BITS);
+	out[1] = t1[1] >> (SBC_COS_TABLE_FIXED4_SCALE-SCALE_OUT_BITS);
+	out[2] = t1[2] >> (SBC_COS_TABLE_FIXED4_SCALE-SCALE_OUT_BITS);
+	out[3] = t1[3] >> (SBC_COS_TABLE_FIXED4_SCALE-SCALE_OUT_BITS);
 }
 
 static inline void sbc_analyze_four(struct sbc_encoder_state *state,
 				struct sbc_frame *frame, int ch, int blk)
 {
-	int32_t *x = &state->X[ch][state->position[ch]];
+	int16_t *x = &state->X[ch][state->position[ch]];
 	int16_t *pcm = &frame->pcm_sample[ch][blk * 4];
 
 	/* Input 4 Audio Samples */
@@ -740,133 +722,80 @@ static inline void sbc_analyze_four(struct sbc_encoder_state *state,
 		state->position[ch] = 36;
 }
 
-static inline void _sbc_analyze_eight(const int32_t *in, int32_t *out)
+static inline void _sbc_analyze_eight(const int16_t *in, int32_t *out)
 {
-	sbc_fixed_t t[8], s[8];
-
-	t[0] = SCALE8_STAGE1( /* Q10 */
-		MULA(_sbc_proto_8[0], (in[16] - in[64]), /* Q18 = Q18 * Q0 */
-		MULA(_sbc_proto_8[1], (in[32] - in[48]),
-		MULA(_sbc_proto_8[2], in[4],
-		MULA(_sbc_proto_8[3], in[20],
-		MULA(_sbc_proto_8[4], in[36],
-		MUL( _sbc_proto_8[5], in[52])))))));
-
-	t[1] = SCALE8_STAGE1(
-		MULA(_sbc_proto_8[6], in[2],
-		MULA(_sbc_proto_8[7], in[18],
-		MULA(_sbc_proto_8[8], in[34],
-		MULA(_sbc_proto_8[9], in[50],
-		MUL(_sbc_proto_8[10], in[66]))))));
-
-	t[2] = SCALE8_STAGE1(
-		MULA(_sbc_proto_8[11], in[1],
-		MULA(_sbc_proto_8[12], in[17],
-		MULA(_sbc_proto_8[13], in[33],
-		MULA(_sbc_proto_8[14], in[49],
-		MULA(_sbc_proto_8[15], in[65],
-		MULA(_sbc_proto_8[16], in[3],
-		MULA(_sbc_proto_8[17], in[19],
-		MULA(_sbc_proto_8[18], in[35],
-		MULA(_sbc_proto_8[19], in[51],
-		MUL( _sbc_proto_8[20], in[67])))))))))));
-
-	t[3] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[21], in[5],
-		MULA( _sbc_proto_8[22], in[21],
-		MULA( _sbc_proto_8[23], in[37],
-		MULA( _sbc_proto_8[24], in[53],
-		MULA( _sbc_proto_8[25], in[69],
-		MULA(-_sbc_proto_8[15], in[15],
-		MULA(-_sbc_proto_8[14], in[31],
-		MULA(-_sbc_proto_8[13], in[47],
-		MULA(-_sbc_proto_8[12], in[63],
-		MUL( -_sbc_proto_8[11], in[79])))))))))));
-
-	t[4] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[26], in[6],
-		MULA( _sbc_proto_8[27], in[22],
-		MULA( _sbc_proto_8[28], in[38],
-		MULA( _sbc_proto_8[29], in[54],
-		MULA( _sbc_proto_8[30], in[70],
-		MULA(-_sbc_proto_8[10], in[14],
-		MULA(-_sbc_proto_8[9], in[30],
-		MULA(-_sbc_proto_8[8], in[46],
-		MULA(-_sbc_proto_8[7], in[62],
-		MUL( -_sbc_proto_8[6], in[78])))))))))));
-
-	t[5] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[31], in[7],
-		MULA( _sbc_proto_8[32], in[23],
-		MULA( _sbc_proto_8[33], in[39],
-		MULA( _sbc_proto_8[34], in[55],
-		MULA( _sbc_proto_8[35], in[71],
-		MULA(-_sbc_proto_8[20], in[13],
-		MULA(-_sbc_proto_8[19], in[29],
-		MULA(-_sbc_proto_8[18], in[45],
-		MULA(-_sbc_proto_8[17], in[61],
-		MUL( -_sbc_proto_8[16], in[77])))))))))));
-
-	t[6] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[36], (in[8] + in[72]),
-		MULA( _sbc_proto_8[37], (in[24] + in[56]),
-		MULA( _sbc_proto_8[38], in[40],
-		MULA(-_sbc_proto_8[39], in[12],
-		MULA(-_sbc_proto_8[5], in[28],
-		MULA(-_sbc_proto_8[4], in[44],
-		MULA(-_sbc_proto_8[3], in[60],
-		MUL( -_sbc_proto_8[2], in[76])))))))));
-
-	t[7] = SCALE8_STAGE1(
-		MULA( _sbc_proto_8[35], in[9],
-		MULA( _sbc_proto_8[34], in[25],
-		MULA( _sbc_proto_8[33], in[41],
-		MULA( _sbc_proto_8[32], in[57],
-		MULA( _sbc_proto_8[31], in[73],
-		MULA(-_sbc_proto_8[25], in[11],
-		MULA(-_sbc_proto_8[24], in[27],
-		MULA(-_sbc_proto_8[23], in[43],
-		MULA(-_sbc_proto_8[22], in[59],
-		MUL( -_sbc_proto_8[21], in[75])))))))))));
-
-	s[0] = MULA(  _anamatrix8[0], t[0],
-		MUL(  _anamatrix8[1], t[6]));
-	s[1] = MUL(   _anamatrix8[7], t[1]);
-	s[2] = MULA(  _anamatrix8[2], t[2],
-		MULA( _anamatrix8[3], t[3],
-		MULA( _anamatrix8[4], t[5],
-		MUL(  _anamatrix8[5], t[7]))));
-	s[3] = MUL(   _anamatrix8[6], t[4]);
-	s[4] = MULA(  _anamatrix8[3], t[2],
-		MULA(-_anamatrix8[5], t[3],
-		MULA(-_anamatrix8[2], t[5],
-		MUL( -_anamatrix8[4], t[7]))));
-	s[5] = MULA(  _anamatrix8[4], t[2],
-		MULA(-_anamatrix8[2], t[3],
-		MULA( _anamatrix8[5], t[5],
-		MUL(  _anamatrix8[3], t[7]))));
-	s[6] = MULA(  _anamatrix8[1], t[0],
-		MUL( -_anamatrix8[0], t[6]));
-	s[7] = MULA(  _anamatrix8[5], t[2],
-		MULA(-_anamatrix8[4], t[3],
-		MULA( _anamatrix8[3], t[5],
-		MUL( -_anamatrix8[2], t[7]))));
-
-	out[0] = SCALE8_STAGE2( s[0] + s[1] + s[2] + s[3]);
-	out[1] = SCALE8_STAGE2( s[1] - s[3] + s[4] + s[6]);
-	out[2] = SCALE8_STAGE2( s[1] - s[3] + s[5] - s[6]);
-	out[3] = SCALE8_STAGE2(-s[0] + s[1] + s[3] + s[7]);
-	out[4] = SCALE8_STAGE2(-s[0] + s[1] + s[3] - s[7]);
-	out[5] = SCALE8_STAGE2( s[1] - s[3] - s[5] - s[6]);
-	out[6] = SCALE8_STAGE2( s[1] - s[3] - s[4] + s[6]);
-	out[7] = SCALE8_STAGE2( s[0] + s[1] - s[2] + s[3]);
+	FIXED_A t1[8];
+	FIXED_T t2[8];
+	int i, hop;
+
+	/* rounding coefficient */
+	t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] =
+		(FIXED_A)1 << (SBC_PROTO_FIXED8_SCALE-1);
+
+	/* low pass polyphase filter */
+	for (hop = 0; hop < 80; hop += 16) {
+		t1[0] += (FIXED_A)in[hop] * _sbc_proto_fixed8[hop];
+		t1[1] += (FIXED_A)in[hop + 1] * _sbc_proto_fixed8[hop + 1];
+		t1[2] += (FIXED_A)in[hop + 2] * _sbc_proto_fixed8[hop + 2];
+		t1[3] += (FIXED_A)in[hop + 3] * _sbc_proto_fixed8[hop + 3];
+
+		t1[4] += (FIXED_A)in[hop + 4] * _sbc_proto_fixed8[hop + 4];
+
+		t1[0] += (FIXED_A)in[hop + 8] * _sbc_proto_fixed8[hop + 8];
+		t1[1] += (FIXED_A)in[hop + 7] * _sbc_proto_fixed8[hop + 7];
+		t1[2] += (FIXED_A)in[hop + 6] * _sbc_proto_fixed8[hop + 6];
+		t1[3] += (FIXED_A)in[hop + 5] * _sbc_proto_fixed8[hop + 5];
+
+		t1[5] += (FIXED_A)in[hop + 9] * _sbc_proto_fixed8[hop + 9];
+		t1[6] += (FIXED_A)in[hop + 10] * _sbc_proto_fixed8[hop + 10];
+		t1[7] += (FIXED_A)in[hop + 11] * _sbc_proto_fixed8[hop + 11];
+
+		t1[5] += (FIXED_A)in[hop + 15] * _sbc_proto_fixed8[hop + 15];
+		t1[6] += (FIXED_A)in[hop + 14] * _sbc_proto_fixed8[hop + 14];
+		t1[7] += (FIXED_A)in[hop + 13] * _sbc_proto_fixed8[hop + 13];
+	}
+
+	/* scaling */
+	t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE;
+	t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE;
+	t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE;
+	t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE;
+	t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE;
+	t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE;
+	t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE;
+	t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE;
+
+	/* do the cos transform */
+	for (i = 0, hop = 0; i < 8; hop += 16, i++) {
+		/* rounding coefficient */
+		t1[i] = (FIXED_A)1 << (SBC_COS_TABLE_FIXED8_SCALE-1-SCALE_OUT_BITS);
+
+		t1[i] += (FIXED_A)t2[0] * cos_table_fixed_8[0 + hop];
+		t1[i] += (FIXED_A)t2[1] * cos_table_fixed_8[1 + hop];
+		t1[i] += (FIXED_A)t2[2] * cos_table_fixed_8[2 + hop];
+		t1[i] += (FIXED_A)t2[3] * cos_table_fixed_8[3 + hop];
+		t1[i] += (FIXED_A)t2[4] * cos_table_fixed_8[4 + hop];
+		t1[i] += (FIXED_A)t2[5] * cos_table_fixed_8[9 + hop];
+		t1[i] += (FIXED_A)t2[6] * cos_table_fixed_8[10 + hop];
+		t1[i] += (FIXED_A)t2[7] * cos_table_fixed_8[11 + hop];
+	}
+
+	/* scaling */
+	out[0] = t1[0] >> (SBC_COS_TABLE_FIXED8_SCALE-SCALE_OUT_BITS);
+	out[1] = t1[1] >> (SBC_COS_TABLE_FIXED8_SCALE-SCALE_OUT_BITS);
+	out[2] = t1[2] >> (SBC_COS_TABLE_FIXED8_SCALE-SCALE_OUT_BITS);
+	out[3] = t1[3] >> (SBC_COS_TABLE_FIXED8_SCALE-SCALE_OUT_BITS);
+	out[4] = t1[4] >> (SBC_COS_TABLE_FIXED8_SCALE-SCALE_OUT_BITS);
+	out[5] = t1[5] >> (SBC_COS_TABLE_FIXED8_SCALE-SCALE_OUT_BITS);
+	out[6] = t1[6] >> (SBC_COS_TABLE_FIXED8_SCALE-SCALE_OUT_BITS);
+	out[7] = t1[7] >> (SBC_COS_TABLE_FIXED8_SCALE-SCALE_OUT_BITS);
 }
 
 static inline void sbc_analyze_eight(struct sbc_encoder_state *state,
 					struct sbc_frame *frame, int ch,
 					int blk)
 {
-	int32_t *x = &state->X[ch][state->position[ch]];
+	int16_t *x = &state->X[ch][state->position[ch]];
 	int16_t *pcm = &frame->pcm_sample[ch][blk * 8];
 
 	/* Input 8 Audio Samples */
@@ -1006,7 +935,7 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 			frame->scale_factor[ch][sb] = 0;
 			scalefactor[ch][sb] = 2;
 			for (blk = 0; blk < frame->blocks; blk++) {
-				while (scalefactor[ch][sb] < fabs(frame->sb_sample_f[blk][ch][sb])) {
+				while ((scalefactor[ch][sb] << SCALE_OUT_BITS) <= neginv(frame->sb_sample_f[blk][ch][sb])) {
 					frame->scale_factor[ch][sb]++;
 					scalefactor[ch][sb] *= 2;
 				}
@@ -1040,11 +969,11 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 						frame->sb_sample_f[blk][1][sb]) >> 1;
 
 				/* calculate scale_factor_j and scalefactor_j for joint case */
-				while (scalefactor_j[0] < fabs(sb_sample_j[blk][0])) {
+				while ((scalefactor_j[0] << SCALE_OUT_BITS) <= neginv(sb_sample_j[blk][0])) {
 					scale_factor_j[0]++;
 					scalefactor_j[0] *= 2;
 				}
-				while (scalefactor_j[1] < fabs(sb_sample_j[blk][1])) {
+				while ((scalefactor_j[1] << SCALE_OUT_BITS) <= neginv(sb_sample_j[blk][1])) {
 					scale_factor_j[1]++;
 					scalefactor_j[1] *= 2;
 				}
@@ -1100,11 +1029,11 @@ static int sbc_pack_frame(uint8_t *data, struct sbc_frame *frame, size_t len)
 		for (ch = 0; ch < frame->channels; ch++) {
 			for (sb = 0; sb < frame->subbands; sb++) {
 				if (levels[ch][sb] > 0) {
-					audio_sample =
-						(uint16_t) (((((int64_t)frame->sb_sample_f[blk][ch][sb]*levels[ch][sb]) >>
-									(frame->scale_factor[ch][sb] + 1)) +
-								levels[ch][sb]) >> 1);
-					PUT_BITS(audio_sample & levels[ch][sb], bits[ch][sb]);
+					int32_t sample = frame->sb_sample_f[blk][ch][sb];
+					int32_t s_shift = (frame->scale_factor[ch][sb] + 1 + SCALE_OUT_BITS);
+					int32_t ls = levels[ch][sb];
+					audio_sample = ((((int64_t)1 << s_shift) + sample) * ls) >> (s_shift + 1);
+					PUT_BITS(audio_sample, bits[ch][sb]);
 				}
 			}
 		}
diff --git a/sbc/sbc_math.h b/sbc/sbc_math.h
index b3d87a6..f3937ce 100644
--- a/sbc/sbc_math.h
+++ b/sbc/sbc_math.h
@@ -23,37 +23,30 @@
  *
  */
 
-#define fabs(x) ((x) < 0 ? -(x) : (x))
 /* C does not provide an explicit arithmetic shift right but this will
    always be correct and every compiler *should* generate optimal code */
 #define ASR(val, bits) ((-2 >> 1 == -1) ? \
 		 ((int32_t)(val)) >> (bits) : ((int32_t) (val)) / (1 << (bits)))
 
-#define SCALE_PROTO4_TBL	15
-#define SCALE_ANA4_TBL		17
-#define SCALE_PROTO8_TBL	16
-#define SCALE_ANA8_TBL		17
+#define neginv(x) ((-2 >> 1 == -1) ? \
+		 ((((int32_t)(x)) >> 31) ^ (int32_t)(x)) : \
+		 ((x >= 0) ? (x) : -(x)-1))
+
+#define SCALE_OUT_BITS 14
+
 #define SCALE_SPROTO4_TBL	12
 #define SCALE_SPROTO8_TBL	14
 #define SCALE_NPROTO4_TBL	11
 #define SCALE_NPROTO8_TBL	11
-#define SCALE4_STAGE1_BITS	15
-#define SCALE4_STAGE2_BITS	16
 #define SCALE4_STAGED1_BITS	15
 #define SCALE4_STAGED2_BITS	16
-#define SCALE8_STAGE1_BITS	15
-#define SCALE8_STAGE2_BITS	15
 #define SCALE8_STAGED1_BITS	15
 #define SCALE8_STAGED2_BITS	16
 
 typedef int32_t sbc_fixed_t;
 
-#define SCALE4_STAGE1(src)  ASR(src, SCALE4_STAGE1_BITS)
-#define SCALE4_STAGE2(src)  ASR(src, SCALE4_STAGE2_BITS)
 #define SCALE4_STAGED1(src) ASR(src, SCALE4_STAGED1_BITS)
 #define SCALE4_STAGED2(src) ASR(src, SCALE4_STAGED2_BITS)
-#define SCALE8_STAGE1(src)  ASR(src, SCALE8_STAGE1_BITS)
-#define SCALE8_STAGE2(src)  ASR(src, SCALE8_STAGE2_BITS)
 #define SCALE8_STAGED1(src) ASR(src, SCALE8_STAGED1_BITS)
 #define SCALE8_STAGED2(src) ASR(src, SCALE8_STAGED2_BITS)
 
diff --git a/sbc/sbc_tables.h b/sbc/sbc_tables.h
index f5daaa7..9c96732 100644
--- a/sbc/sbc_tables.h
+++ b/sbc/sbc_tables.h
@@ -40,40 +40,11 @@ static const int sbc_offset8[4][8] = {
 };
 
 
-#define SP4(val) (((int32_t)(val))/17658) /* Used to be #define SP4(val) ASR(val, SCALE_PROTO4_TBL) but causes wrong gain */
-#define SA4(val) ASR(val, SCALE_ANA4_TBL)
-#define SP8(val) (((int32_t)(val))/57740) /* Used to be #define SP8(val) ASR(val, SCALE_PROTO8_TBL) but causes wrong gain */
-#define SA8(val) ASR(val, SCALE_ANA8_TBL)
 #define SS4(val) ASR(val, SCALE_SPROTO4_TBL)
 #define SS8(val) ASR(val, SCALE_SPROTO8_TBL)
 #define SN4(val) ASR(val, SCALE_NPROTO4_TBL)
 #define SN8(val) ASR(val, SCALE_NPROTO8_TBL)
 
-static const int32_t _sbc_proto_4[20] = {
-	SP4(0x02cb3e8c), SP4(0x22b63dc0), SP4(0x002329cc), SP4(0x053b7548),
-	SP4(0x31eab940), SP4(0xec1f5e60), SP4(0xff3773a8), SP4(0x0061c5a7),
-	SP4(0x07646680), SP4(0x3f239480), SP4(0xf89f23a8), SP4(0x007a4737),
-	SP4(0x00b32807), SP4(0x083ddc80), SP4(0x4825e480), SP4(0x0191e578),
-	SP4(0x00ff11ca), SP4(0x00fb7991), SP4(0x069fdc58), SP4(0x4b584000)
-};
-
-static const int32_t _anamatrix4[4] = {
-	SA4(0x2d413cc0), SA4(0x3b20d780), SA4(0x40000000), SA4(0x187de2a0)
-};
-
-static const int32_t _sbc_proto_8[40] = {
-	SP8(0x02e5cd20), SP8(0x22d0c200), SP8(0x006bfe27), SP8(0x07808930),
-	SP8(0x3f1c8800), SP8(0xf8810d70), SP8(0x002cfdc6), SP8(0x055acf28),
-	SP8(0x31f566c0), SP8(0xebfe57e0), SP8(0xff27c437), SP8(0x001485cc),
-	SP8(0x041c6e58), SP8(0x2a7cfa80), SP8(0xe4c4a240), SP8(0xfe359e4c),
-	SP8(0x0048b1f8), SP8(0x0686ce30), SP8(0x38eec5c0), SP8(0xf2a1b9f0),
-	SP8(0xffe8904a), SP8(0x0095698a), SP8(0x0824a480), SP8(0x443b3c00),
-	SP8(0xfd7badc8), SP8(0x00d3e2d9), SP8(0x00c183d2), SP8(0x084e1950),
-	SP8(0x4810d800), SP8(0x017f43fe), SP8(0x01056dd8), SP8(0x00e9cb9f),
-	SP8(0x07d7d090), SP8(0x4a708980), SP8(0x0488fae8), SP8(0x0113bd20),
-	SP8(0x0107b1a8), SP8(0x069fb3c0), SP8(0x4b3db200), SP8(0x00763f48)
-};
-
 static const int32_t sbc_proto_4_40m0[] = {
 	SS4(0x00000000), SS4(0xffa6982f), SS4(0xfba93848), SS4(0x0456c7b8),
 	SS4(0x005967d1), SS4(0xfffb9ac7), SS4(0xff589157), SS4(0xf9c2a8d8),
@@ -116,11 +87,6 @@ static const int32_t sbc_proto_8_80m1[] = {
 	SS8(0x0d9daee0), SS8(0xeac182c0), SS8(0xfdf1c8d4), SS8(0xfff5bd1a)
 };
 
-static const int32_t _anamatrix8[8] = {
-	SA8(0x3b20d780), SA8(0x187de2a0), SA8(0x3ec52f80), SA8(0x3536cc40),
-	SA8(0x238e7680), SA8(0x0c7c5c20), SA8(0x2d413cc0), SA8(0x40000000)
-};
-
 static const int32_t synmatrix4[8][4] = {
 	{ SN4(0x05a82798), SN4(0xfa57d868), SN4(0xfa57d868), SN4(0x05a82798) },
 	{ SN4(0x030fbc54), SN4(0xf89be510), SN4(0x07641af0), SN4(0xfcf043ac) },
@@ -166,3 +132,169 @@ static const int32_t synmatrix8[16][8] = {
 	{ SN8(0xf9592678), SN8(0x018f8b84), SN8(0x07d8a5f0), SN8(0x0471ced0),
 	  SN8(0xfb8e3130), SN8(0xf8275a10), SN8(0xfe70747c), SN8(0x06a6d988) }
 };
+
+//#define SBC_HIGH_PRECISION
+
+#ifdef SBC_HIGH_PRECISION
+# define FIXED_A int64_t /* data type for fixed point accumulator */
+# define FIXED_T int32_t /* data type for fixed point constants */
+# define SBC_FIXED8_EXTRA_BITS 16
+#else
+# define FIXED_A int32_t /* data type for fixed point accumulator */
+# define FIXED_T int16_t /* data type for fixed point constants */
+# define SBC_FIXED8_EXTRA_BITS 0
+#endif
+
+/* A2DP specification: Section 12.8 Tables 
+ *
+ * Original values are premultiplied by 4 for better precision (that is the
+ * maximum which is possible without overflows)
+ *
+ * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15
+ * in order to compensate the same change applied to cos_table_fixed_8
+ */
+#define SBC_PROTO_FIXED8_SCALE (sizeof(FIXED_T)*CHAR_BIT-1-SBC_FIXED8_EXTRA_BITS+2)
+#define F(x) (FIXED_A)((x*4)*((FIXED_A)1<<(sizeof(FIXED_T)*CHAR_BIT-1))+0.5)
+static const FIXED_T _sbc_proto_fixed8[80] = {
+	 F(0.00000000E+00), F(1.56575398E-04), F(3.43256425E-04), F(5.54620202E-04),
+	-F(8.23919506E-04), F(1.13992507E-03), F(1.47640169E-03), F(1.78371725E-03),
+	 F(2.01182542E-03), F(2.10371989E-03), F(1.99454554E-03), F(1.61656283E-03),
+	 F(9.02154502E-04), F(1.78805361E-04), F(1.64973098E-03), F(3.49717454E-03),
+
+	 F(5.65949473E-03), F(8.02941163E-03), F(1.04584443E-02), F(1.27472335E-02),
+	-F(1.46525263E-02), F(1.59045603E-02), F(1.62208471E-02), F(1.53184106E-02),
+	 F(1.29371806E-02), F(8.85757540E-03), F(2.92408442E-03),-F(4.91578024E-03),
+	-F(1.46404076E-02), F(2.61098752E-02), F(3.90751381E-02), F(5.31873032E-02),
+
+	 F(6.79989431E-02), F(8.29847578E-02), F(9.75753918E-02), F(1.11196689E-01),
+	-F(1.23264548E-01), F(1.33264415E-01), F(1.40753505E-01), F(1.45389847E-01),
+	 F(1.46955068E-01), F(1.45389847E-01), F(1.40753505E-01), F(1.33264415E-01),
+	 F(1.23264548E-01),-F(1.11196689E-01),-F(9.75753918E-02),-F(8.29847578E-02),
+
+	-F(6.79989431E-02),-F(5.31873032E-02),-F(3.90751381E-02),-F(2.61098752E-02),
+	 F(1.46404076E-02),-F(4.91578024E-03), F(2.92408442E-03), F(8.85757540E-03),
+	 F(1.29371806E-02), F(1.53184106E-02), F(1.62208471E-02), F(1.59045603E-02),
+	 F(1.46525263E-02),-F(1.27472335E-02),-F(1.04584443E-02),-F(8.02941163E-03),
+
+	-F(5.65949473E-03),-F(3.49717454E-03),-F(1.64973098E-03),-F(1.78805361E-04),
+	-F(9.02154502E-04), F(1.61656283E-03), F(1.99454554E-03), F(2.10371989E-03),
+	 F(2.01182542E-03), F(1.78371725E-03), F(1.47640169E-03), F(1.13992507E-03),
+	 F(8.23919506E-04),-F(5.54620202E-04),-F(3.43256425E-04),-F(1.56575398E-04),
+};
+#undef F
+
+/*
+ * To produce this cosine matrix in Octave:
+ *
+ * b = zeros(8, 16);
+ * for i = 0:7 for j = 0:15 b(i+1, j+1) = cos( (i + 0.5) * (j - 4) * (pi/8) ) endfor endfor;
+ * printf("%.10f, ", b');
+ *
+ * Note: in each block of 16 numbers sign was changed for elements 4, 13, 14, 15.
+ * Change of sign for element 4 allows to replace constant 1.0 (not representable
+ * in Q15 format) with -1.0 (fine with Q15).
+ * Changed signs for elements 13, 14, 15 allow to have more similar constants
+ * and simplify subband filter function code.
+ */
+#define SBC_COS_TABLE_FIXED8_SCALE (sizeof(FIXED_T)*CHAR_BIT-1+SBC_FIXED8_EXTRA_BITS)
+#define F(x) (FIXED_A)((x)*((FIXED_A)1<<(sizeof(FIXED_T)*CHAR_BIT-1))+0.5)
+static const FIXED_T cos_table_fixed_8[128] = {
+	 F(0.7071067812), F(0.8314696123), F(0.9238795325), F(0.9807852804),
+	-F(1.0000000000), F(0.9807852804), F(0.9238795325), F(0.8314696123),
+	 F(0.7071067812), F(0.5555702330), F(0.3826834324), F(0.1950903220),
+	 F(0.0000000000), F(0.1950903220), F(0.3826834324), F(0.5555702330),
+
+	-F(0.7071067812),-F(0.1950903220), F(0.3826834324), F(0.8314696123),
+	-F(1.0000000000), F(0.8314696123), F(0.3826834324),-F(0.1950903220),
+	-F(0.7071067812),-F(0.9807852804),-F(0.9238795325),-F(0.5555702330),
+	-F(0.0000000000),-F(0.5555702330),-F(0.9238795325),-F(0.9807852804),
+
+	-F(0.7071067812),-F(0.9807852804),-F(0.3826834324), F(0.5555702330),
+	-F(1.0000000000), F(0.5555702330),-F(0.3826834324),-F(0.9807852804),
+	-F(0.7071067812), F(0.1950903220), F(0.9238795325), F(0.8314696123),
+	 F(0.0000000000), F(0.8314696123), F(0.9238795325), F(0.1950903220),
+
+	 F(0.7071067812),-F(0.5555702330),-F(0.9238795325), F(0.1950903220),
+	-F(1.0000000000), F(0.1950903220),-F(0.9238795325),-F(0.5555702330),
+	 F(0.7071067812), F(0.8314696123),-F(0.3826834324),-F(0.9807852804),
+	-F(0.0000000000),-F(0.9807852804),-F(0.3826834324), F(0.8314696123),
+
+	 F(0.7071067812), F(0.5555702330),-F(0.9238795325),-F(0.1950903220),
+	-F(1.0000000000),-F(0.1950903220),-F(0.9238795325), F(0.5555702330),
+	 F(0.7071067812),-F(0.8314696123),-F(0.3826834324), F(0.9807852804),
+	 F(0.0000000000), F(0.9807852804),-F(0.3826834324),-F(0.8314696123),
+
+	-F(0.7071067812), F(0.9807852804),-F(0.3826834324),-F(0.5555702330),
+	-F(1.0000000000),-F(0.5555702330),-F(0.3826834324), F(0.9807852804),
+	-F(0.7071067812),-F(0.1950903220), F(0.9238795325),-F(0.8314696123),
+	-F(0.0000000000),-F(0.8314696123), F(0.9238795325),-F(0.1950903220),
+
+	-F(0.7071067812), F(0.1950903220), F(0.3826834324),-F(0.8314696123),
+	-F(1.0000000000),-F(0.8314696123), F(0.3826834324), F(0.1950903220),
+	-F(0.7071067812), F(0.9807852804),-F(0.9238795325), F(0.5555702330),
+	-F(0.0000000000), F(0.5555702330),-F(0.9238795325), F(0.9807852804),
+
+	 F(0.7071067812),-F(0.8314696123), F(0.9238795325),-F(0.9807852804),
+	-F(1.0000000000),-F(0.9807852804), F(0.9238795325),-F(0.8314696123),
+	 F(0.7071067812),-F(0.5555702330), F(0.3826834324),-F(0.1950903220),
+	-F(0.0000000000),-F(0.1950903220), F(0.3826834324),-F(0.5555702330),
+};
+#undef F
+
+/* A2DP specification: Section 12.8 Tables
+ *
+ * Original values are premultiplied by 2 for better precision (that is the
+ * maximum which is possible without overflows)
+ *
+ * Note: in each block of 8 numbers sign was changed for elements 2 and 7
+ * in order to compensate the same change applied to cos_table_fixed_4
+ */
+#define SBC_PROTO_FIXED4_SCALE (sizeof(FIXED_T)*CHAR_BIT-1-SBC_FIXED8_EXTRA_BITS+1)
+#define F(x) (FIXED_A)((x*2)*((FIXED_A)1<<(sizeof(FIXED_T)*CHAR_BIT-1))+0.5)
+static const FIXED_T _sbc_proto_fixed4[40] = {
+	 F(0.00000000E+00), F(5.36548976E-04),-F(1.49188357E-03), F(2.73370904E-03),
+	 F(3.83720193E-03), F(3.89205149E-03), F(1.86581691E-03), F(3.06012286E-03),
+
+	 F(1.09137620E-02), F(2.04385087E-02),-F(2.88757392E-02), F(3.21939290E-02),
+	 F(2.58767811E-02), F(6.13245186E-03),-F(2.88217274E-02), F(7.76463494E-02),
+
+	 F(1.35593274E-01), F(1.94987841E-01),-F(2.46636662E-01), F(2.81828203E-01),
+	 F(2.94315332E-01), F(2.81828203E-01), F(2.46636662E-01),-F(1.94987841E-01),
+
+	-F(1.35593274E-01),-F(7.76463494E-02), F(2.88217274E-02), F(6.13245186E-03),
+	 F(2.58767811E-02), F(3.21939290E-02), F(2.88757392E-02),-F(2.04385087E-02),
+
+	-F(1.09137620E-02),-F(3.06012286E-03),-F(1.86581691E-03), F(3.89205149E-03),
+	 F(3.83720193E-03), F(2.73370904E-03), F(1.49188357E-03),-F(5.36548976E-04),
+};
+#undef F
+
+/*
+ * To produce this cosine matrix in Octave:
+ *
+ * b = zeros(4, 8);
+ * for i = 0:3 for j = 0:7 b(i+1, j+1) = cos( (i + 0.5) * (j - 2) * (pi/4) ) endfor endfor;
+ * printf("F(%.10f), ", b');
+ *
+ * Note: in each block of 8 numbers sign was changed for elements 2 and 7.
+ * Change of sign for element 2 allows to replace constant 1.0 (not representable
+ * in Q15 format) with -1.0 (fine with Q15).
+ * Changed sign for element 7 allows to have more similar constants
+ * and simplify subband filter function code.
+ */
+#define SBC_COS_TABLE_FIXED4_SCALE (sizeof(FIXED_T)*CHAR_BIT-1+SBC_FIXED8_EXTRA_BITS)
+#define F(x) (FIXED_A)((x)*((FIXED_A)1<<(sizeof(FIXED_T)*CHAR_BIT-1))+0.5)
+static const FIXED_T cos_table_fixed_4[32] = {
+	 F(0.7071067812), F(0.9238795325),-F(1.0000000000), F(0.9238795325),
+	 F(0.7071067812), F(0.3826834324), F(0.0000000000), F(0.3826834324),
+
+	-F(0.7071067812), F(0.3826834324),-F(1.0000000000), F(0.3826834324),
+	-F(0.7071067812),-F(0.9238795325),-F(0.0000000000),-F(0.9238795325),
+
+	-F(0.7071067812),-F(0.3826834324),-F(1.0000000000),-F(0.3826834324),
+	-F(0.7071067812), F(0.9238795325), F(0.0000000000), F(0.9238795325),
+
+	 F(0.7071067812),-F(0.9238795325),-F(1.0000000000),-F(0.9238795325),
+	 F(0.7071067812),-F(0.3826834324),-F(0.0000000000),-F(0.3826834324),
+};
+#undef F

  reply	other threads:[~2008-12-22 23:30 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-28 13:35 [RFC/PATCH] sbc: new filtering function for 8 band fixed point encoding Jaska Uimonen
2008-11-28 14:18 ` Marcel Holtmann
2008-11-28 14:24   ` Jelle de Jong
2008-11-28 15:20     ` Jaska Uimonen
2008-11-28 18:13       ` David Sainty
2008-11-28 15:14   ` Jaska Uimonen
2008-12-02 20:15 ` Jim Carter
2008-12-12 17:14   ` Siarhei Siamashka
2008-12-12 19:19     ` Brad Midgley
2008-12-15 12:54       ` Siarhei Siamashka
2008-12-15 15:16         ` Brad Midgley
2008-12-16 22:37           ` Siarhei Siamashka
2008-12-17  8:16             ` Jaska Uimonen
2008-12-19 22:12             ` Siarhei Siamashka
2008-12-22 23:30               ` Siarhei Siamashka [this message]
2008-12-23  1:00                 ` Marcel Holtmann
2008-12-23  8:20                   ` Jaska.Uimonen
2008-12-23 11:14                     ` Siarhei Siamashka
2008-12-23 10:45                   ` Siarhei Siamashka
2008-12-23 11:48                     ` Marcel Holtmann
2008-12-29  9:16                       ` Testing SBC filtering functions Christian Hoene
2008-12-29 10:00                         ` Marcel Holtmann
2008-12-29 10:55                           ` Christian Hoene
2008-12-29 12:03                             ` Marcel Holtmann
2008-12-29 12:31                               ` Christian Hoene
2008-12-29 12:41                                 ` Marcel Holtmann
2008-12-29 13:11                                   ` Christian Hoene
2008-12-29 13:17                                     ` Marcel Holtmann
2009-01-01 14:29                                       ` Testing SBC encoder correctness with sbctester works Christian Hoene
2008-12-29 11:06                         ` Testing SBC filtering functions Siarhei Siamashka
2008-12-29 12:04                           ` Marcel Holtmann
2008-12-29 14:36                             ` Siarhei Siamashka
2008-12-29 15:04                               ` Siarhei Siamashka
2008-12-29 10:46                     ` [RFC/PATCH] sbc: new filtering function for 8 band fixed point encoding Siarhei Siamashka
2008-12-29 11:56                       ` Marcel Holtmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200812230130.43968.siarhei.siamashka@nokia.com \
    --to=siarhei.siamashka@nokia.com \
    --cc=bmidgley@gmail.com \
    --cc=jaska.uimonen@nokia.com \
    --cc=linux-bluetooth@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.