1/*
2 *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
12
13// MIPS optimized implementation of the Autocorrelation function in fixed point.
14// NOTE! Different from SPLIB-version in how it scales the signal.
15int WebRtcIsacfix_AutocorrMIPS(int32_t* __restrict r,
16                               const int16_t* __restrict x,
17                               int16_t N,
18                               int16_t order,
19                               int16_t* __restrict scale) {
20  int i = 0;
21  int16_t scaling = 0;
22  int16_t* in = (int16_t*)x;
23  int loop_size = (int)(N >> 3);
24  int count = (int)(N & 7);
25  // Declare temporary variables used as registry values.
26  int32_t r0, r1, r2, r3;
27#if !defined(MIPS_DSP_R2_LE)
28  // For non-DSPR2 optimizations 4 more registers are used.
29  int32_t r4, r5, r6, r7;
30#endif
31
32  // Calculate r[0] and scaling needed.
33  __asm __volatile (
34    ".set          push                                            \n\t"
35    ".set          noreorder                                       \n\t"
36    "mult          $0,             $0                              \n\t"
37    // Loop is unrolled 8 times, set accumulator to zero in branch delay slot.
38    "beqz          %[loop_size],   2f                              \n\t"
39    " mult         $0,             $0                              \n\t"
40   "1:                                                             \n\t"
41    // Load 8 samples per loop iteration.
42#if defined(MIPS_DSP_R2_LE)
43    "ulw           %[r0],          0(%[in])                        \n\t"
44    "ulw           %[r1],          4(%[in])                        \n\t"
45    "ulw           %[r2],          8(%[in])                        \n\t"
46    "ulw           %[r3],          12(%[in])                       \n\t"
47#else
48    "lh            %[r0],          0(%[in])                        \n\t"
49    "lh            %[r1],          2(%[in])                        \n\t"
50    "lh            %[r2],          4(%[in])                        \n\t"
51    "lh            %[r3],          6(%[in])                        \n\t"
52    "lh            %[r4],          8(%[in])                        \n\t"
53    "lh            %[r5],          10(%[in])                       \n\t"
54    "lh            %[r6],          12(%[in])                       \n\t"
55    "lh            %[r7],          14(%[in])                       \n\t"
56#endif
57    "addiu         %[loop_size],   %[loop_size],   -1              \n\t"
58    // Multiply and accumulate.
59#if defined(MIPS_DSP_R2_LE)
60    "dpa.w.ph      $ac0,           %[r0],          %[r0]           \n\t"
61    "dpa.w.ph      $ac0,           %[r1],          %[r1]           \n\t"
62    "dpa.w.ph      $ac0,           %[r2],          %[r2]           \n\t"
63    "dpa.w.ph      $ac0,           %[r3],          %[r3]           \n\t"
64#else
65    "madd          %[r0],          %[r0]                           \n\t"
66    "madd          %[r1],          %[r1]                           \n\t"
67    "madd          %[r2],          %[r2]                           \n\t"
68    "madd          %[r3],          %[r3]                           \n\t"
69    "madd          %[r4],          %[r4]                           \n\t"
70    "madd          %[r5],          %[r5]                           \n\t"
71    "madd          %[r6],          %[r6]                           \n\t"
72    "madd          %[r7],          %[r7]                           \n\t"
73#endif
74    "bnez          %[loop_size],   1b                              \n\t"
75    " addiu        %[in],          %[in],          16              \n\t"
76   "2:                                                             \n\t"
77    "beqz          %[count],       4f                              \n\t"
78#if defined(MIPS_DSP_R1_LE)
79    " extr.w       %[r0],          $ac0,           31              \n\t"
80#else
81    " mfhi         %[r2]                                           \n\t"
82#endif
83    // Process remaining samples (if any).
84   "3:                                                             \n\t"
85    "lh            %[r0],          0(%[in])                        \n\t"
86    "addiu         %[count],       %[count],       -1              \n\t"
87    "madd          %[r0],          %[r0]                           \n\t"
88    "bnez          %[count],       3b                              \n\t"
89    " addiu        %[in],          %[in],          2               \n\t"
90#if defined(MIPS_DSP_R1_LE)
91    "extr.w        %[r0],          $ac0,           31              \n\t"
92#else
93    "mfhi          %[r2]                                           \n\t"
94#endif
95   "4:                                                             \n\t"
96#if !defined(MIPS_DSP_R1_LE)
97    "mflo          %[r3]                                           \n\t"
98    "sll           %[r0],          %[r2],          1               \n\t"
99    "srl           %[r1],          %[r3],          31              \n\t"
100    "addu          %[r0],          %[r0],          %[r1]           \n\t"
101#endif
102    // Calculate scaling (the value of shifting).
103    "clz           %[r1],          %[r0]                           \n\t"
104    "addiu         %[r1],          %[r1],          -32             \n\t"
105    "subu          %[scaling],     $0,             %[r1]           \n\t"
106    "slti          %[r1],          %[r0],          0x1             \n\t"
107    "movn          %[scaling],     $0,             %[r1]           \n\t"
108#if defined(MIPS_DSP_R1_LE)
109    "extrv.w       %[r0],          $ac0,           %[scaling]      \n\t"
110    "mfhi          %[r2],          $ac0                            \n\t"
111#else
112    "addiu         %[r1],          %[scaling],     -32             \n\t"
113    "subu          %[r1],          $0,             %[r1]           \n\t"
114    "sllv          %[r1],          %[r2],          %[r1]           \n\t"
115    "srlv          %[r0],          %[r3],          %[scaling]      \n\t"
116    "addu          %[r0],          %[r0],          %[r1]           \n\t"
117#endif
118    "slti          %[r1],          %[scaling],     32              \n\t"
119    "movz          %[r0],          %[r2],          %[r1]           \n\t"
120    ".set          pop                                             \n\t"
121    : [loop_size] "+r" (loop_size), [in] "+r" (in), [r0] "=&r" (r0),
122      [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
123#if !defined(MIPS_DSP_R2_LE)
124      [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
125#endif
126      [count] "+r" (count), [scaling] "=r" (scaling)
127    : [N] "r" (N)
128    : "memory", "hi", "lo"
129  );
130  r[0] = r0;
131
132  // Correlation calculation is divided in 3 cases depending on the scaling
133  // value (different accumulator manipulation needed). Three slightly different
134  // loops are written in order to avoid branches inside the loop.
135  if (scaling == 0) {
136    // In this case, the result will be in low part of the accumulator.
137    for (i = 1; i < order + 1; i++) {
138      in = (int16_t*)x;
139      int16_t* in1 = (int16_t*)x + i;
140      count = N - i;
141      loop_size = (count) >> 2;
142      __asm  __volatile (
143        ".set        push                                          \n\t"
144        ".set        noreorder                                     \n\t"
145        "mult        $0,             $0                            \n\t"
146        "beqz        %[loop_size],   2f                            \n\t"
147        " andi       %[count],       %[count],       0x3           \n\t"
148        // Loop processing 4 pairs of samples per iteration.
149       "1:                                                         \n\t"
150#if defined(MIPS_DSP_R2_LE)
151        "ulw         %[r0],          0(%[in])                      \n\t"
152        "ulw         %[r1],          0(%[in1])                     \n\t"
153        "ulw         %[r2],          4(%[in])                      \n\t"
154        "ulw         %[r3],          4(%[in1])                     \n\t"
155#else
156        "lh          %[r0],          0(%[in])                      \n\t"
157        "lh          %[r1],          0(%[in1])                     \n\t"
158        "lh          %[r2],          2(%[in])                      \n\t"
159        "lh          %[r3],          2(%[in1])                     \n\t"
160        "lh          %[r4],          4(%[in])                      \n\t"
161        "lh          %[r5],          4(%[in1])                     \n\t"
162        "lh          %[r6],          6(%[in])                      \n\t"
163        "lh          %[r7],          6(%[in1])                     \n\t"
164#endif
165        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
166#if defined(MIPS_DSP_R2_LE)
167        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
168        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
169#else
170        "madd        %[r0],          %[r1]                         \n\t"
171        "madd        %[r2],          %[r3]                         \n\t"
172        "madd        %[r4],          %[r5]                         \n\t"
173        "madd        %[r6],          %[r7]                         \n\t"
174#endif
175        "addiu       %[in],          %[in],          8             \n\t"
176        "bnez        %[loop_size],   1b                            \n\t"
177        " addiu      %[in1],         %[in1],         8             \n\t"
178       "2:                                                         \n\t"
179        "beqz        %[count],       4f                            \n\t"
180        " mflo       %[r0]                                         \n\t"
181        // Process remaining samples (if any).
182       "3:                                                         \n\t"
183        "lh          %[r0],          0(%[in])                      \n\t"
184        "lh          %[r1],          0(%[in1])                     \n\t"
185        "addiu       %[count],       %[count],       -1            \n\t"
186        "addiu       %[in],          %[in],          2             \n\t"
187        "madd        %[r0],          %[r1]                         \n\t"
188        "bnez        %[count],       3b                            \n\t"
189        " addiu      %[in1],         %[in1],         2             \n\t"
190        "mflo        %[r0]                                         \n\t"
191       "4:                                                         \n\t"
192        ".set        pop                                           \n\t"
193        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
194#if !defined(MIPS_DSP_R2_LE)
195          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
196#endif
197          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
198          [count] "+r" (count)
199        :
200        : "memory", "hi", "lo"
201      );
202      r[i] = r0;
203    }
204  } else if (scaling == 32) {
205    // In this case, the result will be high part of the accumulator.
206    for (i = 1; i < order + 1; i++) {
207      in = (int16_t*)x;
208      int16_t* in1 = (int16_t*)x + i;
209      count = N - i;
210      loop_size = (count) >> 2;
211      __asm __volatile (
212        ".set        push                                          \n\t"
213        ".set        noreorder                                     \n\t"
214        "mult        $0,             $0                            \n\t"
215        "beqz        %[loop_size],   2f                            \n\t"
216        " andi       %[count],       %[count],       0x3           \n\t"
217        // Loop processing 4 pairs of samples per iteration.
218       "1:                                                         \n\t"
219#if defined(MIPS_DSP_R2_LE)
220        "ulw         %[r0],          0(%[in])                      \n\t"
221        "ulw         %[r1],          0(%[in1])                     \n\t"
222        "ulw         %[r2],          4(%[in])                      \n\t"
223        "ulw         %[r3],          4(%[in1])                     \n\t"
224#else
225        "lh          %[r0],          0(%[in])                      \n\t"
226        "lh          %[r1],          0(%[in1])                     \n\t"
227        "lh          %[r2],          2(%[in])                      \n\t"
228        "lh          %[r3],          2(%[in1])                     \n\t"
229        "lh          %[r4],          4(%[in])                      \n\t"
230        "lh          %[r5],          4(%[in1])                     \n\t"
231        "lh          %[r6],          6(%[in])                      \n\t"
232        "lh          %[r7],          6(%[in1])                     \n\t"
233#endif
234        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
235#if defined(MIPS_DSP_R2_LE)
236        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
237        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
238#else
239        "madd        %[r0],          %[r1]                         \n\t"
240        "madd        %[r2],          %[r3]                         \n\t"
241        "madd        %[r4],          %[r5]                         \n\t"
242        "madd        %[r6],          %[r7]                         \n\t"
243#endif
244        "addiu       %[in],          %[in],          8             \n\t"
245        "bnez        %[loop_size],   1b                            \n\t"
246        " addiu      %[in1],         %[in1],         8             \n\t"
247       "2:                                                         \n\t"
248        "beqz        %[count],       4f                            \n\t"
249        " mfhi       %[r0]                                         \n\t"
250        // Process remaining samples (if any).
251       "3:                                                         \n\t"
252        "lh          %[r0],          0(%[in])                      \n\t"
253        "lh          %[r1],          0(%[in1])                     \n\t"
254        "addiu       %[count],       %[count],       -1            \n\t"
255        "addiu       %[in],          %[in],          2             \n\t"
256        "madd        %[r0],          %[r1]                         \n\t"
257        "bnez        %[count],       3b                            \n\t"
258        " addiu      %[in1],         %[in1],         2             \n\t"
259        "mfhi        %[r0]                                         \n\t"
260       "4:                                                         \n\t"
261        ".set        pop                                           \n\t"
262        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
263#if !defined(MIPS_DSP_R2_LE)
264          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
265#endif
266          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
267          [count] "+r" (count)
268        :
269        : "memory", "hi", "lo"
270      );
271      r[i] = r0;
272    }
273  } else {
274    // In this case, the result is obtained by combining low and high parts
275    // of the accumulator.
276#if !defined(MIPS_DSP_R1_LE)
277    int32_t tmp_shift = 32 - scaling;
278#endif
279    for (i = 1; i < order + 1; i++) {
280      in = (int16_t*)x;
281      int16_t* in1 = (int16_t*)x + i;
282      count = N - i;
283      loop_size = (count) >> 2;
284      __asm __volatile (
285        ".set        push                                          \n\t"
286        ".set        noreorder                                     \n\t"
287        "mult        $0,             $0                            \n\t"
288        "beqz        %[loop_size],   2f                            \n\t"
289        " andi       %[count],       %[count],       0x3           \n\t"
290       "1:                                                         \n\t"
291#if defined(MIPS_DSP_R2_LE)
292        "ulw         %[r0],          0(%[in])                      \n\t"
293        "ulw         %[r1],          0(%[in1])                     \n\t"
294        "ulw         %[r2],          4(%[in])                      \n\t"
295        "ulw         %[r3],          4(%[in1])                     \n\t"
296#else
297        "lh          %[r0],          0(%[in])                      \n\t"
298        "lh          %[r1],          0(%[in1])                     \n\t"
299        "lh          %[r2],          2(%[in])                      \n\t"
300        "lh          %[r3],          2(%[in1])                     \n\t"
301        "lh          %[r4],          4(%[in])                      \n\t"
302        "lh          %[r5],          4(%[in1])                     \n\t"
303        "lh          %[r6],          6(%[in])                      \n\t"
304        "lh          %[r7],          6(%[in1])                     \n\t"
305#endif
306        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
307#if defined(MIPS_DSP_R2_LE)
308        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
309        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
310#else
311        "madd        %[r0],          %[r1]                         \n\t"
312        "madd        %[r2],          %[r3]                         \n\t"
313        "madd        %[r4],          %[r5]                         \n\t"
314        "madd        %[r6],          %[r7]                         \n\t"
315#endif
316        "addiu       %[in],          %[in],          8             \n\t"
317        "bnez        %[loop_size],   1b                            \n\t"
318        " addiu      %[in1],         %[in1],         8             \n\t"
319       "2:                                                         \n\t"
320        "beqz        %[count],       4f                            \n\t"
321#if defined(MIPS_DSP_R1_LE)
322        " extrv.w    %[r0],          $ac0,           %[scaling]    \n\t"
323#else
324        " mfhi       %[r0]                                         \n\t"
325#endif
326       "3:                                                         \n\t"
327        "lh          %[r0],          0(%[in])                      \n\t"
328        "lh          %[r1],          0(%[in1])                     \n\t"
329        "addiu       %[count],       %[count],       -1            \n\t"
330        "addiu       %[in],          %[in],          2             \n\t"
331        "madd        %[r0],          %[r1]                         \n\t"
332        "bnez        %[count],       3b                            \n\t"
333        " addiu      %[in1],         %[in1],         2             \n\t"
334#if defined(MIPS_DSP_R1_LE)
335        "extrv.w     %[r0],          $ac0,           %[scaling]    \n\t"
336#else
337        "mfhi        %[r0]                                         \n\t"
338#endif
339       "4:                                                         \n\t"
340#if !defined(MIPS_DSP_R1_LE)
341        "mflo        %[r1]                                         \n\t"
342        "sllv        %[r0],          %[r0],          %[tmp_shift]  \n\t"
343        "srlv        %[r1],          %[r1],          %[scaling]    \n\t"
344        "addu        %[r0],          %[r0],          %[r1]         \n\t"
345#endif
346        ".set        pop                                           \n\t"
347        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
348#if !defined(MIPS_DSP_R2_LE)
349          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
350#endif
351          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
352          [count] "+r" (count)
353        : [scaling] "r" (scaling)
354#if !defined(MIPS_DSP_R1_LE)
355        , [tmp_shift] "r" (tmp_shift)
356#endif
357        : "memory", "hi", "lo"
358      );
359      r[i] = r0;
360    }
361  }
362  *scale = scaling;
363
364  return (order + 1);
365}
366