1919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org/*
2919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org *
4919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org *  Use of this source code is governed by a BSD-style license
5919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org *  that can be found in the LICENSE file in the root of the source
6919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org *  tree. An additional intellectual property rights grant can be found
7919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org *  in the file PATENTS.  All contributing project authors may
8919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
9919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org */
10919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org
11919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h"
12919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org
13919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org// MIPS optimized implementation of the Autocorrelation function in fixed point.
14919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org// NOTE! Different from SPLIB-version in how it scales the signal.
15919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.orgint WebRtcIsacfix_AutocorrMIPS(int32_t* __restrict r,
16919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org                               const int16_t* __restrict x,
17919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org                               int16_t N,
18919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org                               int16_t order,
19919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org                               int16_t* __restrict scale) {
20919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  int i = 0;
21919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  int16_t scaling = 0;
22919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  int16_t* in = (int16_t*)x;
23919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  int loop_size = (int)(N >> 3);
24919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  int count = (int)(N & 7);
25919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  // Declare temporary variables used as registry values.
26919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  int32_t r0, r1, r2, r3;
27919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE)
28919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  // For non-DSPR2 optimizations 4 more registers are used.
29919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  int32_t r4, r5, r6, r7;
30919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
31919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org
32919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  // Calculate r[0] and scaling needed.
33919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  __asm __volatile (
34919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    ".set          push                                            \n\t"
35919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    ".set          noreorder                                       \n\t"
36919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "mult          $0,             $0                              \n\t"
37919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // Loop is unrolled 8 times, set accumulator to zero in branch delay slot.
38919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "beqz          %[loop_size],   2f                              \n\t"
39919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    " mult         $0,             $0                              \n\t"
40919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org   "1:                                                             \n\t"
41919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // Load 8 samples per loop iteration.
42919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE)
43919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "ulw           %[r0],          0(%[in])                        \n\t"
44919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "ulw           %[r1],          4(%[in])                        \n\t"
45919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "ulw           %[r2],          8(%[in])                        \n\t"
46919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "ulw           %[r3],          12(%[in])                       \n\t"
47919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
48919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r0],          0(%[in])                        \n\t"
49919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r1],          2(%[in])                        \n\t"
50919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r2],          4(%[in])                        \n\t"
51919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r3],          6(%[in])                        \n\t"
52919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r4],          8(%[in])                        \n\t"
53919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r5],          10(%[in])                       \n\t"
54919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r6],          12(%[in])                       \n\t"
55919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r7],          14(%[in])                       \n\t"
56919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
57919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "addiu         %[loop_size],   %[loop_size],   -1              \n\t"
58919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // Multiply and accumulate.
59919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE)
60919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "dpa.w.ph      $ac0,           %[r0],          %[r0]           \n\t"
61919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "dpa.w.ph      $ac0,           %[r1],          %[r1]           \n\t"
62919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "dpa.w.ph      $ac0,           %[r2],          %[r2]           \n\t"
63919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "dpa.w.ph      $ac0,           %[r3],          %[r3]           \n\t"
64919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
65919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r0],          %[r0]                           \n\t"
66919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r1],          %[r1]                           \n\t"
67919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r2],          %[r2]                           \n\t"
68919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r3],          %[r3]                           \n\t"
69919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r4],          %[r4]                           \n\t"
70919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r5],          %[r5]                           \n\t"
71919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r6],          %[r6]                           \n\t"
72919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r7],          %[r7]                           \n\t"
73919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
74919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "bnez          %[loop_size],   1b                              \n\t"
75919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    " addiu        %[in],          %[in],          16              \n\t"
76919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org   "2:                                                             \n\t"
77919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "beqz          %[count],       4f                              \n\t"
78919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
79919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    " extr.w       %[r0],          $ac0,           31              \n\t"
80919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
81919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    " mfhi         %[r2]                                           \n\t"
82919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
83919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // Process remaining samples (if any).
84919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org   "3:                                                             \n\t"
85919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "lh            %[r0],          0(%[in])                        \n\t"
86919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "addiu         %[count],       %[count],       -1              \n\t"
87919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "madd          %[r0],          %[r0]                           \n\t"
88919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "bnez          %[count],       3b                              \n\t"
89919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    " addiu        %[in],          %[in],          2               \n\t"
90919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
91919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "extr.w        %[r0],          $ac0,           31              \n\t"
92919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
93919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "mfhi          %[r2]                                           \n\t"
94919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
95919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org   "4:                                                             \n\t"
96919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R1_LE)
97919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "mflo          %[r3]                                           \n\t"
98919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "sll           %[r0],          %[r2],          1               \n\t"
99919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "srl           %[r1],          %[r3],          31              \n\t"
100919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "addu          %[r0],          %[r0],          %[r1]           \n\t"
101919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
102919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // Calculate scaling (the value of shifting).
103919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "clz           %[r1],          %[r0]                           \n\t"
104919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "addiu         %[r1],          %[r1],          -32             \n\t"
105919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "subu          %[scaling],     $0,             %[r1]           \n\t"
106919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "slti          %[r1],          %[r0],          0x1             \n\t"
107919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "movn          %[scaling],     $0,             %[r1]           \n\t"
108919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
109919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "extrv.w       %[r0],          $ac0,           %[scaling]      \n\t"
110919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "mfhi          %[r2],          $ac0                            \n\t"
111919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
112919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "addiu         %[r1],          %[scaling],     -32             \n\t"
113919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "subu          %[r1],          $0,             %[r1]           \n\t"
114919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "sllv          %[r1],          %[r2],          %[r1]           \n\t"
115919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "srlv          %[r0],          %[r3],          %[scaling]      \n\t"
116919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "addu          %[r0],          %[r0],          %[r1]           \n\t"
117919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
118919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "slti          %[r1],          %[scaling],     32              \n\t"
119919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    "movz          %[r0],          %[r2],          %[r1]           \n\t"
120919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    ".set          pop                                             \n\t"
121919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    : [loop_size] "+r" (loop_size), [in] "+r" (in), [r0] "=&r" (r0),
122919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
123919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE)
124919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
125919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
126919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      [count] "+r" (count), [scaling] "=r" (scaling)
127919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    : [N] "r" (N)
128919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    : "memory", "hi", "lo"
129919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  );
130919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  r[0] = r0;
131919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org
132919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  // Correlation calculation is divided in 3 cases depending on the scaling
133919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  // value (different accumulator manipulation needed). Three slightly different
134919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  // loops are written in order to avoid branches inside the loop.
135919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  if (scaling == 0) {
136919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // In this case, the result will be in low part of the accumulator.
137919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    for (i = 1; i < order + 1; i++) {
138919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      in = (int16_t*)x;
139919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      int16_t* in1 = (int16_t*)x + i;
140919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      count = N - i;
141919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      loop_size = (count) >> 2;
142919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      __asm  __volatile (
143919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        push                                          \n\t"
144919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        noreorder                                     \n\t"
145919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "mult        $0,             $0                            \n\t"
146919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "beqz        %[loop_size],   2f                            \n\t"
147919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " andi       %[count],       %[count],       0x3           \n\t"
148919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        // Loop processing 4 pairs of samples per iteration.
149919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "1:                                                         \n\t"
150919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE)
151919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r0],          0(%[in])                      \n\t"
152919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r1],          0(%[in1])                     \n\t"
153919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r2],          4(%[in])                      \n\t"
154919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r3],          4(%[in1])                     \n\t"
155919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
156919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r0],          0(%[in])                      \n\t"
157919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r1],          0(%[in1])                     \n\t"
158919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r2],          2(%[in])                      \n\t"
159919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r3],          2(%[in1])                     \n\t"
160919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r4],          4(%[in])                      \n\t"
161919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r5],          4(%[in1])                     \n\t"
162919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r6],          6(%[in])                      \n\t"
163919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r7],          6(%[in1])                     \n\t"
164919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
165919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
166919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE)
167919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
168919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
169919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
170919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r0],          %[r1]                         \n\t"
171919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r2],          %[r3]                         \n\t"
172919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r4],          %[r5]                         \n\t"
173919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r6],          %[r7]                         \n\t"
174919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
175919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[in],          %[in],          8             \n\t"
176919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "bnez        %[loop_size],   1b                            \n\t"
177919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " addiu      %[in1],         %[in1],         8             \n\t"
178919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "2:                                                         \n\t"
179919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "beqz        %[count],       4f                            \n\t"
180919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " mflo       %[r0]                                         \n\t"
181919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        // Process remaining samples (if any).
182919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "3:                                                         \n\t"
183919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r0],          0(%[in])                      \n\t"
184919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r1],          0(%[in1])                     \n\t"
185919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[count],       %[count],       -1            \n\t"
186919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[in],          %[in],          2             \n\t"
187919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r0],          %[r1]                         \n\t"
188919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "bnez        %[count],       3b                            \n\t"
189919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " addiu      %[in1],         %[in1],         2             \n\t"
190919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "mflo        %[r0]                                         \n\t"
191919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "4:                                                         \n\t"
192919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        pop                                           \n\t"
193919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
194919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE)
195919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
196919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
197919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
198919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [count] "+r" (count)
199919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        :
200919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        : "memory", "hi", "lo"
201919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      );
202919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      r[i] = r0;
203919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    }
204919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  } else if (scaling == 32) {
205919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // In this case, the result will be high part of the accumulator.
206919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    for (i = 1; i < order + 1; i++) {
207919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      in = (int16_t*)x;
208919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      int16_t* in1 = (int16_t*)x + i;
209919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      count = N - i;
210919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      loop_size = (count) >> 2;
211919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      __asm __volatile (
212919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        push                                          \n\t"
213919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        noreorder                                     \n\t"
214919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "mult        $0,             $0                            \n\t"
215919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "beqz        %[loop_size],   2f                            \n\t"
216919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " andi       %[count],       %[count],       0x3           \n\t"
217919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        // Loop processing 4 pairs of samples per iteration.
218919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "1:                                                         \n\t"
219919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE)
220919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r0],          0(%[in])                      \n\t"
221919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r1],          0(%[in1])                     \n\t"
222919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r2],          4(%[in])                      \n\t"
223919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r3],          4(%[in1])                     \n\t"
224919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
225919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r0],          0(%[in])                      \n\t"
226919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r1],          0(%[in1])                     \n\t"
227919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r2],          2(%[in])                      \n\t"
228919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r3],          2(%[in1])                     \n\t"
229919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r4],          4(%[in])                      \n\t"
230919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r5],          4(%[in1])                     \n\t"
231919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r6],          6(%[in])                      \n\t"
232919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r7],          6(%[in1])                     \n\t"
233919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
234919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
235919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE)
236919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
237919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
238919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
239919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r0],          %[r1]                         \n\t"
240919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r2],          %[r3]                         \n\t"
241919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r4],          %[r5]                         \n\t"
242919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r6],          %[r7]                         \n\t"
243919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
244919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[in],          %[in],          8             \n\t"
245919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "bnez        %[loop_size],   1b                            \n\t"
246919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " addiu      %[in1],         %[in1],         8             \n\t"
247919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "2:                                                         \n\t"
248919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "beqz        %[count],       4f                            \n\t"
249919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " mfhi       %[r0]                                         \n\t"
250919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        // Process remaining samples (if any).
251919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "3:                                                         \n\t"
252919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r0],          0(%[in])                      \n\t"
253919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r1],          0(%[in1])                     \n\t"
254919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[count],       %[count],       -1            \n\t"
255919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[in],          %[in],          2             \n\t"
256919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r0],          %[r1]                         \n\t"
257919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "bnez        %[count],       3b                            \n\t"
258919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " addiu      %[in1],         %[in1],         2             \n\t"
259919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "mfhi        %[r0]                                         \n\t"
260919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "4:                                                         \n\t"
261919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        pop                                           \n\t"
262919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
263919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE)
264919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
265919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
266919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
267919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [count] "+r" (count)
268919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        :
269919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        : "memory", "hi", "lo"
270919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      );
271919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      r[i] = r0;
272919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    }
273919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  } else {
274919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // In this case, the result is obtained by combining low and high parts
275919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    // of the accumulator.
276919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R1_LE)
277919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    int32_t tmp_shift = 32 - scaling;
278919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
279919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    for (i = 1; i < order + 1; i++) {
280919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      in = (int16_t*)x;
281919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      int16_t* in1 = (int16_t*)x + i;
282919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      count = N - i;
283919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      loop_size = (count) >> 2;
284919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      __asm __volatile (
285919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        push                                          \n\t"
286919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        noreorder                                     \n\t"
287919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "mult        $0,             $0                            \n\t"
288919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "beqz        %[loop_size],   2f                            \n\t"
289919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " andi       %[count],       %[count],       0x3           \n\t"
290919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "1:                                                         \n\t"
291919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE)
292919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r0],          0(%[in])                      \n\t"
293919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r1],          0(%[in1])                     \n\t"
294919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r2],          4(%[in])                      \n\t"
295919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "ulw         %[r3],          4(%[in1])                     \n\t"
296919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
297919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r0],          0(%[in])                      \n\t"
298919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r1],          0(%[in1])                     \n\t"
299919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r2],          2(%[in])                      \n\t"
300919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r3],          2(%[in1])                     \n\t"
301919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r4],          4(%[in])                      \n\t"
302919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r5],          4(%[in1])                     \n\t"
303919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r6],          6(%[in])                      \n\t"
304919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r7],          6(%[in1])                     \n\t"
305919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
306919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[loop_size],   %[loop_size],   -1            \n\t"
307919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE)
308919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "dpa.w.ph    $ac0,           %[r0],          %[r1]         \n\t"
309919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "dpa.w.ph    $ac0,           %[r2],          %[r3]         \n\t"
310919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
311919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r0],          %[r1]                         \n\t"
312919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r2],          %[r3]                         \n\t"
313919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r4],          %[r5]                         \n\t"
314919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r6],          %[r7]                         \n\t"
315919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
316919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[in],          %[in],          8             \n\t"
317919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "bnez        %[loop_size],   1b                            \n\t"
318919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " addiu      %[in1],         %[in1],         8             \n\t"
319919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "2:                                                         \n\t"
320919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "beqz        %[count],       4f                            \n\t"
321919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
322919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " extrv.w    %[r0],          $ac0,           %[scaling]    \n\t"
323919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
324919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " mfhi       %[r0]                                         \n\t"
325919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
326919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "3:                                                         \n\t"
327919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r0],          0(%[in])                      \n\t"
328919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "lh          %[r1],          0(%[in1])                     \n\t"
329919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[count],       %[count],       -1            \n\t"
330919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addiu       %[in],          %[in],          2             \n\t"
331919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "madd        %[r0],          %[r1]                         \n\t"
332919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "bnez        %[count],       3b                            \n\t"
333919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        " addiu      %[in1],         %[in1],         2             \n\t"
334919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE)
335919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "extrv.w     %[r0],          $ac0,           %[scaling]    \n\t"
336919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else
337919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "mfhi        %[r0]                                         \n\t"
338919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
339919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org       "4:                                                         \n\t"
340919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R1_LE)
341919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "mflo        %[r1]                                         \n\t"
342919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "sllv        %[r0],          %[r0],          %[tmp_shift]  \n\t"
343919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "srlv        %[r1],          %[r1],          %[scaling]    \n\t"
344919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        "addu        %[r0],          %[r0],          %[r1]         \n\t"
345919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
346919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        ".set        pop                                           \n\t"
347919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1),
348919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE)
349919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7),
350919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
351919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3),
352919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org          [count] "+r" (count)
353919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        : [scaling] "r" (scaling)
354919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R1_LE)
355919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        , [tmp_shift] "r" (tmp_shift)
356919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif
357919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org        : "memory", "hi", "lo"
358919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      );
359919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org      r[i] = r0;
360919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org    }
361919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  }
362919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  *scale = scaling;
363919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org
364919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org  return (order + 1);
365919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org}
366