1/* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" 12 13// MIPS optimized implementation of the Autocorrelation function in fixed point. 14// NOTE! Different from SPLIB-version in how it scales the signal. 15int WebRtcIsacfix_AutocorrMIPS(int32_t* __restrict r, 16 const int16_t* __restrict x, 17 int16_t N, 18 int16_t order, 19 int16_t* __restrict scale) { 20 int i = 0; 21 int16_t scaling = 0; 22 int16_t* in = (int16_t*)x; 23 int loop_size = (int)(N >> 3); 24 int count = (int)(N & 7); 25 // Declare temporary variables used as registry values. 26 int32_t r0, r1, r2, r3; 27#if !defined(MIPS_DSP_R2_LE) 28 // For non-DSPR2 optimizations 4 more registers are used. 29 int32_t r4, r5, r6, r7; 30#endif 31 32 // Calculate r[0] and scaling needed. 33 __asm __volatile ( 34 ".set push \n\t" 35 ".set noreorder \n\t" 36 "mult $0, $0 \n\t" 37 // Loop is unrolled 8 times, set accumulator to zero in branch delay slot. 38 "beqz %[loop_size], 2f \n\t" 39 " mult $0, $0 \n\t" 40 "1: \n\t" 41 // Load 8 samples per loop iteration. 42#if defined(MIPS_DSP_R2_LE) 43 "ulw %[r0], 0(%[in]) \n\t" 44 "ulw %[r1], 4(%[in]) \n\t" 45 "ulw %[r2], 8(%[in]) \n\t" 46 "ulw %[r3], 12(%[in]) \n\t" 47#else 48 "lh %[r0], 0(%[in]) \n\t" 49 "lh %[r1], 2(%[in]) \n\t" 50 "lh %[r2], 4(%[in]) \n\t" 51 "lh %[r3], 6(%[in]) \n\t" 52 "lh %[r4], 8(%[in]) \n\t" 53 "lh %[r5], 10(%[in]) \n\t" 54 "lh %[r6], 12(%[in]) \n\t" 55 "lh %[r7], 14(%[in]) \n\t" 56#endif 57 "addiu %[loop_size], %[loop_size], -1 \n\t" 58 // Multiply and accumulate. 59#if defined(MIPS_DSP_R2_LE) 60 "dpa.w.ph $ac0, %[r0], %[r0] \n\t" 61 "dpa.w.ph $ac0, %[r1], %[r1] \n\t" 62 "dpa.w.ph $ac0, %[r2], %[r2] \n\t" 63 "dpa.w.ph $ac0, %[r3], %[r3] \n\t" 64#else 65 "madd %[r0], %[r0] \n\t" 66 "madd %[r1], %[r1] \n\t" 67 "madd %[r2], %[r2] \n\t" 68 "madd %[r3], %[r3] \n\t" 69 "madd %[r4], %[r4] \n\t" 70 "madd %[r5], %[r5] \n\t" 71 "madd %[r6], %[r6] \n\t" 72 "madd %[r7], %[r7] \n\t" 73#endif 74 "bnez %[loop_size], 1b \n\t" 75 " addiu %[in], %[in], 16 \n\t" 76 "2: \n\t" 77 "beqz %[count], 4f \n\t" 78#if defined(MIPS_DSP_R1_LE) 79 " extr.w %[r0], $ac0, 31 \n\t" 80#else 81 " mfhi %[r2] \n\t" 82#endif 83 // Process remaining samples (if any). 84 "3: \n\t" 85 "lh %[r0], 0(%[in]) \n\t" 86 "addiu %[count], %[count], -1 \n\t" 87 "madd %[r0], %[r0] \n\t" 88 "bnez %[count], 3b \n\t" 89 " addiu %[in], %[in], 2 \n\t" 90#if defined(MIPS_DSP_R1_LE) 91 "extr.w %[r0], $ac0, 31 \n\t" 92#else 93 "mfhi %[r2] \n\t" 94#endif 95 "4: \n\t" 96#if !defined(MIPS_DSP_R1_LE) 97 "mflo %[r3] \n\t" 98 "sll %[r0], %[r2], 1 \n\t" 99 "srl %[r1], %[r3], 31 \n\t" 100 "addu %[r0], %[r0], %[r1] \n\t" 101#endif 102 // Calculate scaling (the value of shifting). 103 "clz %[r1], %[r0] \n\t" 104 "addiu %[r1], %[r1], -32 \n\t" 105 "subu %[scaling], $0, %[r1] \n\t" 106 "slti %[r1], %[r0], 0x1 \n\t" 107 "movn %[scaling], $0, %[r1] \n\t" 108#if defined(MIPS_DSP_R1_LE) 109 "extrv.w %[r0], $ac0, %[scaling] \n\t" 110 "mfhi %[r2], $ac0 \n\t" 111#else 112 "addiu %[r1], %[scaling], -32 \n\t" 113 "subu %[r1], $0, %[r1] \n\t" 114 "sllv %[r1], %[r2], %[r1] \n\t" 115 "srlv %[r0], %[r3], %[scaling] \n\t" 116 "addu %[r0], %[r0], %[r1] \n\t" 117#endif 118 "slti %[r1], %[scaling], 32 \n\t" 119 "movz %[r0], %[r2], %[r1] \n\t" 120 ".set pop \n\t" 121 : [loop_size] "+r" (loop_size), [in] "+r" (in), [r0] "=&r" (r0), 122 [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 123#if !defined(MIPS_DSP_R2_LE) 124 [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 125#endif 126 [count] "+r" (count), [scaling] "=r" (scaling) 127 : [N] "r" (N) 128 : "memory", "hi", "lo" 129 ); 130 r[0] = r0; 131 132 // Correlation calculation is divided in 3 cases depending on the scaling 133 // value (different accumulator manipulation needed). Three slightly different 134 // loops are written in order to avoid branches inside the loop. 135 if (scaling == 0) { 136 // In this case, the result will be in low part of the accumulator. 137 for (i = 1; i < order + 1; i++) { 138 in = (int16_t*)x; 139 int16_t* in1 = (int16_t*)x + i; 140 count = N - i; 141 loop_size = (count) >> 2; 142 __asm __volatile ( 143 ".set push \n\t" 144 ".set noreorder \n\t" 145 "mult $0, $0 \n\t" 146 "beqz %[loop_size], 2f \n\t" 147 " andi %[count], %[count], 0x3 \n\t" 148 // Loop processing 4 pairs of samples per iteration. 149 "1: \n\t" 150#if defined(MIPS_DSP_R2_LE) 151 "ulw %[r0], 0(%[in]) \n\t" 152 "ulw %[r1], 0(%[in1]) \n\t" 153 "ulw %[r2], 4(%[in]) \n\t" 154 "ulw %[r3], 4(%[in1]) \n\t" 155#else 156 "lh %[r0], 0(%[in]) \n\t" 157 "lh %[r1], 0(%[in1]) \n\t" 158 "lh %[r2], 2(%[in]) \n\t" 159 "lh %[r3], 2(%[in1]) \n\t" 160 "lh %[r4], 4(%[in]) \n\t" 161 "lh %[r5], 4(%[in1]) \n\t" 162 "lh %[r6], 6(%[in]) \n\t" 163 "lh %[r7], 6(%[in1]) \n\t" 164#endif 165 "addiu %[loop_size], %[loop_size], -1 \n\t" 166#if defined(MIPS_DSP_R2_LE) 167 "dpa.w.ph $ac0, %[r0], %[r1] \n\t" 168 "dpa.w.ph $ac0, %[r2], %[r3] \n\t" 169#else 170 "madd %[r0], %[r1] \n\t" 171 "madd %[r2], %[r3] \n\t" 172 "madd %[r4], %[r5] \n\t" 173 "madd %[r6], %[r7] \n\t" 174#endif 175 "addiu %[in], %[in], 8 \n\t" 176 "bnez %[loop_size], 1b \n\t" 177 " addiu %[in1], %[in1], 8 \n\t" 178 "2: \n\t" 179 "beqz %[count], 4f \n\t" 180 " mflo %[r0] \n\t" 181 // Process remaining samples (if any). 182 "3: \n\t" 183 "lh %[r0], 0(%[in]) \n\t" 184 "lh %[r1], 0(%[in1]) \n\t" 185 "addiu %[count], %[count], -1 \n\t" 186 "addiu %[in], %[in], 2 \n\t" 187 "madd %[r0], %[r1] \n\t" 188 "bnez %[count], 3b \n\t" 189 " addiu %[in1], %[in1], 2 \n\t" 190 "mflo %[r0] \n\t" 191 "4: \n\t" 192 ".set pop \n\t" 193 : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1), 194#if !defined(MIPS_DSP_R2_LE) 195 [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 196#endif 197 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 198 [count] "+r" (count) 199 : 200 : "memory", "hi", "lo" 201 ); 202 r[i] = r0; 203 } 204 } else if (scaling == 32) { 205 // In this case, the result will be high part of the accumulator. 206 for (i = 1; i < order + 1; i++) { 207 in = (int16_t*)x; 208 int16_t* in1 = (int16_t*)x + i; 209 count = N - i; 210 loop_size = (count) >> 2; 211 __asm __volatile ( 212 ".set push \n\t" 213 ".set noreorder \n\t" 214 "mult $0, $0 \n\t" 215 "beqz %[loop_size], 2f \n\t" 216 " andi %[count], %[count], 0x3 \n\t" 217 // Loop processing 4 pairs of samples per iteration. 218 "1: \n\t" 219#if defined(MIPS_DSP_R2_LE) 220 "ulw %[r0], 0(%[in]) \n\t" 221 "ulw %[r1], 0(%[in1]) \n\t" 222 "ulw %[r2], 4(%[in]) \n\t" 223 "ulw %[r3], 4(%[in1]) \n\t" 224#else 225 "lh %[r0], 0(%[in]) \n\t" 226 "lh %[r1], 0(%[in1]) \n\t" 227 "lh %[r2], 2(%[in]) \n\t" 228 "lh %[r3], 2(%[in1]) \n\t" 229 "lh %[r4], 4(%[in]) \n\t" 230 "lh %[r5], 4(%[in1]) \n\t" 231 "lh %[r6], 6(%[in]) \n\t" 232 "lh %[r7], 6(%[in1]) \n\t" 233#endif 234 "addiu %[loop_size], %[loop_size], -1 \n\t" 235#if defined(MIPS_DSP_R2_LE) 236 "dpa.w.ph $ac0, %[r0], %[r1] \n\t" 237 "dpa.w.ph $ac0, %[r2], %[r3] \n\t" 238#else 239 "madd %[r0], %[r1] \n\t" 240 "madd %[r2], %[r3] \n\t" 241 "madd %[r4], %[r5] \n\t" 242 "madd %[r6], %[r7] \n\t" 243#endif 244 "addiu %[in], %[in], 8 \n\t" 245 "bnez %[loop_size], 1b \n\t" 246 " addiu %[in1], %[in1], 8 \n\t" 247 "2: \n\t" 248 "beqz %[count], 4f \n\t" 249 " mfhi %[r0] \n\t" 250 // Process remaining samples (if any). 251 "3: \n\t" 252 "lh %[r0], 0(%[in]) \n\t" 253 "lh %[r1], 0(%[in1]) \n\t" 254 "addiu %[count], %[count], -1 \n\t" 255 "addiu %[in], %[in], 2 \n\t" 256 "madd %[r0], %[r1] \n\t" 257 "bnez %[count], 3b \n\t" 258 " addiu %[in1], %[in1], 2 \n\t" 259 "mfhi %[r0] \n\t" 260 "4: \n\t" 261 ".set pop \n\t" 262 : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1), 263#if !defined(MIPS_DSP_R2_LE) 264 [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 265#endif 266 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 267 [count] "+r" (count) 268 : 269 : "memory", "hi", "lo" 270 ); 271 r[i] = r0; 272 } 273 } else { 274 // In this case, the result is obtained by combining low and high parts 275 // of the accumulator. 276#if !defined(MIPS_DSP_R1_LE) 277 int32_t tmp_shift = 32 - scaling; 278#endif 279 for (i = 1; i < order + 1; i++) { 280 in = (int16_t*)x; 281 int16_t* in1 = (int16_t*)x + i; 282 count = N - i; 283 loop_size = (count) >> 2; 284 __asm __volatile ( 285 ".set push \n\t" 286 ".set noreorder \n\t" 287 "mult $0, $0 \n\t" 288 "beqz %[loop_size], 2f \n\t" 289 " andi %[count], %[count], 0x3 \n\t" 290 "1: \n\t" 291#if defined(MIPS_DSP_R2_LE) 292 "ulw %[r0], 0(%[in]) \n\t" 293 "ulw %[r1], 0(%[in1]) \n\t" 294 "ulw %[r2], 4(%[in]) \n\t" 295 "ulw %[r3], 4(%[in1]) \n\t" 296#else 297 "lh %[r0], 0(%[in]) \n\t" 298 "lh %[r1], 0(%[in1]) \n\t" 299 "lh %[r2], 2(%[in]) \n\t" 300 "lh %[r3], 2(%[in1]) \n\t" 301 "lh %[r4], 4(%[in]) \n\t" 302 "lh %[r5], 4(%[in1]) \n\t" 303 "lh %[r6], 6(%[in]) \n\t" 304 "lh %[r7], 6(%[in1]) \n\t" 305#endif 306 "addiu %[loop_size], %[loop_size], -1 \n\t" 307#if defined(MIPS_DSP_R2_LE) 308 "dpa.w.ph $ac0, %[r0], %[r1] \n\t" 309 "dpa.w.ph $ac0, %[r2], %[r3] \n\t" 310#else 311 "madd %[r0], %[r1] \n\t" 312 "madd %[r2], %[r3] \n\t" 313 "madd %[r4], %[r5] \n\t" 314 "madd %[r6], %[r7] \n\t" 315#endif 316 "addiu %[in], %[in], 8 \n\t" 317 "bnez %[loop_size], 1b \n\t" 318 " addiu %[in1], %[in1], 8 \n\t" 319 "2: \n\t" 320 "beqz %[count], 4f \n\t" 321#if defined(MIPS_DSP_R1_LE) 322 " extrv.w %[r0], $ac0, %[scaling] \n\t" 323#else 324 " mfhi %[r0] \n\t" 325#endif 326 "3: \n\t" 327 "lh %[r0], 0(%[in]) \n\t" 328 "lh %[r1], 0(%[in1]) \n\t" 329 "addiu %[count], %[count], -1 \n\t" 330 "addiu %[in], %[in], 2 \n\t" 331 "madd %[r0], %[r1] \n\t" 332 "bnez %[count], 3b \n\t" 333 " addiu %[in1], %[in1], 2 \n\t" 334#if defined(MIPS_DSP_R1_LE) 335 "extrv.w %[r0], $ac0, %[scaling] \n\t" 336#else 337 "mfhi %[r0] \n\t" 338#endif 339 "4: \n\t" 340#if !defined(MIPS_DSP_R1_LE) 341 "mflo %[r1] \n\t" 342 "sllv %[r0], %[r0], %[tmp_shift] \n\t" 343 "srlv %[r1], %[r1], %[scaling] \n\t" 344 "addu %[r0], %[r0], %[r1] \n\t" 345#endif 346 ".set pop \n\t" 347 : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1), 348#if !defined(MIPS_DSP_R2_LE) 349 [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 350#endif 351 [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 352 [count] "+r" (count) 353 : [scaling] "r" (scaling) 354#if !defined(MIPS_DSP_R1_LE) 355 , [tmp_shift] "r" (tmp_shift) 356#endif 357 : "memory", "hi", "lo" 358 ); 359 r[i] = r0; 360 } 361 } 362 *scale = scaling; 363 364 return (order + 1); 365} 366