1919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org/* 2919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org * 4919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org * Use of this source code is governed by a BSD-style license 5919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org * that can be found in the LICENSE file in the root of the source 6919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org * tree. An additional intellectual property rights grant can be found 7919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org * in the file PATENTS. All contributing project authors may 8919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org * be found in the AUTHORS file in the root of the source tree. 9919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org */ 10919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org 11919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#include "webrtc/modules/audio_coding/codecs/isac/fix/source/codec.h" 12919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org 13919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org// MIPS optimized implementation of the Autocorrelation function in fixed point. 14919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org// NOTE! Different from SPLIB-version in how it scales the signal. 15919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.orgint WebRtcIsacfix_AutocorrMIPS(int32_t* __restrict r, 16919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org const int16_t* __restrict x, 17919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int16_t N, 18919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int16_t order, 19919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int16_t* __restrict scale) { 20919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int i = 0; 21919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int16_t scaling = 0; 22919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int16_t* in = (int16_t*)x; 23919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int loop_size = (int)(N >> 3); 24919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int count = (int)(N & 7); 25919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Declare temporary variables used as registry values. 26919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int32_t r0, r1, r2, r3; 27919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE) 28919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // For non-DSPR2 optimizations 4 more registers are used. 29919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int32_t r4, r5, r6, r7; 30919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 31919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org 32919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Calculate r[0] and scaling needed. 33919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org __asm __volatile ( 34919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set push \n\t" 35919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set noreorder \n\t" 36919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mult $0, $0 \n\t" 37919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Loop is unrolled 8 times, set accumulator to zero in branch delay slot. 38919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "beqz %[loop_size], 2f \n\t" 39919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " mult $0, $0 \n\t" 40919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "1: \n\t" 41919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Load 8 samples per loop iteration. 42919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE) 43919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r0], 0(%[in]) \n\t" 44919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r1], 4(%[in]) \n\t" 45919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r2], 8(%[in]) \n\t" 46919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r3], 12(%[in]) \n\t" 47919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 48919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r0], 0(%[in]) \n\t" 49919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r1], 2(%[in]) \n\t" 50919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r2], 4(%[in]) \n\t" 51919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r3], 6(%[in]) \n\t" 52919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r4], 8(%[in]) \n\t" 53919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r5], 10(%[in]) \n\t" 54919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r6], 12(%[in]) \n\t" 55919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r7], 14(%[in]) \n\t" 56919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 57919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[loop_size], %[loop_size], -1 \n\t" 58919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Multiply and accumulate. 59919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE) 60919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r0], %[r0] \n\t" 61919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r1], %[r1] \n\t" 62919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r2], %[r2] \n\t" 63919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r3], %[r3] \n\t" 64919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 65919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r0], %[r0] \n\t" 66919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r1], %[r1] \n\t" 67919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r2], %[r2] \n\t" 68919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r3], %[r3] \n\t" 69919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r4], %[r4] \n\t" 70919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r5], %[r5] \n\t" 71919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r6], %[r6] \n\t" 72919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r7], %[r7] \n\t" 73919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 74919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "bnez %[loop_size], 1b \n\t" 75919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " addiu %[in], %[in], 16 \n\t" 76919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "2: \n\t" 77919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "beqz %[count], 4f \n\t" 78919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE) 79919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " extr.w %[r0], $ac0, 31 \n\t" 80919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 81919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " mfhi %[r2] \n\t" 82919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 83919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Process remaining samples (if any). 84919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "3: \n\t" 85919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r0], 0(%[in]) \n\t" 86919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[count], %[count], -1 \n\t" 87919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r0], %[r0] \n\t" 88919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "bnez %[count], 3b \n\t" 89919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " addiu %[in], %[in], 2 \n\t" 90919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE) 91919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "extr.w %[r0], $ac0, 31 \n\t" 92919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 93919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mfhi %[r2] \n\t" 94919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 95919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "4: \n\t" 96919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R1_LE) 97919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mflo %[r3] \n\t" 98919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "sll %[r0], %[r2], 1 \n\t" 99919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "srl %[r1], %[r3], 31 \n\t" 100919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addu %[r0], %[r0], %[r1] \n\t" 101919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 102919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Calculate scaling (the value of shifting). 103919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "clz %[r1], %[r0] \n\t" 104919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[r1], %[r1], -32 \n\t" 105919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "subu %[scaling], $0, %[r1] \n\t" 106919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "slti %[r1], %[r0], 0x1 \n\t" 107919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "movn %[scaling], $0, %[r1] \n\t" 108919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE) 109919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "extrv.w %[r0], $ac0, %[scaling] \n\t" 110919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mfhi %[r2], $ac0 \n\t" 111919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 112919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[r1], %[scaling], -32 \n\t" 113919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "subu %[r1], $0, %[r1] \n\t" 114919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "sllv %[r1], %[r2], %[r1] \n\t" 115919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "srlv %[r0], %[r3], %[scaling] \n\t" 116919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addu %[r0], %[r0], %[r1] \n\t" 117919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 118919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "slti %[r1], %[scaling], 32 \n\t" 119919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "movz %[r0], %[r2], %[r1] \n\t" 120919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set pop \n\t" 121919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : [loop_size] "+r" (loop_size), [in] "+r" (in), [r0] "=&r" (r0), 122919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 123919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE) 124919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 125919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 126919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [count] "+r" (count), [scaling] "=r" (scaling) 127919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : [N] "r" (N) 128919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : "memory", "hi", "lo" 129919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ); 130919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org r[0] = r0; 131919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org 132919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Correlation calculation is divided in 3 cases depending on the scaling 133919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // value (different accumulator manipulation needed). Three slightly different 134919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // loops are written in order to avoid branches inside the loop. 135919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org if (scaling == 0) { 136919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // In this case, the result will be in low part of the accumulator. 137919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org for (i = 1; i < order + 1; i++) { 138919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org in = (int16_t*)x; 139919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int16_t* in1 = (int16_t*)x + i; 140919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org count = N - i; 141919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org loop_size = (count) >> 2; 142919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org __asm __volatile ( 143919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set push \n\t" 144919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set noreorder \n\t" 145919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mult $0, $0 \n\t" 146919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "beqz %[loop_size], 2f \n\t" 147919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " andi %[count], %[count], 0x3 \n\t" 148919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Loop processing 4 pairs of samples per iteration. 149919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "1: \n\t" 150919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE) 151919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r0], 0(%[in]) \n\t" 152919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r1], 0(%[in1]) \n\t" 153919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r2], 4(%[in]) \n\t" 154919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r3], 4(%[in1]) \n\t" 155919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 156919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r0], 0(%[in]) \n\t" 157919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r1], 0(%[in1]) \n\t" 158919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r2], 2(%[in]) \n\t" 159919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r3], 2(%[in1]) \n\t" 160919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r4], 4(%[in]) \n\t" 161919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r5], 4(%[in1]) \n\t" 162919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r6], 6(%[in]) \n\t" 163919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r7], 6(%[in1]) \n\t" 164919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 165919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[loop_size], %[loop_size], -1 \n\t" 166919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE) 167919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r0], %[r1] \n\t" 168919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r2], %[r3] \n\t" 169919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 170919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r0], %[r1] \n\t" 171919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r2], %[r3] \n\t" 172919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r4], %[r5] \n\t" 173919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r6], %[r7] \n\t" 174919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 175919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[in], %[in], 8 \n\t" 176919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "bnez %[loop_size], 1b \n\t" 177919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " addiu %[in1], %[in1], 8 \n\t" 178919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "2: \n\t" 179919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "beqz %[count], 4f \n\t" 180919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " mflo %[r0] \n\t" 181919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Process remaining samples (if any). 182919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "3: \n\t" 183919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r0], 0(%[in]) \n\t" 184919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r1], 0(%[in1]) \n\t" 185919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[count], %[count], -1 \n\t" 186919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[in], %[in], 2 \n\t" 187919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r0], %[r1] \n\t" 188919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "bnez %[count], 3b \n\t" 189919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " addiu %[in1], %[in1], 2 \n\t" 190919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mflo %[r0] \n\t" 191919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "4: \n\t" 192919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set pop \n\t" 193919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1), 194919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE) 195919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 196919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 197919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 198919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [count] "+r" (count) 199919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : 200919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : "memory", "hi", "lo" 201919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ); 202919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org r[i] = r0; 203919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org } 204919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org } else if (scaling == 32) { 205919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // In this case, the result will be high part of the accumulator. 206919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org for (i = 1; i < order + 1; i++) { 207919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org in = (int16_t*)x; 208919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int16_t* in1 = (int16_t*)x + i; 209919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org count = N - i; 210919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org loop_size = (count) >> 2; 211919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org __asm __volatile ( 212919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set push \n\t" 213919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set noreorder \n\t" 214919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mult $0, $0 \n\t" 215919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "beqz %[loop_size], 2f \n\t" 216919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " andi %[count], %[count], 0x3 \n\t" 217919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Loop processing 4 pairs of samples per iteration. 218919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "1: \n\t" 219919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE) 220919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r0], 0(%[in]) \n\t" 221919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r1], 0(%[in1]) \n\t" 222919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r2], 4(%[in]) \n\t" 223919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r3], 4(%[in1]) \n\t" 224919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 225919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r0], 0(%[in]) \n\t" 226919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r1], 0(%[in1]) \n\t" 227919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r2], 2(%[in]) \n\t" 228919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r3], 2(%[in1]) \n\t" 229919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r4], 4(%[in]) \n\t" 230919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r5], 4(%[in1]) \n\t" 231919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r6], 6(%[in]) \n\t" 232919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r7], 6(%[in1]) \n\t" 233919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 234919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[loop_size], %[loop_size], -1 \n\t" 235919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE) 236919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r0], %[r1] \n\t" 237919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r2], %[r3] \n\t" 238919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 239919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r0], %[r1] \n\t" 240919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r2], %[r3] \n\t" 241919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r4], %[r5] \n\t" 242919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r6], %[r7] \n\t" 243919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 244919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[in], %[in], 8 \n\t" 245919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "bnez %[loop_size], 1b \n\t" 246919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " addiu %[in1], %[in1], 8 \n\t" 247919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "2: \n\t" 248919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "beqz %[count], 4f \n\t" 249919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " mfhi %[r0] \n\t" 250919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // Process remaining samples (if any). 251919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "3: \n\t" 252919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r0], 0(%[in]) \n\t" 253919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r1], 0(%[in1]) \n\t" 254919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[count], %[count], -1 \n\t" 255919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[in], %[in], 2 \n\t" 256919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r0], %[r1] \n\t" 257919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "bnez %[count], 3b \n\t" 258919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " addiu %[in1], %[in1], 2 \n\t" 259919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mfhi %[r0] \n\t" 260919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "4: \n\t" 261919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set pop \n\t" 262919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1), 263919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE) 264919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 265919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 266919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 267919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [count] "+r" (count) 268919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : 269919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : "memory", "hi", "lo" 270919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ); 271919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org r[i] = r0; 272919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org } 273919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org } else { 274919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // In this case, the result is obtained by combining low and high parts 275919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org // of the accumulator. 276919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R1_LE) 277919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int32_t tmp_shift = 32 - scaling; 278919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 279919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org for (i = 1; i < order + 1; i++) { 280919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org in = (int16_t*)x; 281919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org int16_t* in1 = (int16_t*)x + i; 282919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org count = N - i; 283919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org loop_size = (count) >> 2; 284919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org __asm __volatile ( 285919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set push \n\t" 286919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set noreorder \n\t" 287919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mult $0, $0 \n\t" 288919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "beqz %[loop_size], 2f \n\t" 289919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " andi %[count], %[count], 0x3 \n\t" 290919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "1: \n\t" 291919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE) 292919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r0], 0(%[in]) \n\t" 293919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r1], 0(%[in1]) \n\t" 294919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r2], 4(%[in]) \n\t" 295919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "ulw %[r3], 4(%[in1]) \n\t" 296919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 297919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r0], 0(%[in]) \n\t" 298919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r1], 0(%[in1]) \n\t" 299919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r2], 2(%[in]) \n\t" 300919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r3], 2(%[in1]) \n\t" 301919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r4], 4(%[in]) \n\t" 302919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r5], 4(%[in1]) \n\t" 303919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r6], 6(%[in]) \n\t" 304919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r7], 6(%[in1]) \n\t" 305919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 306919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[loop_size], %[loop_size], -1 \n\t" 307919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R2_LE) 308919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r0], %[r1] \n\t" 309919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "dpa.w.ph $ac0, %[r2], %[r3] \n\t" 310919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 311919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r0], %[r1] \n\t" 312919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r2], %[r3] \n\t" 313919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r4], %[r5] \n\t" 314919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r6], %[r7] \n\t" 315919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 316919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[in], %[in], 8 \n\t" 317919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "bnez %[loop_size], 1b \n\t" 318919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " addiu %[in1], %[in1], 8 \n\t" 319919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "2: \n\t" 320919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "beqz %[count], 4f \n\t" 321919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE) 322919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " extrv.w %[r0], $ac0, %[scaling] \n\t" 323919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 324919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " mfhi %[r0] \n\t" 325919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 326919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "3: \n\t" 327919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r0], 0(%[in]) \n\t" 328919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "lh %[r1], 0(%[in1]) \n\t" 329919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[count], %[count], -1 \n\t" 330919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addiu %[in], %[in], 2 \n\t" 331919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "madd %[r0], %[r1] \n\t" 332919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "bnez %[count], 3b \n\t" 333919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org " addiu %[in1], %[in1], 2 \n\t" 334919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if defined(MIPS_DSP_R1_LE) 335919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "extrv.w %[r0], $ac0, %[scaling] \n\t" 336919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#else 337919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mfhi %[r0] \n\t" 338919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 339919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "4: \n\t" 340919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R1_LE) 341919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "mflo %[r1] \n\t" 342919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "sllv %[r0], %[r0], %[tmp_shift] \n\t" 343919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "srlv %[r1], %[r1], %[scaling] \n\t" 344919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org "addu %[r0], %[r0], %[r1] \n\t" 345919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 346919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ".set pop \n\t" 347919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : [loop_size] "+r" (loop_size), [in] "+r" (in), [in1] "+r" (in1), 348919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R2_LE) 349919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [r4] "=&r" (r4), [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), 350919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 351919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), [r3] "=&r" (r3), 352919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org [count] "+r" (count) 353919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : [scaling] "r" (scaling) 354919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#if !defined(MIPS_DSP_R1_LE) 355919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org , [tmp_shift] "r" (tmp_shift) 356919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org#endif 357919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org : "memory", "hi", "lo" 358919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org ); 359919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org r[i] = r0; 360919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org } 361919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org } 362919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org *scale = scaling; 363919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org 364919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org return (order + 1); 365919914d71becdf4ef3a322d0af0d997c7f458e7candrew@webrtc.org} 366