1/*
2 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "webrtc/modules/audio_coding/codecs/isac/fix/source/pitch_estimator.h"
12#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
13#include "webrtc/system_wrappers/include/compile_assert_c.h"
14
15extern int32_t WebRtcIsacfix_Log2Q8(uint32_t x);
16
17void WebRtcIsacfix_PCorr2Q32(const int16_t* in, int32_t* logcorQ8) {
18  int16_t scaling,n,k;
19  int32_t ysum32,csum32, lys, lcs;
20  const int32_t oneQ8 = 1 << 8;  // 1.00 in Q8
21  const int16_t* x;
22  const int16_t* inptr;
23
24  x = in + PITCH_MAX_LAG / 2 + 2;
25  scaling = WebRtcSpl_GetScalingSquare((int16_t*)in,
26                                       PITCH_CORR_LEN2,
27                                       PITCH_CORR_LEN2);
28  ysum32 = 1;
29  csum32 = 0;
30  x = in + PITCH_MAX_LAG / 2 + 2;
31  {
32    const int16_t* tmp_x = x;
33    const int16_t* tmp_in = in;
34    int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
35    n = PITCH_CORR_LEN2;
36    COMPILE_ASSERT(PITCH_CORR_LEN2 % 4 == 0);
37    __asm __volatile (
38      ".set       push                                          \n\t"
39      ".set       noreorder                                     \n\t"
40     "1:                                                        \n\t"
41      "lh         %[tmp1],       0(%[tmp_in])                   \n\t"
42      "lh         %[tmp2],       2(%[tmp_in])                   \n\t"
43      "lh         %[tmp3],       4(%[tmp_in])                   \n\t"
44      "lh         %[tmp4],       6(%[tmp_in])                   \n\t"
45      "lh         %[tmp5],       0(%[tmp_x])                    \n\t"
46      "lh         %[tmp6],       2(%[tmp_x])                    \n\t"
47      "lh         %[tmp7],       4(%[tmp_x])                    \n\t"
48      "lh         %[tmp8],       6(%[tmp_x])                    \n\t"
49      "mul        %[tmp5],       %[tmp1],        %[tmp5]        \n\t"
50      "mul        %[tmp1],       %[tmp1],        %[tmp1]        \n\t"
51      "mul        %[tmp6],       %[tmp2],        %[tmp6]        \n\t"
52      "mul        %[tmp2],       %[tmp2],        %[tmp2]        \n\t"
53      "mul        %[tmp7],       %[tmp3],        %[tmp7]        \n\t"
54      "mul        %[tmp3],       %[tmp3],        %[tmp3]        \n\t"
55      "mul        %[tmp8],       %[tmp4],        %[tmp8]        \n\t"
56      "mul        %[tmp4],       %[tmp4],        %[tmp4]        \n\t"
57      "addiu      %[n],          %[n],           -4             \n\t"
58      "srav       %[tmp5],       %[tmp5],        %[scaling]     \n\t"
59      "srav       %[tmp1],       %[tmp1],        %[scaling]     \n\t"
60      "srav       %[tmp6],       %[tmp6],        %[scaling]     \n\t"
61      "srav       %[tmp2],       %[tmp2],        %[scaling]     \n\t"
62      "srav       %[tmp7],       %[tmp7],        %[scaling]     \n\t"
63      "srav       %[tmp3],       %[tmp3],        %[scaling]     \n\t"
64      "srav       %[tmp8],       %[tmp8],        %[scaling]     \n\t"
65      "srav       %[tmp4],       %[tmp4],        %[scaling]     \n\t"
66      "addu       %[ysum32],     %[ysum32],      %[tmp1]        \n\t"
67      "addu       %[csum32],     %[csum32],      %[tmp5]        \n\t"
68      "addu       %[ysum32],     %[ysum32],      %[tmp2]        \n\t"
69      "addu       %[csum32],     %[csum32],      %[tmp6]        \n\t"
70      "addu       %[ysum32],     %[ysum32],      %[tmp3]        \n\t"
71      "addu       %[csum32],     %[csum32],      %[tmp7]        \n\t"
72      "addu       %[ysum32],     %[ysum32],      %[tmp4]        \n\t"
73      "addu       %[csum32],     %[csum32],      %[tmp8]        \n\t"
74      "addiu      %[tmp_in],     %[tmp_in],      8              \n\t"
75      "bgtz       %[n],          1b                             \n\t"
76      " addiu     %[tmp_x],      %[tmp_x],       8              \n\t"
77      ".set       pop                                           \n\t"
78      : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
79        [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
80        [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [tmp_in] "+r" (tmp_in),
81        [ysum32] "+r" (ysum32), [tmp_x] "+r" (tmp_x), [csum32] "+r" (csum32),
82        [n] "+r" (n)
83      : [scaling] "r" (scaling)
84      : "memory", "hi", "lo"
85    );
86  }
87  logcorQ8 += PITCH_LAG_SPAN2 - 1;
88  lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32) >> 1; // Q8, sqrt(ysum)
89  if (csum32 > 0) {
90    lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32);  // 2log(csum) in Q8
91    if (lcs > (lys + oneQ8)) {  // csum/sqrt(ysum) > 2 in Q8
92      *logcorQ8 = lcs - lys;  // log2(csum/sqrt(ysum))
93    } else {
94      *logcorQ8 = oneQ8;  // 1.00
95    }
96  } else {
97    *logcorQ8 = 0;
98  }
99
100  for (k = 1; k < PITCH_LAG_SPAN2; k++) {
101    inptr = &in[k];
102    const int16_t* tmp_in1 = &in[k - 1];
103    const int16_t* tmp_in2 = &in[PITCH_CORR_LEN2 + k - 1];
104    const int16_t* tmp_x = x;
105    int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
106    n = PITCH_CORR_LEN2;
107    csum32 = 0;
108    __asm __volatile (
109      ".set       push                                             \n\t"
110      ".set       noreorder                                        \n\t"
111      "lh         %[tmp1],        0(%[tmp_in1])                    \n\t"
112      "lh         %[tmp2],        0(%[tmp_in2])                    \n\t"
113      "mul        %[tmp1],        %[tmp1],         %[tmp1]         \n\t"
114      "mul        %[tmp2],        %[tmp2],         %[tmp2]         \n\t"
115      "srav       %[tmp1],        %[tmp1],         %[scaling]      \n\t"
116      "srav       %[tmp2],        %[tmp2],         %[scaling]      \n\t"
117      "subu       %[ysum32],      %[ysum32],       %[tmp1]         \n\t"
118      "bnez       %[scaling],     2f                               \n\t"
119      " addu      %[ysum32],      %[ysum32],       %[tmp2]         \n\t"
120     "1:                                                           \n\t"
121      "lh         %[tmp1],        0(%[inptr])                      \n\t"
122      "lh         %[tmp2],        0(%[tmp_x])                      \n\t"
123      "lh         %[tmp3],        2(%[inptr])                      \n\t"
124      "lh         %[tmp4],        2(%[tmp_x])                      \n\t"
125      "lh         %[tmp5],        4(%[inptr])                      \n\t"
126      "lh         %[tmp6],        4(%[tmp_x])                      \n\t"
127      "lh         %[tmp7],        6(%[inptr])                      \n\t"
128      "lh         %[tmp8],        6(%[tmp_x])                      \n\t"
129      "mul        %[tmp1],        %[tmp1],         %[tmp2]         \n\t"
130      "mul        %[tmp2],        %[tmp3],         %[tmp4]         \n\t"
131      "mul        %[tmp3],        %[tmp5],         %[tmp6]         \n\t"
132      "mul        %[tmp4],        %[tmp7],         %[tmp8]         \n\t"
133      "addiu      %[n],           %[n],            -4              \n\t"
134      "addiu      %[inptr],       %[inptr],        8               \n\t"
135      "addiu      %[tmp_x],       %[tmp_x],        8               \n\t"
136      "addu       %[csum32],      %[csum32],       %[tmp1]         \n\t"
137      "addu       %[csum32],      %[csum32],       %[tmp2]         \n\t"
138      "addu       %[csum32],      %[csum32],       %[tmp3]         \n\t"
139      "bgtz       %[n],           1b                               \n\t"
140      " addu      %[csum32],      %[csum32],       %[tmp4]         \n\t"
141      "b          3f                                               \n\t"
142      " nop                                                        \n\t"
143     "2:                                                           \n\t"
144      "lh         %[tmp1],        0(%[inptr])                      \n\t"
145      "lh         %[tmp2],        0(%[tmp_x])                      \n\t"
146      "lh         %[tmp3],        2(%[inptr])                      \n\t"
147      "lh         %[tmp4],        2(%[tmp_x])                      \n\t"
148      "lh         %[tmp5],        4(%[inptr])                      \n\t"
149      "lh         %[tmp6],        4(%[tmp_x])                      \n\t"
150      "lh         %[tmp7],        6(%[inptr])                      \n\t"
151      "lh         %[tmp8],        6(%[tmp_x])                      \n\t"
152      "mul        %[tmp1],        %[tmp1],         %[tmp2]         \n\t"
153      "mul        %[tmp2],        %[tmp3],         %[tmp4]         \n\t"
154      "mul        %[tmp3],        %[tmp5],         %[tmp6]         \n\t"
155      "mul        %[tmp4],        %[tmp7],         %[tmp8]         \n\t"
156      "addiu      %[n],           %[n],            -4              \n\t"
157      "addiu      %[inptr],       %[inptr],        8               \n\t"
158      "addiu      %[tmp_x],       %[tmp_x],        8               \n\t"
159      "srav       %[tmp1],        %[tmp1],         %[scaling]      \n\t"
160      "srav       %[tmp2],        %[tmp2],         %[scaling]      \n\t"
161      "srav       %[tmp3],        %[tmp3],         %[scaling]      \n\t"
162      "srav       %[tmp4],        %[tmp4],         %[scaling]      \n\t"
163      "addu       %[csum32],      %[csum32],       %[tmp1]         \n\t"
164      "addu       %[csum32],      %[csum32],       %[tmp2]         \n\t"
165      "addu       %[csum32],      %[csum32],       %[tmp3]         \n\t"
166      "bgtz       %[n],           2b                               \n\t"
167      " addu      %[csum32],      %[csum32],       %[tmp4]         \n\t"
168     "3:                                                           \n\t"
169      ".set       pop                                              \n\t"
170      : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
171        [tmp4] "=&r" (tmp4), [tmp5] "=&r" (tmp5), [tmp6] "=&r" (tmp6),
172        [tmp7] "=&r" (tmp7), [tmp8] "=&r" (tmp8), [inptr] "+r" (inptr),
173        [csum32] "+r" (csum32), [tmp_x] "+r" (tmp_x), [ysum32] "+r" (ysum32),
174        [n] "+r" (n)
175      : [tmp_in1] "r" (tmp_in1), [tmp_in2] "r" (tmp_in2),
176        [scaling] "r" (scaling)
177      : "memory", "hi", "lo"
178    );
179
180    logcorQ8--;
181    lys = WebRtcIsacfix_Log2Q8((uint32_t)ysum32) >> 1; // Q8, sqrt(ysum)
182    if (csum32 > 0) {
183      lcs = WebRtcIsacfix_Log2Q8((uint32_t)csum32); // 2log(csum) in Q8
184      if (lcs > (lys + oneQ8)) { // csum/sqrt(ysum) > 2
185        *logcorQ8 = lcs - lys;  // log2(csum/sqrt(ysum))
186      } else {
187        *logcorQ8 = oneQ8;  // 1.00
188      }
189    } else {
190      *logcorQ8 = 0;
191    }
192  }
193}
194