1/* Copyright (c) 2007-2008 CSIRO
2   Copyright (c) 2007-2009 Xiph.Org Foundation
3   Written by Jean-Marc Valin */
4/*
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions
7   are met:
8
9   - Redistributions of source code must retain the above copyright
10   notice, this list of conditions and the following disclaimer.
11
12   - Redistributions in binary form must reproduce the above copyright
13   notice, this list of conditions and the following disclaimer in the
14   documentation and/or other materials provided with the distribution.
15
16   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*/
28
29#ifdef HAVE_CONFIG_H
30#include "config.h"
31#endif
32
33#include "mathops.h"
34#include "cwrs.h"
35#include "vq.h"
36#include "arch.h"
37#include "os_support.h"
38#include "bands.h"
39#include "rate.h"
40
41static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
42{
43   int i;
44   celt_norm *Xptr;
45   Xptr = X;
46   for (i=0;i<len-stride;i++)
47   {
48      celt_norm x1, x2;
49      x1 = Xptr[0];
50      x2 = Xptr[stride];
51      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
52      *Xptr++      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
53   }
54   Xptr = &X[len-2*stride-1];
55   for (i=len-2*stride-1;i>=0;i--)
56   {
57      celt_norm x1, x2;
58      x1 = Xptr[0];
59      x2 = Xptr[stride];
60      Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
61      *Xptr--      = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
62   }
63}
64
65static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
66{
67   static const int SPREAD_FACTOR[3]={15,10,5};
68   int i;
69   opus_val16 c, s;
70   opus_val16 gain, theta;
71   int stride2=0;
72   int factor;
73
74   if (2*K>=len || spread==SPREAD_NONE)
75      return;
76   factor = SPREAD_FACTOR[spread-1];
77
78   gain = celt_div((opus_val32)MULT16_16(Q15_ONE,len),(opus_val32)(len+factor*K));
79   theta = HALF16(MULT16_16_Q15(gain,gain));
80
81   c = celt_cos_norm(EXTEND32(theta));
82   s = celt_cos_norm(EXTEND32(SUB16(Q15ONE,theta))); /*  sin(theta) */
83
84   if (len>=8*stride)
85   {
86      stride2 = 1;
87      /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
88         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
89      while ((stride2*stride2+stride2)*stride + (stride>>2) < len)
90         stride2++;
91   }
92   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
93      extract_collapse_mask().*/
94   len /= stride;
95   for (i=0;i<stride;i++)
96   {
97      if (dir < 0)
98      {
99         if (stride2)
100            exp_rotation1(X+i*len, len, stride2, s, c);
101         exp_rotation1(X+i*len, len, 1, c, s);
102      } else {
103         exp_rotation1(X+i*len, len, 1, c, -s);
104         if (stride2)
105            exp_rotation1(X+i*len, len, stride2, s, -c);
106      }
107   }
108}
109
110/** Takes the pitch vector and the decoded residual vector, computes the gain
111    that will give ||p+g*y||=1 and mixes the residual with the pitch. */
112static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
113      int N, opus_val32 Ryy, opus_val16 gain)
114{
115   int i;
116#ifdef FIXED_POINT
117   int k;
118#endif
119   opus_val32 t;
120   opus_val16 g;
121
122#ifdef FIXED_POINT
123   k = celt_ilog2(Ryy)>>1;
124#endif
125   t = VSHR32(Ryy, 2*(k-7));
126   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
127
128   i=0;
129   do
130      X[i] = EXTRACT16(PSHR32(MULT16_16(g, iy[i]), k+1));
131   while (++i < N);
132}
133
134static unsigned extract_collapse_mask(int *iy, int N, int B)
135{
136   unsigned collapse_mask;
137   int N0;
138   int i;
139   if (B<=1)
140      return 1;
141   /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
142      exp_rotation().*/
143   N0 = N/B;
144   collapse_mask = 0;
145   i=0; do {
146      int j;
147      j=0; do {
148         collapse_mask |= (iy[i*N0+j]!=0)<<i;
149      } while (++j<N0);
150   } while (++i<B);
151   return collapse_mask;
152}
153
154unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
155#ifdef RESYNTH
156   , opus_val16 gain
157#endif
158   )
159{
160   VARDECL(celt_norm, y);
161   VARDECL(int, iy);
162   VARDECL(opus_val16, signx);
163   int i, j;
164   opus_val16 s;
165   int pulsesLeft;
166   opus_val32 sum;
167   opus_val32 xy;
168   opus_val16 yy;
169   unsigned collapse_mask;
170   SAVE_STACK;
171
172   celt_assert2(K>0, "alg_quant() needs at least one pulse");
173   celt_assert2(N>1, "alg_quant() needs at least two dimensions");
174
175   ALLOC(y, N, celt_norm);
176   ALLOC(iy, N, int);
177   ALLOC(signx, N, opus_val16);
178
179   exp_rotation(X, N, 1, B, K, spread);
180
181   /* Get rid of the sign */
182   sum = 0;
183   j=0; do {
184      if (X[j]>0)
185         signx[j]=1;
186      else {
187         signx[j]=-1;
188         X[j]=-X[j];
189      }
190      iy[j] = 0;
191      y[j] = 0;
192   } while (++j<N);
193
194   xy = yy = 0;
195
196   pulsesLeft = K;
197
198   /* Do a pre-search by projecting on the pyramid */
199   if (K > (N>>1))
200   {
201      opus_val16 rcp;
202      j=0; do {
203         sum += X[j];
204      }  while (++j<N);
205
206      /* If X is too small, just replace it with a pulse at 0 */
207#ifdef FIXED_POINT
208      if (sum <= K)
209#else
210      /* Prevents infinities and NaNs from causing too many pulses
211         to be allocated. 64 is an approximation of infinity here. */
212      if (!(sum > EPSILON && sum < 64))
213#endif
214      {
215         X[0] = QCONST16(1.f,14);
216         j=1; do
217            X[j]=0;
218         while (++j<N);
219         sum = QCONST16(1.f,14);
220      }
221      rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));
222      j=0; do {
223#ifdef FIXED_POINT
224         /* It's really important to round *towards zero* here */
225         iy[j] = MULT16_16_Q15(X[j],rcp);
226#else
227         iy[j] = (int)floor(rcp*X[j]);
228#endif
229         y[j] = (celt_norm)iy[j];
230         yy = MAC16_16(yy, y[j],y[j]);
231         xy = MAC16_16(xy, X[j],y[j]);
232         y[j] *= 2;
233         pulsesLeft -= iy[j];
234      }  while (++j<N);
235   }
236   celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");
237
238   /* This should never happen, but just in case it does (e.g. on silence)
239      we fill the first bin with pulses. */
240#ifdef FIXED_POINT_DEBUG
241   celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
242#endif
243   if (pulsesLeft > N+3)
244   {
245      opus_val16 tmp = (opus_val16)pulsesLeft;
246      yy = MAC16_16(yy, tmp, tmp);
247      yy = MAC16_16(yy, tmp, y[0]);
248      iy[0] += pulsesLeft;
249      pulsesLeft=0;
250   }
251
252   s = 1;
253   for (i=0;i<pulsesLeft;i++)
254   {
255      int best_id;
256      opus_val32 best_num = -VERY_LARGE16;
257      opus_val16 best_den = 0;
258#ifdef FIXED_POINT
259      int rshift;
260#endif
261#ifdef FIXED_POINT
262      rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
263#endif
264      best_id = 0;
265      /* The squared magnitude term gets added anyway, so we might as well
266         add it outside the loop */
267      yy = ADD32(yy, 1);
268      j=0;
269      do {
270         opus_val16 Rxy, Ryy;
271         /* Temporary sums of the new pulse(s) */
272         Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
273         /* We're multiplying y[j] by two so we don't have to do it here */
274         Ryy = ADD16(yy, y[j]);
275
276         /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
277            Rxy is positive because the sign is pre-computed) */
278         Rxy = MULT16_16_Q15(Rxy,Rxy);
279         /* The idea is to check for num/den >= best_num/best_den, but that way
280            we can do it without any division */
281         /* OPT: Make sure to use conditional moves here */
282         if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))
283         {
284            best_den = Ryy;
285            best_num = Rxy;
286            best_id = j;
287         }
288      } while (++j<N);
289
290      /* Updating the sums of the new pulse(s) */
291      xy = ADD32(xy, EXTEND32(X[best_id]));
292      /* We're multiplying y[j] by two so we don't have to do it here */
293      yy = ADD16(yy, y[best_id]);
294
295      /* Only now that we've made the final choice, update y/iy */
296      /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
297      y[best_id] += 2*s;
298      iy[best_id]++;
299   }
300
301   /* Put the original sign back */
302   j=0;
303   do {
304      X[j] = MULT16_16(signx[j],X[j]);
305      if (signx[j] < 0)
306         iy[j] = -iy[j];
307   } while (++j<N);
308   encode_pulses(iy, N, K, enc);
309
310#ifdef RESYNTH
311   normalise_residual(iy, X, N, yy, gain);
312   exp_rotation(X, N, -1, B, K, spread);
313#endif
314
315   collapse_mask = extract_collapse_mask(iy, N, B);
316   RESTORE_STACK;
317   return collapse_mask;
318}
319
320/** Decode pulse vector and combine the result with the pitch vector to produce
321    the final normalised signal in the current band. */
322unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
323      ec_dec *dec, opus_val16 gain)
324{
325   int i;
326   opus_val32 Ryy;
327   unsigned collapse_mask;
328   VARDECL(int, iy);
329   SAVE_STACK;
330
331   celt_assert2(K>0, "alg_unquant() needs at least one pulse");
332   celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
333   ALLOC(iy, N, int);
334   decode_pulses(iy, N, K, dec);
335   Ryy = 0;
336   i=0;
337   do {
338      Ryy = MAC16_16(Ryy, iy[i], iy[i]);
339   } while (++i < N);
340   normalise_residual(iy, X, N, Ryy, gain);
341   exp_rotation(X, N, -1, B, K, spread);
342   collapse_mask = extract_collapse_mask(iy, N, B);
343   RESTORE_STACK;
344   return collapse_mask;
345}
346
347void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
348{
349   int i;
350#ifdef FIXED_POINT
351   int k;
352#endif
353   opus_val32 E = EPSILON;
354   opus_val16 g;
355   opus_val32 t;
356   celt_norm *xptr = X;
357   for (i=0;i<N;i++)
358   {
359      E = MAC16_16(E, *xptr, *xptr);
360      xptr++;
361   }
362#ifdef FIXED_POINT
363   k = celt_ilog2(E)>>1;
364#endif
365   t = VSHR32(E, 2*(k-7));
366   g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
367
368   xptr = X;
369   for (i=0;i<N;i++)
370   {
371      *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1));
372      xptr++;
373   }
374   /*return celt_sqrt(E);*/
375}
376
377int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
378{
379   int i;
380   int itheta;
381   opus_val16 mid, side;
382   opus_val32 Emid, Eside;
383
384   Emid = Eside = EPSILON;
385   if (stereo)
386   {
387      for (i=0;i<N;i++)
388      {
389         celt_norm m, s;
390         m = ADD16(SHR16(X[i],1),SHR16(Y[i],1));
391         s = SUB16(SHR16(X[i],1),SHR16(Y[i],1));
392         Emid = MAC16_16(Emid, m, m);
393         Eside = MAC16_16(Eside, s, s);
394      }
395   } else {
396      for (i=0;i<N;i++)
397      {
398         celt_norm m, s;
399         m = X[i];
400         s = Y[i];
401         Emid = MAC16_16(Emid, m, m);
402         Eside = MAC16_16(Eside, s, s);
403      }
404   }
405   mid = celt_sqrt(Emid);
406   side = celt_sqrt(Eside);
407#ifdef FIXED_POINT
408   /* 0.63662 = 2/pi */
409   itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
410#else
411   itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid));
412#endif
413
414   return itheta;
415}
416