1/* Copyright (c) 2007-2008 CSIRO
2   Copyright (c) 2007-2009 Xiph.Org Foundation
3   Copyright (c) 2008-2009 Gregory Maxwell
4   Written by Jean-Marc Valin and Gregory Maxwell */
5/*
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions
8   are met:
9
10   - Redistributions of source code must retain the above copyright
11   notice, this list of conditions and the following disclaimer.
12
13   - Redistributions in binary form must reproduce the above copyright
14   notice, this list of conditions and the following disclaimer in the
15   documentation and/or other materials provided with the distribution.
16
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include <math.h>
35#include "bands.h"
36#include "modes.h"
37#include "vq.h"
38#include "cwrs.h"
39#include "stack_alloc.h"
40#include "os_support.h"
41#include "mathops.h"
42#include "rate.h"
43#include "quant_bands.h"
44#include "pitch.h"
45
46int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
47{
48   int i;
49   for (i=0;i<N;i++)
50   {
51      if (val < thresholds[i])
52         break;
53   }
54   if (i>prev && val < thresholds[prev]+hysteresis[prev])
55      i=prev;
56   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
57      i=prev;
58   return i;
59}
60
61opus_uint32 celt_lcg_rand(opus_uint32 seed)
62{
63   return 1664525 * seed + 1013904223;
64}
65
66/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
67   with this approximation is important because it has an impact on the bit allocation */
68static opus_int16 bitexact_cos(opus_int16 x)
69{
70   opus_int32 tmp;
71   opus_int16 x2;
72   tmp = (4096+((opus_int32)(x)*(x)))>>13;
73   celt_assert(tmp<=32767);
74   x2 = tmp;
75   x2 = (32767-x2) + FRAC_MUL16(x2, (-7651 + FRAC_MUL16(x2, (8277 + FRAC_MUL16(-626, x2)))));
76   celt_assert(x2<=32766);
77   return 1+x2;
78}
79
80static int bitexact_log2tan(int isin,int icos)
81{
82   int lc;
83   int ls;
84   lc=EC_ILOG(icos);
85   ls=EC_ILOG(isin);
86   icos<<=15-lc;
87   isin<<=15-ls;
88   return (ls-lc)*(1<<11)
89         +FRAC_MUL16(isin, FRAC_MUL16(isin, -2597) + 7932)
90         -FRAC_MUL16(icos, FRAC_MUL16(icos, -2597) + 7932);
91}
92
93#ifdef FIXED_POINT
94/* Compute the amplitude (sqrt energy) in each of the bands */
95void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
96{
97   int i, c, N;
98   const opus_int16 *eBands = m->eBands;
99   N = m->shortMdctSize<<LM;
100   c=0; do {
101      for (i=0;i<end;i++)
102      {
103         int j;
104         opus_val32 maxval=0;
105         opus_val32 sum = 0;
106
107         maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
108         if (maxval > 0)
109         {
110            int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
111            j=eBands[i]<<LM;
112            if (shift>0)
113            {
114               do {
115                  sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
116                        EXTRACT16(SHR32(X[j+c*N],shift)));
117               } while (++j<eBands[i+1]<<LM);
118            } else {
119               do {
120                  sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
121                        EXTRACT16(SHL32(X[j+c*N],-shift)));
122               } while (++j<eBands[i+1]<<LM);
123            }
124            /* We're adding one here to ensure the normalized band isn't larger than unity norm */
125            bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
126         } else {
127            bandE[i+c*m->nbEBands] = EPSILON;
128         }
129         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
130      }
131   } while (++c<C);
132   /*printf ("\n");*/
133}
134
135/* Normalise each band such that the energy is one. */
136void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
137{
138   int i, c, N;
139   const opus_int16 *eBands = m->eBands;
140   N = M*m->shortMdctSize;
141   c=0; do {
142      i=0; do {
143         opus_val16 g;
144         int j,shift;
145         opus_val16 E;
146         shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
147         E = VSHR32(bandE[i+c*m->nbEBands], shift);
148         g = EXTRACT16(celt_rcp(SHL32(E,3)));
149         j=M*eBands[i]; do {
150            X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
151         } while (++j<M*eBands[i+1]);
152      } while (++i<end);
153   } while (++c<C);
154}
155
156#else /* FIXED_POINT */
157/* Compute the amplitude (sqrt energy) in each of the bands */
158void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM)
159{
160   int i, c, N;
161   const opus_int16 *eBands = m->eBands;
162   N = m->shortMdctSize<<LM;
163   c=0; do {
164      for (i=0;i<end;i++)
165      {
166         opus_val32 sum;
167         sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
168         bandE[i+c*m->nbEBands] = celt_sqrt(sum);
169         /*printf ("%f ", bandE[i+c*m->nbEBands]);*/
170      }
171   } while (++c<C);
172   /*printf ("\n");*/
173}
174
175/* Normalise each band such that the energy is one. */
176void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, celt_norm * OPUS_RESTRICT X, const celt_ener *bandE, int end, int C, int M)
177{
178   int i, c, N;
179   const opus_int16 *eBands = m->eBands;
180   N = M*m->shortMdctSize;
181   c=0; do {
182      for (i=0;i<end;i++)
183      {
184         int j;
185         opus_val16 g = 1.f/(1e-27f+bandE[i+c*m->nbEBands]);
186         for (j=M*eBands[i];j<M*eBands[i+1];j++)
187            X[j+c*N] = freq[j+c*N]*g;
188      }
189   } while (++c<C);
190}
191
192#endif /* FIXED_POINT */
193
194/* De-normalise the energy to produce the synthesis from the unit-energy bands */
195void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
196      celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
197      int end, int M, int downsample, int silence)
198{
199   int i, N;
200   int bound;
201   celt_sig * OPUS_RESTRICT f;
202   const celt_norm * OPUS_RESTRICT x;
203   const opus_int16 *eBands = m->eBands;
204   N = M*m->shortMdctSize;
205   bound = M*eBands[end];
206   if (downsample!=1)
207      bound = IMIN(bound, N/downsample);
208   if (silence)
209   {
210      bound = 0;
211      start = end = 0;
212   }
213   f = freq;
214   x = X+M*eBands[start];
215   for (i=0;i<M*eBands[start];i++)
216      *f++ = 0;
217   for (i=start;i<end;i++)
218   {
219      int j, band_end;
220      opus_val16 g;
221      opus_val16 lg;
222#ifdef FIXED_POINT
223      int shift;
224#endif
225      j=M*eBands[i];
226      band_end = M*eBands[i+1];
227      lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6));
228#ifndef FIXED_POINT
229      g = celt_exp2(lg);
230#else
231      /* Handle the integer part of the log energy */
232      shift = 16-(lg>>DB_SHIFT);
233      if (shift>31)
234      {
235         shift=0;
236         g=0;
237      } else {
238         /* Handle the fractional part. */
239         g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
240      }
241      /* Handle extreme gains with negative shift. */
242      if (shift<0)
243      {
244         /* For shift < -2 we'd be likely to overflow, so we're capping
245               the gain here. This shouldn't happen unless the bitstream is
246               already corrupted. */
247         if (shift < -2)
248         {
249            g = 32767;
250            shift = -2;
251         }
252         do {
253            *f++ = SHL32(MULT16_16(*x++, g), -shift);
254         } while (++j<band_end);
255      } else
256#endif
257         /* Be careful of the fixed-point "else" just above when changing this code */
258         do {
259            *f++ = SHR32(MULT16_16(*x++, g), shift);
260         } while (++j<band_end);
261   }
262   celt_assert(start <= end);
263   OPUS_CLEAR(&freq[bound], N-bound);
264}
265
266/* This prevents energy collapse for transients with multiple short MDCTs */
267void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
268      int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
269      const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
270{
271   int c, i, j, k;
272   for (i=start;i<end;i++)
273   {
274      int N0;
275      opus_val16 thresh, sqrt_1;
276      int depth;
277#ifdef FIXED_POINT
278      int shift;
279      opus_val32 thresh32;
280#endif
281
282      N0 = m->eBands[i+1]-m->eBands[i];
283      /* depth in 1/8 bits */
284      celt_assert(pulses[i]>=0);
285      depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM;
286
287#ifdef FIXED_POINT
288      thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1);
289      thresh = MULT16_32_Q15(QCONST16(0.5f, 15), MIN32(32767,thresh32));
290      {
291         opus_val32 t;
292         t = N0<<LM;
293         shift = celt_ilog2(t)>>1;
294         t = SHL32(t, (7-shift)<<1);
295         sqrt_1 = celt_rsqrt_norm(t);
296      }
297#else
298      thresh = .5f*celt_exp2(-.125f*depth);
299      sqrt_1 = celt_rsqrt(N0<<LM);
300#endif
301
302      c=0; do
303      {
304         celt_norm *X;
305         opus_val16 prev1;
306         opus_val16 prev2;
307         opus_val32 Ediff;
308         opus_val16 r;
309         int renormalize=0;
310         prev1 = prev1logE[c*m->nbEBands+i];
311         prev2 = prev2logE[c*m->nbEBands+i];
312         if (C==1)
313         {
314            prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
315            prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
316         }
317         Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
318         Ediff = MAX32(0, Ediff);
319
320#ifdef FIXED_POINT
321         if (Ediff < 16384)
322         {
323            opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
324            r = 2*MIN16(16383,r32);
325         } else {
326            r = 0;
327         }
328         if (LM==3)
329            r = MULT16_16_Q14(23170, MIN32(23169, r));
330         r = SHR16(MIN16(thresh, r),1);
331         r = SHR32(MULT16_16_Q15(sqrt_1, r),shift);
332#else
333         /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
334            short blocks don't have the same energy as long */
335         r = 2.f*celt_exp2(-Ediff);
336         if (LM==3)
337            r *= 1.41421356f;
338         r = MIN16(thresh, r);
339         r = r*sqrt_1;
340#endif
341         X = X_+c*size+(m->eBands[i]<<LM);
342         for (k=0;k<1<<LM;k++)
343         {
344            /* Detect collapse */
345            if (!(collapse_masks[i*C+c]&1<<k))
346            {
347               /* Fill with noise */
348               for (j=0;j<N0;j++)
349               {
350                  seed = celt_lcg_rand(seed);
351                  X[(j<<LM)+k] = (seed&0x8000 ? r : -r);
352               }
353               renormalize = 1;
354            }
355         }
356         /* We just added some energy, so we need to renormalise */
357         if (renormalize)
358            renormalise_vector(X, N0<<LM, Q15ONE, arch);
359      } while (++c<C);
360   }
361}
362
363static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N)
364{
365   int i = bandID;
366   int j;
367   opus_val16 a1, a2;
368   opus_val16 left, right;
369   opus_val16 norm;
370#ifdef FIXED_POINT
371   int shift = celt_zlog2(MAX32(bandE[i], bandE[i+m->nbEBands]))-13;
372#endif
373   left = VSHR32(bandE[i],shift);
374   right = VSHR32(bandE[i+m->nbEBands],shift);
375   norm = EPSILON + celt_sqrt(EPSILON+MULT16_16(left,left)+MULT16_16(right,right));
376   a1 = DIV32_16(SHL32(EXTEND32(left),14),norm);
377   a2 = DIV32_16(SHL32(EXTEND32(right),14),norm);
378   for (j=0;j<N;j++)
379   {
380      celt_norm r, l;
381      l = X[j];
382      r = Y[j];
383      X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14));
384      /* Side is not encoded, no need to calculate */
385   }
386}
387
388static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N)
389{
390   int j;
391   for (j=0;j<N;j++)
392   {
393      opus_val32 r, l;
394      l = MULT16_16(QCONST16(.70710678f, 15), X[j]);
395      r = MULT16_16(QCONST16(.70710678f, 15), Y[j]);
396      X[j] = EXTRACT16(SHR32(ADD32(l, r), 15));
397      Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15));
398   }
399}
400
401static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
402{
403   int j;
404   opus_val32 xp=0, side=0;
405   opus_val32 El, Er;
406   opus_val16 mid2;
407#ifdef FIXED_POINT
408   int kl, kr;
409#endif
410   opus_val32 t, lgain, rgain;
411
412   /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
413   dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
414   /* Compensating for the mid normalization */
415   xp = MULT16_32_Q15(mid, xp);
416   /* mid and side are in Q15, not Q14 like X and Y */
417   mid2 = SHR16(mid, 1);
418   El = MULT16_16(mid2, mid2) + side - 2*xp;
419   Er = MULT16_16(mid2, mid2) + side + 2*xp;
420   if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
421   {
422      OPUS_COPY(Y, X, N);
423      return;
424   }
425
426#ifdef FIXED_POINT
427   kl = celt_ilog2(El)>>1;
428   kr = celt_ilog2(Er)>>1;
429#endif
430   t = VSHR32(El, (kl-7)<<1);
431   lgain = celt_rsqrt_norm(t);
432   t = VSHR32(Er, (kr-7)<<1);
433   rgain = celt_rsqrt_norm(t);
434
435#ifdef FIXED_POINT
436   if (kl < 7)
437      kl = 7;
438   if (kr < 7)
439      kr = 7;
440#endif
441
442   for (j=0;j<N;j++)
443   {
444      celt_norm r, l;
445      /* Apply mid scaling (side is already scaled) */
446      l = MULT16_16_P15(mid, X[j]);
447      r = Y[j];
448      X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
449      Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
450   }
451}
452
453/* Decide whether we should spread the pulses in the current frame */
454int spreading_decision(const CELTMode *m, const celt_norm *X, int *average,
455      int last_decision, int *hf_average, int *tapset_decision, int update_hf,
456      int end, int C, int M)
457{
458   int i, c, N0;
459   int sum = 0, nbBands=0;
460   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
461   int decision;
462   int hf_sum=0;
463
464   celt_assert(end>0);
465
466   N0 = M*m->shortMdctSize;
467
468   if (M*(eBands[end]-eBands[end-1]) <= 8)
469      return SPREAD_NONE;
470   c=0; do {
471      for (i=0;i<end;i++)
472      {
473         int j, N, tmp=0;
474         int tcount[3] = {0,0,0};
475         const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0;
476         N = M*(eBands[i+1]-eBands[i]);
477         if (N<=8)
478            continue;
479         /* Compute rough CDF of |x[j]| */
480         for (j=0;j<N;j++)
481         {
482            opus_val32 x2N; /* Q13 */
483
484            x2N = MULT16_16(MULT16_16_Q15(x[j], x[j]), N);
485            if (x2N < QCONST16(0.25f,13))
486               tcount[0]++;
487            if (x2N < QCONST16(0.0625f,13))
488               tcount[1]++;
489            if (x2N < QCONST16(0.015625f,13))
490               tcount[2]++;
491         }
492
493         /* Only include four last bands (8 kHz and up) */
494         if (i>m->nbEBands-4)
495            hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N);
496         tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N);
497         sum += tmp*256;
498         nbBands++;
499      }
500   } while (++c<C);
501
502   if (update_hf)
503   {
504      if (hf_sum)
505         hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end));
506      *hf_average = (*hf_average+hf_sum)>>1;
507      hf_sum = *hf_average;
508      if (*tapset_decision==2)
509         hf_sum += 4;
510      else if (*tapset_decision==0)
511         hf_sum -= 4;
512      if (hf_sum > 22)
513         *tapset_decision=2;
514      else if (hf_sum > 18)
515         *tapset_decision=1;
516      else
517         *tapset_decision=0;
518   }
519   /*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/
520   celt_assert(nbBands>0); /* end has to be non-zero */
521   celt_assert(sum>=0);
522   sum = celt_udiv(sum, nbBands);
523   /* Recursive averaging */
524   sum = (sum+*average)>>1;
525   *average = sum;
526   /* Hysteresis */
527   sum = (3*sum + (((3-last_decision)<<7) + 64) + 2)>>2;
528   if (sum < 80)
529   {
530      decision = SPREAD_AGGRESSIVE;
531   } else if (sum < 256)
532   {
533      decision = SPREAD_NORMAL;
534   } else if (sum < 384)
535   {
536      decision = SPREAD_LIGHT;
537   } else {
538      decision = SPREAD_NONE;
539   }
540#ifdef FUZZING
541   decision = rand()&0x3;
542   *tapset_decision=rand()%3;
543#endif
544   return decision;
545}
546
547/* Indexing table for converting from natural Hadamard to ordery Hadamard
548   This is essentially a bit-reversed Gray, on top of which we've added
549   an inversion of the order because we want the DC at the end rather than
550   the beginning. The lines are for N=2, 4, 8, 16 */
551static const int ordery_table[] = {
552       1,  0,
553       3,  0,  2,  1,
554       7,  0,  4,  3,  6,  1,  5,  2,
555      15,  0,  8,  7, 12,  3, 11,  4, 14,  1,  9,  6, 13,  2, 10,  5,
556};
557
558static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
559{
560   int i,j;
561   VARDECL(celt_norm, tmp);
562   int N;
563   SAVE_STACK;
564   N = N0*stride;
565   ALLOC(tmp, N, celt_norm);
566   celt_assert(stride>0);
567   if (hadamard)
568   {
569      const int *ordery = ordery_table+stride-2;
570      for (i=0;i<stride;i++)
571      {
572         for (j=0;j<N0;j++)
573            tmp[ordery[i]*N0+j] = X[j*stride+i];
574      }
575   } else {
576      for (i=0;i<stride;i++)
577         for (j=0;j<N0;j++)
578            tmp[i*N0+j] = X[j*stride+i];
579   }
580   OPUS_COPY(X, tmp, N);
581   RESTORE_STACK;
582}
583
584static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard)
585{
586   int i,j;
587   VARDECL(celt_norm, tmp);
588   int N;
589   SAVE_STACK;
590   N = N0*stride;
591   ALLOC(tmp, N, celt_norm);
592   if (hadamard)
593   {
594      const int *ordery = ordery_table+stride-2;
595      for (i=0;i<stride;i++)
596         for (j=0;j<N0;j++)
597            tmp[j*stride+i] = X[ordery[i]*N0+j];
598   } else {
599      for (i=0;i<stride;i++)
600         for (j=0;j<N0;j++)
601            tmp[j*stride+i] = X[i*N0+j];
602   }
603   OPUS_COPY(X, tmp, N);
604   RESTORE_STACK;
605}
606
607void haar1(celt_norm *X, int N0, int stride)
608{
609   int i, j;
610   N0 >>= 1;
611   for (i=0;i<stride;i++)
612      for (j=0;j<N0;j++)
613      {
614         opus_val32 tmp1, tmp2;
615         tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]);
616         tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]);
617         X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15));
618         X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15));
619      }
620}
621
622static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo)
623{
624   static const opus_int16 exp2_table8[8] =
625      {16384, 17866, 19483, 21247, 23170, 25267, 27554, 30048};
626   int qn, qb;
627   int N2 = 2*N-1;
628   if (stereo && N==2)
629      N2--;
630   /* The upper limit ensures that in a stereo split with itheta==16384, we'll
631       always have enough bits left over to code at least one pulse in the
632       side; otherwise it would collapse, since it doesn't get folded. */
633   qb = celt_sudiv(b+N2*offset, N2);
634   qb = IMIN(b-pulse_cap-(4<<BITRES), qb);
635
636   qb = IMIN(8<<BITRES, qb);
637
638   if (qb<(1<<BITRES>>1)) {
639      qn = 1;
640   } else {
641      qn = exp2_table8[qb&0x7]>>(14-(qb>>BITRES));
642      qn = (qn+1)>>1<<1;
643   }
644   celt_assert(qn <= 256);
645   return qn;
646}
647
648struct band_ctx {
649   int encode;
650   const CELTMode *m;
651   int i;
652   int intensity;
653   int spread;
654   int tf_change;
655   ec_ctx *ec;
656   opus_int32 remaining_bits;
657   const celt_ener *bandE;
658   opus_uint32 seed;
659   int arch;
660};
661
662struct split_ctx {
663   int inv;
664   int imid;
665   int iside;
666   int delta;
667   int itheta;
668   int qalloc;
669};
670
671static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
672      celt_norm *X, celt_norm *Y, int N, int *b, int B, int B0,
673      int LM,
674      int stereo, int *fill)
675{
676   int qn;
677   int itheta=0;
678   int delta;
679   int imid, iside;
680   int qalloc;
681   int pulse_cap;
682   int offset;
683   opus_int32 tell;
684   int inv=0;
685   int encode;
686   const CELTMode *m;
687   int i;
688   int intensity;
689   ec_ctx *ec;
690   const celt_ener *bandE;
691
692   encode = ctx->encode;
693   m = ctx->m;
694   i = ctx->i;
695   intensity = ctx->intensity;
696   ec = ctx->ec;
697   bandE = ctx->bandE;
698
699   /* Decide on the resolution to give to the split parameter theta */
700   pulse_cap = m->logN[i]+LM*(1<<BITRES);
701   offset = (pulse_cap>>1) - (stereo&&N==2 ? QTHETA_OFFSET_TWOPHASE : QTHETA_OFFSET);
702   qn = compute_qn(N, *b, offset, pulse_cap, stereo);
703   if (stereo && i>=intensity)
704      qn = 1;
705   if (encode)
706   {
707      /* theta is the atan() of the ratio between the (normalized)
708         side and mid. With just that parameter, we can re-scale both
709         mid and side because we know that 1) they have unit norm and
710         2) they are orthogonal. */
711      itheta = stereo_itheta(X, Y, stereo, N, ctx->arch);
712   }
713   tell = ec_tell_frac(ec);
714   if (qn!=1)
715   {
716      if (encode)
717         itheta = (itheta*(opus_int32)qn+8192)>>14;
718
719      /* Entropy coding of the angle. We use a uniform pdf for the
720         time split, a step for stereo, and a triangular one for the rest. */
721      if (stereo && N>2)
722      {
723         int p0 = 3;
724         int x = itheta;
725         int x0 = qn/2;
726         int ft = p0*(x0+1) + x0;
727         /* Use a probability of p0 up to itheta=8192 and then use 1 after */
728         if (encode)
729         {
730            ec_encode(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
731         } else {
732            int fs;
733            fs=ec_decode(ec,ft);
734            if (fs<(x0+1)*p0)
735               x=fs/p0;
736            else
737               x=x0+1+(fs-(x0+1)*p0);
738            ec_dec_update(ec,x<=x0?p0*x:(x-1-x0)+(x0+1)*p0,x<=x0?p0*(x+1):(x-x0)+(x0+1)*p0,ft);
739            itheta = x;
740         }
741      } else if (B0>1 || stereo) {
742         /* Uniform pdf */
743         if (encode)
744            ec_enc_uint(ec, itheta, qn+1);
745         else
746            itheta = ec_dec_uint(ec, qn+1);
747      } else {
748         int fs=1, ft;
749         ft = ((qn>>1)+1)*((qn>>1)+1);
750         if (encode)
751         {
752            int fl;
753
754            fs = itheta <= (qn>>1) ? itheta + 1 : qn + 1 - itheta;
755            fl = itheta <= (qn>>1) ? itheta*(itheta + 1)>>1 :
756             ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
757
758            ec_encode(ec, fl, fl+fs, ft);
759         } else {
760            /* Triangular pdf */
761            int fl=0;
762            int fm;
763            fm = ec_decode(ec, ft);
764
765            if (fm < ((qn>>1)*((qn>>1) + 1)>>1))
766            {
767               itheta = (isqrt32(8*(opus_uint32)fm + 1) - 1)>>1;
768               fs = itheta + 1;
769               fl = itheta*(itheta + 1)>>1;
770            }
771            else
772            {
773               itheta = (2*(qn + 1)
774                - isqrt32(8*(opus_uint32)(ft - fm - 1) + 1))>>1;
775               fs = qn + 1 - itheta;
776               fl = ft - ((qn + 1 - itheta)*(qn + 2 - itheta)>>1);
777            }
778
779            ec_dec_update(ec, fl, fl+fs, ft);
780         }
781      }
782      celt_assert(itheta>=0);
783      itheta = celt_udiv((opus_int32)itheta*16384, qn);
784      if (encode && stereo)
785      {
786         if (itheta==0)
787            intensity_stereo(m, X, Y, bandE, i, N);
788         else
789            stereo_split(X, Y, N);
790      }
791      /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
792               Let's do that at higher complexity */
793   } else if (stereo) {
794      if (encode)
795      {
796         inv = itheta > 8192;
797         if (inv)
798         {
799            int j;
800            for (j=0;j<N;j++)
801               Y[j] = -Y[j];
802         }
803         intensity_stereo(m, X, Y, bandE, i, N);
804      }
805      if (*b>2<<BITRES && ctx->remaining_bits > 2<<BITRES)
806      {
807         if (encode)
808            ec_enc_bit_logp(ec, inv, 2);
809         else
810            inv = ec_dec_bit_logp(ec, 2);
811      } else
812         inv = 0;
813      itheta = 0;
814   }
815   qalloc = ec_tell_frac(ec) - tell;
816   *b -= qalloc;
817
818   if (itheta == 0)
819   {
820      imid = 32767;
821      iside = 0;
822      *fill &= (1<<B)-1;
823      delta = -16384;
824   } else if (itheta == 16384)
825   {
826      imid = 0;
827      iside = 32767;
828      *fill &= ((1<<B)-1)<<B;
829      delta = 16384;
830   } else {
831      imid = bitexact_cos((opus_int16)itheta);
832      iside = bitexact_cos((opus_int16)(16384-itheta));
833      /* This is the mid vs side allocation that minimizes squared error
834         in that band. */
835      delta = FRAC_MUL16((N-1)<<7,bitexact_log2tan(iside,imid));
836   }
837
838   sctx->inv = inv;
839   sctx->imid = imid;
840   sctx->iside = iside;
841   sctx->delta = delta;
842   sctx->itheta = itheta;
843   sctx->qalloc = qalloc;
844}
845static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y, int b,
846      celt_norm *lowband_out)
847{
848#ifdef RESYNTH
849   int resynth = 1;
850#else
851   int resynth = !ctx->encode;
852#endif
853   int c;
854   int stereo;
855   celt_norm *x = X;
856   int encode;
857   ec_ctx *ec;
858
859   encode = ctx->encode;
860   ec = ctx->ec;
861
862   stereo = Y != NULL;
863   c=0; do {
864      int sign=0;
865      if (ctx->remaining_bits>=1<<BITRES)
866      {
867         if (encode)
868         {
869            sign = x[0]<0;
870            ec_enc_bits(ec, sign, 1);
871         } else {
872            sign = ec_dec_bits(ec, 1);
873         }
874         ctx->remaining_bits -= 1<<BITRES;
875         b-=1<<BITRES;
876      }
877      if (resynth)
878         x[0] = sign ? -NORM_SCALING : NORM_SCALING;
879      x = Y;
880   } while (++c<1+stereo);
881   if (lowband_out)
882      lowband_out[0] = SHR16(X[0],4);
883   return 1;
884}
885
886/* This function is responsible for encoding and decoding a mono partition.
887   It can split the band in two and transmit the energy difference with
888   the two half-bands. It can be called recursively so bands can end up being
889   split in 8 parts. */
890static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
891      int N, int b, int B, celt_norm *lowband,
892      int LM,
893      opus_val16 gain, int fill)
894{
895   const unsigned char *cache;
896   int q;
897   int curr_bits;
898   int imid=0, iside=0;
899   int B0=B;
900   opus_val16 mid=0, side=0;
901   unsigned cm=0;
902#ifdef RESYNTH
903   int resynth = 1;
904#else
905   int resynth = !ctx->encode;
906#endif
907   celt_norm *Y=NULL;
908   int encode;
909   const CELTMode *m;
910   int i;
911   int spread;
912   ec_ctx *ec;
913
914   encode = ctx->encode;
915   m = ctx->m;
916   i = ctx->i;
917   spread = ctx->spread;
918   ec = ctx->ec;
919
920   /* If we need 1.5 more bit than we can produce, split the band in two. */
921   cache = m->cache.bits + m->cache.index[(LM+1)*m->nbEBands+i];
922   if (LM != -1 && b > cache[cache[0]]+12 && N>2)
923   {
924      int mbits, sbits, delta;
925      int itheta;
926      int qalloc;
927      struct split_ctx sctx;
928      celt_norm *next_lowband2=NULL;
929      opus_int32 rebalance;
930
931      N >>= 1;
932      Y = X+N;
933      LM -= 1;
934      if (B==1)
935         fill = (fill&1)|(fill<<1);
936      B = (B+1)>>1;
937
938      compute_theta(ctx, &sctx, X, Y, N, &b, B, B0,
939            LM, 0, &fill);
940      imid = sctx.imid;
941      iside = sctx.iside;
942      delta = sctx.delta;
943      itheta = sctx.itheta;
944      qalloc = sctx.qalloc;
945#ifdef FIXED_POINT
946      mid = imid;
947      side = iside;
948#else
949      mid = (1.f/32768)*imid;
950      side = (1.f/32768)*iside;
951#endif
952
953      /* Give more bits to low-energy MDCTs than they would otherwise deserve */
954      if (B0>1 && (itheta&0x3fff))
955      {
956         if (itheta > 8192)
957            /* Rough approximation for pre-echo masking */
958            delta -= delta>>(4-LM);
959         else
960            /* Corresponds to a forward-masking slope of 1.5 dB per 10 ms */
961            delta = IMIN(0, delta + (N<<BITRES>>(5-LM)));
962      }
963      mbits = IMAX(0, IMIN(b, (b-delta)/2));
964      sbits = b-mbits;
965      ctx->remaining_bits -= qalloc;
966
967      if (lowband)
968         next_lowband2 = lowband+N; /* >32-bit split case */
969
970      rebalance = ctx->remaining_bits;
971      if (mbits >= sbits)
972      {
973         cm = quant_partition(ctx, X, N, mbits, B,
974               lowband, LM,
975               MULT16_16_P15(gain,mid), fill);
976         rebalance = mbits - (rebalance-ctx->remaining_bits);
977         if (rebalance > 3<<BITRES && itheta!=0)
978            sbits += rebalance - (3<<BITRES);
979         cm |= quant_partition(ctx, Y, N, sbits, B,
980               next_lowband2, LM,
981               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
982      } else {
983         cm = quant_partition(ctx, Y, N, sbits, B,
984               next_lowband2, LM,
985               MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
986         rebalance = sbits - (rebalance-ctx->remaining_bits);
987         if (rebalance > 3<<BITRES && itheta!=16384)
988            mbits += rebalance - (3<<BITRES);
989         cm |= quant_partition(ctx, X, N, mbits, B,
990               lowband, LM,
991               MULT16_16_P15(gain,mid), fill);
992      }
993   } else {
994      /* This is the basic no-split case */
995      q = bits2pulses(m, i, LM, b);
996      curr_bits = pulses2bits(m, i, LM, q);
997      ctx->remaining_bits -= curr_bits;
998
999      /* Ensures we can never bust the budget */
1000      while (ctx->remaining_bits < 0 && q > 0)
1001      {
1002         ctx->remaining_bits += curr_bits;
1003         q--;
1004         curr_bits = pulses2bits(m, i, LM, q);
1005         ctx->remaining_bits -= curr_bits;
1006      }
1007
1008      if (q!=0)
1009      {
1010         int K = get_pulses(q);
1011
1012         /* Finally do the actual quantization */
1013         if (encode)
1014         {
1015            cm = alg_quant(X, N, K, spread, B, ec
1016#ifdef RESYNTH
1017                 , gain
1018#endif
1019                 );
1020         } else {
1021            cm = alg_unquant(X, N, K, spread, B, ec, gain);
1022         }
1023      } else {
1024         /* If there's no pulse, fill the band anyway */
1025         int j;
1026         if (resynth)
1027         {
1028            unsigned cm_mask;
1029            /* B can be as large as 16, so this shift might overflow an int on a
1030               16-bit platform; use a long to get defined behavior.*/
1031            cm_mask = (unsigned)(1UL<<B)-1;
1032            fill &= cm_mask;
1033            if (!fill)
1034            {
1035               OPUS_CLEAR(X, N);
1036            } else {
1037               if (lowband == NULL)
1038               {
1039                  /* Noise */
1040                  for (j=0;j<N;j++)
1041                  {
1042                     ctx->seed = celt_lcg_rand(ctx->seed);
1043                     X[j] = (celt_norm)((opus_int32)ctx->seed>>20);
1044                  }
1045                  cm = cm_mask;
1046               } else {
1047                  /* Folded spectrum */
1048                  for (j=0;j<N;j++)
1049                  {
1050                     opus_val16 tmp;
1051                     ctx->seed = celt_lcg_rand(ctx->seed);
1052                     /* About 48 dB below the "normal" folding level */
1053                     tmp = QCONST16(1.0f/256, 10);
1054                     tmp = (ctx->seed)&0x8000 ? tmp : -tmp;
1055                     X[j] = lowband[j]+tmp;
1056                  }
1057                  cm = fill;
1058               }
1059               renormalise_vector(X, N, gain, ctx->arch);
1060            }
1061         }
1062      }
1063   }
1064
1065   return cm;
1066}
1067
1068
1069/* This function is responsible for encoding and decoding a band for the mono case. */
1070static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
1071      int N, int b, int B, celt_norm *lowband,
1072      int LM, celt_norm *lowband_out,
1073      opus_val16 gain, celt_norm *lowband_scratch, int fill)
1074{
1075   int N0=N;
1076   int N_B=N;
1077   int N_B0;
1078   int B0=B;
1079   int time_divide=0;
1080   int recombine=0;
1081   int longBlocks;
1082   unsigned cm=0;
1083#ifdef RESYNTH
1084   int resynth = 1;
1085#else
1086   int resynth = !ctx->encode;
1087#endif
1088   int k;
1089   int encode;
1090   int tf_change;
1091
1092   encode = ctx->encode;
1093   tf_change = ctx->tf_change;
1094
1095   longBlocks = B0==1;
1096
1097   N_B = celt_udiv(N_B, B);
1098
1099   /* Special case for one sample */
1100   if (N==1)
1101   {
1102      return quant_band_n1(ctx, X, NULL, b, lowband_out);
1103   }
1104
1105   if (tf_change>0)
1106      recombine = tf_change;
1107   /* Band recombining to increase frequency resolution */
1108
1109   if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1))
1110   {
1111      OPUS_COPY(lowband_scratch, lowband, N);
1112      lowband = lowband_scratch;
1113   }
1114
1115   for (k=0;k<recombine;k++)
1116   {
1117      static const unsigned char bit_interleave_table[16]={
1118            0,1,1,1,2,3,3,3,2,3,3,3,2,3,3,3
1119      };
1120      if (encode)
1121         haar1(X, N>>k, 1<<k);
1122      if (lowband)
1123         haar1(lowband, N>>k, 1<<k);
1124      fill = bit_interleave_table[fill&0xF]|bit_interleave_table[fill>>4]<<2;
1125   }
1126   B>>=recombine;
1127   N_B<<=recombine;
1128
1129   /* Increasing the time resolution */
1130   while ((N_B&1) == 0 && tf_change<0)
1131   {
1132      if (encode)
1133         haar1(X, N_B, B);
1134      if (lowband)
1135         haar1(lowband, N_B, B);
1136      fill |= fill<<B;
1137      B <<= 1;
1138      N_B >>= 1;
1139      time_divide++;
1140      tf_change++;
1141   }
1142   B0=B;
1143   N_B0 = N_B;
1144
1145   /* Reorganize the samples in time order instead of frequency order */
1146   if (B0>1)
1147   {
1148      if (encode)
1149         deinterleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1150      if (lowband)
1151         deinterleave_hadamard(lowband, N_B>>recombine, B0<<recombine, longBlocks);
1152   }
1153
1154   cm = quant_partition(ctx, X, N, b, B, lowband,
1155         LM, gain, fill);
1156
1157   /* This code is used by the decoder and by the resynthesis-enabled encoder */
1158   if (resynth)
1159   {
1160      /* Undo the sample reorganization going from time order to frequency order */
1161      if (B0>1)
1162         interleave_hadamard(X, N_B>>recombine, B0<<recombine, longBlocks);
1163
1164      /* Undo time-freq changes that we did earlier */
1165      N_B = N_B0;
1166      B = B0;
1167      for (k=0;k<time_divide;k++)
1168      {
1169         B >>= 1;
1170         N_B <<= 1;
1171         cm |= cm>>B;
1172         haar1(X, N_B, B);
1173      }
1174
1175      for (k=0;k<recombine;k++)
1176      {
1177         static const unsigned char bit_deinterleave_table[16]={
1178               0x00,0x03,0x0C,0x0F,0x30,0x33,0x3C,0x3F,
1179               0xC0,0xC3,0xCC,0xCF,0xF0,0xF3,0xFC,0xFF
1180         };
1181         cm = bit_deinterleave_table[cm];
1182         haar1(X, N0>>k, 1<<k);
1183      }
1184      B<<=recombine;
1185
1186      /* Scale output for later folding */
1187      if (lowband_out)
1188      {
1189         int j;
1190         opus_val16 n;
1191         n = celt_sqrt(SHL32(EXTEND32(N0),22));
1192         for (j=0;j<N0;j++)
1193            lowband_out[j] = MULT16_16_Q15(n,X[j]);
1194      }
1195      cm &= (1<<B)-1;
1196   }
1197   return cm;
1198}
1199
1200
1201/* This function is responsible for encoding and decoding a band for the stereo case. */
1202static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
1203      int N, int b, int B, celt_norm *lowband,
1204      int LM, celt_norm *lowband_out,
1205      celt_norm *lowband_scratch, int fill)
1206{
1207   int imid=0, iside=0;
1208   int inv = 0;
1209   opus_val16 mid=0, side=0;
1210   unsigned cm=0;
1211#ifdef RESYNTH
1212   int resynth = 1;
1213#else
1214   int resynth = !ctx->encode;
1215#endif
1216   int mbits, sbits, delta;
1217   int itheta;
1218   int qalloc;
1219   struct split_ctx sctx;
1220   int orig_fill;
1221   int encode;
1222   ec_ctx *ec;
1223
1224   encode = ctx->encode;
1225   ec = ctx->ec;
1226
1227   /* Special case for one sample */
1228   if (N==1)
1229   {
1230      return quant_band_n1(ctx, X, Y, b, lowband_out);
1231   }
1232
1233   orig_fill = fill;
1234
1235   compute_theta(ctx, &sctx, X, Y, N, &b, B, B,
1236         LM, 1, &fill);
1237   inv = sctx.inv;
1238   imid = sctx.imid;
1239   iside = sctx.iside;
1240   delta = sctx.delta;
1241   itheta = sctx.itheta;
1242   qalloc = sctx.qalloc;
1243#ifdef FIXED_POINT
1244   mid = imid;
1245   side = iside;
1246#else
1247   mid = (1.f/32768)*imid;
1248   side = (1.f/32768)*iside;
1249#endif
1250
1251   /* This is a special case for N=2 that only works for stereo and takes
1252      advantage of the fact that mid and side are orthogonal to encode
1253      the side with just one bit. */
1254   if (N==2)
1255   {
1256      int c;
1257      int sign=0;
1258      celt_norm *x2, *y2;
1259      mbits = b;
1260      sbits = 0;
1261      /* Only need one bit for the side. */
1262      if (itheta != 0 && itheta != 16384)
1263         sbits = 1<<BITRES;
1264      mbits -= sbits;
1265      c = itheta > 8192;
1266      ctx->remaining_bits -= qalloc+sbits;
1267
1268      x2 = c ? Y : X;
1269      y2 = c ? X : Y;
1270      if (sbits)
1271      {
1272         if (encode)
1273         {
1274            /* Here we only need to encode a sign for the side. */
1275            sign = x2[0]*y2[1] - x2[1]*y2[0] < 0;
1276            ec_enc_bits(ec, sign, 1);
1277         } else {
1278            sign = ec_dec_bits(ec, 1);
1279         }
1280      }
1281      sign = 1-2*sign;
1282      /* We use orig_fill here because we want to fold the side, but if
1283         itheta==16384, we'll have cleared the low bits of fill. */
1284      cm = quant_band(ctx, x2, N, mbits, B, lowband,
1285            LM, lowband_out, Q15ONE, lowband_scratch, orig_fill);
1286      /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1287         and there's no need to worry about mixing with the other channel. */
1288      y2[0] = -sign*x2[1];
1289      y2[1] = sign*x2[0];
1290      if (resynth)
1291      {
1292         celt_norm tmp;
1293         X[0] = MULT16_16_Q15(mid, X[0]);
1294         X[1] = MULT16_16_Q15(mid, X[1]);
1295         Y[0] = MULT16_16_Q15(side, Y[0]);
1296         Y[1] = MULT16_16_Q15(side, Y[1]);
1297         tmp = X[0];
1298         X[0] = SUB16(tmp,Y[0]);
1299         Y[0] = ADD16(tmp,Y[0]);
1300         tmp = X[1];
1301         X[1] = SUB16(tmp,Y[1]);
1302         Y[1] = ADD16(tmp,Y[1]);
1303      }
1304   } else {
1305      /* "Normal" split code */
1306      opus_int32 rebalance;
1307
1308      mbits = IMAX(0, IMIN(b, (b-delta)/2));
1309      sbits = b-mbits;
1310      ctx->remaining_bits -= qalloc;
1311
1312      rebalance = ctx->remaining_bits;
1313      if (mbits >= sbits)
1314      {
1315         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1316            mid for folding later. */
1317         cm = quant_band(ctx, X, N, mbits, B,
1318               lowband, LM, lowband_out,
1319               Q15ONE, lowband_scratch, fill);
1320         rebalance = mbits - (rebalance-ctx->remaining_bits);
1321         if (rebalance > 3<<BITRES && itheta!=0)
1322            sbits += rebalance - (3<<BITRES);
1323
1324         /* For a stereo split, the high bits of fill are always zero, so no
1325            folding will be done to the side. */
1326         cm |= quant_band(ctx, Y, N, sbits, B,
1327               NULL, LM, NULL,
1328               side, NULL, fill>>B);
1329      } else {
1330         /* For a stereo split, the high bits of fill are always zero, so no
1331            folding will be done to the side. */
1332         cm = quant_band(ctx, Y, N, sbits, B,
1333               NULL, LM, NULL,
1334               side, NULL, fill>>B);
1335         rebalance = sbits - (rebalance-ctx->remaining_bits);
1336         if (rebalance > 3<<BITRES && itheta!=16384)
1337            mbits += rebalance - (3<<BITRES);
1338         /* In stereo mode, we do not apply a scaling to the mid because we need the normalized
1339            mid for folding later. */
1340         cm |= quant_band(ctx, X, N, mbits, B,
1341               lowband, LM, lowband_out,
1342               Q15ONE, lowband_scratch, fill);
1343      }
1344   }
1345
1346
1347   /* This code is used by the decoder and by the resynthesis-enabled encoder */
1348   if (resynth)
1349   {
1350      if (N!=2)
1351         stereo_merge(X, Y, mid, N, ctx->arch);
1352      if (inv)
1353      {
1354         int j;
1355         for (j=0;j<N;j++)
1356            Y[j] = -Y[j];
1357      }
1358   }
1359   return cm;
1360}
1361
1362
1363void quant_all_bands(int encode, const CELTMode *m, int start, int end,
1364      celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
1365      const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
1366      int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits,
1367      opus_int32 balance, ec_ctx *ec, int LM, int codedBands,
1368      opus_uint32 *seed, int arch)
1369{
1370   int i;
1371   opus_int32 remaining_bits;
1372   const opus_int16 * OPUS_RESTRICT eBands = m->eBands;
1373   celt_norm * OPUS_RESTRICT norm, * OPUS_RESTRICT norm2;
1374   VARDECL(celt_norm, _norm);
1375   celt_norm *lowband_scratch;
1376   int B;
1377   int M;
1378   int lowband_offset;
1379   int update_lowband = 1;
1380   int C = Y_ != NULL ? 2 : 1;
1381   int norm_offset;
1382#ifdef RESYNTH
1383   int resynth = 1;
1384#else
1385   int resynth = !encode;
1386#endif
1387   struct band_ctx ctx;
1388   SAVE_STACK;
1389
1390   M = 1<<LM;
1391   B = shortBlocks ? M : 1;
1392   norm_offset = M*eBands[start];
1393   /* No need to allocate norm for the last band because we don't need an
1394      output in that band. */
1395   ALLOC(_norm, C*(M*eBands[m->nbEBands-1]-norm_offset), celt_norm);
1396   norm = _norm;
1397   norm2 = norm + M*eBands[m->nbEBands-1]-norm_offset;
1398   /* We can use the last band as scratch space because we don't need that
1399      scratch space for the last band. */
1400   lowband_scratch = X_+M*eBands[m->nbEBands-1];
1401
1402   lowband_offset = 0;
1403   ctx.bandE = bandE;
1404   ctx.ec = ec;
1405   ctx.encode = encode;
1406   ctx.intensity = intensity;
1407   ctx.m = m;
1408   ctx.seed = *seed;
1409   ctx.spread = spread;
1410   ctx.arch = arch;
1411   for (i=start;i<end;i++)
1412   {
1413      opus_int32 tell;
1414      int b;
1415      int N;
1416      opus_int32 curr_balance;
1417      int effective_lowband=-1;
1418      celt_norm * OPUS_RESTRICT X, * OPUS_RESTRICT Y;
1419      int tf_change=0;
1420      unsigned x_cm;
1421      unsigned y_cm;
1422      int last;
1423
1424      ctx.i = i;
1425      last = (i==end-1);
1426
1427      X = X_+M*eBands[i];
1428      if (Y_!=NULL)
1429         Y = Y_+M*eBands[i];
1430      else
1431         Y = NULL;
1432      N = M*eBands[i+1]-M*eBands[i];
1433      tell = ec_tell_frac(ec);
1434
1435      /* Compute how many bits we want to allocate to this band */
1436      if (i != start)
1437         balance -= tell;
1438      remaining_bits = total_bits-tell-1;
1439      ctx.remaining_bits = remaining_bits;
1440      if (i <= codedBands-1)
1441      {
1442         curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i));
1443         b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance)));
1444      } else {
1445         b = 0;
1446      }
1447
1448      if (resynth && M*eBands[i]-N >= M*eBands[start] && (update_lowband || lowband_offset==0))
1449            lowband_offset = i;
1450
1451      tf_change = tf_res[i];
1452      ctx.tf_change = tf_change;
1453      if (i>=m->effEBands)
1454      {
1455         X=norm;
1456         if (Y_!=NULL)
1457            Y = norm;
1458         lowband_scratch = NULL;
1459      }
1460      if (i==end-1)
1461         lowband_scratch = NULL;
1462
1463      /* Get a conservative estimate of the collapse_mask's for the bands we're
1464         going to be folding from. */
1465      if (lowband_offset != 0 && (spread!=SPREAD_AGGRESSIVE || B>1 || tf_change<0))
1466      {
1467         int fold_start;
1468         int fold_end;
1469         int fold_i;
1470         /* This ensures we never repeat spectral content within one band */
1471         effective_lowband = IMAX(0, M*eBands[lowband_offset]-norm_offset-N);
1472         fold_start = lowband_offset;
1473         while(M*eBands[--fold_start] > effective_lowband+norm_offset);
1474         fold_end = lowband_offset-1;
1475         while(M*eBands[++fold_end] < effective_lowband+norm_offset+N);
1476         x_cm = y_cm = 0;
1477         fold_i = fold_start; do {
1478           x_cm |= collapse_masks[fold_i*C+0];
1479           y_cm |= collapse_masks[fold_i*C+C-1];
1480         } while (++fold_i<fold_end);
1481      }
1482      /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1483         always) be non-zero. */
1484      else
1485         x_cm = y_cm = (1<<B)-1;
1486
1487      if (dual_stereo && i==intensity)
1488      {
1489         int j;
1490
1491         /* Switch off dual stereo to do intensity. */
1492         dual_stereo = 0;
1493         if (resynth)
1494            for (j=0;j<M*eBands[i]-norm_offset;j++)
1495               norm[j] = HALF32(norm[j]+norm2[j]);
1496      }
1497      if (dual_stereo)
1498      {
1499         x_cm = quant_band(&ctx, X, N, b/2, B,
1500               effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1501               last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
1502         y_cm = quant_band(&ctx, Y, N, b/2, B,
1503               effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
1504               last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
1505      } else {
1506         if (Y!=NULL)
1507         {
1508            x_cm = quant_band_stereo(&ctx, X, Y, N, b, B,
1509                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1510                        last?NULL:norm+M*eBands[i]-norm_offset, lowband_scratch, x_cm|y_cm);
1511         } else {
1512            x_cm = quant_band(&ctx, X, N, b, B,
1513                  effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
1514                        last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
1515         }
1516         y_cm = x_cm;
1517      }
1518      collapse_masks[i*C+0] = (unsigned char)x_cm;
1519      collapse_masks[i*C+C-1] = (unsigned char)y_cm;
1520      balance += pulses[i] + tell;
1521
1522      /* Update the folding position only as long as we have 1 bit/sample depth. */
1523      update_lowband = b>(N<<BITRES);
1524   }
1525   *seed = ctx.seed;
1526
1527   RESTORE_STACK;
1528}
1529
1530