1/* Copyright (c) 2007-2008 CSIRO
2   Copyright (c) 2007-2009 Xiph.Org Foundation
3   Copyright (c) 2007-2009 Timothy B. Terriberry
4   Written by Timothy B. Terriberry and Jean-Marc Valin */
5/*
6   Redistribution and use in source and binary forms, with or without
7   modification, are permitted provided that the following conditions
8   are met:
9
10   - Redistributions of source code must retain the above copyright
11   notice, this list of conditions and the following disclaimer.
12
13   - Redistributions in binary form must reproduce the above copyright
14   notice, this list of conditions and the following disclaimer in the
15   documentation and/or other materials provided with the distribution.
16
17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
21   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*/
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33
34#include "os_support.h"
35#include "cwrs.h"
36#include "mathops.h"
37#include "arch.h"
38
39#ifdef CUSTOM_MODES
40
41/*Guaranteed to return a conservatively large estimate of the binary logarithm
42   with frac bits of fractional precision.
43  Tested for all possible 32-bit inputs with frac=4, where the maximum
44   overestimation is 0.06254243 bits.*/
45int log2_frac(opus_uint32 val, int frac)
46{
47  int l;
48  l=EC_ILOG(val);
49  if(val&(val-1)){
50    /*This is (val>>l-16), but guaranteed to round up, even if adding a bias
51       before the shift would cause overflow (e.g., for 0xFFFFxxxx).
52       Doesn't work for val=0, but that case fails the test above.*/
53    if(l>16)val=((val-1)>>(l-16))+1;
54    else val<<=16-l;
55    l=(l-1)<<frac;
56    /*Note that we always need one iteration, since the rounding up above means
57       that we might need to adjust the integer part of the logarithm.*/
58    do{
59      int b;
60      b=(int)(val>>16);
61      l+=b<<frac;
62      val=(val+b)>>b;
63      val=(val*val+0x7FFF)>>15;
64    }
65    while(frac-->0);
66    /*If val is not exactly 0x8000, then we have to round up the remainder.*/
67    return l+(val>0x8000);
68  }
69  /*Exact powers of two require no rounding.*/
70  else return (l-1)<<frac;
71}
72#endif
73
74#ifndef SMALL_FOOTPRINT
75
76#define MASK32 (0xFFFFFFFF)
77
78/*INV_TABLE[i] holds the multiplicative inverse of (2*i+1) mod 2**32.*/
79static const opus_uint32 INV_TABLE[53]={
80  0x00000001,0xAAAAAAAB,0xCCCCCCCD,0xB6DB6DB7,
81  0x38E38E39,0xBA2E8BA3,0xC4EC4EC5,0xEEEEEEEF,
82  0xF0F0F0F1,0x286BCA1B,0x3CF3CF3D,0xE9BD37A7,
83  0xC28F5C29,0x684BDA13,0x4F72C235,0xBDEF7BDF,
84  0x3E0F83E1,0x8AF8AF8B,0x914C1BAD,0x96F96F97,
85  0xC18F9C19,0x2FA0BE83,0xA4FA4FA5,0x677D46CF,
86  0x1A1F58D1,0xFAFAFAFB,0x8C13521D,0x586FB587,
87  0xB823EE09,0xA08AD8F3,0xC10C9715,0xBEFBEFBF,
88  0xC0FC0FC1,0x07A44C6B,0xA33F128D,0xE327A977,
89  0xC7E3F1F9,0x962FC963,0x3F2B3885,0x613716AF,
90  0x781948B1,0x2B2E43DB,0xFCFCFCFD,0x6FD0EB67,
91  0xFA3F47E9,0xD2FD2FD3,0x3F4FD3F5,0xD4E25B9F,
92  0x5F02A3A1,0xBF5A814B,0x7C32B16D,0xD3431B57,
93  0xD8FD8FD9,
94};
95
96/*Computes (_a*_b-_c)/(2*_d+1) when the quotient is known to be exact.
97  _a, _b, _c, and _d may be arbitrary so long as the arbitrary precision result
98   fits in 32 bits, but currently the table for multiplicative inverses is only
99   valid for _d<=52.*/
100static inline opus_uint32 imusdiv32odd(opus_uint32 _a,opus_uint32 _b,
101 opus_uint32 _c,int _d){
102  celt_assert(_d<=52);
103  return (_a*_b-_c)*INV_TABLE[_d]&MASK32;
104}
105
106/*Computes (_a*_b-_c)/_d when the quotient is known to be exact.
107  _d does not actually have to be even, but imusdiv32odd will be faster when
108   it's odd, so you should use that instead.
109  _a and _d are assumed to be small (e.g., _a*_d fits in 32 bits; currently the
110   table for multiplicative inverses is only valid for _d<=54).
111  _b and _c may be arbitrary so long as the arbitrary precision reuslt fits in
112   32 bits.*/
113static inline opus_uint32 imusdiv32even(opus_uint32 _a,opus_uint32 _b,
114 opus_uint32 _c,int _d){
115  opus_uint32 inv;
116  int           mask;
117  int           shift;
118  int           one;
119  celt_assert(_d>0);
120  celt_assert(_d<=54);
121  shift=EC_ILOG(_d^(_d-1));
122  inv=INV_TABLE[(_d-1)>>shift];
123  shift--;
124  one=1<<shift;
125  mask=one-1;
126  return (_a*(_b>>shift)-(_c>>shift)+
127   ((_a*(_b&mask)+one-(_c&mask))>>shift)-1)*inv&MASK32;
128}
129
130#endif /* SMALL_FOOTPRINT */
131
132/*Although derived separately, the pulse vector coding scheme is equivalent to
133   a Pyramid Vector Quantizer \cite{Fis86}.
134  Some additional notes about an early version appear at
135   http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
136   and the definitions of some terms have evolved since that was written.
137
138  The conversion from a pulse vector to an integer index (encoding) and back
139   (decoding) is governed by two related functions, V(N,K) and U(N,K).
140
141  V(N,K) = the number of combinations, with replacement, of N items, taken K
142   at a time, when a sign bit is added to each item taken at least once (i.e.,
143   the number of N-dimensional unit pulse vectors with K pulses).
144  One way to compute this is via
145    V(N,K) = K>0 ? sum(k=1...K,2**k*choose(N,k)*choose(K-1,k-1)) : 1,
146   where choose() is the binomial function.
147  A table of values for N<10 and K<10 looks like:
148  V[10][10] = {
149    {1,  0,   0,    0,    0,     0,     0,      0,      0,       0},
150    {1,  2,   2,    2,    2,     2,     2,      2,      2,       2},
151    {1,  4,   8,   12,   16,    20,    24,     28,     32,      36},
152    {1,  6,  18,   38,   66,   102,   146,    198,    258,     326},
153    {1,  8,  32,   88,  192,   360,   608,    952,   1408,    1992},
154    {1, 10,  50,  170,  450,  1002,  1970,   3530,   5890,    9290},
155    {1, 12,  72,  292,  912,  2364,  5336,  10836,  20256,   35436},
156    {1, 14,  98,  462, 1666,  4942, 12642,  28814,  59906,  115598},
157    {1, 16, 128,  688, 2816,  9424, 27008,  68464, 157184,  332688},
158    {1, 18, 162,  978, 4482, 16722, 53154, 148626, 374274,  864146}
159  };
160
161  U(N,K) = the number of such combinations wherein N-1 objects are taken at
162   most K-1 at a time.
163  This is given by
164    U(N,K) = sum(k=0...K-1,V(N-1,k))
165           = K>0 ? (V(N-1,K-1) + V(N,K-1))/2 : 0.
166  The latter expression also makes clear that U(N,K) is half the number of such
167   combinations wherein the first object is taken at least once.
168  Although it may not be clear from either of these definitions, U(N,K) is the
169   natural function to work with when enumerating the pulse vector codebooks,
170   not V(N,K).
171  U(N,K) is not well-defined for N=0, but with the extension
172    U(0,K) = K>0 ? 0 : 1,
173   the function becomes symmetric: U(N,K) = U(K,N), with a similar table:
174  U[10][10] = {
175    {1, 0,  0,   0,    0,    0,     0,     0,      0,      0},
176    {0, 1,  1,   1,    1,    1,     1,     1,      1,      1},
177    {0, 1,  3,   5,    7,    9,    11,    13,     15,     17},
178    {0, 1,  5,  13,   25,   41,    61,    85,    113,    145},
179    {0, 1,  7,  25,   63,  129,   231,   377,    575,    833},
180    {0, 1,  9,  41,  129,  321,   681,  1289,   2241,   3649},
181    {0, 1, 11,  61,  231,  681,  1683,  3653,   7183,  13073},
182    {0, 1, 13,  85,  377, 1289,  3653,  8989,  19825,  40081},
183    {0, 1, 15, 113,  575, 2241,  7183, 19825,  48639, 108545},
184    {0, 1, 17, 145,  833, 3649, 13073, 40081, 108545, 265729}
185  };
186
187  With this extension, V(N,K) may be written in terms of U(N,K):
188    V(N,K) = U(N,K) + U(N,K+1)
189   for all N>=0, K>=0.
190  Thus U(N,K+1) represents the number of combinations where the first element
191   is positive or zero, and U(N,K) represents the number of combinations where
192   it is negative.
193  With a large enough table of U(N,K) values, we could write O(N) encoding
194   and O(min(N*log(K),N+K)) decoding routines, but such a table would be
195   prohibitively large for small embedded devices (K may be as large as 32767
196   for small N, and N may be as large as 200).
197
198  Both functions obey the same recurrence relation:
199    V(N,K) = V(N-1,K) + V(N,K-1) + V(N-1,K-1),
200    U(N,K) = U(N-1,K) + U(N,K-1) + U(N-1,K-1),
201   for all N>0, K>0, with different initial conditions at N=0 or K=0.
202  This allows us to construct a row of one of the tables above given the
203   previous row or the next row.
204  Thus we can derive O(NK) encoding and decoding routines with O(K) memory
205   using only addition and subtraction.
206
207  When encoding, we build up from the U(2,K) row and work our way forwards.
208  When decoding, we need to start at the U(N,K) row and work our way backwards,
209   which requires a means of computing U(N,K).
210  U(N,K) may be computed from two previous values with the same N:
211    U(N,K) = ((2*N-1)*U(N,K-1) - U(N,K-2))/(K-1) + U(N,K-2)
212   for all N>1, and since U(N,K) is symmetric, a similar relation holds for two
213   previous values with the same K:
214    U(N,K>1) = ((2*K-1)*U(N-1,K) - U(N-2,K))/(N-1) + U(N-2,K)
215   for all K>1.
216  This allows us to construct an arbitrary row of the U(N,K) table by starting
217   with the first two values, which are constants.
218  This saves roughly 2/3 the work in our O(NK) decoding routine, but costs O(K)
219   multiplications.
220  Similar relations can be derived for V(N,K), but are not used here.
221
222  For N>0 and K>0, U(N,K) and V(N,K) take on the form of an (N-1)-degree
223   polynomial for fixed N.
224  The first few are
225    U(1,K) = 1,
226    U(2,K) = 2*K-1,
227    U(3,K) = (2*K-2)*K+1,
228    U(4,K) = (((4*K-6)*K+8)*K-3)/3,
229    U(5,K) = ((((2*K-4)*K+10)*K-8)*K+3)/3,
230   and
231    V(1,K) = 2,
232    V(2,K) = 4*K,
233    V(3,K) = 4*K*K+2,
234    V(4,K) = 8*(K*K+2)*K/3,
235    V(5,K) = ((4*K*K+20)*K*K+6)/3,
236   for all K>0.
237  This allows us to derive O(N) encoding and O(N*log(K)) decoding routines for
238   small N (and indeed decoding is also O(N) for N<3).
239
240  @ARTICLE{Fis86,
241    author="Thomas R. Fischer",
242    title="A Pyramid Vector Quantizer",
243    journal="IEEE Transactions on Information Theory",
244    volume="IT-32",
245    number=4,
246    pages="568--583",
247    month=Jul,
248    year=1986
249  }*/
250
251#ifndef SMALL_FOOTPRINT
252/*Compute U(2,_k).
253  Note that this may be called with _k=32768 (maxK[2]+1).*/
254static inline unsigned ucwrs2(unsigned _k){
255  celt_assert(_k>0);
256  return _k+(_k-1);
257}
258
259/*Compute V(2,_k).*/
260static inline opus_uint32 ncwrs2(int _k){
261  celt_assert(_k>0);
262  return 4*(opus_uint32)_k;
263}
264
265/*Compute U(3,_k).
266  Note that this may be called with _k=32768 (maxK[3]+1).*/
267static inline opus_uint32 ucwrs3(unsigned _k){
268  celt_assert(_k>0);
269  return (2*(opus_uint32)_k-2)*_k+1;
270}
271
272/*Compute V(3,_k).*/
273static inline opus_uint32 ncwrs3(int _k){
274  celt_assert(_k>0);
275  return 2*(2*(unsigned)_k*(opus_uint32)_k+1);
276}
277
278/*Compute U(4,_k).*/
279static inline opus_uint32 ucwrs4(int _k){
280  celt_assert(_k>0);
281  return imusdiv32odd(2*_k,(2*_k-3)*(opus_uint32)_k+4,3,1);
282}
283
284/*Compute V(4,_k).*/
285static inline opus_uint32 ncwrs4(int _k){
286  celt_assert(_k>0);
287  return ((_k*(opus_uint32)_k+2)*_k)/3<<3;
288}
289
290#endif /* SMALL_FOOTPRINT */
291
292/*Computes the next row/column of any recurrence that obeys the relation
293   u[i][j]=u[i-1][j]+u[i][j-1]+u[i-1][j-1].
294  _ui0 is the base case for the new row/column.*/
295static inline void unext(opus_uint32 *_ui,unsigned _len,opus_uint32 _ui0){
296  opus_uint32 ui1;
297  unsigned      j;
298  /*This do-while will overrun the array if we don't have storage for at least
299     2 values.*/
300  j=1; do {
301    ui1=UADD32(UADD32(_ui[j],_ui[j-1]),_ui0);
302    _ui[j-1]=_ui0;
303    _ui0=ui1;
304  } while (++j<_len);
305  _ui[j-1]=_ui0;
306}
307
308/*Computes the previous row/column of any recurrence that obeys the relation
309   u[i-1][j]=u[i][j]-u[i][j-1]-u[i-1][j-1].
310  _ui0 is the base case for the new row/column.*/
311static inline void uprev(opus_uint32 *_ui,unsigned _n,opus_uint32 _ui0){
312  opus_uint32 ui1;
313  unsigned      j;
314  /*This do-while will overrun the array if we don't have storage for at least
315     2 values.*/
316  j=1; do {
317    ui1=USUB32(USUB32(_ui[j],_ui[j-1]),_ui0);
318    _ui[j-1]=_ui0;
319    _ui0=ui1;
320  } while (++j<_n);
321  _ui[j-1]=_ui0;
322}
323
324/*Compute V(_n,_k), as well as U(_n,0..._k+1).
325  _u: On exit, _u[i] contains U(_n,i) for i in [0..._k+1].*/
326static opus_uint32 ncwrs_urow(unsigned _n,unsigned _k,opus_uint32 *_u){
327  opus_uint32 um2;
328  unsigned      len;
329  unsigned      k;
330  len=_k+2;
331  /*We require storage at least 3 values (e.g., _k>0).*/
332  celt_assert(len>=3);
333  _u[0]=0;
334  _u[1]=um2=1;
335#ifndef SMALL_FOOTPRINT
336  /*_k>52 doesn't work in the false branch due to the limits of INV_TABLE,
337    but _k isn't tested here because k<=52 for n=7*/
338  if(_n<=6)
339#endif
340  {
341    /*If _n==0, _u[0] should be 1 and the rest should be 0.*/
342    /*If _n==1, _u[i] should be 1 for i>1.*/
343    celt_assert(_n>=2);
344    /*If _k==0, the following do-while loop will overflow the buffer.*/
345    celt_assert(_k>0);
346    k=2;
347    do _u[k]=(k<<1)-1;
348    while(++k<len);
349    for(k=2;k<_n;k++)unext(_u+1,_k+1,1);
350  }
351#ifndef SMALL_FOOTPRINT
352  else{
353    opus_uint32 um1;
354    opus_uint32 n2m1;
355    _u[2]=n2m1=um1=(_n<<1)-1;
356    for(k=3;k<len;k++){
357      /*U(N,K) = ((2*N-1)*U(N,K-1)-U(N,K-2))/(K-1) + U(N,K-2)*/
358      _u[k]=um2=imusdiv32even(n2m1,um1,um2,k-1)+um2;
359      if(++k>=len)break;
360      _u[k]=um1=imusdiv32odd(n2m1,um2,um1,(k-1)>>1)+um1;
361    }
362  }
363#endif /* SMALL_FOOTPRINT */
364  return _u[_k]+_u[_k+1];
365}
366
367#ifndef SMALL_FOOTPRINT
368
369/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
370   set of size 1 with associated sign bits.
371  _y: Returns the vector of pulses.*/
372static inline void cwrsi1(int _k,opus_uint32 _i,int *_y){
373  int s;
374  s=-(int)_i;
375  _y[0]=(_k+s)^s;
376}
377
378/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
379   set of size 2 with associated sign bits.
380  _y: Returns the vector of pulses.*/
381static inline void cwrsi2(int _k,opus_uint32 _i,int *_y){
382  opus_uint32 p;
383  int           s;
384  int           yj;
385  p=ucwrs2(_k+1U);
386  s=-(_i>=p);
387  _i-=p&s;
388  yj=_k;
389  _k=(_i+1)>>1;
390  p=_k?ucwrs2(_k):0;
391  _i-=p;
392  yj-=_k;
393  _y[0]=(yj+s)^s;
394  cwrsi1(_k,_i,_y+1);
395}
396
397/*Returns the _i'th combination of _k elements (at most 32767) chosen from a
398   set of size 3 with associated sign bits.
399  _y: Returns the vector of pulses.*/
400static void cwrsi3(int _k,opus_uint32 _i,int *_y){
401  opus_uint32 p;
402  int           s;
403  int           yj;
404  p=ucwrs3(_k+1U);
405  s=-(_i>=p);
406  _i-=p&s;
407  yj=_k;
408  /*Finds the maximum _k such that ucwrs3(_k)<=_i (tested for all
409     _i<2147418113=U(3,32768)).*/
410  _k=_i>0?(isqrt32(2*_i-1)+1)>>1:0;
411  p=_k?ucwrs3(_k):0;
412  _i-=p;
413  yj-=_k;
414  _y[0]=(yj+s)^s;
415  cwrsi2(_k,_i,_y+1);
416}
417
418/*Returns the _i'th combination of _k elements (at most 1172) chosen from a set
419   of size 4 with associated sign bits.
420  _y: Returns the vector of pulses.*/
421static void cwrsi4(int _k,opus_uint32 _i,int *_y){
422  opus_uint32 p;
423  int           s;
424  int           yj;
425  int           kl;
426  int           kr;
427  p=ucwrs4(_k+1);
428  s=-(_i>=p);
429  _i-=p&s;
430  yj=_k;
431  /*We could solve a cubic for k here, but the form of the direct solution does
432     not lend itself well to exact integer arithmetic.
433    Instead we do a binary search on U(4,K).*/
434  kl=0;
435  kr=_k;
436  for(;;){
437    _k=(kl+kr)>>1;
438    p=_k?ucwrs4(_k):0;
439    if(p<_i){
440      if(_k>=kr)break;
441      kl=_k+1;
442    }
443    else if(p>_i)kr=_k-1;
444    else break;
445  }
446  _i-=p;
447  yj-=_k;
448  _y[0]=(yj+s)^s;
449  cwrsi3(_k,_i,_y+1);
450}
451
452#endif /* SMALL_FOOTPRINT */
453
454/*Returns the _i'th combination of _k elements chosen from a set of size _n
455   with associated sign bits.
456  _y: Returns the vector of pulses.
457  _u: Must contain entries [0..._k+1] of row _n of U() on input.
458      Its contents will be destructively modified.*/
459static void cwrsi(int _n,int _k,opus_uint32 _i,int *_y,opus_uint32 *_u){
460  int j;
461  celt_assert(_n>0);
462  j=0;
463  do{
464    opus_uint32 p;
465    int           s;
466    int           yj;
467    p=_u[_k+1];
468    s=-(_i>=p);
469    _i-=p&s;
470    yj=_k;
471    p=_u[_k];
472    while(p>_i)p=_u[--_k];
473    _i-=p;
474    yj-=_k;
475    _y[j]=(yj+s)^s;
476    uprev(_u,_k+2,0);
477  }
478  while(++j<_n);
479}
480
481/*Returns the index of the given combination of K elements chosen from a set
482   of size 1 with associated sign bits.
483  _y: The vector of pulses, whose sum of absolute values is K.
484  _k: Returns K.*/
485static inline opus_uint32 icwrs1(const int *_y,int *_k){
486  *_k=abs(_y[0]);
487  return _y[0]<0;
488}
489
490#ifndef SMALL_FOOTPRINT
491
492/*Returns the index of the given combination of K elements chosen from a set
493   of size 2 with associated sign bits.
494  _y: The vector of pulses, whose sum of absolute values is K.
495  _k: Returns K.*/
496static inline opus_uint32 icwrs2(const int *_y,int *_k){
497  opus_uint32 i;
498  int           k;
499  i=icwrs1(_y+1,&k);
500  i+=k?ucwrs2(k):0;
501  k+=abs(_y[0]);
502  if(_y[0]<0)i+=ucwrs2(k+1U);
503  *_k=k;
504  return i;
505}
506
507/*Returns the index of the given combination of K elements chosen from a set
508   of size 3 with associated sign bits.
509  _y: The vector of pulses, whose sum of absolute values is K.
510  _k: Returns K.*/
511static inline opus_uint32 icwrs3(const int *_y,int *_k){
512  opus_uint32 i;
513  int           k;
514  i=icwrs2(_y+1,&k);
515  i+=k?ucwrs3(k):0;
516  k+=abs(_y[0]);
517  if(_y[0]<0)i+=ucwrs3(k+1U);
518  *_k=k;
519  return i;
520}
521
522/*Returns the index of the given combination of K elements chosen from a set
523   of size 4 with associated sign bits.
524  _y: The vector of pulses, whose sum of absolute values is K.
525  _k: Returns K.*/
526static inline opus_uint32 icwrs4(const int *_y,int *_k){
527  opus_uint32 i;
528  int           k;
529  i=icwrs3(_y+1,&k);
530  i+=k?ucwrs4(k):0;
531  k+=abs(_y[0]);
532  if(_y[0]<0)i+=ucwrs4(k+1);
533  *_k=k;
534  return i;
535}
536
537#endif /* SMALL_FOOTPRINT */
538
539/*Returns the index of the given combination of K elements chosen from a set
540   of size _n with associated sign bits.
541  _y:  The vector of pulses, whose sum of absolute values must be _k.
542  _nc: Returns V(_n,_k).*/
543static inline opus_uint32 icwrs(int _n,int _k,opus_uint32 *_nc,const int *_y,
544 opus_uint32 *_u){
545  opus_uint32 i;
546  int           j;
547  int           k;
548  /*We can't unroll the first two iterations of the loop unless _n>=2.*/
549  celt_assert(_n>=2);
550  _u[0]=0;
551  for(k=1;k<=_k+1;k++)_u[k]=(k<<1)-1;
552  i=icwrs1(_y+_n-1,&k);
553  j=_n-2;
554  i+=_u[k];
555  k+=abs(_y[j]);
556  if(_y[j]<0)i+=_u[k+1];
557  while(j-->0){
558    unext(_u,_k+2,0);
559    i+=_u[k];
560    k+=abs(_y[j]);
561    if(_y[j]<0)i+=_u[k+1];
562  }
563  *_nc=_u[k]+_u[k+1];
564  return i;
565}
566
567#ifdef CUSTOM_MODES
568void get_required_bits(opus_int16 *_bits,int _n,int _maxk,int _frac){
569  int k;
570  /*_maxk==0 => there's nothing to do.*/
571  celt_assert(_maxk>0);
572  _bits[0]=0;
573  if (_n==1)
574  {
575    for (k=1;k<=_maxk;k++)
576      _bits[k] = 1<<_frac;
577  }
578  else {
579    VARDECL(opus_uint32,u);
580    SAVE_STACK;
581    ALLOC(u,_maxk+2U,opus_uint32);
582    ncwrs_urow(_n,_maxk,u);
583    for(k=1;k<=_maxk;k++)
584      _bits[k]=log2_frac(u[k]+u[k+1],_frac);
585    RESTORE_STACK;
586  }
587}
588#endif /* CUSTOM_MODES */
589
590void encode_pulses(const int *_y,int _n,int _k,ec_enc *_enc){
591  opus_uint32 i;
592  celt_assert(_k>0);
593#ifndef SMALL_FOOTPRINT
594  switch(_n){
595    case 2:{
596      i=icwrs2(_y,&_k);
597      ec_enc_uint(_enc,i,ncwrs2(_k));
598    }break;
599    case 3:{
600      i=icwrs3(_y,&_k);
601      ec_enc_uint(_enc,i,ncwrs3(_k));
602    }break;
603    case 4:{
604      i=icwrs4(_y,&_k);
605      ec_enc_uint(_enc,i,ncwrs4(_k));
606    }break;
607     default:
608    {
609#endif
610      VARDECL(opus_uint32,u);
611      opus_uint32 nc;
612      SAVE_STACK;
613      ALLOC(u,_k+2U,opus_uint32);
614      i=icwrs(_n,_k,&nc,_y,u);
615      ec_enc_uint(_enc,i,nc);
616      RESTORE_STACK;
617#ifndef SMALL_FOOTPRINT
618    }
619    break;
620  }
621#endif
622}
623
624void decode_pulses(int *_y,int _n,int _k,ec_dec *_dec)
625{
626  celt_assert(_k>0);
627#ifndef SMALL_FOOTPRINT
628   switch(_n){
629    case 2:cwrsi2(_k,ec_dec_uint(_dec,ncwrs2(_k)),_y);break;
630    case 3:cwrsi3(_k,ec_dec_uint(_dec,ncwrs3(_k)),_y);break;
631    case 4:cwrsi4(_k,ec_dec_uint(_dec,ncwrs4(_k)),_y);break;
632    default:
633    {
634#endif
635      VARDECL(opus_uint32,u);
636      SAVE_STACK;
637      ALLOC(u,_k+2U,opus_uint32);
638      cwrsi(_n,_k,ec_dec_uint(_dec,ncwrs_urow(_n,_k,u)),_y,u);
639      RESTORE_STACK;
640#ifndef SMALL_FOOTPRINT
641    }
642    break;
643  }
644#endif
645}
646