12bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/* Copyright (c) 2002-2008 Jean-Marc Valin
22bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Copyright (c) 2007-2008 CSIRO
32bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Copyright (c) 2007-2009 Xiph.Org Foundation
42bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Written by Jean-Marc Valin */
52bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/**
62bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   @file mathops.h
72bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   @brief Various math functions
82bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/
92bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/*
102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   Redistribution and use in source and binary forms, with or without
112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   modification, are permitted provided that the following conditions
122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   are met:
132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   - Redistributions of source code must retain the above copyright
152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   notice, this list of conditions and the following disclaimer.
162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   - Redistributions in binary form must reproduce the above copyright
182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   notice, this list of conditions and the following disclaimer in the
192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   documentation and/or other materials provided with the distribution.
202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian*/
332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef HAVE_CONFIG_H
352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "config.h"
362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#include "mathops.h"
392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/*Compute floor(sqrt(_val)) with exact arithmetic.
412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  This has been tested on all possible 32-bit inputs.*/
422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianunsigned isqrt32(opus_uint32 _val){
432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  unsigned b;
442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  unsigned g;
452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  int      bshift;
462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  /*Uses the second method from
472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian     http://www.azillionmonkeys.com/qed/sqroot.html
482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian    The main idea is to search for the largest binary digit b such that
492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian     (g+b)*(g+b) <= _val, and add it to the solution g.*/
502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  g=0;
512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  bshift=(EC_ILOG(_val)-1)>>1;
522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  b=1U<<bshift;
532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  do{
542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian    opus_uint32 t;
552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian    t=(((opus_uint32)g<<1)+b)<<bshift;
562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian    if(t<=_val){
572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      g+=b;
582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      _val-=t;
592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian    }
602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian    b>>=1;
612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian    bshift--;
622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  }
632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  while(bshift>=0);
642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian  return g;
652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#ifdef FIXED_POINT
682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianopus_val32 frac_div32(opus_val32 a, opus_val32 b)
702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 rcp;
722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val32 result, rem;
732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int shift = celt_ilog2(b)-29;
742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   a = VSHR32(a,shift);
752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   b = VSHR32(b,shift);
762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* 16-bit reciprocal */
772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   rcp = ROUND16(celt_rcp(ROUND16(b,16)),3);
782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   result = MULT16_32_Q15(rcp, a);
792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   if (result >= 536870912)       /*  2^29 */
822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      return 2147483647;          /*  2^31 - 1 */
832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   else if (result <= -536870912) /* -2^29 */
842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      return -2147483647;         /* -2^31 */
852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   else
862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      return SHL32(result, 2);
872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */
902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianopus_val16 celt_rsqrt_norm(opus_val32 x)
912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 n;
932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 r;
942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 r2;
952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 y;
962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */
972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   n = x-32768;
982bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Get a rough initial guess for the root.
992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      The optimal minimax quadratic approximation (using relative error) is
1002bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian       r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485).
1012bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      Coefficients here, and the final result r, are Q14.*/
1022bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713))));
1032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14.
1042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      We can compute the result from n and r using Q15 multiplies with some
1052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian       adjustment, carefully done to avoid overflow.
1062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      Range of y is [-1564,1594]. */
1072bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   r2 = MULT16_16_Q15(r, r);
1082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1);
1092bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5).
1102bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      This yields the Q14 reciprocal square root of the Q16 x, with a maximum
1112bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian       relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a
1122bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian       peak absolute error of 2.26591/16384. */
1132bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y,
1142bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian              SUB16(MULT16_16_Q15(y, 12288), 16384))));
1152bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
1162bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1172bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/** Sqrt approximation (QX input, QX/2 output) */
1182bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianopus_val32 celt_sqrt(opus_val32 x)
1192bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
1202bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int k;
1212bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 n;
1222bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val32 rt;
1232bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
1242bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   if (x==0)
1252bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      return 0;
1262bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   else if (x>=1073741824)
1272bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      return 32767;
1282bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   k = (celt_ilog2(x)>>1)-7;
1292bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   x = VSHR32(x, 2*k);
1302bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   n = x-32768;
1312bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
1322bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian              MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
1332bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   rt = VSHR32(rt,7-k);
1342bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   return rt;
1352bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
1362bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1372bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#define L1 32767
1382bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#define L2 -7651
1392bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#define L3 8277
1402bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#define L4 -626
1412bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1422bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianstatic OPUS_INLINE opus_val16 _celt_cos_pi_2(opus_val16 x)
1432bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
1442bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 x2;
1452bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1462bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   x2 = MULT16_16_P15(x,x);
1472bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2
1482bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian                                                                                ))))))));
1492bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
1502bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1512bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#undef L1
1522bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#undef L2
1532bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#undef L3
1542bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#undef L4
1552bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1562bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianopus_val16 celt_cos_norm(opus_val32 x)
1572bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
1582bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   x = x&0x0001ffff;
1592bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   if (x>SHL32(EXTEND32(1), 16))
1602bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      x = SUB32(SHL32(EXTEND32(1), 17),x);
1612bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   if (x&0x00007fff)
1622bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   {
1632bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      if (x<SHL32(EXTEND32(1), 15))
1642bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      {
1652bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         return _celt_cos_pi_2(EXTRACT16(x));
1662bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      } else {
1672bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x)));
1682bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      }
1692bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   } else {
1702bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      if (x&0x0000ffff)
1712bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         return 0;
1722bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      else if (x&0x0001ffff)
1732bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         return -32767;
1742bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      else
1752bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         return 32767;
1762bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   }
1772bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
1782bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
1792bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian/** Reciprocal approximation (Q15 input, Q16 output) */
1802bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanianopus_val32 celt_rcp(opus_val32 x)
1812bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian{
1822bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   int i;
1832bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 n;
1842bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   opus_val16 r;
1852bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   celt_assert2(x>0, "celt_rcp() only defined for positive values");
1862bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   i = celt_ilog2(x);
1872bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* n is Q15 with range [0,1). */
1882bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   n = VSHR32(x,i-15)-32768;
1892bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Start with a linear approximation:
1902bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      r = 1.8823529411764706-0.9411764705882353*n.
1912bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      The coefficients and the result are Q14 in the range [15420,30840].*/
1922bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   r = ADD16(30840, MULT16_16_Q15(-15420, n));
1932bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* Perform two Newton iterations:
1942bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian      r -= r*((r*n)-1.Q15)
1952bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian         = r*((r*n)+(r-1.Q15)). */
1962bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   r = SUB16(r, MULT16_16_Q15(r,
1972bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))));
1982bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* We subtract an extra 1 in the second iteration to avoid overflow; it also
1992bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian       neatly compensates for truncation error in the rest of the process. */
2002bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   r = SUB16(r, ADD16(1, MULT16_16_Q15(r,
2012bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian             ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))));
2022bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   /* r is now the Q15 solution to 2/(n+1), with a maximum relative error
2032bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian       of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute
2042bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian       error of 1.24665/32768. */
2052bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian   return VSHR32(EXTEND32(r),i-16);
2062bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian}
2072bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian
2082bd8b54017b5320bc0c1df9bf86f4cdc9f8db242Vignesh Venkatasubramanian#endif
209