1/***********************************************************************
2Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3Redistribution and use in source and binary forms, with or without
4modification, are permitted provided that the following conditions
5are met:
6- Redistributions of source code must retain the above copyright notice,
7this list of conditions and the following disclaimer.
8- Redistributions in binary form must reproduce the above copyright
9notice, this list of conditions and the following disclaimer in the
10documentation and/or other materials provided with the distribution.
11- Neither the name of Internet Society, IETF or IETF Trust, nor the
12names of specific contributors, may be used to endorse or promote
13products derived from this software without specific prior written
14permission.
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25POSSIBILITY OF SUCH DAMAGE.
26***********************************************************************/
27
28#ifdef HAVE_CONFIG_H
29#include "config.h"
30#endif
31#include "API.h"
32#include "main.h"
33#include "stack_alloc.h"
34
35/************************/
36/* Decoder Super Struct */
37/************************/
38typedef struct {
39    silk_decoder_state          channel_state[ DECODER_NUM_CHANNELS ];
40    stereo_dec_state                sStereo;
41    opus_int                         nChannelsAPI;
42    opus_int                         nChannelsInternal;
43    opus_int                         prev_decode_only_middle;
44} silk_decoder;
45
46/*********************/
47/* Decoder functions */
48/*********************/
49
50opus_int silk_Get_Decoder_Size(                         /* O    Returns error code                              */
51    opus_int                        *decSizeBytes       /* O    Number of bytes in SILK decoder state           */
52)
53{
54    opus_int ret = SILK_NO_ERROR;
55
56    *decSizeBytes = sizeof( silk_decoder );
57
58    return ret;
59}
60
61/* Reset decoder state */
62opus_int silk_InitDecoder(                              /* O    Returns error code                              */
63    void                            *decState           /* I/O  State                                           */
64)
65{
66    opus_int n, ret = SILK_NO_ERROR;
67    silk_decoder_state *channel_state = ((silk_decoder *)decState)->channel_state;
68
69    for( n = 0; n < DECODER_NUM_CHANNELS; n++ ) {
70        ret  = silk_init_decoder( &channel_state[ n ] );
71    }
72    silk_memset(&((silk_decoder *)decState)->sStereo, 0, sizeof(((silk_decoder *)decState)->sStereo));
73    /* Not strictly needed, but it's cleaner that way */
74    ((silk_decoder *)decState)->prev_decode_only_middle = 0;
75
76    return ret;
77}
78
79/* Decode a frame */
80opus_int silk_Decode(                                   /* O    Returns error code                              */
81    void*                           decState,           /* I/O  State                                           */
82    silk_DecControlStruct*          decControl,         /* I/O  Control Structure                               */
83    opus_int                        lostFlag,           /* I    0: no loss, 1 loss, 2 decode fec                */
84    opus_int                        newPacketFlag,      /* I    Indicates first decoder call for this packet    */
85    ec_dec                          *psRangeDec,        /* I/O  Compressor data structure                       */
86    opus_int16                      *samplesOut,        /* O    Decoded output speech vector                    */
87    opus_int32                      *nSamplesOut        /* O    Number of samples decoded                       */
88)
89{
90    opus_int   i, n, decode_only_middle = 0, ret = SILK_NO_ERROR;
91    opus_int32 nSamplesOutDec, LBRR_symbol;
92    opus_int16 *samplesOut1_tmp[ 2 ];
93    VARDECL( opus_int16, samplesOut1_tmp_storage );
94    VARDECL( opus_int16, samplesOut2_tmp );
95    opus_int32 MS_pred_Q13[ 2 ] = { 0 };
96    opus_int16 *resample_out_ptr;
97    silk_decoder *psDec = ( silk_decoder * )decState;
98    silk_decoder_state *channel_state = psDec->channel_state;
99    opus_int has_side;
100    opus_int stereo_to_mono;
101    SAVE_STACK;
102
103    silk_assert( decControl->nChannelsInternal == 1 || decControl->nChannelsInternal == 2 );
104
105    /**********************************/
106    /* Test if first frame in payload */
107    /**********************************/
108    if( newPacketFlag ) {
109        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
110            channel_state[ n ].nFramesDecoded = 0;  /* Used to count frames in packet */
111        }
112    }
113
114    /* If Mono -> Stereo transition in bitstream: init state of second channel */
115    if( decControl->nChannelsInternal > psDec->nChannelsInternal ) {
116        ret += silk_init_decoder( &channel_state[ 1 ] );
117    }
118
119    stereo_to_mono = decControl->nChannelsInternal == 1 && psDec->nChannelsInternal == 2 &&
120                     ( decControl->internalSampleRate == 1000*channel_state[ 0 ].fs_kHz );
121
122    if( channel_state[ 0 ].nFramesDecoded == 0 ) {
123        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
124            opus_int fs_kHz_dec;
125            if( decControl->payloadSize_ms == 0 ) {
126                /* Assuming packet loss, use 10 ms */
127                channel_state[ n ].nFramesPerPacket = 1;
128                channel_state[ n ].nb_subfr = 2;
129            } else if( decControl->payloadSize_ms == 10 ) {
130                channel_state[ n ].nFramesPerPacket = 1;
131                channel_state[ n ].nb_subfr = 2;
132            } else if( decControl->payloadSize_ms == 20 ) {
133                channel_state[ n ].nFramesPerPacket = 1;
134                channel_state[ n ].nb_subfr = 4;
135            } else if( decControl->payloadSize_ms == 40 ) {
136                channel_state[ n ].nFramesPerPacket = 2;
137                channel_state[ n ].nb_subfr = 4;
138            } else if( decControl->payloadSize_ms == 60 ) {
139                channel_state[ n ].nFramesPerPacket = 3;
140                channel_state[ n ].nb_subfr = 4;
141            } else {
142                silk_assert( 0 );
143                RESTORE_STACK;
144                return SILK_DEC_INVALID_FRAME_SIZE;
145            }
146            fs_kHz_dec = ( decControl->internalSampleRate >> 10 ) + 1;
147            if( fs_kHz_dec != 8 && fs_kHz_dec != 12 && fs_kHz_dec != 16 ) {
148                silk_assert( 0 );
149                RESTORE_STACK;
150                return SILK_DEC_INVALID_SAMPLING_FREQUENCY;
151            }
152            ret += silk_decoder_set_fs( &channel_state[ n ], fs_kHz_dec, decControl->API_sampleRate );
153        }
154    }
155
156    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 && ( psDec->nChannelsAPI == 1 || psDec->nChannelsInternal == 1 ) ) {
157        silk_memset( psDec->sStereo.pred_prev_Q13, 0, sizeof( psDec->sStereo.pred_prev_Q13 ) );
158        silk_memset( psDec->sStereo.sSide, 0, sizeof( psDec->sStereo.sSide ) );
159        silk_memcpy( &channel_state[ 1 ].resampler_state, &channel_state[ 0 ].resampler_state, sizeof( silk_resampler_state_struct ) );
160    }
161    psDec->nChannelsAPI      = decControl->nChannelsAPI;
162    psDec->nChannelsInternal = decControl->nChannelsInternal;
163
164    if( decControl->API_sampleRate > (opus_int32)MAX_API_FS_KHZ * 1000 || decControl->API_sampleRate < 8000 ) {
165        ret = SILK_DEC_INVALID_SAMPLING_FREQUENCY;
166        RESTORE_STACK;
167        return( ret );
168    }
169
170    if( lostFlag != FLAG_PACKET_LOST && channel_state[ 0 ].nFramesDecoded == 0 ) {
171        /* First decoder call for this payload */
172        /* Decode VAD flags and LBRR flag */
173        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
174            for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
175                channel_state[ n ].VAD_flags[ i ] = ec_dec_bit_logp(psRangeDec, 1);
176            }
177            channel_state[ n ].LBRR_flag = ec_dec_bit_logp(psRangeDec, 1);
178        }
179        /* Decode LBRR flags */
180        for( n = 0; n < decControl->nChannelsInternal; n++ ) {
181            silk_memset( channel_state[ n ].LBRR_flags, 0, sizeof( channel_state[ n ].LBRR_flags ) );
182            if( channel_state[ n ].LBRR_flag ) {
183                if( channel_state[ n ].nFramesPerPacket == 1 ) {
184                    channel_state[ n ].LBRR_flags[ 0 ] = 1;
185                } else {
186                    LBRR_symbol = ec_dec_icdf( psRangeDec, silk_LBRR_flags_iCDF_ptr[ channel_state[ n ].nFramesPerPacket - 2 ], 8 ) + 1;
187                    for( i = 0; i < channel_state[ n ].nFramesPerPacket; i++ ) {
188                        channel_state[ n ].LBRR_flags[ i ] = silk_RSHIFT( LBRR_symbol, i ) & 1;
189                    }
190                }
191            }
192        }
193
194        if( lostFlag == FLAG_DECODE_NORMAL ) {
195            /* Regular decoding: skip all LBRR data */
196            for( i = 0; i < channel_state[ 0 ].nFramesPerPacket; i++ ) {
197                for( n = 0; n < decControl->nChannelsInternal; n++ ) {
198                    if( channel_state[ n ].LBRR_flags[ i ] ) {
199                        opus_int pulses[ MAX_FRAME_LENGTH ];
200                        opus_int condCoding;
201
202                        if( decControl->nChannelsInternal == 2 && n == 0 ) {
203                            silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
204                            if( channel_state[ 1 ].LBRR_flags[ i ] == 0 ) {
205                                silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
206                            }
207                        }
208                        /* Use conditional coding if previous frame available */
209                        if( i > 0 && channel_state[ n ].LBRR_flags[ i - 1 ] ) {
210                            condCoding = CODE_CONDITIONALLY;
211                        } else {
212                            condCoding = CODE_INDEPENDENTLY;
213                        }
214                        silk_decode_indices( &channel_state[ n ], psRangeDec, i, 1, condCoding );
215                        silk_decode_pulses( psRangeDec, pulses, channel_state[ n ].indices.signalType,
216                            channel_state[ n ].indices.quantOffsetType, channel_state[ n ].frame_length );
217                    }
218                }
219            }
220        }
221    }
222
223    /* Get MS predictor index */
224    if( decControl->nChannelsInternal == 2 ) {
225        if(   lostFlag == FLAG_DECODE_NORMAL ||
226            ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 0 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 1 ) )
227        {
228            silk_stereo_decode_pred( psRangeDec, MS_pred_Q13 );
229            /* For LBRR data, decode mid-only flag only if side-channel's LBRR flag is false */
230            if( ( lostFlag == FLAG_DECODE_NORMAL && channel_state[ 1 ].VAD_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) ||
231                ( lostFlag == FLAG_DECODE_LBRR && channel_state[ 1 ].LBRR_flags[ channel_state[ 0 ].nFramesDecoded ] == 0 ) )
232            {
233                silk_stereo_decode_mid_only( psRangeDec, &decode_only_middle );
234            } else {
235                decode_only_middle = 0;
236            }
237        } else {
238            for( n = 0; n < 2; n++ ) {
239                MS_pred_Q13[ n ] = psDec->sStereo.pred_prev_Q13[ n ];
240            }
241        }
242    }
243
244    /* Reset side channel decoder prediction memory for first frame with side coding */
245    if( decControl->nChannelsInternal == 2 && decode_only_middle == 0 && psDec->prev_decode_only_middle == 1 ) {
246        silk_memset( psDec->channel_state[ 1 ].outBuf, 0, sizeof(psDec->channel_state[ 1 ].outBuf) );
247        silk_memset( psDec->channel_state[ 1 ].sLPC_Q14_buf, 0, sizeof(psDec->channel_state[ 1 ].sLPC_Q14_buf) );
248        psDec->channel_state[ 1 ].lagPrev        = 100;
249        psDec->channel_state[ 1 ].LastGainIndex  = 10;
250        psDec->channel_state[ 1 ].prevSignalType = TYPE_NO_VOICE_ACTIVITY;
251        psDec->channel_state[ 1 ].first_frame_after_reset = 1;
252    }
253
254    ALLOC( samplesOut1_tmp_storage,
255           decControl->nChannelsInternal*(
256               channel_state[ 0 ].frame_length + 2 ),
257           opus_int16 );
258    samplesOut1_tmp[ 0 ] = samplesOut1_tmp_storage;
259    samplesOut1_tmp[ 1 ] = samplesOut1_tmp_storage
260                           + channel_state[ 0 ].frame_length + 2;
261
262    if( lostFlag == FLAG_DECODE_NORMAL ) {
263        has_side = !decode_only_middle;
264    } else {
265        has_side = !psDec->prev_decode_only_middle
266              || (decControl->nChannelsInternal == 2 && lostFlag == FLAG_DECODE_LBRR && channel_state[1].LBRR_flags[ channel_state[1].nFramesDecoded ] == 1 );
267    }
268    /* Call decoder for one frame */
269    for( n = 0; n < decControl->nChannelsInternal; n++ ) {
270        if( n == 0 || has_side ) {
271            opus_int FrameIndex;
272            opus_int condCoding;
273
274            FrameIndex = channel_state[ 0 ].nFramesDecoded - n;
275            /* Use independent coding if no previous frame available */
276            if( FrameIndex <= 0 ) {
277                condCoding = CODE_INDEPENDENTLY;
278            } else if( lostFlag == FLAG_DECODE_LBRR ) {
279                condCoding = channel_state[ n ].LBRR_flags[ FrameIndex - 1 ] ? CODE_CONDITIONALLY : CODE_INDEPENDENTLY;
280            } else if( n > 0 && psDec->prev_decode_only_middle ) {
281                /* If we skipped a side frame in this packet, we don't
282                   need LTP scaling; the LTP state is well-defined. */
283                condCoding = CODE_INDEPENDENTLY_NO_LTP_SCALING;
284            } else {
285                condCoding = CODE_CONDITIONALLY;
286            }
287            ret += silk_decode_frame( &channel_state[ n ], psRangeDec, &samplesOut1_tmp[ n ][ 2 ], &nSamplesOutDec, lostFlag, condCoding);
288        } else {
289            silk_memset( &samplesOut1_tmp[ n ][ 2 ], 0, nSamplesOutDec * sizeof( opus_int16 ) );
290        }
291        channel_state[ n ].nFramesDecoded++;
292    }
293
294    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 2 ) {
295        /* Convert Mid/Side to Left/Right */
296        silk_stereo_MS_to_LR( &psDec->sStereo, samplesOut1_tmp[ 0 ], samplesOut1_tmp[ 1 ], MS_pred_Q13, channel_state[ 0 ].fs_kHz, nSamplesOutDec );
297    } else {
298        /* Buffering */
299        silk_memcpy( samplesOut1_tmp[ 0 ], psDec->sStereo.sMid, 2 * sizeof( opus_int16 ) );
300        silk_memcpy( psDec->sStereo.sMid, &samplesOut1_tmp[ 0 ][ nSamplesOutDec ], 2 * sizeof( opus_int16 ) );
301    }
302
303    /* Number of output samples */
304    *nSamplesOut = silk_DIV32( nSamplesOutDec * decControl->API_sampleRate, silk_SMULBB( channel_state[ 0 ].fs_kHz, 1000 ) );
305
306    /* Set up pointers to temp buffers */
307    ALLOC( samplesOut2_tmp,
308           decControl->nChannelsAPI == 2 ? *nSamplesOut : ALLOC_NONE, opus_int16 );
309    if( decControl->nChannelsAPI == 2 ) {
310        resample_out_ptr = samplesOut2_tmp;
311    } else {
312        resample_out_ptr = samplesOut;
313    }
314
315    for( n = 0; n < silk_min( decControl->nChannelsAPI, decControl->nChannelsInternal ); n++ ) {
316
317        /* Resample decoded signal to API_sampleRate */
318        ret += silk_resampler( &channel_state[ n ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ n ][ 1 ], nSamplesOutDec );
319
320        /* Interleave if stereo output and stereo stream */
321        if( decControl->nChannelsAPI == 2 ) {
322            for( i = 0; i < *nSamplesOut; i++ ) {
323                samplesOut[ n + 2 * i ] = resample_out_ptr[ i ];
324            }
325        }
326    }
327
328    /* Create two channel output from mono stream */
329    if( decControl->nChannelsAPI == 2 && decControl->nChannelsInternal == 1 ) {
330        if ( stereo_to_mono ){
331            /* Resample right channel for newly collapsed stereo just in case
332               we weren't doing collapsing when switching to mono */
333            ret += silk_resampler( &channel_state[ 1 ].resampler_state, resample_out_ptr, &samplesOut1_tmp[ 0 ][ 1 ], nSamplesOutDec );
334
335            for( i = 0; i < *nSamplesOut; i++ ) {
336                samplesOut[ 1 + 2 * i ] = resample_out_ptr[ i ];
337            }
338        } else {
339            for( i = 0; i < *nSamplesOut; i++ ) {
340                samplesOut[ 1 + 2 * i ] = samplesOut[ 0 + 2 * i ];
341            }
342        }
343    }
344
345    /* Export pitch lag, measured at 48 kHz sampling rate */
346    if( channel_state[ 0 ].prevSignalType == TYPE_VOICED ) {
347        int mult_tab[ 3 ] = { 6, 4, 3 };
348        decControl->prevPitchLag = channel_state[ 0 ].lagPrev * mult_tab[ ( channel_state[ 0 ].fs_kHz - 8 ) >> 2 ];
349    } else {
350        decControl->prevPitchLag = 0;
351    }
352
353    if( lostFlag == FLAG_PACKET_LOST ) {
354       /* On packet loss, remove the gain clamping to prevent having the energy "bounce back"
355          if we lose packets when the energy is going down */
356       for ( i = 0; i < psDec->nChannelsInternal; i++ )
357          psDec->channel_state[ i ].LastGainIndex = 10;
358    } else {
359       psDec->prev_decode_only_middle = decode_only_middle;
360    }
361    RESTORE_STACK;
362    return ret;
363}
364
365#if 0
366/* Getting table of contents for a packet */
367opus_int silk_get_TOC(
368    const opus_uint8                *payload,           /* I    Payload data                                */
369    const opus_int                  nBytesIn,           /* I    Number of input bytes                       */
370    const opus_int                  nFramesPerPayload,  /* I    Number of SILK frames per payload           */
371    silk_TOC_struct                 *Silk_TOC           /* O    Type of content                             */
372)
373{
374    opus_int i, flags, ret = SILK_NO_ERROR;
375
376    if( nBytesIn < 1 ) {
377        return -1;
378    }
379    if( nFramesPerPayload < 0 || nFramesPerPayload > 3 ) {
380        return -1;
381    }
382
383    silk_memset( Silk_TOC, 0, sizeof( *Silk_TOC ) );
384
385    /* For stereo, extract the flags for the mid channel */
386    flags = silk_RSHIFT( payload[ 0 ], 7 - nFramesPerPayload ) & ( silk_LSHIFT( 1, nFramesPerPayload + 1 ) - 1 );
387
388    Silk_TOC->inbandFECFlag = flags & 1;
389    for( i = nFramesPerPayload - 1; i >= 0 ; i-- ) {
390        flags = silk_RSHIFT( flags, 1 );
391        Silk_TOC->VADFlags[ i ] = flags & 1;
392        Silk_TOC->VADFlag |= flags & 1;
393    }
394
395    return ret;
396}
397#endif
398