1/*********************************************************************** 2Copyright (c) 2006-2011, Skype Limited. All rights reserved. 3Redistribution and use in source and binary forms, with or without 4modification, are permitted provided that the following conditions 5are met: 6- Redistributions of source code must retain the above copyright notice, 7this list of conditions and the following disclaimer. 8- Redistributions in binary form must reproduce the above copyright 9notice, this list of conditions and the following disclaimer in the 10documentation and/or other materials provided with the distribution. 11- Neither the name of Internet Society, IETF or IETF Trust, nor the 12names of specific contributors, may be used to endorse or promote 13products derived from this software without specific prior written 14permission. 15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 19LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 20CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 21SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 22INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 23CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 24ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25POSSIBILITY OF SUCH DAMAGE. 26***********************************************************************/ 27 28#ifdef HAVE_CONFIG_H 29#include "config.h" 30#endif 31 32#include <stdlib.h> 33#include "main_FLP.h" 34#include "tuning_parameters.h" 35 36/* Low Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode with lower bitrate */ 37static OPUS_INLINE void silk_LBRR_encode_FLP( 38 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ 39 silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ 40 const silk_float xfw[], /* I Input signal */ 41 opus_int condCoding /* I The type of conditional coding used so far for this frame */ 42); 43 44void silk_encode_do_VAD_FLP( 45 silk_encoder_state_FLP *psEnc /* I/O Encoder state FLP */ 46) 47{ 48 /****************************/ 49 /* Voice Activity Detection */ 50 /****************************/ 51 silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); 52 53 /**************************************************/ 54 /* Convert speech activity into VAD and DTX flags */ 55 /**************************************************/ 56 if( psEnc->sCmn.speech_activity_Q8 < SILK_FIX_CONST( SPEECH_ACTIVITY_DTX_THRES, 8 ) ) { 57 psEnc->sCmn.indices.signalType = TYPE_NO_VOICE_ACTIVITY; 58 psEnc->sCmn.noSpeechCounter++; 59 if( psEnc->sCmn.noSpeechCounter < NB_SPEECH_FRAMES_BEFORE_DTX ) { 60 psEnc->sCmn.inDTX = 0; 61 } else if( psEnc->sCmn.noSpeechCounter > MAX_CONSECUTIVE_DTX + NB_SPEECH_FRAMES_BEFORE_DTX ) { 62 psEnc->sCmn.noSpeechCounter = NB_SPEECH_FRAMES_BEFORE_DTX; 63 psEnc->sCmn.inDTX = 0; 64 } 65 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 0; 66 } else { 67 psEnc->sCmn.noSpeechCounter = 0; 68 psEnc->sCmn.inDTX = 0; 69 psEnc->sCmn.indices.signalType = TYPE_UNVOICED; 70 psEnc->sCmn.VAD_flags[ psEnc->sCmn.nFramesEncoded ] = 1; 71 } 72} 73 74/****************/ 75/* Encode frame */ 76/****************/ 77opus_int silk_encode_frame_FLP( 78 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ 79 opus_int32 *pnBytesOut, /* O Number of payload bytes; */ 80 ec_enc *psRangeEnc, /* I/O compressor data structure */ 81 opus_int condCoding, /* I The type of conditional coding to use */ 82 opus_int maxBits, /* I If > 0: maximum number of output bits */ 83 opus_int useCBR /* I Flag to force constant-bitrate operation */ 84) 85{ 86 silk_encoder_control_FLP sEncCtrl; 87 opus_int i, iter, maxIter, found_upper, found_lower, ret = 0; 88 silk_float *x_frame, *res_pitch_frame; 89 silk_float res_pitch[ 2 * MAX_FRAME_LENGTH + LA_PITCH_MAX ]; 90 ec_enc sRangeEnc_copy, sRangeEnc_copy2; 91 silk_nsq_state sNSQ_copy, sNSQ_copy2; 92 opus_int32 seed_copy, nBits, nBits_lower, nBits_upper, gainMult_lower, gainMult_upper; 93 opus_int32 gainsID, gainsID_lower, gainsID_upper; 94 opus_int16 gainMult_Q8; 95 opus_int16 ec_prevLagIndex_copy; 96 opus_int ec_prevSignalType_copy; 97 opus_int8 LastGainIndex_copy2; 98 opus_int32 pGains_Q16[ MAX_NB_SUBFR ]; 99 opus_uint8 ec_buf_copy[ 1275 ]; 100 opus_int gain_lock[ MAX_NB_SUBFR ] = {0}; 101 opus_int16 best_gain_mult[ MAX_NB_SUBFR ]; 102 opus_int best_sum[ MAX_NB_SUBFR ]; 103 104 /* This is totally unnecessary but many compilers (including gcc) are too dumb to realise it */ 105 LastGainIndex_copy2 = nBits_lower = nBits_upper = gainMult_lower = gainMult_upper = 0; 106 107 psEnc->sCmn.indices.Seed = psEnc->sCmn.frameCounter++ & 3; 108 109 /**************************************************************/ 110 /* Set up Input Pointers, and insert frame in input buffer */ 111 /**************************************************************/ 112 /* pointers aligned with start of frame to encode */ 113 x_frame = psEnc->x_buf + psEnc->sCmn.ltp_mem_length; /* start of frame to encode */ 114 res_pitch_frame = res_pitch + psEnc->sCmn.ltp_mem_length; /* start of pitch LPC residual frame */ 115 116 /***************************************/ 117 /* Ensure smooth bandwidth transitions */ 118 /***************************************/ 119 silk_LP_variable_cutoff( &psEnc->sCmn.sLP, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); 120 121 /*******************************************/ 122 /* Copy new frame to front of input buffer */ 123 /*******************************************/ 124 silk_short2float_array( x_frame + LA_SHAPE_MS * psEnc->sCmn.fs_kHz, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.frame_length ); 125 126 /* Add tiny signal to avoid high CPU load from denormalized floating point numbers */ 127 for( i = 0; i < 8; i++ ) { 128 x_frame[ LA_SHAPE_MS * psEnc->sCmn.fs_kHz + i * ( psEnc->sCmn.frame_length >> 3 ) ] += ( 1 - ( i & 2 ) ) * 1e-6f; 129 } 130 131 if( !psEnc->sCmn.prefillFlag ) { 132 /*****************************************/ 133 /* Find pitch lags, initial LPC analysis */ 134 /*****************************************/ 135 silk_find_pitch_lags_FLP( psEnc, &sEncCtrl, res_pitch, x_frame, psEnc->sCmn.arch ); 136 137 /************************/ 138 /* Noise shape analysis */ 139 /************************/ 140 silk_noise_shape_analysis_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame ); 141 142 /***************************************************/ 143 /* Find linear prediction coefficients (LPC + LTP) */ 144 /***************************************************/ 145 silk_find_pred_coefs_FLP( psEnc, &sEncCtrl, res_pitch_frame, x_frame, condCoding ); 146 147 /****************************************/ 148 /* Process gains */ 149 /****************************************/ 150 silk_process_gains_FLP( psEnc, &sEncCtrl, condCoding ); 151 152 /****************************************/ 153 /* Low Bitrate Redundant Encoding */ 154 /****************************************/ 155 silk_LBRR_encode_FLP( psEnc, &sEncCtrl, x_frame, condCoding ); 156 157 /* Loop over quantizer and entroy coding to control bitrate */ 158 maxIter = 6; 159 gainMult_Q8 = SILK_FIX_CONST( 1, 8 ); 160 found_lower = 0; 161 found_upper = 0; 162 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); 163 gainsID_lower = -1; 164 gainsID_upper = -1; 165 /* Copy part of the input state */ 166 silk_memcpy( &sRangeEnc_copy, psRangeEnc, sizeof( ec_enc ) ); 167 silk_memcpy( &sNSQ_copy, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 168 seed_copy = psEnc->sCmn.indices.Seed; 169 ec_prevLagIndex_copy = psEnc->sCmn.ec_prevLagIndex; 170 ec_prevSignalType_copy = psEnc->sCmn.ec_prevSignalType; 171 for( iter = 0; ; iter++ ) { 172 if( gainsID == gainsID_lower ) { 173 nBits = nBits_lower; 174 } else if( gainsID == gainsID_upper ) { 175 nBits = nBits_upper; 176 } else { 177 /* Restore part of the input state */ 178 if( iter > 0 ) { 179 silk_memcpy( psRangeEnc, &sRangeEnc_copy, sizeof( ec_enc ) ); 180 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy, sizeof( silk_nsq_state ) ); 181 psEnc->sCmn.indices.Seed = seed_copy; 182 psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; 183 psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; 184 } 185 186 /*****************************************/ 187 /* Noise shaping quantization */ 188 /*****************************************/ 189 silk_NSQ_wrapper_FLP( psEnc, &sEncCtrl, &psEnc->sCmn.indices, &psEnc->sCmn.sNSQ, psEnc->sCmn.pulses, x_frame ); 190 191 if ( iter == maxIter && !found_lower ) { 192 silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); 193 } 194 195 /****************************************/ 196 /* Encode Parameters */ 197 /****************************************/ 198 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); 199 200 /****************************************/ 201 /* Encode Excitation Signal */ 202 /****************************************/ 203 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, 204 psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); 205 206 nBits = ec_tell( psRangeEnc ); 207 208 /* If we still bust after the last iteration, do some damage control. */ 209 if ( iter == maxIter && !found_lower && nBits > maxBits ) { 210 silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); 211 212 /* Keep gains the same as the last frame. */ 213 psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; 214 for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { 215 psEnc->sCmn.indices.GainsIndices[ i ] = 4; 216 } 217 if (condCoding != CODE_CONDITIONALLY) { 218 psEnc->sCmn.indices.GainsIndices[ 0 ] = sEncCtrl.lastGainIndexPrev; 219 } 220 psEnc->sCmn.ec_prevLagIndex = ec_prevLagIndex_copy; 221 psEnc->sCmn.ec_prevSignalType = ec_prevSignalType_copy; 222 /* Clear all pulses. */ 223 for ( i = 0; i < psEnc->sCmn.frame_length; i++ ) { 224 psEnc->sCmn.pulses[ i ] = 0; 225 } 226 227 silk_encode_indices( &psEnc->sCmn, psRangeEnc, psEnc->sCmn.nFramesEncoded, 0, condCoding ); 228 229 silk_encode_pulses( psRangeEnc, psEnc->sCmn.indices.signalType, psEnc->sCmn.indices.quantOffsetType, 230 psEnc->sCmn.pulses, psEnc->sCmn.frame_length ); 231 232 nBits = ec_tell( psRangeEnc ); 233 } 234 235 if( useCBR == 0 && iter == 0 && nBits <= maxBits ) { 236 break; 237 } 238 } 239 240 if( iter == maxIter ) { 241 if( found_lower && ( gainsID == gainsID_lower || nBits > maxBits ) ) { 242 /* Restore output state from earlier iteration that did meet the bitrate budget */ 243 silk_memcpy( psRangeEnc, &sRangeEnc_copy2, sizeof( ec_enc ) ); 244 silk_assert( sRangeEnc_copy2.offs <= 1275 ); 245 silk_memcpy( psRangeEnc->buf, ec_buf_copy, sRangeEnc_copy2.offs ); 246 silk_memcpy( &psEnc->sCmn.sNSQ, &sNSQ_copy2, sizeof( silk_nsq_state ) ); 247 psEnc->sShape.LastGainIndex = LastGainIndex_copy2; 248 } 249 break; 250 } 251 252 if( nBits > maxBits ) { 253 if( found_lower == 0 && iter >= 2 ) { 254 /* Adjust the quantizer's rate/distortion tradeoff and discard previous "upper" results */ 255 sEncCtrl.Lambda = silk_max_float(sEncCtrl.Lambda*1.5f, 1.5f); 256 /* Reducing dithering can help us hit the target. */ 257 psEnc->sCmn.indices.quantOffsetType = 0; 258 found_upper = 0; 259 gainsID_upper = -1; 260 } else { 261 found_upper = 1; 262 nBits_upper = nBits; 263 gainMult_upper = gainMult_Q8; 264 gainsID_upper = gainsID; 265 } 266 } else if( nBits < maxBits - 5 ) { 267 found_lower = 1; 268 nBits_lower = nBits; 269 gainMult_lower = gainMult_Q8; 270 if( gainsID != gainsID_lower ) { 271 gainsID_lower = gainsID; 272 /* Copy part of the output state */ 273 silk_memcpy( &sRangeEnc_copy2, psRangeEnc, sizeof( ec_enc ) ); 274 silk_assert( psRangeEnc->offs <= 1275 ); 275 silk_memcpy( ec_buf_copy, psRangeEnc->buf, psRangeEnc->offs ); 276 silk_memcpy( &sNSQ_copy2, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 277 LastGainIndex_copy2 = psEnc->sShape.LastGainIndex; 278 } 279 } else { 280 /* Within 5 bits of budget: close enough */ 281 break; 282 } 283 284 if ( !found_lower && nBits > maxBits ) { 285 int j; 286 for ( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { 287 int sum=0; 288 for ( j = i*psEnc->sCmn.subfr_length; j < (i+1)*psEnc->sCmn.subfr_length; j++ ) { 289 sum += abs( psEnc->sCmn.pulses[j] ); 290 } 291 if ( iter == 0 || (sum < best_sum[i] && !gain_lock[i]) ) { 292 best_sum[i] = sum; 293 best_gain_mult[i] = gainMult_Q8; 294 } else { 295 gain_lock[i] = 1; 296 } 297 } 298 } 299 if( ( found_lower & found_upper ) == 0 ) { 300 /* Adjust gain according to high-rate rate/distortion curve */ 301 if( nBits > maxBits ) { 302 if (gainMult_Q8 < 16384) { 303 gainMult_Q8 *= 2; 304 } else { 305 gainMult_Q8 = 32767; 306 } 307 } else { 308 opus_int32 gain_factor_Q16; 309 gain_factor_Q16 = silk_log2lin( silk_LSHIFT( nBits - maxBits, 7 ) / psEnc->sCmn.frame_length + SILK_FIX_CONST( 16, 7 ) ); 310 gainMult_Q8 = silk_SMULWB( gain_factor_Q16, gainMult_Q8 ); 311 } 312 } else { 313 /* Adjust gain by interpolating */ 314 gainMult_Q8 = gainMult_lower + ( ( gainMult_upper - gainMult_lower ) * ( maxBits - nBits_lower ) ) / ( nBits_upper - nBits_lower ); 315 /* New gain multplier must be between 25% and 75% of old range (note that gainMult_upper < gainMult_lower) */ 316 if( gainMult_Q8 > silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ) ) { 317 gainMult_Q8 = silk_ADD_RSHIFT32( gainMult_lower, gainMult_upper - gainMult_lower, 2 ); 318 } else 319 if( gainMult_Q8 < silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ) ) { 320 gainMult_Q8 = silk_SUB_RSHIFT32( gainMult_upper, gainMult_upper - gainMult_lower, 2 ); 321 } 322 } 323 324 for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { 325 opus_int16 tmp; 326 if ( gain_lock[i] ) { 327 tmp = best_gain_mult[i]; 328 } else { 329 tmp = gainMult_Q8; 330 } 331 pGains_Q16[ i ] = silk_LSHIFT_SAT32( silk_SMULWB( sEncCtrl.GainsUnq_Q16[ i ], tmp ), 8 ); 332 } 333 334 /* Quantize gains */ 335 psEnc->sShape.LastGainIndex = sEncCtrl.lastGainIndexPrev; 336 silk_gains_quant( psEnc->sCmn.indices.GainsIndices, pGains_Q16, 337 &psEnc->sShape.LastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); 338 339 /* Unique identifier of gains vector */ 340 gainsID = silk_gains_ID( psEnc->sCmn.indices.GainsIndices, psEnc->sCmn.nb_subfr ); 341 342 /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ 343 for( i = 0; i < psEnc->sCmn.nb_subfr; i++ ) { 344 sEncCtrl.Gains[ i ] = pGains_Q16[ i ] / 65536.0f; 345 } 346 } 347 } 348 349 /* Update input buffer */ 350 silk_memmove( psEnc->x_buf, &psEnc->x_buf[ psEnc->sCmn.frame_length ], 351 ( psEnc->sCmn.ltp_mem_length + LA_SHAPE_MS * psEnc->sCmn.fs_kHz ) * sizeof( silk_float ) ); 352 353 /* Exit without entropy coding */ 354 if( psEnc->sCmn.prefillFlag ) { 355 /* No payload */ 356 *pnBytesOut = 0; 357 return ret; 358 } 359 360 /* Parameters needed for next frame */ 361 psEnc->sCmn.prevLag = sEncCtrl.pitchL[ psEnc->sCmn.nb_subfr - 1 ]; 362 psEnc->sCmn.prevSignalType = psEnc->sCmn.indices.signalType; 363 364 /****************************************/ 365 /* Finalize payload */ 366 /****************************************/ 367 psEnc->sCmn.first_frame_after_reset = 0; 368 /* Payload size */ 369 *pnBytesOut = silk_RSHIFT( ec_tell( psRangeEnc ) + 7, 3 ); 370 371 return ret; 372} 373 374/* Low-Bitrate Redundancy (LBRR) encoding. Reuse all parameters but encode excitation at lower bitrate */ 375static OPUS_INLINE void silk_LBRR_encode_FLP( 376 silk_encoder_state_FLP *psEnc, /* I/O Encoder state FLP */ 377 silk_encoder_control_FLP *psEncCtrl, /* I/O Encoder control FLP */ 378 const silk_float xfw[], /* I Input signal */ 379 opus_int condCoding /* I The type of conditional coding used so far for this frame */ 380) 381{ 382 opus_int k; 383 opus_int32 Gains_Q16[ MAX_NB_SUBFR ]; 384 silk_float TempGains[ MAX_NB_SUBFR ]; 385 SideInfoIndices *psIndices_LBRR = &psEnc->sCmn.indices_LBRR[ psEnc->sCmn.nFramesEncoded ]; 386 silk_nsq_state sNSQ_LBRR; 387 388 /*******************************************/ 389 /* Control use of inband LBRR */ 390 /*******************************************/ 391 if( psEnc->sCmn.LBRR_enabled && psEnc->sCmn.speech_activity_Q8 > SILK_FIX_CONST( LBRR_SPEECH_ACTIVITY_THRES, 8 ) ) { 392 psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded ] = 1; 393 394 /* Copy noise shaping quantizer state and quantization indices from regular encoding */ 395 silk_memcpy( &sNSQ_LBRR, &psEnc->sCmn.sNSQ, sizeof( silk_nsq_state ) ); 396 silk_memcpy( psIndices_LBRR, &psEnc->sCmn.indices, sizeof( SideInfoIndices ) ); 397 398 /* Save original gains */ 399 silk_memcpy( TempGains, psEncCtrl->Gains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); 400 401 if( psEnc->sCmn.nFramesEncoded == 0 || psEnc->sCmn.LBRR_flags[ psEnc->sCmn.nFramesEncoded - 1 ] == 0 ) { 402 /* First frame in packet or previous frame not LBRR coded */ 403 psEnc->sCmn.LBRRprevLastGainIndex = psEnc->sShape.LastGainIndex; 404 405 /* Increase Gains to get target LBRR rate */ 406 psIndices_LBRR->GainsIndices[ 0 ] += psEnc->sCmn.LBRR_GainIncreases; 407 psIndices_LBRR->GainsIndices[ 0 ] = silk_min_int( psIndices_LBRR->GainsIndices[ 0 ], N_LEVELS_QGAIN - 1 ); 408 } 409 410 /* Decode to get gains in sync with decoder */ 411 silk_gains_dequant( Gains_Q16, psIndices_LBRR->GainsIndices, 412 &psEnc->sCmn.LBRRprevLastGainIndex, condCoding == CODE_CONDITIONALLY, psEnc->sCmn.nb_subfr ); 413 414 /* Overwrite unquantized gains with quantized gains and convert back to Q0 from Q16 */ 415 for( k = 0; k < psEnc->sCmn.nb_subfr; k++ ) { 416 psEncCtrl->Gains[ k ] = Gains_Q16[ k ] * ( 1.0f / 65536.0f ); 417 } 418 419 /*****************************************/ 420 /* Noise shaping quantization */ 421 /*****************************************/ 422 silk_NSQ_wrapper_FLP( psEnc, psEncCtrl, psIndices_LBRR, &sNSQ_LBRR, 423 psEnc->sCmn.pulses_LBRR[ psEnc->sCmn.nFramesEncoded ], xfw ); 424 425 /* Restore original gains */ 426 silk_memcpy( psEncCtrl->Gains, TempGains, psEnc->sCmn.nb_subfr * sizeof( silk_float ) ); 427 } 428} 429