151a8d8528135ba4e3e4cf7cd711a9e47b19078a3Chris Lattner/* 2ea61c358720aa6c7a159d51658b34276316aa841Misha Brukman * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 36fbcc26f1460eaee4e0eb8b426fc1ff0c7af11beJohn Criswell * 46fbcc26f1460eaee4e0eb8b426fc1ff0c7af11beJohn Criswell * Use of this source code is governed by a BSD-style license 57ed47a13356daed2a34cd2209a31f92552e3bdd8Chris Lattner * that can be found in the LICENSE file in the root of the source 67ed47a13356daed2a34cd2209a31f92552e3bdd8Chris Lattner * tree. An additional intellectual property rights grant can be found 7ea61c358720aa6c7a159d51658b34276316aa841Misha Brukman * in the file PATENTS. All contributing project authors may 86fbcc26f1460eaee4e0eb8b426fc1ff0c7af11beJohn Criswell * be found in the AUTHORS file in the root of the source tree. 9ea61c358720aa6c7a159d51658b34276316aa841Misha Brukman */ 10e8b5413e5d0c7c0fc5b384e975c4ca87f4c00699Chris Lattner 11e8b5413e5d0c7c0fc5b384e975c4ca87f4c00699Chris Lattner#include "webrtc/modules/audio_coding/neteq/audio_classifier.h" 1251a8d8528135ba4e3e4cf7cd711a9e47b19078a3Chris Lattner 13deb9654056939a12981446f6ed1139dca3412746Vikram S. Adve#include <assert.h> 14fce1143bcfa73f61845002fa50473d1a01384202Misha Brukman#include <string.h> 15fce1143bcfa73f61845002fa50473d1a01384202Misha Brukman 16deb9654056939a12981446f6ed1139dca3412746Vikram S. Advenamespace webrtc { 17551ccae044b0ff658fe629dd67edd5ffe75d10e8Reid Spencer 18255f89faee13dc491cb64fbeae3c763e7e2ea4e6Chandler Carruthstatic const int kDefaultSampleRateHz = 48000; 197cc2b07437a1243c33324549a1904fefc5f1845eJakub Staszakstatic const int kDefaultFrameRateHz = 50; 2013ba2dab631636e525a44bb259aaea56a860d1c7Jakob Stoklund Olesenstatic const int kDefaultFrameSizeSamples = 21d0fde30ce850b78371fd1386338350591f9ff494Brian Gaeke kDefaultSampleRateHz / kDefaultFrameRateHz; 22d0fde30ce850b78371fd1386338350591f9ff494Brian Gaekestatic const float kDefaultThreshold = 0.5f; 23fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman 24853d3fb8d24fab2258e9cd5dce3ec8ff4189eedaDan GohmanAudioClassifier::AudioClassifier() 25fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman : analysis_info_(), 26fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman is_music_(false), 27a655f088b88cf015fc48721fd9869787d1b8ce13Chris Lattner music_probability_(0), 28f4a1e1a69f0727762a73ef0d551e3bbd16b7c04eJakob Stoklund Olesen // This actually assigns the pointer to a static constant struct 29a655f088b88cf015fc48721fd9869787d1b8ce13Chris Lattner // rather than creates a struct and |celt_mode_| does not need 301cd1d98232c3c3a0bd3810c3bf6c2572ea02f208Daniel Dunbar // to be deleted. 317cc2b07437a1243c33324549a1904fefc5f1845eJakub Staszak celt_mode_(opus_custom_mode_create(kDefaultSampleRateHz, 32d0fde30ce850b78371fd1386338350591f9ff494Brian Gaeke kDefaultFrameSizeSamples, 3394dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos NULL)), 348e4018e2de52c534405d7155c7009d0b35afb861Cedric Venet analysis_state_() { 358e4018e2de52c534405d7155c7009d0b35afb861Cedric Venet assert(celt_mode_); 367309be6735666143bd9835b275dc8501617a2591Gabor Greif} 37fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman 388e5f2c6f65841542e2a7092553fe42a00048e4c7Dan GohmanAudioClassifier::~AudioClassifier() {} 3994dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos 408e5f2c6f65841542e2a7092553fe42a00048e4c7Dan Gohmanbool AudioClassifier::Analysis(const int16_t* input, 4194dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos int input_length, 428e5f2c6f65841542e2a7092553fe42a00048e4c7Dan Gohman int channels) { 43c7f6b8c5d40e17bf43fd3a1549d7d89c9da735e1Gabor Greif // Must be 20 ms frames at 48 kHz sampling. 44c7f6b8c5d40e17bf43fd3a1549d7d89c9da735e1Gabor Greif assert((input_length / channels) == kDefaultFrameSizeSamples); 45c7f6b8c5d40e17bf43fd3a1549d7d89c9da735e1Gabor Greif 46fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman // Only mono or stereo are allowed. 4794dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos assert(channels == 1 || channels == 2); 48c23b8719ef9d6b1220e854b37d40e9e1c48a82bcGabor Greif 49c23b8719ef9d6b1220e854b37d40e9e1c48a82bcGabor Greif // Call Opus' classifier, defined in 50f3841fcbd587c31aa9842b3f33bd57de40c9f443Gabor Greif // "third_party/opus/src/src/analysis.h", with lsb_depth = 16. 51c23b8719ef9d6b1220e854b37d40e9e1c48a82bcGabor Greif // Also uses a down-mixing function downmix_int, defined in 52aad5c0505183a5b7913f1a443a1f0650122551ccAlkis Evlogimenos // "third_party/opus/src/src/opus_private.h", with 53aad5c0505183a5b7913f1a443a1f0650122551ccAlkis Evlogimenos // constants c1 = 0, and c2 = -2. 54fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman run_analysis(&analysis_state_, 55fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman celt_mode_, 56fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman input, 578e5f2c6f65841542e2a7092553fe42a00048e4c7Dan Gohman kDefaultFrameSizeSamples, 58fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman kDefaultFrameSizeSamples, 59fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman 0, 6094dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos -2, 6194dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos channels, 62fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman kDefaultSampleRateHz, 63fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman 16, 64c0b9dc5be79f009d260edb5cd5e1d8346587aaa2Alkis Evlogimenos downmix_int, 651194e9501984daf0d3237ed1bf18a156173e7fd4Chris Lattner &analysis_info_); 66c07d8d8a26f63dfc54dbd0e1ff776763ec6443adBrian Gaeke music_probability_ = analysis_info_.music_prob; 67f20c1a497fe3922ac718429d65a5fe396890575eChris Lattner is_music_ = music_probability_ > kDefaultThreshold; 687cc2b07437a1243c33324549a1904fefc5f1845eJakub Staszak return is_music_; 6913d828567812041c1ca1817f4b66fce840903a1fEvan Cheng} 7013d828567812041c1ca1817f4b66fce840903a1fEvan Cheng 7113d828567812041c1ca1817f4b66fce840903a1fEvan Cheng} // namespace webrtc 7213d828567812041c1ca1817f4b66fce840903a1fEvan Cheng