151a8d8528135ba4e3e4cf7cd711a9e47b19078a3Chris Lattner/*
2ea61c358720aa6c7a159d51658b34276316aa841Misha Brukman *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
36fbcc26f1460eaee4e0eb8b426fc1ff0c7af11beJohn Criswell *
46fbcc26f1460eaee4e0eb8b426fc1ff0c7af11beJohn Criswell *  Use of this source code is governed by a BSD-style license
57ed47a13356daed2a34cd2209a31f92552e3bdd8Chris Lattner *  that can be found in the LICENSE file in the root of the source
67ed47a13356daed2a34cd2209a31f92552e3bdd8Chris Lattner *  tree. An additional intellectual property rights grant can be found
7ea61c358720aa6c7a159d51658b34276316aa841Misha Brukman *  in the file PATENTS.  All contributing project authors may
86fbcc26f1460eaee4e0eb8b426fc1ff0c7af11beJohn Criswell *  be found in the AUTHORS file in the root of the source tree.
9ea61c358720aa6c7a159d51658b34276316aa841Misha Brukman */
10e8b5413e5d0c7c0fc5b384e975c4ca87f4c00699Chris Lattner
11e8b5413e5d0c7c0fc5b384e975c4ca87f4c00699Chris Lattner#include "webrtc/modules/audio_coding/neteq/audio_classifier.h"
1251a8d8528135ba4e3e4cf7cd711a9e47b19078a3Chris Lattner
13deb9654056939a12981446f6ed1139dca3412746Vikram S. Adve#include <assert.h>
14fce1143bcfa73f61845002fa50473d1a01384202Misha Brukman#include <string.h>
15fce1143bcfa73f61845002fa50473d1a01384202Misha Brukman
16deb9654056939a12981446f6ed1139dca3412746Vikram S. Advenamespace webrtc {
17551ccae044b0ff658fe629dd67edd5ffe75d10e8Reid Spencer
18255f89faee13dc491cb64fbeae3c763e7e2ea4e6Chandler Carruthstatic const int kDefaultSampleRateHz = 48000;
197cc2b07437a1243c33324549a1904fefc5f1845eJakub Staszakstatic const int kDefaultFrameRateHz = 50;
2013ba2dab631636e525a44bb259aaea56a860d1c7Jakob Stoklund Olesenstatic const int kDefaultFrameSizeSamples =
21d0fde30ce850b78371fd1386338350591f9ff494Brian Gaeke    kDefaultSampleRateHz / kDefaultFrameRateHz;
22d0fde30ce850b78371fd1386338350591f9ff494Brian Gaekestatic const float kDefaultThreshold = 0.5f;
23fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman
24853d3fb8d24fab2258e9cd5dce3ec8ff4189eedaDan GohmanAudioClassifier::AudioClassifier()
25fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman    : analysis_info_(),
26fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman      is_music_(false),
27a655f088b88cf015fc48721fd9869787d1b8ce13Chris Lattner      music_probability_(0),
28f4a1e1a69f0727762a73ef0d551e3bbd16b7c04eJakob Stoklund Olesen      // This actually assigns the pointer to a static constant struct
29a655f088b88cf015fc48721fd9869787d1b8ce13Chris Lattner      // rather than creates a struct and |celt_mode_| does not need
301cd1d98232c3c3a0bd3810c3bf6c2572ea02f208Daniel Dunbar      // to be deleted.
317cc2b07437a1243c33324549a1904fefc5f1845eJakub Staszak      celt_mode_(opus_custom_mode_create(kDefaultSampleRateHz,
32d0fde30ce850b78371fd1386338350591f9ff494Brian Gaeke                                         kDefaultFrameSizeSamples,
3394dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos                                         NULL)),
348e4018e2de52c534405d7155c7009d0b35afb861Cedric Venet      analysis_state_() {
358e4018e2de52c534405d7155c7009d0b35afb861Cedric Venet  assert(celt_mode_);
367309be6735666143bd9835b275dc8501617a2591Gabor Greif}
37fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman
388e5f2c6f65841542e2a7092553fe42a00048e4c7Dan GohmanAudioClassifier::~AudioClassifier() {}
3994dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos
408e5f2c6f65841542e2a7092553fe42a00048e4c7Dan Gohmanbool AudioClassifier::Analysis(const int16_t* input,
4194dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos                               int input_length,
428e5f2c6f65841542e2a7092553fe42a00048e4c7Dan Gohman                               int channels) {
43c7f6b8c5d40e17bf43fd3a1549d7d89c9da735e1Gabor Greif  // Must be 20 ms frames at 48 kHz sampling.
44c7f6b8c5d40e17bf43fd3a1549d7d89c9da735e1Gabor Greif  assert((input_length / channels) == kDefaultFrameSizeSamples);
45c7f6b8c5d40e17bf43fd3a1549d7d89c9da735e1Gabor Greif
46fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman  // Only mono or stereo are allowed.
4794dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos  assert(channels == 1 || channels == 2);
48c23b8719ef9d6b1220e854b37d40e9e1c48a82bcGabor Greif
49c23b8719ef9d6b1220e854b37d40e9e1c48a82bcGabor Greif  // Call Opus' classifier, defined in
50f3841fcbd587c31aa9842b3f33bd57de40c9f443Gabor Greif  // "third_party/opus/src/src/analysis.h", with lsb_depth = 16.
51c23b8719ef9d6b1220e854b37d40e9e1c48a82bcGabor Greif  // Also uses a down-mixing function downmix_int, defined in
52aad5c0505183a5b7913f1a443a1f0650122551ccAlkis Evlogimenos  // "third_party/opus/src/src/opus_private.h", with
53aad5c0505183a5b7913f1a443a1f0650122551ccAlkis Evlogimenos  // constants c1 = 0, and c2 = -2.
54fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman  run_analysis(&analysis_state_,
55fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman               celt_mode_,
56fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman               input,
578e5f2c6f65841542e2a7092553fe42a00048e4c7Dan Gohman               kDefaultFrameSizeSamples,
58fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman               kDefaultFrameSizeSamples,
59fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman               0,
6094dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos               -2,
6194dc07728f091c652f0a8059aba6dce5018485eeAlkis Evlogimenos               channels,
62fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman               kDefaultSampleRateHz,
63fed90b6d097d50881afb45e4d79f430db66dd741Dan Gohman               16,
64c0b9dc5be79f009d260edb5cd5e1d8346587aaa2Alkis Evlogimenos               downmix_int,
651194e9501984daf0d3237ed1bf18a156173e7fd4Chris Lattner               &analysis_info_);
66c07d8d8a26f63dfc54dbd0e1ff776763ec6443adBrian Gaeke  music_probability_ = analysis_info_.music_prob;
67f20c1a497fe3922ac718429d65a5fe396890575eChris Lattner  is_music_ = music_probability_ > kDefaultThreshold;
687cc2b07437a1243c33324549a1904fefc5f1845eJakub Staszak  return is_music_;
6913d828567812041c1ca1817f4b66fce840903a1fEvan Cheng}
7013d828567812041c1ca1817f4b66fce840903a1fEvan Cheng
7113d828567812041c1ca1817f4b66fce840903a1fEvan Cheng}  // namespace webrtc
7213d828567812041c1ca1817f4b66fce840903a1fEvan Cheng