1c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org/* 2c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org * 4c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org * Use of this source code is governed by a BSD-style license 5c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org * that can be found in the LICENSE file in the root of the source 6c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org * tree. An additional intellectual property rights grant can be found 7c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org * in the file PATENTS. All contributing project authors may 8c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org * be found in the AUTHORS file in the root of the source tree. 9c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org */ 10c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 119c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#include "webrtc/modules/audio_coding/neteq/audio_classifier.h" 12c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 13c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <math.h> 14c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <stdio.h> 15c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <stdlib.h> 16c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <string.h> 17c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 18c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <string> 19c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <iostream> 20c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 2100b8f6b3643332cce1ee711715f7fbb824d793cakwiberg@webrtc.org#include "webrtc/base/scoped_ptr.h" 22c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 23c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.orgint main(int argc, char* argv[]) { 24c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (argc != 5) { 25c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Usage: " << argv[0] << 26c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org " channels output_type <input file name> <output file name> " 27c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org << std::endl << std::endl; 28c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Where channels can be 1 (mono) or 2 (interleaved stereo),"; 29c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << " outputs can be 1 (classification (boolean)) or 2"; 30c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << " (classification and music probability (float))," 31c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org << std::endl; 32c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "and the sampling frequency is assumed to be 48 kHz." 33c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org << std::endl; 34c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org return -1; 35c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 36c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 37c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org const int kFrameSizeSamples = 960; 38c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org int channels = atoi(argv[1]); 39c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (channels < 1 || channels > 2) { 40c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Disallowed number of channels " << channels << std::endl; 41c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org return -1; 42c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 43c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 44c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org int outputs = atoi(argv[2]); 45c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (outputs < 1 || outputs > 2) { 46c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Disallowed number of outputs " << outputs << std::endl; 47c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org return -1; 48c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 49c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 50c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org const int data_size = channels * kFrameSizeSamples; 5100b8f6b3643332cce1ee711715f7fbb824d793cakwiberg@webrtc.org rtc::scoped_ptr<int16_t[]> in(new int16_t[data_size]); 52c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 53c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::string input_filename = argv[3]; 54c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::string output_filename = argv[4]; 55c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 56c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Input file: " << input_filename << std::endl; 57c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Output file: " << output_filename << std::endl; 58c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 59c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org FILE* in_file = fopen(input_filename.c_str(), "rb"); 60c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (!in_file) { 61c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Cannot open input file " << input_filename << std::endl; 62c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org return -1; 63c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 64c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 65c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org FILE* out_file = fopen(output_filename.c_str(), "wb"); 66c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (!out_file) { 67c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Cannot open output file " << output_filename << std::endl; 68c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org return -1; 69c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 70c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 71c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org webrtc::AudioClassifier classifier; 72c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org int frame_counter = 0; 73c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org int music_counter = 0; 74c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org while (fread(in.get(), sizeof(*in.get()), 75c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org data_size, in_file) == (size_t) data_size) { 76c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org bool is_music = classifier.Analysis(in.get(), data_size, channels); 77c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (!fwrite(&is_music, sizeof(is_music), 1, out_file)) { 78c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Error writing." << std::endl; 79c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org return -1; 80c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 81c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (is_music) { 82c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org music_counter++; 83c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 84c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "frame " << frame_counter << " decision " << is_music; 85c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (outputs == 2) { 86c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org float music_prob = classifier.music_probability(); 87c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (!fwrite(&music_prob, sizeof(music_prob), 1, out_file)) { 88c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << "Error writing." << std::endl; 89c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org return -1; 90c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 91c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << " music prob " << music_prob; 92c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 93c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << std::endl; 94c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org frame_counter++; 95c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 96c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << frame_counter << " frames processed." << std::endl; 97c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org if (frame_counter > 0) { 98c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org float music_percentage = music_counter / static_cast<float>(frame_counter); 99c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org std::cout << music_percentage << " percent music." << std::endl; 100c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org } 101c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org 102c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org fclose(in_file); 103c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org fclose(out_file); 104c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org return 0; 105c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org} 106