1c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org/*
2c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org *
4c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org *  Use of this source code is governed by a BSD-style license
5c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org *  that can be found in the LICENSE file in the root of the source
6c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org *  tree. An additional intellectual property rights grant can be found
7c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org *  in the file PATENTS.  All contributing project authors may
8c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org *  be found in the AUTHORS file in the root of the source tree.
9c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org */
10c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
119c55f0f957534144d2b8a64154f0a479249b34behenrik.lundin@webrtc.org#include "webrtc/modules/audio_coding/neteq/audio_classifier.h"
12c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
13c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <math.h>
14c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <stdio.h>
15c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <stdlib.h>
16c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <string.h>
17c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
18c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <string>
19c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org#include <iostream>
20c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
2100b8f6b3643332cce1ee711715f7fbb824d793cakwiberg@webrtc.org#include "webrtc/base/scoped_ptr.h"
22c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
23c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.orgint main(int argc, char* argv[]) {
24c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  if (argc != 5) {
25c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << "Usage: " << argv[0] <<
26c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org        " channels output_type <input file name> <output file name> "
27c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org        << std::endl << std::endl;
28c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << "Where channels can be 1 (mono) or 2 (interleaved stereo),";
29c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << " outputs can be 1 (classification (boolean)) or 2";
30c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << " (classification and music probability (float)),"
31c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org        << std::endl;
32c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << "and the sampling frequency is assumed to be 48 kHz."
33c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org        << std::endl;
34c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    return -1;
35c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  }
36c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
37c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  const int kFrameSizeSamples = 960;
38c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  int channels = atoi(argv[1]);
39c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  if (channels < 1 || channels > 2) {
40c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << "Disallowed number of channels  " << channels << std::endl;
41c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    return -1;
42c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  }
43c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
44c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  int outputs = atoi(argv[2]);
45c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  if (outputs < 1 || outputs > 2) {
46c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << "Disallowed number of outputs  " << outputs << std::endl;
47c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    return -1;
48c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  }
49c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
50c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  const int data_size = channels * kFrameSizeSamples;
5100b8f6b3643332cce1ee711715f7fbb824d793cakwiberg@webrtc.org  rtc::scoped_ptr<int16_t[]> in(new int16_t[data_size]);
52c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
53c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  std::string input_filename = argv[3];
54c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  std::string output_filename = argv[4];
55c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
56c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  std::cout << "Input file: " << input_filename << std::endl;
57c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  std::cout << "Output file: " << output_filename << std::endl;
58c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
59c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  FILE* in_file = fopen(input_filename.c_str(), "rb");
60c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  if (!in_file) {
61c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << "Cannot open input file " << input_filename << std::endl;
62c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    return -1;
63c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  }
64c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
65c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  FILE* out_file = fopen(output_filename.c_str(), "wb");
66c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  if (!out_file) {
67c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << "Cannot open output file " << output_filename << std::endl;
68c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    return -1;
69c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  }
70c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
71c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  webrtc::AudioClassifier classifier;
72c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  int frame_counter = 0;
73c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  int music_counter = 0;
74c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  while (fread(in.get(), sizeof(*in.get()),
75c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org               data_size, in_file) == (size_t) data_size) {
76c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    bool is_music = classifier.Analysis(in.get(), data_size, channels);
77c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    if (!fwrite(&is_music, sizeof(is_music), 1, out_file)) {
78c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org       std::cout << "Error writing." << std::endl;
79c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org       return -1;
80c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    }
81c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    if (is_music) {
82c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org      music_counter++;
83c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    }
84c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << "frame " << frame_counter << " decision " << is_music;
85c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    if (outputs == 2) {
86c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org      float music_prob = classifier.music_probability();
87c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org      if (!fwrite(&music_prob, sizeof(music_prob), 1, out_file)) {
88c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org        std::cout << "Error writing." << std::endl;
89c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org        return -1;
90c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org      }
91c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org      std::cout << " music prob " << music_prob;
92c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    }
93c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout << std::endl;
94c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    frame_counter++;
95c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  }
96c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  std::cout << frame_counter << " frames processed." << std::endl;
97c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  if (frame_counter > 0) {
98c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    float music_percentage = music_counter / static_cast<float>(frame_counter);
99c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org    std::cout <<  music_percentage <<  " percent music." << std::endl;
100c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  }
101c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org
102c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  fclose(in_file);
103c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  fclose(out_file);
104c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org  return 0;
105c3d13d38f481e5217aa1209f55127ac1061bab39jan.skoglund@webrtc.org}
106