1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <math.h>
12#include <stdio.h>
13#include <string.h>
14#ifdef WEBRTC_ANDROID
15#include <sys/stat.h>
16#endif
17
18#include <algorithm>
19
20#include "webrtc/base/format_macros.h"
21#include "webrtc/base/scoped_ptr.h"
22#include "webrtc/common.h"
23#include "webrtc/modules/audio_processing/include/audio_processing.h"
24#include "webrtc/modules/audio_processing/test/protobuf_utils.h"
25#include "webrtc/modules/audio_processing/test/test_utils.h"
26#include "webrtc/modules/include/module_common_types.h"
27#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
28#include "webrtc/system_wrappers/include/tick_util.h"
29#include "webrtc/test/testsupport/fileutils.h"
30#include "webrtc/test/testsupport/perf_test.h"
31#ifdef WEBRTC_ANDROID_PLATFORM_BUILD
32#include "gtest/gtest.h"
33#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h"
34#else
35#include "testing/gtest/include/gtest/gtest.h"
36#include "webrtc/audio_processing/debug.pb.h"
37#endif
38
39namespace webrtc {
40
41using webrtc::audioproc::Event;
42using webrtc::audioproc::Init;
43using webrtc::audioproc::ReverseStream;
44using webrtc::audioproc::Stream;
45
46namespace {
47
48void PrintStat(const AudioProcessing::Statistic& stat) {
49  printf("%d, %d, %d\n", stat.average,
50                         stat.maximum,
51                         stat.minimum);
52}
53
54void usage() {
55  printf(
56  "Usage: process_test [options] [-pb PROTOBUF_FILE]\n"
57  "  [-ir REVERSE_FILE] [-i PRIMARY_FILE] [-o OUT_FILE]\n");
58  printf(
59  "process_test is a test application for AudioProcessing.\n\n"
60  "When a protobuf debug file is available, specify it with -pb. Alternately,\n"
61  "when -ir or -i is used, the specified files will be processed directly in\n"
62  "a simulation mode. Otherwise the full set of legacy test files is expected\n"
63  "to be present in the working directory. OUT_FILE should be specified\n"
64  "without extension to support both raw and wav output.\n\n");
65  printf("Options\n");
66  printf("General configuration (only used for the simulation mode):\n");
67  printf("  -fs SAMPLE_RATE_HZ\n");
68  printf("  -ch CHANNELS_IN CHANNELS_OUT\n");
69  printf("  -rch REVERSE_CHANNELS\n");
70  printf("\n");
71  printf("Component configuration:\n");
72  printf(
73  "All components are disabled by default. Each block below begins with a\n"
74  "flag to enable the component with default settings. The subsequent flags\n"
75  "in the block are used to provide configuration settings.\n");
76  printf("\n  -aec     Echo cancellation\n");
77  printf("  --drift_compensation\n");
78  printf("  --no_drift_compensation\n");
79  printf("  --no_echo_metrics\n");
80  printf("  --no_delay_logging\n");
81  printf("  --aec_suppression_level LEVEL  [0 - 2]\n");
82  printf("  --extended_filter\n");
83  printf("  --no_reported_delay\n");
84  printf("\n  -aecm    Echo control mobile\n");
85  printf("  --aecm_echo_path_in_file FILE\n");
86  printf("  --aecm_echo_path_out_file FILE\n");
87  printf("  --no_comfort_noise\n");
88  printf("  --routing_mode MODE  [0 - 4]\n");
89  printf("\n  -agc     Gain control\n");
90  printf("  --analog\n");
91  printf("  --adaptive_digital\n");
92  printf("  --fixed_digital\n");
93  printf("  --target_level LEVEL\n");
94  printf("  --compression_gain GAIN\n");
95  printf("  --limiter\n");
96  printf("  --no_limiter\n");
97  printf("\n  -hpf     High pass filter\n");
98  printf("\n  -ns      Noise suppression\n");
99  printf("  --ns_low\n");
100  printf("  --ns_moderate\n");
101  printf("  --ns_high\n");
102  printf("  --ns_very_high\n");
103  printf("  --ns_prob_file FILE\n");
104  printf("\n  -vad     Voice activity detection\n");
105  printf("  --vad_out_file FILE\n");
106  printf("\n  -expns   Experimental noise suppression\n");
107  printf("\n Level metrics (enabled by default)\n");
108  printf("  --no_level_metrics\n");
109  printf("\n");
110  printf("Modifiers:\n");
111  printf("  --noasm            Disable SSE optimization.\n");
112  printf("  --add_delay DELAY  Add DELAY ms to input value.\n");
113  printf("  --delay DELAY      Override input delay with DELAY ms.\n");
114  printf("  --perf             Measure performance.\n");
115  printf("  --quiet            Suppress text output.\n");
116  printf("  --no_progress      Suppress progress.\n");
117  printf("  --raw_output       Raw output instead of WAV file.\n");
118  printf("  --debug_file FILE  Dump a debug recording.\n");
119}
120
121static float MicLevel2Gain(int level) {
122  return pow(10.0f, ((level - 127.0f) / 128.0f * 40.0f) / 20.0f);
123}
124
125static void SimulateMic(int mic_level, AudioFrame* frame) {
126  mic_level = std::min(std::max(mic_level, 0), 255);
127  float mic_gain = MicLevel2Gain(mic_level);
128  int num_samples = frame->samples_per_channel_ * frame->num_channels_;
129  float v;
130  for (int n = 0; n < num_samples; n++) {
131    v = floor(frame->data_[n] * mic_gain + 0.5);
132    v = std::max(std::min(32767.0f, v), -32768.0f);
133    frame->data_[n] = static_cast<int16_t>(v);
134  }
135}
136
137// void function for gtest.
138void void_main(int argc, char* argv[]) {
139  if (argc > 1 && strcmp(argv[1], "--help") == 0) {
140    usage();
141    return;
142  }
143
144  if (argc < 2) {
145    printf("Did you mean to run without arguments?\n");
146    printf("Try `process_test --help' for more information.\n\n");
147  }
148
149  rtc::scoped_ptr<AudioProcessing> apm(AudioProcessing::Create());
150  ASSERT_TRUE(apm.get() != NULL);
151
152  const char* pb_filename = NULL;
153  const char* far_filename = NULL;
154  const char* near_filename = NULL;
155  std::string out_filename;
156  const char* vad_out_filename = NULL;
157  const char* ns_prob_filename = NULL;
158  const char* aecm_echo_path_in_filename = NULL;
159  const char* aecm_echo_path_out_filename = NULL;
160
161  int32_t sample_rate_hz = 16000;
162
163  size_t num_capture_input_channels = 1;
164  size_t num_capture_output_channels = 1;
165  size_t num_render_channels = 1;
166
167  int samples_per_channel = sample_rate_hz / 100;
168
169  bool simulating = false;
170  bool perf_testing = false;
171  bool verbose = true;
172  bool progress = true;
173  bool raw_output = false;
174  int extra_delay_ms = 0;
175  int override_delay_ms = 0;
176  Config config;
177
178  ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true));
179  for (int i = 1; i < argc; i++) {
180    if (strcmp(argv[i], "-pb") == 0) {
181      i++;
182      ASSERT_LT(i, argc) << "Specify protobuf filename after -pb";
183      pb_filename = argv[i];
184
185    } else if (strcmp(argv[i], "-ir") == 0) {
186      i++;
187      ASSERT_LT(i, argc) << "Specify filename after -ir";
188      far_filename = argv[i];
189      simulating = true;
190
191    } else if (strcmp(argv[i], "-i") == 0) {
192      i++;
193      ASSERT_LT(i, argc) << "Specify filename after -i";
194      near_filename = argv[i];
195      simulating = true;
196
197    } else if (strcmp(argv[i], "-o") == 0) {
198      i++;
199      ASSERT_LT(i, argc) << "Specify filename without extension after -o";
200      out_filename = argv[i];
201
202    } else if (strcmp(argv[i], "-fs") == 0) {
203      i++;
204      ASSERT_LT(i, argc) << "Specify sample rate after -fs";
205      ASSERT_EQ(1, sscanf(argv[i], "%d", &sample_rate_hz));
206      samples_per_channel = sample_rate_hz / 100;
207
208    } else if (strcmp(argv[i], "-ch") == 0) {
209      i++;
210      ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch";
211      ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_capture_input_channels));
212      i++;
213      ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_capture_output_channels));
214
215    } else if (strcmp(argv[i], "-rch") == 0) {
216      i++;
217      ASSERT_LT(i, argc) << "Specify number of channels after -rch";
218      ASSERT_EQ(1, sscanf(argv[i], "%" PRIuS, &num_render_channels));
219
220    } else if (strcmp(argv[i], "-aec") == 0) {
221      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
222      ASSERT_EQ(apm->kNoError,
223                apm->echo_cancellation()->enable_metrics(true));
224      ASSERT_EQ(apm->kNoError,
225                apm->echo_cancellation()->enable_delay_logging(true));
226
227    } else if (strcmp(argv[i], "--drift_compensation") == 0) {
228      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
229      // TODO(ajm): this is enabled in the VQE test app by default. Investigate
230      //            why it can give better performance despite passing zeros.
231      ASSERT_EQ(apm->kNoError,
232                apm->echo_cancellation()->enable_drift_compensation(true));
233    } else if (strcmp(argv[i], "--no_drift_compensation") == 0) {
234      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
235      ASSERT_EQ(apm->kNoError,
236                apm->echo_cancellation()->enable_drift_compensation(false));
237
238    } else if (strcmp(argv[i], "--no_echo_metrics") == 0) {
239      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
240      ASSERT_EQ(apm->kNoError,
241                apm->echo_cancellation()->enable_metrics(false));
242
243    } else if (strcmp(argv[i], "--no_delay_logging") == 0) {
244      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
245      ASSERT_EQ(apm->kNoError,
246                apm->echo_cancellation()->enable_delay_logging(false));
247
248    } else if (strcmp(argv[i], "--no_level_metrics") == 0) {
249      ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(false));
250
251    } else if (strcmp(argv[i], "--aec_suppression_level") == 0) {
252      i++;
253      ASSERT_LT(i, argc) << "Specify level after --aec_suppression_level";
254      int suppression_level;
255      ASSERT_EQ(1, sscanf(argv[i], "%d", &suppression_level));
256      ASSERT_EQ(apm->kNoError,
257                apm->echo_cancellation()->set_suppression_level(
258                    static_cast<webrtc::EchoCancellation::SuppressionLevel>(
259                        suppression_level)));
260
261    } else if (strcmp(argv[i], "--extended_filter") == 0) {
262      config.Set<ExtendedFilter>(new ExtendedFilter(true));
263
264    } else if (strcmp(argv[i], "--no_reported_delay") == 0) {
265      config.Set<DelayAgnostic>(new DelayAgnostic(true));
266
267    } else if (strcmp(argv[i], "--delay_agnostic") == 0) {
268      config.Set<DelayAgnostic>(new DelayAgnostic(true));
269
270    } else if (strcmp(argv[i], "-aecm") == 0) {
271      ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true));
272
273    } else if (strcmp(argv[i], "--aecm_echo_path_in_file") == 0) {
274      i++;
275      ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_in_file";
276      aecm_echo_path_in_filename = argv[i];
277
278    } else if (strcmp(argv[i], "--aecm_echo_path_out_file") == 0) {
279      i++;
280      ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_out_file";
281      aecm_echo_path_out_filename = argv[i];
282
283    } else if (strcmp(argv[i], "--no_comfort_noise") == 0) {
284      ASSERT_EQ(apm->kNoError,
285                apm->echo_control_mobile()->enable_comfort_noise(false));
286
287    } else if (strcmp(argv[i], "--routing_mode") == 0) {
288      i++;
289      ASSERT_LT(i, argc) << "Specify mode after --routing_mode";
290      int routing_mode;
291      ASSERT_EQ(1, sscanf(argv[i], "%d", &routing_mode));
292      ASSERT_EQ(apm->kNoError,
293                apm->echo_control_mobile()->set_routing_mode(
294                    static_cast<webrtc::EchoControlMobile::RoutingMode>(
295                        routing_mode)));
296
297    } else if (strcmp(argv[i], "-agc") == 0) {
298      ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
299
300    } else if (strcmp(argv[i], "--analog") == 0) {
301      ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
302      ASSERT_EQ(apm->kNoError,
303                apm->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
304
305    } else if (strcmp(argv[i], "--adaptive_digital") == 0) {
306      ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
307      ASSERT_EQ(apm->kNoError,
308                apm->gain_control()->set_mode(GainControl::kAdaptiveDigital));
309
310    } else if (strcmp(argv[i], "--fixed_digital") == 0) {
311      ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
312      ASSERT_EQ(apm->kNoError,
313                apm->gain_control()->set_mode(GainControl::kFixedDigital));
314
315    } else if (strcmp(argv[i], "--target_level") == 0) {
316      i++;
317      int level;
318      ASSERT_EQ(1, sscanf(argv[i], "%d", &level));
319
320      ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
321      ASSERT_EQ(apm->kNoError,
322                apm->gain_control()->set_target_level_dbfs(level));
323
324    } else if (strcmp(argv[i], "--compression_gain") == 0) {
325      i++;
326      int gain;
327      ASSERT_EQ(1, sscanf(argv[i], "%d", &gain));
328
329      ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
330      ASSERT_EQ(apm->kNoError,
331                apm->gain_control()->set_compression_gain_db(gain));
332
333    } else if (strcmp(argv[i], "--limiter") == 0) {
334      ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
335      ASSERT_EQ(apm->kNoError,
336                apm->gain_control()->enable_limiter(true));
337
338    } else if (strcmp(argv[i], "--no_limiter") == 0) {
339      ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
340      ASSERT_EQ(apm->kNoError,
341                apm->gain_control()->enable_limiter(false));
342
343    } else if (strcmp(argv[i], "-hpf") == 0) {
344      ASSERT_EQ(apm->kNoError, apm->high_pass_filter()->Enable(true));
345
346    } else if (strcmp(argv[i], "-ns") == 0) {
347      ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
348
349    } else if (strcmp(argv[i], "--ns_low") == 0) {
350      ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
351      ASSERT_EQ(apm->kNoError,
352          apm->noise_suppression()->set_level(NoiseSuppression::kLow));
353
354    } else if (strcmp(argv[i], "--ns_moderate") == 0) {
355      ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
356      ASSERT_EQ(apm->kNoError,
357          apm->noise_suppression()->set_level(NoiseSuppression::kModerate));
358
359    } else if (strcmp(argv[i], "--ns_high") == 0) {
360      ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
361      ASSERT_EQ(apm->kNoError,
362          apm->noise_suppression()->set_level(NoiseSuppression::kHigh));
363
364    } else if (strcmp(argv[i], "--ns_very_high") == 0) {
365      ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
366      ASSERT_EQ(apm->kNoError,
367          apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh));
368
369    } else if (strcmp(argv[i], "--ns_prob_file") == 0) {
370      i++;
371      ASSERT_LT(i, argc) << "Specify filename after --ns_prob_file";
372      ns_prob_filename = argv[i];
373
374    } else if (strcmp(argv[i], "-vad") == 0) {
375      ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
376
377    } else if (strcmp(argv[i], "--vad_very_low") == 0) {
378      ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
379      ASSERT_EQ(apm->kNoError,
380          apm->voice_detection()->set_likelihood(
381              VoiceDetection::kVeryLowLikelihood));
382
383    } else if (strcmp(argv[i], "--vad_low") == 0) {
384      ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
385      ASSERT_EQ(apm->kNoError,
386          apm->voice_detection()->set_likelihood(
387              VoiceDetection::kLowLikelihood));
388
389    } else if (strcmp(argv[i], "--vad_moderate") == 0) {
390      ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
391      ASSERT_EQ(apm->kNoError,
392          apm->voice_detection()->set_likelihood(
393              VoiceDetection::kModerateLikelihood));
394
395    } else if (strcmp(argv[i], "--vad_high") == 0) {
396      ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
397      ASSERT_EQ(apm->kNoError,
398          apm->voice_detection()->set_likelihood(
399              VoiceDetection::kHighLikelihood));
400
401    } else if (strcmp(argv[i], "--vad_out_file") == 0) {
402      i++;
403      ASSERT_LT(i, argc) << "Specify filename after --vad_out_file";
404      vad_out_filename = argv[i];
405
406    } else if (strcmp(argv[i], "-expns") == 0) {
407      config.Set<ExperimentalNs>(new ExperimentalNs(true));
408
409    } else if (strcmp(argv[i], "--noasm") == 0) {
410      WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM;
411      // We need to reinitialize here if components have already been enabled.
412      ASSERT_EQ(apm->kNoError, apm->Initialize());
413
414    } else if (strcmp(argv[i], "--add_delay") == 0) {
415      i++;
416      ASSERT_EQ(1, sscanf(argv[i], "%d", &extra_delay_ms));
417
418    } else if (strcmp(argv[i], "--delay") == 0) {
419      i++;
420      ASSERT_EQ(1, sscanf(argv[i], "%d", &override_delay_ms));
421
422    } else if (strcmp(argv[i], "--perf") == 0) {
423      perf_testing = true;
424
425    } else if (strcmp(argv[i], "--quiet") == 0) {
426      verbose = false;
427      progress = false;
428
429    } else if (strcmp(argv[i], "--no_progress") == 0) {
430      progress = false;
431
432    } else if (strcmp(argv[i], "--raw_output") == 0) {
433      raw_output = true;
434
435    } else if (strcmp(argv[i], "--debug_file") == 0) {
436      i++;
437      ASSERT_LT(i, argc) << "Specify filename after --debug_file";
438      ASSERT_EQ(apm->kNoError, apm->StartDebugRecording(argv[i]));
439    } else {
440      FAIL() << "Unrecognized argument " << argv[i];
441    }
442  }
443  apm->SetExtraOptions(config);
444
445  // If we're reading a protobuf file, ensure a simulation hasn't also
446  // been requested (which makes no sense...)
447  ASSERT_FALSE(pb_filename && simulating);
448
449  if (verbose) {
450    printf("Sample rate: %d Hz\n", sample_rate_hz);
451    printf("Primary channels: %" PRIuS " (in), %" PRIuS " (out)\n",
452           num_capture_input_channels,
453           num_capture_output_channels);
454    printf("Reverse channels: %" PRIuS "\n", num_render_channels);
455  }
456
457  const std::string out_path = webrtc::test::OutputPath();
458  const char far_file_default[] = "apm_far.pcm";
459  const char near_file_default[] = "apm_near.pcm";
460  const char event_filename[] = "apm_event.dat";
461  const char delay_filename[] = "apm_delay.dat";
462  const char drift_filename[] = "apm_drift.dat";
463  const std::string vad_file_default = out_path + "vad_out.dat";
464  const std::string ns_prob_file_default = out_path + "ns_prob.dat";
465
466  if (!simulating) {
467    far_filename = far_file_default;
468    near_filename = near_file_default;
469  }
470
471  if (out_filename.size() == 0) {
472    out_filename = out_path + "out";
473  }
474
475  if (!vad_out_filename) {
476    vad_out_filename = vad_file_default.c_str();
477  }
478
479  if (!ns_prob_filename) {
480    ns_prob_filename = ns_prob_file_default.c_str();
481  }
482
483  FILE* pb_file = NULL;
484  FILE* far_file = NULL;
485  FILE* near_file = NULL;
486  FILE* event_file = NULL;
487  FILE* delay_file = NULL;
488  FILE* drift_file = NULL;
489  FILE* vad_out_file = NULL;
490  FILE* ns_prob_file = NULL;
491  FILE* aecm_echo_path_in_file = NULL;
492  FILE* aecm_echo_path_out_file = NULL;
493
494  rtc::scoped_ptr<WavWriter> output_wav_file;
495  rtc::scoped_ptr<RawFile> output_raw_file;
496
497  if (pb_filename) {
498    pb_file = OpenFile(pb_filename, "rb");
499  } else {
500    if (far_filename) {
501      far_file = OpenFile(far_filename, "rb");
502    }
503
504    near_file = OpenFile(near_filename, "rb");
505    if (!simulating) {
506      event_file = OpenFile(event_filename, "rb");
507      delay_file = OpenFile(delay_filename, "rb");
508      drift_file = OpenFile(drift_filename, "rb");
509    }
510  }
511
512  int near_size_bytes = 0;
513  if (pb_file) {
514    struct stat st;
515    stat(pb_filename, &st);
516    // Crude estimate, but should be good enough.
517    near_size_bytes = st.st_size / 3;
518  } else {
519    struct stat st;
520    stat(near_filename, &st);
521    near_size_bytes = st.st_size;
522  }
523
524  if (apm->voice_detection()->is_enabled()) {
525    vad_out_file = OpenFile(vad_out_filename, "wb");
526  }
527
528  if (apm->noise_suppression()->is_enabled()) {
529    ns_prob_file = OpenFile(ns_prob_filename, "wb");
530  }
531
532  if (aecm_echo_path_in_filename != NULL) {
533    aecm_echo_path_in_file = OpenFile(aecm_echo_path_in_filename, "rb");
534
535    const size_t path_size =
536        apm->echo_control_mobile()->echo_path_size_bytes();
537    rtc::scoped_ptr<char[]> echo_path(new char[path_size]);
538    ASSERT_EQ(path_size, fread(echo_path.get(),
539                               sizeof(char),
540                               path_size,
541                               aecm_echo_path_in_file));
542    EXPECT_EQ(apm->kNoError,
543              apm->echo_control_mobile()->SetEchoPath(echo_path.get(),
544                                                      path_size));
545    fclose(aecm_echo_path_in_file);
546    aecm_echo_path_in_file = NULL;
547  }
548
549  if (aecm_echo_path_out_filename != NULL) {
550    aecm_echo_path_out_file = OpenFile(aecm_echo_path_out_filename, "wb");
551  }
552
553  size_t read_count = 0;
554  int reverse_count = 0;
555  int primary_count = 0;
556  int near_read_bytes = 0;
557  TickInterval acc_ticks;
558
559  AudioFrame far_frame;
560  AudioFrame near_frame;
561
562  int delay_ms = 0;
563  int drift_samples = 0;
564  int capture_level = 127;
565  int8_t stream_has_voice = 0;
566  float ns_speech_prob = 0.0f;
567
568  TickTime t0 = TickTime::Now();
569  TickTime t1 = t0;
570  int64_t max_time_us = 0;
571  int64_t max_time_reverse_us = 0;
572  int64_t min_time_us = 1e6;
573  int64_t min_time_reverse_us = 1e6;
574
575  // TODO(ajm): Ideally we would refactor this block into separate functions,
576  //            but for now we want to share the variables.
577  if (pb_file) {
578    Event event_msg;
579    rtc::scoped_ptr<ChannelBuffer<float> > reverse_cb;
580    rtc::scoped_ptr<ChannelBuffer<float> > primary_cb;
581    int output_sample_rate = 32000;
582    AudioProcessing::ChannelLayout output_layout = AudioProcessing::kMono;
583    while (ReadMessageFromFile(pb_file, &event_msg)) {
584      std::ostringstream trace_stream;
585      trace_stream << "Processed frames: " << reverse_count << " (reverse), "
586                   << primary_count << " (primary)";
587      SCOPED_TRACE(trace_stream.str());
588
589      if (event_msg.type() == Event::INIT) {
590        ASSERT_TRUE(event_msg.has_init());
591        const Init msg = event_msg.init();
592
593        ASSERT_TRUE(msg.has_sample_rate());
594        ASSERT_TRUE(msg.has_num_input_channels());
595        ASSERT_TRUE(msg.has_num_output_channels());
596        ASSERT_TRUE(msg.has_num_reverse_channels());
597        int reverse_sample_rate = msg.sample_rate();
598        if (msg.has_reverse_sample_rate()) {
599          reverse_sample_rate = msg.reverse_sample_rate();
600        }
601        output_sample_rate = msg.sample_rate();
602        if (msg.has_output_sample_rate()) {
603          output_sample_rate = msg.output_sample_rate();
604        }
605        output_layout =
606            LayoutFromChannels(static_cast<size_t>(msg.num_output_channels()));
607        ASSERT_EQ(kNoErr,
608                  apm->Initialize(
609                      msg.sample_rate(),
610                      output_sample_rate,
611                      reverse_sample_rate,
612                      LayoutFromChannels(
613                          static_cast<size_t>(msg.num_input_channels())),
614                      output_layout,
615                      LayoutFromChannels(
616                          static_cast<size_t>(msg.num_reverse_channels()))));
617
618        samples_per_channel = msg.sample_rate() / 100;
619        far_frame.sample_rate_hz_ = reverse_sample_rate;
620        far_frame.samples_per_channel_ = reverse_sample_rate / 100;
621        far_frame.num_channels_ = msg.num_reverse_channels();
622        near_frame.sample_rate_hz_ = msg.sample_rate();
623        near_frame.samples_per_channel_ = samples_per_channel;
624        near_frame.num_channels_ = msg.num_input_channels();
625        reverse_cb.reset(new ChannelBuffer<float>(
626            far_frame.samples_per_channel_,
627            msg.num_reverse_channels()));
628        primary_cb.reset(new ChannelBuffer<float>(samples_per_channel,
629                                                  msg.num_input_channels()));
630
631        if (verbose) {
632          printf("Init at frame: %d (primary), %d (reverse)\n",
633              primary_count, reverse_count);
634          printf("  Primary rates: %d Hz (in), %d Hz (out)\n",
635                 msg.sample_rate(), output_sample_rate);
636          printf("  Primary channels: %d (in), %d (out)\n",
637                 msg.num_input_channels(),
638                 msg.num_output_channels());
639          printf("  Reverse rate: %d\n", reverse_sample_rate);
640          printf("  Reverse channels: %d\n", msg.num_reverse_channels());
641        }
642
643        if (!raw_output) {
644          // The WAV file needs to be reset every time, because it can't change
645          // its sample rate or number of channels.
646          output_wav_file.reset(new WavWriter(
647              out_filename + ".wav", output_sample_rate,
648              static_cast<size_t>(msg.num_output_channels())));
649        }
650
651      } else if (event_msg.type() == Event::REVERSE_STREAM) {
652        ASSERT_TRUE(event_msg.has_reverse_stream());
653        ReverseStream msg = event_msg.reverse_stream();
654        reverse_count++;
655
656        ASSERT_TRUE(msg.has_data() ^ (msg.channel_size() > 0));
657        if (msg.has_data()) {
658          ASSERT_EQ(sizeof(int16_t) * far_frame.samples_per_channel_ *
659              far_frame.num_channels_, msg.data().size());
660          memcpy(far_frame.data_, msg.data().data(), msg.data().size());
661        } else {
662          for (int i = 0; i < msg.channel_size(); ++i) {
663            memcpy(reverse_cb->channels()[i],
664                   msg.channel(i).data(),
665                   reverse_cb->num_frames() *
666                       sizeof(reverse_cb->channels()[i][0]));
667          }
668        }
669
670        if (perf_testing) {
671          t0 = TickTime::Now();
672        }
673
674        if (msg.has_data()) {
675          ASSERT_EQ(apm->kNoError,
676                    apm->AnalyzeReverseStream(&far_frame));
677        } else {
678          ASSERT_EQ(apm->kNoError,
679                    apm->AnalyzeReverseStream(
680                        reverse_cb->channels(),
681                        far_frame.samples_per_channel_,
682                        far_frame.sample_rate_hz_,
683                        LayoutFromChannels(far_frame.num_channels_)));
684        }
685
686        if (perf_testing) {
687          t1 = TickTime::Now();
688          TickInterval tick_diff = t1 - t0;
689          acc_ticks += tick_diff;
690          if (tick_diff.Microseconds() > max_time_reverse_us) {
691            max_time_reverse_us = tick_diff.Microseconds();
692          }
693          if (tick_diff.Microseconds() < min_time_reverse_us) {
694            min_time_reverse_us = tick_diff.Microseconds();
695          }
696        }
697
698      } else if (event_msg.type() == Event::STREAM) {
699        ASSERT_TRUE(event_msg.has_stream());
700        const Stream msg = event_msg.stream();
701        primary_count++;
702
703        // ProcessStream could have changed this for the output frame.
704        near_frame.num_channels_ = apm->num_input_channels();
705
706        ASSERT_TRUE(msg.has_input_data() ^ (msg.input_channel_size() > 0));
707        if (msg.has_input_data()) {
708          ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
709              near_frame.num_channels_, msg.input_data().size());
710          memcpy(near_frame.data_,
711                 msg.input_data().data(),
712                 msg.input_data().size());
713          near_read_bytes += msg.input_data().size();
714        } else {
715          for (int i = 0; i < msg.input_channel_size(); ++i) {
716            memcpy(primary_cb->channels()[i],
717                   msg.input_channel(i).data(),
718                   primary_cb->num_frames() *
719                       sizeof(primary_cb->channels()[i][0]));
720            near_read_bytes += msg.input_channel(i).size();
721          }
722        }
723
724        if (progress && primary_count % 100 == 0) {
725          near_read_bytes = std::min(near_read_bytes, near_size_bytes);
726          printf("%.0f%% complete\r",
727              (near_read_bytes * 100.0) / near_size_bytes);
728          fflush(stdout);
729        }
730
731        if (perf_testing) {
732          t0 = TickTime::Now();
733        }
734
735        ASSERT_EQ(apm->kNoError,
736                  apm->gain_control()->set_stream_analog_level(msg.level()));
737        delay_ms = msg.delay() + extra_delay_ms;
738        if (override_delay_ms) {
739          delay_ms = override_delay_ms;
740        }
741        ASSERT_EQ(apm->kNoError,
742                  apm->set_stream_delay_ms(delay_ms));
743        apm->echo_cancellation()->set_stream_drift_samples(msg.drift());
744
745        if (msg.has_keypress()) {
746          apm->set_stream_key_pressed(msg.keypress());
747        } else {
748          apm->set_stream_key_pressed(true);
749        }
750
751        int err = apm->kNoError;
752        if (msg.has_input_data()) {
753          err = apm->ProcessStream(&near_frame);
754          ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels());
755        } else {
756          err = apm->ProcessStream(
757              primary_cb->channels(),
758              near_frame.samples_per_channel_,
759              near_frame.sample_rate_hz_,
760              LayoutFromChannels(near_frame.num_channels_),
761              output_sample_rate,
762              output_layout,
763              primary_cb->channels());
764        }
765
766        if (err == apm->kBadStreamParameterWarning) {
767          printf("Bad parameter warning. %s\n", trace_stream.str().c_str());
768        }
769        ASSERT_TRUE(err == apm->kNoError ||
770                    err == apm->kBadStreamParameterWarning);
771
772        stream_has_voice =
773            static_cast<int8_t>(apm->voice_detection()->stream_has_voice());
774        if (vad_out_file != NULL) {
775          ASSERT_EQ(1u, fwrite(&stream_has_voice,
776                               sizeof(stream_has_voice),
777                               1,
778                               vad_out_file));
779        }
780
781        if (ns_prob_file != NULL) {
782          ns_speech_prob = apm->noise_suppression()->speech_probability();
783          ASSERT_EQ(1u, fwrite(&ns_speech_prob,
784                               sizeof(ns_speech_prob),
785                               1,
786                               ns_prob_file));
787        }
788
789        if (perf_testing) {
790          t1 = TickTime::Now();
791          TickInterval tick_diff = t1 - t0;
792          acc_ticks += tick_diff;
793          if (tick_diff.Microseconds() > max_time_us) {
794            max_time_us = tick_diff.Microseconds();
795          }
796          if (tick_diff.Microseconds() < min_time_us) {
797            min_time_us = tick_diff.Microseconds();
798          }
799        }
800
801        const size_t samples_per_channel = output_sample_rate / 100;
802        if (msg.has_input_data()) {
803          if (raw_output && !output_raw_file) {
804            output_raw_file.reset(new RawFile(out_filename + ".pcm"));
805          }
806          WriteIntData(near_frame.data_,
807                       apm->num_output_channels() * samples_per_channel,
808                       output_wav_file.get(),
809                       output_raw_file.get());
810        } else {
811          if (raw_output && !output_raw_file) {
812            output_raw_file.reset(new RawFile(out_filename + ".float"));
813          }
814          WriteFloatData(primary_cb->channels(),
815                         samples_per_channel,
816                         apm->num_output_channels(),
817                         output_wav_file.get(),
818                         output_raw_file.get());
819        }
820      }
821    }
822
823    ASSERT_TRUE(feof(pb_file));
824
825  } else {
826    enum Events {
827      kInitializeEvent,
828      kRenderEvent,
829      kCaptureEvent,
830      kResetEventDeprecated
831    };
832    int16_t event = 0;
833    while (simulating || feof(event_file) == 0) {
834      std::ostringstream trace_stream;
835      trace_stream << "Processed frames: " << reverse_count << " (reverse), "
836                   << primary_count << " (primary)";
837      SCOPED_TRACE(trace_stream.str());
838
839      if (simulating) {
840        if (far_file == NULL) {
841          event = kCaptureEvent;
842        } else {
843          if (event == kRenderEvent) {
844            event = kCaptureEvent;
845          } else {
846            event = kRenderEvent;
847          }
848        }
849      } else {
850        read_count = fread(&event, sizeof(event), 1, event_file);
851        if (read_count != 1) {
852          break;
853        }
854      }
855
856      far_frame.sample_rate_hz_ = sample_rate_hz;
857      far_frame.samples_per_channel_ = samples_per_channel;
858      far_frame.num_channels_ = num_render_channels;
859      near_frame.sample_rate_hz_ = sample_rate_hz;
860      near_frame.samples_per_channel_ = samples_per_channel;
861
862      if (event == kInitializeEvent || event == kResetEventDeprecated) {
863        ASSERT_EQ(1u,
864            fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file));
865        samples_per_channel = sample_rate_hz / 100;
866
867        int32_t unused_device_sample_rate_hz;
868        ASSERT_EQ(1u,
869            fread(&unused_device_sample_rate_hz,
870                  sizeof(unused_device_sample_rate_hz),
871                  1,
872                  event_file));
873
874        ASSERT_EQ(kNoErr, apm->Initialize(
875                              sample_rate_hz,
876                              sample_rate_hz,
877                              sample_rate_hz,
878                              LayoutFromChannels(num_capture_input_channels),
879                              LayoutFromChannels(num_capture_output_channels),
880                              LayoutFromChannels(num_render_channels)));
881
882        far_frame.sample_rate_hz_ = sample_rate_hz;
883        far_frame.samples_per_channel_ = samples_per_channel;
884        far_frame.num_channels_ = num_render_channels;
885        near_frame.sample_rate_hz_ = sample_rate_hz;
886        near_frame.samples_per_channel_ = samples_per_channel;
887
888        if (!raw_output) {
889          // The WAV file needs to be reset every time, because it can't change
890          // it's sample rate or number of channels.
891          output_wav_file.reset(new WavWriter(out_filename + ".wav",
892                                              sample_rate_hz,
893                                              num_capture_output_channels));
894        }
895
896        if (verbose) {
897          printf("Init at frame: %d (primary), %d (reverse)\n",
898              primary_count, reverse_count);
899          printf("  Sample rate: %d Hz\n", sample_rate_hz);
900        }
901
902      } else if (event == kRenderEvent) {
903        reverse_count++;
904
905        size_t size = samples_per_channel * num_render_channels;
906        read_count = fread(far_frame.data_,
907                           sizeof(int16_t),
908                           size,
909                           far_file);
910
911        if (simulating) {
912          if (read_count != size) {
913            // Read an equal amount from the near file to avoid errors due to
914            // not reaching end-of-file.
915            EXPECT_EQ(0, fseek(near_file, read_count * sizeof(int16_t),
916                      SEEK_CUR));
917            break;  // This is expected.
918          }
919        } else {
920          ASSERT_EQ(size, read_count);
921        }
922
923        if (perf_testing) {
924          t0 = TickTime::Now();
925        }
926
927        ASSERT_EQ(apm->kNoError,
928                  apm->AnalyzeReverseStream(&far_frame));
929
930        if (perf_testing) {
931          t1 = TickTime::Now();
932          TickInterval tick_diff = t1 - t0;
933          acc_ticks += tick_diff;
934          if (tick_diff.Microseconds() > max_time_reverse_us) {
935            max_time_reverse_us = tick_diff.Microseconds();
936          }
937          if (tick_diff.Microseconds() < min_time_reverse_us) {
938            min_time_reverse_us = tick_diff.Microseconds();
939          }
940        }
941
942      } else if (event == kCaptureEvent) {
943        primary_count++;
944        near_frame.num_channels_ = num_capture_input_channels;
945
946        size_t size = samples_per_channel * num_capture_input_channels;
947        read_count = fread(near_frame.data_,
948                           sizeof(int16_t),
949                           size,
950                           near_file);
951
952        near_read_bytes += read_count * sizeof(int16_t);
953        if (progress && primary_count % 100 == 0) {
954          printf("%.0f%% complete\r",
955              (near_read_bytes * 100.0) / near_size_bytes);
956          fflush(stdout);
957        }
958        if (simulating) {
959          if (read_count != size) {
960            break;  // This is expected.
961          }
962
963          delay_ms = 0;
964          drift_samples = 0;
965        } else {
966          ASSERT_EQ(size, read_count);
967
968          // TODO(ajm): sizeof(delay_ms) for current files?
969          ASSERT_EQ(1u,
970              fread(&delay_ms, 2, 1, delay_file));
971          ASSERT_EQ(1u,
972              fread(&drift_samples, sizeof(drift_samples), 1, drift_file));
973        }
974
975        if (apm->gain_control()->is_enabled() &&
976            apm->gain_control()->mode() == GainControl::kAdaptiveAnalog) {
977          SimulateMic(capture_level, &near_frame);
978        }
979
980        if (perf_testing) {
981          t0 = TickTime::Now();
982        }
983
984        const int capture_level_in = capture_level;
985        ASSERT_EQ(apm->kNoError,
986                  apm->gain_control()->set_stream_analog_level(capture_level));
987        delay_ms += extra_delay_ms;
988        if (override_delay_ms) {
989          delay_ms = override_delay_ms;
990        }
991        ASSERT_EQ(apm->kNoError,
992                  apm->set_stream_delay_ms(delay_ms));
993        apm->echo_cancellation()->set_stream_drift_samples(drift_samples);
994
995        apm->set_stream_key_pressed(true);
996
997        int err = apm->ProcessStream(&near_frame);
998        if (err == apm->kBadStreamParameterWarning) {
999          printf("Bad parameter warning. %s\n", trace_stream.str().c_str());
1000        }
1001        ASSERT_TRUE(err == apm->kNoError ||
1002                    err == apm->kBadStreamParameterWarning);
1003        ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels());
1004
1005        capture_level = apm->gain_control()->stream_analog_level();
1006
1007        stream_has_voice =
1008            static_cast<int8_t>(apm->voice_detection()->stream_has_voice());
1009        if (vad_out_file != NULL) {
1010          ASSERT_EQ(1u, fwrite(&stream_has_voice,
1011                               sizeof(stream_has_voice),
1012                               1,
1013                               vad_out_file));
1014        }
1015
1016        if (ns_prob_file != NULL) {
1017          ns_speech_prob = apm->noise_suppression()->speech_probability();
1018          ASSERT_EQ(1u, fwrite(&ns_speech_prob,
1019                               sizeof(ns_speech_prob),
1020                               1,
1021                               ns_prob_file));
1022        }
1023
1024        if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
1025          ASSERT_EQ(capture_level_in, capture_level);
1026        }
1027
1028        if (perf_testing) {
1029          t1 = TickTime::Now();
1030          TickInterval tick_diff = t1 - t0;
1031          acc_ticks += tick_diff;
1032          if (tick_diff.Microseconds() > max_time_us) {
1033            max_time_us = tick_diff.Microseconds();
1034          }
1035          if (tick_diff.Microseconds() < min_time_us) {
1036            min_time_us = tick_diff.Microseconds();
1037          }
1038        }
1039
1040        if (raw_output && !output_raw_file) {
1041          output_raw_file.reset(new RawFile(out_filename + ".pcm"));
1042        }
1043        if (!raw_output && !output_wav_file) {
1044          output_wav_file.reset(new WavWriter(out_filename + ".wav",
1045                                              sample_rate_hz,
1046                                              num_capture_output_channels));
1047        }
1048        WriteIntData(near_frame.data_,
1049                     size,
1050                     output_wav_file.get(),
1051                     output_raw_file.get());
1052      } else {
1053        FAIL() << "Event " << event << " is unrecognized";
1054      }
1055    }
1056  }
1057  if (progress) {
1058    printf("100%% complete\r");
1059  }
1060
1061  if (aecm_echo_path_out_file != NULL) {
1062    const size_t path_size =
1063        apm->echo_control_mobile()->echo_path_size_bytes();
1064    rtc::scoped_ptr<char[]> echo_path(new char[path_size]);
1065    apm->echo_control_mobile()->GetEchoPath(echo_path.get(), path_size);
1066    ASSERT_EQ(path_size, fwrite(echo_path.get(),
1067                                sizeof(char),
1068                                path_size,
1069                                aecm_echo_path_out_file));
1070    fclose(aecm_echo_path_out_file);
1071    aecm_echo_path_out_file = NULL;
1072  }
1073
1074  if (verbose) {
1075    printf("\nProcessed frames: %d (primary), %d (reverse)\n",
1076        primary_count, reverse_count);
1077
1078    if (apm->level_estimator()->is_enabled()) {
1079      printf("\n--Level metrics--\n");
1080      printf("RMS: %d dBFS\n", -apm->level_estimator()->RMS());
1081    }
1082    if (apm->echo_cancellation()->are_metrics_enabled()) {
1083      EchoCancellation::Metrics metrics;
1084      apm->echo_cancellation()->GetMetrics(&metrics);
1085      printf("\n--Echo metrics--\n");
1086      printf("(avg, max, min)\n");
1087      printf("ERL:  ");
1088      PrintStat(metrics.echo_return_loss);
1089      printf("ERLE: ");
1090      PrintStat(metrics.echo_return_loss_enhancement);
1091      printf("ANLP: ");
1092      PrintStat(metrics.a_nlp);
1093    }
1094    if (apm->echo_cancellation()->is_delay_logging_enabled()) {
1095      int median = 0;
1096      int std = 0;
1097      float fraction_poor_delays = 0;
1098      apm->echo_cancellation()->GetDelayMetrics(&median, &std,
1099                                                &fraction_poor_delays);
1100      printf("\n--Delay metrics--\n");
1101      printf("Median:             %3d\n", median);
1102      printf("Standard deviation: %3d\n", std);
1103      printf("Poor delay values:  %3.1f%%\n", fraction_poor_delays * 100);
1104    }
1105  }
1106
1107  if (!pb_file) {
1108    int8_t temp_int8;
1109    if (far_file) {
1110      read_count = fread(&temp_int8, sizeof(temp_int8), 1, far_file);
1111      EXPECT_NE(0, feof(far_file)) << "Far-end file not fully processed";
1112    }
1113
1114    read_count = fread(&temp_int8, sizeof(temp_int8), 1, near_file);
1115    EXPECT_NE(0, feof(near_file)) << "Near-end file not fully processed";
1116
1117    if (!simulating) {
1118      read_count = fread(&temp_int8, sizeof(temp_int8), 1, event_file);
1119      EXPECT_NE(0, feof(event_file)) << "Event file not fully processed";
1120      read_count = fread(&temp_int8, sizeof(temp_int8), 1, delay_file);
1121      EXPECT_NE(0, feof(delay_file)) << "Delay file not fully processed";
1122      read_count = fread(&temp_int8, sizeof(temp_int8), 1, drift_file);
1123      EXPECT_NE(0, feof(drift_file)) << "Drift file not fully processed";
1124    }
1125  }
1126
1127  if (perf_testing) {
1128    if (primary_count > 0) {
1129      int64_t exec_time = acc_ticks.Milliseconds();
1130      printf("\nTotal time: %.3f s, file time: %.2f s\n",
1131        exec_time * 0.001, primary_count * 0.01);
1132      printf("Time per frame: %.3f ms (average), %.3f ms (max),"
1133             " %.3f ms (min)\n",
1134          (exec_time * 1.0) / primary_count,
1135          (max_time_us + max_time_reverse_us) / 1000.0,
1136          (min_time_us + min_time_reverse_us) / 1000.0);
1137      // Record the results with Perf test tools.
1138      webrtc::test::PrintResult("audioproc", "", "time_per_10ms_frame",
1139          (exec_time * 1000) / primary_count, "us", false);
1140    } else {
1141      printf("Warning: no capture frames\n");
1142    }
1143  }
1144}
1145
1146}  // namespace
1147}  // namespace webrtc
1148
1149int main(int argc, char* argv[]) {
1150  webrtc::void_main(argc, argv);
1151
1152  // Optional, but removes memory leak noise from Valgrind.
1153  google::protobuf::ShutdownProtobufLibrary();
1154  return 0;
1155}
1156