1/*
2 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "audio_processing_impl.h"
12
13#include <assert.h>
14
15#include "audio_buffer.h"
16#include "critical_section_wrapper.h"
17#include "echo_cancellation_impl.h"
18#include "echo_control_mobile_impl.h"
19#include "file_wrapper.h"
20#include "high_pass_filter_impl.h"
21#include "gain_control_impl.h"
22#include "level_estimator_impl.h"
23#include "module_common_types.h"
24#include "noise_suppression_impl.h"
25#include "processing_component.h"
26#include "splitting_filter.h"
27#include "voice_detection_impl.h"
28
29#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
30// Files generated at build-time by the protobuf compiler.
31#ifdef WEBRTC_ANDROID
32#include "external/webrtc/src/modules/audio_processing/debug.pb.h"
33#else
34#include "webrtc/audio_processing/debug.pb.h"
35#endif
36#endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
37
38namespace webrtc {
39AudioProcessing* AudioProcessing::Create(int id) {
40  /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
41             webrtc::kTraceAudioProcessing,
42             id,
43             "AudioProcessing::Create()");*/
44
45  AudioProcessingImpl* apm = new AudioProcessingImpl(id);
46  if (apm->Initialize() != kNoError) {
47    delete apm;
48    apm = NULL;
49  }
50
51  return apm;
52}
53
54void AudioProcessing::Destroy(AudioProcessing* apm) {
55  delete static_cast<AudioProcessingImpl*>(apm);
56}
57
58AudioProcessingImpl::AudioProcessingImpl(int id)
59    : id_(id),
60      echo_cancellation_(NULL),
61      echo_control_mobile_(NULL),
62      gain_control_(NULL),
63      high_pass_filter_(NULL),
64      level_estimator_(NULL),
65      noise_suppression_(NULL),
66      voice_detection_(NULL),
67      crit_(CriticalSectionWrapper::CreateCriticalSection()),
68      render_audio_(NULL),
69      capture_audio_(NULL),
70#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
71      debug_file_(FileWrapper::Create()),
72      event_msg_(new audioproc::Event()),
73#endif
74      sample_rate_hz_(kSampleRate16kHz),
75      split_sample_rate_hz_(kSampleRate16kHz),
76      samples_per_channel_(sample_rate_hz_ / 100),
77      stream_delay_ms_(0),
78      was_stream_delay_set_(false),
79      num_reverse_channels_(1),
80      num_input_channels_(1),
81      num_output_channels_(1) {
82
83  echo_cancellation_ = new EchoCancellationImpl(this);
84  component_list_.push_back(echo_cancellation_);
85
86  echo_control_mobile_ = new EchoControlMobileImpl(this);
87  component_list_.push_back(echo_control_mobile_);
88
89  gain_control_ = new GainControlImpl(this);
90  component_list_.push_back(gain_control_);
91
92  high_pass_filter_ = new HighPassFilterImpl(this);
93  component_list_.push_back(high_pass_filter_);
94
95  level_estimator_ = new LevelEstimatorImpl(this);
96  component_list_.push_back(level_estimator_);
97
98  noise_suppression_ = new NoiseSuppressionImpl(this);
99  component_list_.push_back(noise_suppression_);
100
101  voice_detection_ = new VoiceDetectionImpl(this);
102  component_list_.push_back(voice_detection_);
103}
104
105AudioProcessingImpl::~AudioProcessingImpl() {
106  while (!component_list_.empty()) {
107    ProcessingComponent* component = component_list_.front();
108    component->Destroy();
109    delete component;
110    component_list_.pop_front();
111  }
112
113#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
114  if (debug_file_->Open()) {
115    debug_file_->CloseFile();
116  }
117#endif
118
119  delete crit_;
120  crit_ = NULL;
121
122  if (render_audio_) {
123    delete render_audio_;
124    render_audio_ = NULL;
125  }
126
127  if (capture_audio_) {
128    delete capture_audio_;
129    capture_audio_ = NULL;
130  }
131}
132
133CriticalSectionWrapper* AudioProcessingImpl::crit() const {
134  return crit_;
135}
136
137int AudioProcessingImpl::split_sample_rate_hz() const {
138  return split_sample_rate_hz_;
139}
140
141int AudioProcessingImpl::Initialize() {
142  CriticalSectionScoped crit_scoped(*crit_);
143  return InitializeLocked();
144}
145
146int AudioProcessingImpl::InitializeLocked() {
147  if (render_audio_ != NULL) {
148    delete render_audio_;
149    render_audio_ = NULL;
150  }
151
152  if (capture_audio_ != NULL) {
153    delete capture_audio_;
154    capture_audio_ = NULL;
155  }
156
157  render_audio_ = new AudioBuffer(num_reverse_channels_,
158                                  samples_per_channel_);
159  capture_audio_ = new AudioBuffer(num_input_channels_,
160                                   samples_per_channel_);
161
162  was_stream_delay_set_ = false;
163
164  // Initialize all components.
165  std::list<ProcessingComponent*>::iterator it;
166  for (it = component_list_.begin(); it != component_list_.end(); it++) {
167    int err = (*it)->Initialize();
168    if (err != kNoError) {
169      return err;
170    }
171  }
172
173#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
174  if (debug_file_->Open()) {
175    int err = WriteInitMessage();
176    if (err != kNoError) {
177      return err;
178    }
179  }
180#endif
181
182  return kNoError;
183}
184
185int AudioProcessingImpl::set_sample_rate_hz(int rate) {
186  CriticalSectionScoped crit_scoped(*crit_);
187  if (rate != kSampleRate8kHz &&
188      rate != kSampleRate16kHz &&
189      rate != kSampleRate32kHz) {
190    return kBadParameterError;
191  }
192
193  sample_rate_hz_ = rate;
194  samples_per_channel_ = rate / 100;
195
196  if (sample_rate_hz_ == kSampleRate32kHz) {
197    split_sample_rate_hz_ = kSampleRate16kHz;
198  } else {
199    split_sample_rate_hz_ = sample_rate_hz_;
200  }
201
202  return InitializeLocked();
203}
204
205int AudioProcessingImpl::sample_rate_hz() const {
206  return sample_rate_hz_;
207}
208
209int AudioProcessingImpl::set_num_reverse_channels(int channels) {
210  CriticalSectionScoped crit_scoped(*crit_);
211  // Only stereo supported currently.
212  if (channels > 2 || channels < 1) {
213    return kBadParameterError;
214  }
215
216  num_reverse_channels_ = channels;
217
218  return InitializeLocked();
219}
220
221int AudioProcessingImpl::num_reverse_channels() const {
222  return num_reverse_channels_;
223}
224
225int AudioProcessingImpl::set_num_channels(
226    int input_channels,
227    int output_channels) {
228  CriticalSectionScoped crit_scoped(*crit_);
229  if (output_channels > input_channels) {
230    return kBadParameterError;
231  }
232
233  // Only stereo supported currently.
234  if (input_channels > 2 || input_channels < 1) {
235    return kBadParameterError;
236  }
237
238  if (output_channels > 2 || output_channels < 1) {
239    return kBadParameterError;
240  }
241
242  num_input_channels_ = input_channels;
243  num_output_channels_ = output_channels;
244
245  return InitializeLocked();
246}
247
248int AudioProcessingImpl::num_input_channels() const {
249  return num_input_channels_;
250}
251
252int AudioProcessingImpl::num_output_channels() const {
253  return num_output_channels_;
254}
255
256int AudioProcessingImpl::ProcessStream(AudioFrame* frame) {
257  CriticalSectionScoped crit_scoped(*crit_);
258  int err = kNoError;
259
260  if (frame == NULL) {
261    return kNullPointerError;
262  }
263
264  if (frame->_frequencyInHz != sample_rate_hz_) {
265    return kBadSampleRateError;
266  }
267
268  if (frame->_audioChannel != num_input_channels_) {
269    return kBadNumberChannelsError;
270  }
271
272  if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
273    return kBadDataLengthError;
274  }
275
276#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
277  if (debug_file_->Open()) {
278    event_msg_->set_type(audioproc::Event::STREAM);
279    audioproc::Stream* msg = event_msg_->mutable_stream();
280    const size_t data_size = sizeof(int16_t) *
281                             frame->_payloadDataLengthInSamples *
282                             frame->_audioChannel;
283    msg->set_input_data(frame->_payloadData, data_size);
284    msg->set_delay(stream_delay_ms_);
285    msg->set_drift(echo_cancellation_->stream_drift_samples());
286    msg->set_level(gain_control_->stream_analog_level());
287  }
288#endif
289
290  capture_audio_->DeinterleaveFrom(frame);
291
292  // TODO(ajm): experiment with mixing and AEC placement.
293  if (num_output_channels_ < num_input_channels_) {
294    capture_audio_->Mix(num_output_channels_);
295    frame->_audioChannel = num_output_channels_;
296  }
297
298  bool data_changed = stream_data_changed();
299  if (analysis_needed(data_changed)) {
300    for (int i = 0; i < num_output_channels_; i++) {
301      // Split into a low and high band.
302      SplittingFilterAnalysis(capture_audio_->data(i),
303                              capture_audio_->low_pass_split_data(i),
304                              capture_audio_->high_pass_split_data(i),
305                              capture_audio_->analysis_filter_state1(i),
306                              capture_audio_->analysis_filter_state2(i));
307    }
308  }
309
310  err = high_pass_filter_->ProcessCaptureAudio(capture_audio_);
311  if (err != kNoError) {
312    return err;
313  }
314
315  err = gain_control_->AnalyzeCaptureAudio(capture_audio_);
316  if (err != kNoError) {
317    return err;
318  }
319
320  err = echo_cancellation_->ProcessCaptureAudio(capture_audio_);
321  if (err != kNoError) {
322    return err;
323  }
324
325  if (echo_control_mobile_->is_enabled() &&
326      noise_suppression_->is_enabled()) {
327    capture_audio_->CopyLowPassToReference();
328  }
329
330  err = noise_suppression_->ProcessCaptureAudio(capture_audio_);
331  if (err != kNoError) {
332    return err;
333  }
334
335  err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_);
336  if (err != kNoError) {
337    return err;
338  }
339
340  err = voice_detection_->ProcessCaptureAudio(capture_audio_);
341  if (err != kNoError) {
342    return err;
343  }
344
345  err = gain_control_->ProcessCaptureAudio(capture_audio_);
346  if (err != kNoError) {
347    return err;
348  }
349
350  if (synthesis_needed(data_changed)) {
351    for (int i = 0; i < num_output_channels_; i++) {
352      // Recombine low and high bands.
353      SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i),
354                               capture_audio_->high_pass_split_data(i),
355                               capture_audio_->data(i),
356                               capture_audio_->synthesis_filter_state1(i),
357                               capture_audio_->synthesis_filter_state2(i));
358    }
359  }
360
361  // The level estimator operates on the recombined data.
362  err = level_estimator_->ProcessStream(capture_audio_);
363  if (err != kNoError) {
364    return err;
365  }
366
367  capture_audio_->InterleaveTo(frame, data_changed);
368
369#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
370  if (debug_file_->Open()) {
371    audioproc::Stream* msg = event_msg_->mutable_stream();
372    const size_t data_size = sizeof(int16_t) *
373                             frame->_payloadDataLengthInSamples *
374                             frame->_audioChannel;
375    msg->set_output_data(frame->_payloadData, data_size);
376    err = WriteMessageToDebugFile();
377    if (err != kNoError) {
378      return err;
379    }
380  }
381#endif
382
383  was_stream_delay_set_ = false;
384  return kNoError;
385}
386
387int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) {
388  CriticalSectionScoped crit_scoped(*crit_);
389  int err = kNoError;
390
391  if (frame == NULL) {
392    return kNullPointerError;
393  }
394
395  if (frame->_frequencyInHz != sample_rate_hz_) {
396    return kBadSampleRateError;
397  }
398
399  if (frame->_audioChannel != num_reverse_channels_) {
400    return kBadNumberChannelsError;
401  }
402
403  if (frame->_payloadDataLengthInSamples != samples_per_channel_) {
404    return kBadDataLengthError;
405  }
406
407#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
408  if (debug_file_->Open()) {
409    event_msg_->set_type(audioproc::Event::REVERSE_STREAM);
410    audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream();
411    const size_t data_size = sizeof(int16_t) *
412                             frame->_payloadDataLengthInSamples *
413                             frame->_audioChannel;
414    msg->set_data(frame->_payloadData, data_size);
415    err = WriteMessageToDebugFile();
416    if (err != kNoError) {
417      return err;
418    }
419  }
420#endif
421
422  render_audio_->DeinterleaveFrom(frame);
423
424  // TODO(ajm): turn the splitting filter into a component?
425  if (sample_rate_hz_ == kSampleRate32kHz) {
426    for (int i = 0; i < num_reverse_channels_; i++) {
427      // Split into low and high band.
428      SplittingFilterAnalysis(render_audio_->data(i),
429                              render_audio_->low_pass_split_data(i),
430                              render_audio_->high_pass_split_data(i),
431                              render_audio_->analysis_filter_state1(i),
432                              render_audio_->analysis_filter_state2(i));
433    }
434  }
435
436  // TODO(ajm): warnings possible from components?
437  err = echo_cancellation_->ProcessRenderAudio(render_audio_);
438  if (err != kNoError) {
439    return err;
440  }
441
442  err = echo_control_mobile_->ProcessRenderAudio(render_audio_);
443  if (err != kNoError) {
444    return err;
445  }
446
447  err = gain_control_->ProcessRenderAudio(render_audio_);
448  if (err != kNoError) {
449    return err;
450  }
451
452  return err;  // TODO(ajm): this is for returning warnings; necessary?
453}
454
455int AudioProcessingImpl::set_stream_delay_ms(int delay) {
456  was_stream_delay_set_ = true;
457  if (delay < 0) {
458    return kBadParameterError;
459  }
460
461  // TODO(ajm): the max is rather arbitrarily chosen; investigate.
462  if (delay > 500) {
463    stream_delay_ms_ = 500;
464    return kBadStreamParameterWarning;
465  }
466
467  stream_delay_ms_ = delay;
468  return kNoError;
469}
470
471int AudioProcessingImpl::stream_delay_ms() const {
472  return stream_delay_ms_;
473}
474
475bool AudioProcessingImpl::was_stream_delay_set() const {
476  return was_stream_delay_set_;
477}
478
479int AudioProcessingImpl::StartDebugRecording(
480    const char filename[AudioProcessing::kMaxFilenameSize]) {
481  CriticalSectionScoped crit_scoped(*crit_);
482  assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize);
483
484  if (filename == NULL) {
485    return kNullPointerError;
486  }
487
488#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
489  // Stop any ongoing recording.
490  if (debug_file_->Open()) {
491    if (debug_file_->CloseFile() == -1) {
492      return kFileError;
493    }
494  }
495
496  if (debug_file_->OpenFile(filename, false) == -1) {
497    debug_file_->CloseFile();
498    return kFileError;
499  }
500
501  int err = WriteInitMessage();
502  if (err != kNoError) {
503    return err;
504  }
505  return kNoError;
506#else
507  return kUnsupportedFunctionError;
508#endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
509}
510
511int AudioProcessingImpl::StopDebugRecording() {
512  CriticalSectionScoped crit_scoped(*crit_);
513
514#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
515  // We just return if recording hasn't started.
516  if (debug_file_->Open()) {
517    if (debug_file_->CloseFile() == -1) {
518      return kFileError;
519    }
520  }
521  return kNoError;
522#else
523  return kUnsupportedFunctionError;
524#endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
525}
526
527EchoCancellation* AudioProcessingImpl::echo_cancellation() const {
528  return echo_cancellation_;
529}
530
531EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const {
532  return echo_control_mobile_;
533}
534
535GainControl* AudioProcessingImpl::gain_control() const {
536  return gain_control_;
537}
538
539HighPassFilter* AudioProcessingImpl::high_pass_filter() const {
540  return high_pass_filter_;
541}
542
543LevelEstimator* AudioProcessingImpl::level_estimator() const {
544  return level_estimator_;
545}
546
547NoiseSuppression* AudioProcessingImpl::noise_suppression() const {
548  return noise_suppression_;
549}
550
551VoiceDetection* AudioProcessingImpl::voice_detection() const {
552  return voice_detection_;
553}
554
555WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) {
556  CriticalSectionScoped crit_scoped(*crit_);
557  /*WEBRTC_TRACE(webrtc::kTraceModuleCall,
558             webrtc::kTraceAudioProcessing,
559             id_,
560             "ChangeUniqueId(new id = %d)",
561             id);*/
562  id_ = id;
563
564  return kNoError;
565}
566
567bool AudioProcessingImpl::stream_data_changed() const {
568  int enabled_count = 0;
569  std::list<ProcessingComponent*>::const_iterator it;
570  for (it = component_list_.begin(); it != component_list_.end(); it++) {
571    if ((*it)->is_component_enabled()) {
572      enabled_count++;
573    }
574  }
575
576  // Data is unchanged if no components are enabled, or if only level_estimator_
577  // or voice_detection_ is enabled.
578  if (enabled_count == 0) {
579    return false;
580  } else if (enabled_count == 1) {
581    if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) {
582      return false;
583    }
584  } else if (enabled_count == 2) {
585    if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) {
586      return false;
587    }
588  }
589  return true;
590}
591
592bool AudioProcessingImpl::synthesis_needed(bool stream_data_changed) const {
593  return (stream_data_changed && sample_rate_hz_ == kSampleRate32kHz);
594}
595
596bool AudioProcessingImpl::analysis_needed(bool stream_data_changed) const {
597  if (!stream_data_changed && !voice_detection_->is_enabled()) {
598    // Only level_estimator_ is enabled.
599    return false;
600  } else if (sample_rate_hz_ == kSampleRate32kHz) {
601    // Something besides level_estimator_ is enabled, and we have super-wb.
602    return true;
603  }
604  return false;
605}
606
607#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
608int AudioProcessingImpl::WriteMessageToDebugFile() {
609  int32_t size = event_msg_->ByteSize();
610  if (size <= 0) {
611    return kUnspecifiedError;
612  }
613#if defined(WEBRTC_BIG_ENDIAN)
614  // TODO(ajm): Use little-endian "on the wire". For the moment, we can be
615  //            pretty safe in assuming little-endian.
616#endif
617
618  if (!event_msg_->SerializeToString(&event_str_)) {
619    return kUnspecifiedError;
620  }
621
622  // Write message preceded by its size.
623  if (!debug_file_->Write(&size, sizeof(int32_t))) {
624    return kFileError;
625  }
626  if (!debug_file_->Write(event_str_.data(), event_str_.length())) {
627    return kFileError;
628  }
629
630  event_msg_->Clear();
631
632  return 0;
633}
634
635int AudioProcessingImpl::WriteInitMessage() {
636  event_msg_->set_type(audioproc::Event::INIT);
637  audioproc::Init* msg = event_msg_->mutable_init();
638  msg->set_sample_rate(sample_rate_hz_);
639  msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz());
640  msg->set_num_input_channels(num_input_channels_);
641  msg->set_num_output_channels(num_output_channels_);
642  msg->set_num_reverse_channels(num_reverse_channels_);
643
644  int err = WriteMessageToDebugFile();
645  if (err != kNoError) {
646    return err;
647  }
648
649  return kNoError;
650}
651#endif  // WEBRTC_AUDIOPROC_DEBUG_DUMP
652}  // namespace webrtc
653