media_stream_audio_processor.h revision 5d1f7b1de12d16ceb2c938c56701a3e8bfa558f7
1f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Copyright 2013 The Chromium Authors. All rights reserved. 2f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 3f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// found in the LICENSE file. 4f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 5f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#ifndef CONTENT_RENDERER_MEDIA_MEDIA_STREAM_AUDIO_PROCESSOR_H_ 6f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#define CONTENT_RENDERER_MEDIA_MEDIA_STREAM_AUDIO_PROCESSOR_H_ 7f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 8f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/atomicops.h" 9f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/synchronization/lock.h" 10f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/threading/thread_checker.h" 11f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "base/time/time.h" 12f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "content/common/content_export.h" 13f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "media/base/audio_converter.h" 14f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "third_party/webrtc/modules/audio_processing/include/audio_processing.h" 15f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#include "third_party/webrtc/modules/interface/module_common_types.h" 16f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)namespace blink { 185d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)class WebMediaConstraints; 195d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)} 205d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 21f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)namespace media { 22f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class AudioBus; 23f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class AudioFifo; 24f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class AudioParameters; 25f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} // namespace media 26f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 27f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)namespace webrtc { 28f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)class AudioFrame; 29f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} 30f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 31f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)namespace content { 32f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 335d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)class RTCMediaConstraints; 345d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 35f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// This class owns an object of webrtc::AudioProcessing which contains signal 36f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// processing components like AGC, AEC and NS. It enables the components based 37f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// on the getUserMedia constraints, processes the data and outputs it in a unit 38f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)// of 10 ms data chunk. 395d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles)class CONTENT_EXPORT MediaStreamAudioProcessor : 405d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) public base::RefCountedThreadSafe<MediaStreamAudioProcessor> { 41f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) public: 425d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) MediaStreamAudioProcessor(const media::AudioParameters& source_params, 435d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const blink::WebMediaConstraints& constraints, 445d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) int effects); 45f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 46f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Pushes capture data in |audio_source| to the internal FIFO. 47f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Called on the capture audio thread. 48f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void PushCaptureData(media::AudioBus* audio_source); 49f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 50f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Push the render audio to webrtc::AudioProcessing for analysis. This is 51f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // needed iff echo processing is enabled. 52f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // |render_audio| is the pointer to the render audio data, its format 53f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // is specified by |sample_rate|, |number_of_channels| and |number_of_frames|. 54f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Called on the render audio thread. 55f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void PushRenderData(const int16* render_audio, 56f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int sample_rate, 57f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int number_of_channels, 58f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int number_of_frames, 59f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::TimeDelta render_delay); 60f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 61f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Processes a block of 10 ms data from the internal FIFO and outputs it via 62f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // |out|. |out| is the address of the pointer that will be pointed to 63f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // the post-processed data if the method is returning a true. The lifetime 64f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // of the data represeted by |out| is guaranteed to outlive the method call. 65f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // That also says *|out| won't change until this method is called again. 665d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // |new_volume| receives the new microphone volume from the AGC. 675d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // The new microphoen volume range is [0, 255], and the value will be 0 if 685d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // the microphone volume should not be adjusted. 69f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Returns true if the internal FIFO has at least 10 ms data for processing, 70f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // otherwise false. 71f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // |capture_delay|, |volume| and |key_pressed| will be passed to 72f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // webrtc::AudioProcessing to help processing the data. 73f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Called on the capture audio thread. 74f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) bool ProcessAndConsumeData(base::TimeDelta capture_delay, 75f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int volume, 76f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) bool key_pressed, 775d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) int* new_volume, 78f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int16** out); 79f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 805d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 815d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // The audio format of the input to the processor. 825d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const media::AudioParameters& InputFormat() const; 83f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 84f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // The audio format of the output from the processor. 85f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) const media::AudioParameters& OutputFormat() const; 86f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 87f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Accessor to check if the audio processing is enabled or not. 885d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool has_audio_processing() const { return audio_processing_ != NULL; } 895d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 905d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) protected: 915d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) friend class base::RefCountedThreadSafe<MediaStreamAudioProcessor>; 925d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) virtual ~MediaStreamAudioProcessor(); 93f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 94f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) private: 955d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) friend class MediaStreamAudioProcessorTest; 965d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 97f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) class MediaStreamAudioConverter; 98f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 99f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Helper to initialize the WebRtc AudioProcessing. 100f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void InitializeAudioProcessingModule( 1015d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) const blink::WebMediaConstraints& constraints, int effects); 1025d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1035d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // Helper to initialize the capture converter. 1045d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) void InitializeCaptureConverter(const media::AudioParameters& source_params); 105f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 106f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Helper to initialize the render converter. 107f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void InitializeRenderConverterIfNeeded(int sample_rate, 108f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int number_of_channels, 109f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) int frames_per_buffer); 110f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 111f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Called by ProcessAndConsumeData(). 1125d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // Returns the new microphone volume in the range of |0, 255]. 1135d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // When the volume does not need to be updated, it returns 0. 1145d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) int ProcessData(webrtc::AudioFrame* audio_frame, 1155d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) base::TimeDelta capture_delay, 1165d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) int volume, 1175d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool key_pressed); 118f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 119f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Called when the processor is going away. 120f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) void StopAudioProcessing(); 121f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 122f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Cached value for the render delay latency. This member is accessed by 123f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // both the capture audio thread and the render audio thread. 124f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::subtle::Atomic32 render_delay_ms_; 125f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 126f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // webrtc::AudioProcessing module which does AEC, AGC, NS, HighPass filter, 127f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // ..etc. 128f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<webrtc::AudioProcessing> audio_processing_; 129f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 130f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Converter used for the down-mixing and resampling of the capture data. 131f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<MediaStreamAudioConverter> capture_converter_; 132f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 133f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // AudioFrame used to hold the output of |capture_converter_|. 134f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) webrtc::AudioFrame capture_frame_; 135f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 136f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Converter used for the down-mixing and resampling of the render data when 137f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // the AEC is enabled. 138f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<MediaStreamAudioConverter> render_converter_; 139f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 140f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // AudioFrame used to hold the output of |render_converter_|. 141f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) webrtc::AudioFrame render_frame_; 142f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 143f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Data bus to help converting interleaved data to an AudioBus. 144f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) scoped_ptr<media::AudioBus> render_data_bus_; 145f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 146f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Used to DCHECK that some methods are called on the main render thread. 147f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::ThreadChecker main_thread_checker_; 148f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 149f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Used to DCHECK that some methods are called on the capture audio thread. 150f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::ThreadChecker capture_thread_checker_; 151f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 152f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) // Used to DCHECK that PushRenderData() is called on the render audio thread. 153f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) base::ThreadChecker render_thread_checker_; 1545d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) 1555d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) // Flag to enable the stereo channels mirroring. 1565d1f7b1de12d16ceb2c938c56701a3e8bfa558f7Torne (Richard Coles) bool audio_mirroring_; 157f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)}; 158f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 159f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)} // namespace content 160f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles) 161f2477e01787aa58f445919b809d89e252beef54fTorne (Richard Coles)#endif // CONTENT_RENDERER_MEDIA_MEDIA_STREAM_AUDIO_PROCESSOR_H_ 162