neteq_impl.h revision 6d92bf59f3f8c0ce8ad445c11aaaf955eae752cc
1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
13
14#include "webrtc/base/constructormagic.h"
15#include "webrtc/base/scoped_ptr.h"
16#include "webrtc/base/thread_annotations.h"
17#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
18#include "webrtc/modules/audio_coding/neteq/defines.h"
19#include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
20#include "webrtc/modules/audio_coding/neteq/packet.h"  // Declare PacketList.
21#include "webrtc/modules/audio_coding/neteq/random_vector.h"
22#include "webrtc/modules/audio_coding/neteq/rtcp.h"
23#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
24#include "webrtc/typedefs.h"
25
26namespace webrtc {
27
28// Forward declarations.
29class Accelerate;
30class BackgroundNoise;
31class BufferLevelFilter;
32class ComfortNoise;
33class CriticalSectionWrapper;
34class DecisionLogic;
35class DecoderDatabase;
36class DelayManager;
37class DelayPeakDetector;
38class DtmfBuffer;
39class DtmfToneGenerator;
40class Expand;
41class Merge;
42class Normal;
43class PacketBuffer;
44class PayloadSplitter;
45class PostDecodeVad;
46class PreemptiveExpand;
47class RandomVector;
48class SyncBuffer;
49class TimestampScaler;
50struct AccelerateFactory;
51struct DtmfEvent;
52struct ExpandFactory;
53struct PreemptiveExpandFactory;
54
55class NetEqImpl : public webrtc::NetEq {
56 public:
57  // Creates a new NetEqImpl object. The object will assume ownership of all
58  // injected dependencies, and will delete them when done.
59  NetEqImpl(const NetEq::Config& config,
60            BufferLevelFilter* buffer_level_filter,
61            DecoderDatabase* decoder_database,
62            DelayManager* delay_manager,
63            DelayPeakDetector* delay_peak_detector,
64            DtmfBuffer* dtmf_buffer,
65            DtmfToneGenerator* dtmf_tone_generator,
66            PacketBuffer* packet_buffer,
67            PayloadSplitter* payload_splitter,
68            TimestampScaler* timestamp_scaler,
69            AccelerateFactory* accelerate_factory,
70            ExpandFactory* expand_factory,
71            PreemptiveExpandFactory* preemptive_expand_factory,
72            bool create_components = true);
73
74  ~NetEqImpl() override;
75
76  // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
77  // of the time when the packet was received, and should be measured with
78  // the same tick rate as the RTP timestamp of the current payload.
79  // Returns 0 on success, -1 on failure.
80  int InsertPacket(const WebRtcRTPHeader& rtp_header,
81                   const uint8_t* payload,
82                   size_t length_bytes,
83                   uint32_t receive_timestamp) override;
84
85  // Inserts a sync-packet into packet queue. Sync-packets are decoded to
86  // silence and are intended to keep AV-sync intact in an event of long packet
87  // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
88  // might insert sync-packet when they observe that buffer level of NetEq is
89  // decreasing below a certain threshold, defined by the application.
90  // Sync-packets should have the same payload type as the last audio payload
91  // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
92  // can be implied by inserting a sync-packet.
93  // Returns kOk on success, kFail on failure.
94  int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
95                       uint32_t receive_timestamp) override;
96
97  // Instructs NetEq to deliver 10 ms of audio data. The data is written to
98  // |output_audio|, which can hold (at least) |max_length| elements.
99  // The number of channels that were written to the output is provided in
100  // the output variable |num_channels|, and each channel contains
101  // |samples_per_channel| elements. If more than one channel is written,
102  // the samples are interleaved.
103  // The speech type is written to |type|, if |type| is not NULL.
104  // Returns kOK on success, or kFail in case of an error.
105  int GetAudio(size_t max_length,
106               int16_t* output_audio,
107               size_t* samples_per_channel,
108               int* num_channels,
109               NetEqOutputType* type) override;
110
111  // Associates |rtp_payload_type| with |codec| and stores the information in
112  // the codec database. Returns kOK on success, kFail on failure.
113  int RegisterPayloadType(enum NetEqDecoder codec,
114                          uint8_t rtp_payload_type) override;
115
116  // Provides an externally created decoder object |decoder| to insert in the
117  // decoder database. The decoder implements a decoder of type |codec| and
118  // associates it with |rtp_payload_type|. The decoder will produce samples
119  // at the rate |sample_rate_hz|. Returns kOK on success, kFail on failure.
120  int RegisterExternalDecoder(AudioDecoder* decoder,
121                              enum NetEqDecoder codec,
122                              uint8_t rtp_payload_type,
123                              int sample_rate_hz) override;
124
125  // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
126  // -1 on failure.
127  int RemovePayloadType(uint8_t rtp_payload_type) override;
128
129  bool SetMinimumDelay(int delay_ms) override;
130
131  bool SetMaximumDelay(int delay_ms) override;
132
133  int LeastRequiredDelayMs() const override;
134
135  int SetTargetDelay() override;
136
137  int TargetDelay() override;
138
139  int CurrentDelayMs() const override;
140
141  // Sets the playout mode to |mode|.
142  // Deprecated.
143  // TODO(henrik.lundin) Delete.
144  void SetPlayoutMode(NetEqPlayoutMode mode) override;
145
146  // Returns the current playout mode.
147  // Deprecated.
148  // TODO(henrik.lundin) Delete.
149  NetEqPlayoutMode PlayoutMode() const override;
150
151  // Writes the current network statistics to |stats|. The statistics are reset
152  // after the call.
153  int NetworkStatistics(NetEqNetworkStatistics* stats) override;
154
155  // Writes the current RTCP statistics to |stats|. The statistics are reset
156  // and a new report period is started with the call.
157  void GetRtcpStatistics(RtcpStatistics* stats) override;
158
159  // Same as RtcpStatistics(), but does not reset anything.
160  void GetRtcpStatisticsNoReset(RtcpStatistics* stats) override;
161
162  // Enables post-decode VAD. When enabled, GetAudio() will return
163  // kOutputVADPassive when the signal contains no speech.
164  void EnableVad() override;
165
166  // Disables post-decode VAD.
167  void DisableVad() override;
168
169  bool GetPlayoutTimestamp(uint32_t* timestamp) override;
170
171  int SetTargetNumberOfChannels() override;
172
173  int SetTargetSampleRate() override;
174
175  // Returns the error code for the last occurred error. If no error has
176  // occurred, 0 is returned.
177  int LastError() const override;
178
179  // Returns the error code last returned by a decoder (audio or comfort noise).
180  // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
181  // this method to get the decoder's error code.
182  int LastDecoderError() override;
183
184  // Flushes both the packet buffer and the sync buffer.
185  void FlushBuffers() override;
186
187  void PacketBufferStatistics(int* current_num_packets,
188                              int* max_num_packets) const override;
189
190  // Get sequence number and timestamp of the latest RTP.
191  // This method is to facilitate NACK.
192  int DecodedRtpInfo(int* sequence_number, uint32_t* timestamp) const override;
193
194  // This accessor method is only intended for testing purposes.
195  const SyncBuffer* sync_buffer_for_test() const;
196
197 protected:
198  static const int kOutputSizeMs = 10;
199  static const size_t kMaxFrameSize = 2880;  // 60 ms @ 48 kHz.
200  // TODO(hlundin): Provide a better value for kSyncBufferSize.
201  static const size_t kSyncBufferSize = 2 * kMaxFrameSize;
202
203  // Inserts a new packet into NetEq. This is used by the InsertPacket method
204  // above. Returns 0 on success, otherwise an error code.
205  // TODO(hlundin): Merge this with InsertPacket above?
206  int InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
207                           const uint8_t* payload,
208                           size_t length_bytes,
209                           uint32_t receive_timestamp,
210                           bool is_sync_packet)
211      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
212
213  // Delivers 10 ms of audio data. The data is written to |output|, which can
214  // hold (at least) |max_length| elements. The number of channels that were
215  // written to the output is provided in the output variable |num_channels|,
216  // and each channel contains |samples_per_channel| elements. If more than one
217  // channel is written, the samples are interleaved.
218  // Returns 0 on success, otherwise an error code.
219  int GetAudioInternal(size_t max_length,
220                       int16_t* output,
221                       size_t* samples_per_channel,
222                       int* num_channels) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
223
224  // Provides a decision to the GetAudioInternal method. The decision what to
225  // do is written to |operation|. Packets to decode are written to
226  // |packet_list|, and a DTMF event to play is written to |dtmf_event|. When
227  // DTMF should be played, |play_dtmf| is set to true by the method.
228  // Returns 0 on success, otherwise an error code.
229  int GetDecision(Operations* operation,
230                  PacketList* packet_list,
231                  DtmfEvent* dtmf_event,
232                  bool* play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
233
234  // Decodes the speech packets in |packet_list|, and writes the results to
235  // |decoded_buffer|, which is allocated to hold |decoded_buffer_length|
236  // elements. The length of the decoded data is written to |decoded_length|.
237  // The speech type -- speech or (codec-internal) comfort noise -- is written
238  // to |speech_type|. If |packet_list| contains any SID frames for RFC 3389
239  // comfort noise, those are not decoded.
240  int Decode(PacketList* packet_list,
241             Operations* operation,
242             int* decoded_length,
243             AudioDecoder::SpeechType* speech_type)
244      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
245
246  // Sub-method to Decode(). Performs codec internal CNG.
247  int DecodeCng(AudioDecoder* decoder, int* decoded_length,
248                AudioDecoder::SpeechType* speech_type)
249      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
250
251  // Sub-method to Decode(). Performs the actual decoding.
252  int DecodeLoop(PacketList* packet_list,
253                 const Operations& operation,
254                 AudioDecoder* decoder,
255                 int* decoded_length,
256                 AudioDecoder::SpeechType* speech_type)
257      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
258
259  // Sub-method which calls the Normal class to perform the normal operation.
260  void DoNormal(const int16_t* decoded_buffer,
261                size_t decoded_length,
262                AudioDecoder::SpeechType speech_type,
263                bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
264
265  // Sub-method which calls the Merge class to perform the merge operation.
266  void DoMerge(int16_t* decoded_buffer,
267               size_t decoded_length,
268               AudioDecoder::SpeechType speech_type,
269               bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
270
271  // Sub-method which calls the Expand class to perform the expand operation.
272  int DoExpand(bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
273
274  // Sub-method which calls the Accelerate class to perform the accelerate
275  // operation.
276  int DoAccelerate(int16_t* decoded_buffer,
277                   size_t decoded_length,
278                   AudioDecoder::SpeechType speech_type,
279                   bool play_dtmf,
280                   bool fast_accelerate) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
281
282  // Sub-method which calls the PreemptiveExpand class to perform the
283  // preemtive expand operation.
284  int DoPreemptiveExpand(int16_t* decoded_buffer,
285                         size_t decoded_length,
286                         AudioDecoder::SpeechType speech_type,
287                         bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
288
289  // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
290  // noise. |packet_list| can either contain one SID frame to update the
291  // noise parameters, or no payload at all, in which case the previously
292  // received parameters are used.
293  int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf)
294      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
295
296  // Calls the audio decoder to generate codec-internal comfort noise when
297  // no packet was received.
298  void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length)
299      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
300
301  // Calls the DtmfToneGenerator class to generate DTMF tones.
302  int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
303      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
304
305  // Produces packet-loss concealment using alternative methods. If the codec
306  // has an internal PLC, it is called to generate samples. Otherwise, the
307  // method performs zero-stuffing.
308  void DoAlternativePlc(bool increase_timestamp)
309      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
310
311  // Overdub DTMF on top of |output|.
312  int DtmfOverdub(const DtmfEvent& dtmf_event,
313                  size_t num_channels,
314                  int16_t* output) const EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
315
316  // Extracts packets from |packet_buffer_| to produce at least
317  // |required_samples| samples. The packets are inserted into |packet_list|.
318  // Returns the number of samples that the packets in the list will produce, or
319  // -1 in case of an error.
320  int ExtractPackets(size_t required_samples, PacketList* packet_list)
321      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
322
323  // Resets various variables and objects to new values based on the sample rate
324  // |fs_hz| and |channels| number audio channels.
325  void SetSampleRateAndChannels(int fs_hz, size_t channels)
326      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
327
328  // Returns the output type for the audio produced by the latest call to
329  // GetAudio().
330  NetEqOutputType LastOutputType() EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
331
332  // Updates Expand and Merge.
333  virtual void UpdatePlcComponents(int fs_hz, size_t channels)
334      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
335
336  // Creates DecisionLogic object with the mode given by |playout_mode_|.
337  virtual void CreateDecisionLogic() EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
338
339  const rtc::scoped_ptr<CriticalSectionWrapper> crit_sect_;
340  const rtc::scoped_ptr<BufferLevelFilter> buffer_level_filter_
341      GUARDED_BY(crit_sect_);
342  const rtc::scoped_ptr<DecoderDatabase> decoder_database_
343      GUARDED_BY(crit_sect_);
344  const rtc::scoped_ptr<DelayManager> delay_manager_ GUARDED_BY(crit_sect_);
345  const rtc::scoped_ptr<DelayPeakDetector> delay_peak_detector_
346      GUARDED_BY(crit_sect_);
347  const rtc::scoped_ptr<DtmfBuffer> dtmf_buffer_ GUARDED_BY(crit_sect_);
348  const rtc::scoped_ptr<DtmfToneGenerator> dtmf_tone_generator_
349      GUARDED_BY(crit_sect_);
350  const rtc::scoped_ptr<PacketBuffer> packet_buffer_ GUARDED_BY(crit_sect_);
351  const rtc::scoped_ptr<PayloadSplitter> payload_splitter_
352      GUARDED_BY(crit_sect_);
353  const rtc::scoped_ptr<TimestampScaler> timestamp_scaler_
354      GUARDED_BY(crit_sect_);
355  const rtc::scoped_ptr<PostDecodeVad> vad_ GUARDED_BY(crit_sect_);
356  const rtc::scoped_ptr<ExpandFactory> expand_factory_ GUARDED_BY(crit_sect_);
357  const rtc::scoped_ptr<AccelerateFactory> accelerate_factory_
358      GUARDED_BY(crit_sect_);
359  const rtc::scoped_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
360      GUARDED_BY(crit_sect_);
361
362  rtc::scoped_ptr<BackgroundNoise> background_noise_ GUARDED_BY(crit_sect_);
363  rtc::scoped_ptr<DecisionLogic> decision_logic_ GUARDED_BY(crit_sect_);
364  rtc::scoped_ptr<AudioMultiVector> algorithm_buffer_ GUARDED_BY(crit_sect_);
365  rtc::scoped_ptr<SyncBuffer> sync_buffer_ GUARDED_BY(crit_sect_);
366  rtc::scoped_ptr<Expand> expand_ GUARDED_BY(crit_sect_);
367  rtc::scoped_ptr<Normal> normal_ GUARDED_BY(crit_sect_);
368  rtc::scoped_ptr<Merge> merge_ GUARDED_BY(crit_sect_);
369  rtc::scoped_ptr<Accelerate> accelerate_ GUARDED_BY(crit_sect_);
370  rtc::scoped_ptr<PreemptiveExpand> preemptive_expand_ GUARDED_BY(crit_sect_);
371  RandomVector random_vector_ GUARDED_BY(crit_sect_);
372  rtc::scoped_ptr<ComfortNoise> comfort_noise_ GUARDED_BY(crit_sect_);
373  Rtcp rtcp_ GUARDED_BY(crit_sect_);
374  StatisticsCalculator stats_ GUARDED_BY(crit_sect_);
375  int fs_hz_ GUARDED_BY(crit_sect_);
376  int fs_mult_ GUARDED_BY(crit_sect_);
377  size_t output_size_samples_ GUARDED_BY(crit_sect_);
378  size_t decoder_frame_length_ GUARDED_BY(crit_sect_);
379  Modes last_mode_ GUARDED_BY(crit_sect_);
380  rtc::scoped_ptr<int16_t[]> mute_factor_array_ GUARDED_BY(crit_sect_);
381  size_t decoded_buffer_length_ GUARDED_BY(crit_sect_);
382  rtc::scoped_ptr<int16_t[]> decoded_buffer_ GUARDED_BY(crit_sect_);
383  uint32_t playout_timestamp_ GUARDED_BY(crit_sect_);
384  bool new_codec_ GUARDED_BY(crit_sect_);
385  uint32_t timestamp_ GUARDED_BY(crit_sect_);
386  bool reset_decoder_ GUARDED_BY(crit_sect_);
387  uint8_t current_rtp_payload_type_ GUARDED_BY(crit_sect_);
388  uint8_t current_cng_rtp_payload_type_ GUARDED_BY(crit_sect_);
389  uint32_t ssrc_ GUARDED_BY(crit_sect_);
390  bool first_packet_ GUARDED_BY(crit_sect_);
391  int error_code_ GUARDED_BY(crit_sect_);  // Store last error code.
392  int decoder_error_code_ GUARDED_BY(crit_sect_);
393  const BackgroundNoiseMode background_noise_mode_ GUARDED_BY(crit_sect_);
394  NetEqPlayoutMode playout_mode_ GUARDED_BY(crit_sect_);
395  bool enable_fast_accelerate_ GUARDED_BY(crit_sect_);
396
397  // These values are used by NACK module to estimate time-to-play of
398  // a missing packet. Occasionally, NetEq might decide to decode more
399  // than one packet. Therefore, these values store sequence number and
400  // timestamp of the first packet pulled from the packet buffer. In
401  // such cases, these values do not exactly represent the sequence number
402  // or timestamp associated with a 10ms audio pulled from NetEq. NACK
403  // module is designed to compensate for this.
404  int decoded_packet_sequence_number_ GUARDED_BY(crit_sect_);
405  uint32_t decoded_packet_timestamp_ GUARDED_BY(crit_sect_);
406
407 private:
408  RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
409};
410
411}  // namespace webrtc
412#endif  // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
413