1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
13
14#include <vector>
15
16#include "webrtc/base/constructormagic.h"
17#include "webrtc/base/thread_annotations.h"
18#include "webrtc/modules/audio_coding/neteq/audio_multi_vector.h"
19#include "webrtc/modules/audio_coding/neteq/defines.h"
20#include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
21#include "webrtc/modules/audio_coding/neteq/packet.h"  // Declare PacketList.
22#include "webrtc/modules/audio_coding/neteq/random_vector.h"
23#include "webrtc/modules/audio_coding/neteq/rtcp.h"
24#include "webrtc/modules/audio_coding/neteq/statistics_calculator.h"
25#include "webrtc/system_wrappers/interface/scoped_ptr.h"
26#include "webrtc/typedefs.h"
27
28namespace webrtc {
29
30// Forward declarations.
31class Accelerate;
32class BackgroundNoise;
33class BufferLevelFilter;
34class ComfortNoise;
35class CriticalSectionWrapper;
36class DecisionLogic;
37class DecoderDatabase;
38class DelayManager;
39class DelayPeakDetector;
40class DtmfBuffer;
41class DtmfToneGenerator;
42class Expand;
43class Merge;
44class Normal;
45class PacketBuffer;
46class PayloadSplitter;
47class PostDecodeVad;
48class PreemptiveExpand;
49class RandomVector;
50class SyncBuffer;
51class TimestampScaler;
52struct AccelerateFactory;
53struct DtmfEvent;
54struct ExpandFactory;
55struct PreemptiveExpandFactory;
56
57class NetEqImpl : public webrtc::NetEq {
58 public:
59  // Creates a new NetEqImpl object. The object will assume ownership of all
60  // injected dependencies, and will delete them when done.
61  NetEqImpl(const NetEq::Config& config,
62            BufferLevelFilter* buffer_level_filter,
63            DecoderDatabase* decoder_database,
64            DelayManager* delay_manager,
65            DelayPeakDetector* delay_peak_detector,
66            DtmfBuffer* dtmf_buffer,
67            DtmfToneGenerator* dtmf_tone_generator,
68            PacketBuffer* packet_buffer,
69            PayloadSplitter* payload_splitter,
70            TimestampScaler* timestamp_scaler,
71            AccelerateFactory* accelerate_factory,
72            ExpandFactory* expand_factory,
73            PreemptiveExpandFactory* preemptive_expand_factory,
74            bool create_components = true);
75
76  virtual ~NetEqImpl();
77
78  // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
79  // of the time when the packet was received, and should be measured with
80  // the same tick rate as the RTP timestamp of the current payload.
81  // Returns 0 on success, -1 on failure.
82  virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
83                           const uint8_t* payload,
84                           int length_bytes,
85                           uint32_t receive_timestamp) OVERRIDE;
86
87  // Inserts a sync-packet into packet queue. Sync-packets are decoded to
88  // silence and are intended to keep AV-sync intact in an event of long packet
89  // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
90  // might insert sync-packet when they observe that buffer level of NetEq is
91  // decreasing below a certain threshold, defined by the application.
92  // Sync-packets should have the same payload type as the last audio payload
93  // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
94  // can be implied by inserting a sync-packet.
95  // Returns kOk on success, kFail on failure.
96  virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
97                               uint32_t receive_timestamp) OVERRIDE;
98
99  // Instructs NetEq to deliver 10 ms of audio data. The data is written to
100  // |output_audio|, which can hold (at least) |max_length| elements.
101  // The number of channels that were written to the output is provided in
102  // the output variable |num_channels|, and each channel contains
103  // |samples_per_channel| elements. If more than one channel is written,
104  // the samples are interleaved.
105  // The speech type is written to |type|, if |type| is not NULL.
106  // Returns kOK on success, or kFail in case of an error.
107  virtual int GetAudio(size_t max_length, int16_t* output_audio,
108                       int* samples_per_channel, int* num_channels,
109                       NetEqOutputType* type) OVERRIDE;
110
111  // Associates |rtp_payload_type| with |codec| and stores the information in
112  // the codec database. Returns kOK on success, kFail on failure.
113  virtual int RegisterPayloadType(enum NetEqDecoder codec,
114                                  uint8_t rtp_payload_type) OVERRIDE;
115
116  // Provides an externally created decoder object |decoder| to insert in the
117  // decoder database. The decoder implements a decoder of type |codec| and
118  // associates it with |rtp_payload_type|. Returns kOK on success, kFail on
119  // failure.
120  virtual int RegisterExternalDecoder(AudioDecoder* decoder,
121                                      enum NetEqDecoder codec,
122                                      uint8_t rtp_payload_type) OVERRIDE;
123
124  // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
125  // -1 on failure.
126  virtual int RemovePayloadType(uint8_t rtp_payload_type) OVERRIDE;
127
128  virtual bool SetMinimumDelay(int delay_ms) OVERRIDE;
129
130  virtual bool SetMaximumDelay(int delay_ms) OVERRIDE;
131
132  virtual int LeastRequiredDelayMs() const OVERRIDE;
133
134  virtual int SetTargetDelay() OVERRIDE { return kNotImplemented; }
135
136  virtual int TargetDelay() OVERRIDE { return kNotImplemented; }
137
138  virtual int CurrentDelay() OVERRIDE { return kNotImplemented; }
139
140  // Sets the playout mode to |mode|.
141  virtual void SetPlayoutMode(NetEqPlayoutMode mode) OVERRIDE;
142
143  // Returns the current playout mode.
144  virtual NetEqPlayoutMode PlayoutMode() const OVERRIDE;
145
146  // Writes the current network statistics to |stats|. The statistics are reset
147  // after the call.
148  virtual int NetworkStatistics(NetEqNetworkStatistics* stats) OVERRIDE;
149
150  // Writes the last packet waiting times (in ms) to |waiting_times|. The number
151  // of values written is no more than 100, but may be smaller if the interface
152  // is polled again before 100 packets has arrived.
153  virtual void WaitingTimes(std::vector<int>* waiting_times) OVERRIDE;
154
155  // Writes the current RTCP statistics to |stats|. The statistics are reset
156  // and a new report period is started with the call.
157  virtual void GetRtcpStatistics(RtcpStatistics* stats) OVERRIDE;
158
159  // Same as RtcpStatistics(), but does not reset anything.
160  virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) OVERRIDE;
161
162  // Enables post-decode VAD. When enabled, GetAudio() will return
163  // kOutputVADPassive when the signal contains no speech.
164  virtual void EnableVad() OVERRIDE;
165
166  // Disables post-decode VAD.
167  virtual void DisableVad() OVERRIDE;
168
169  virtual bool GetPlayoutTimestamp(uint32_t* timestamp) OVERRIDE;
170
171  virtual int SetTargetNumberOfChannels() OVERRIDE { return kNotImplemented; }
172
173  virtual int SetTargetSampleRate() OVERRIDE { return kNotImplemented; }
174
175  // Returns the error code for the last occurred error. If no error has
176  // occurred, 0 is returned.
177  virtual int LastError() OVERRIDE;
178
179  // Returns the error code last returned by a decoder (audio or comfort noise).
180  // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
181  // this method to get the decoder's error code.
182  virtual int LastDecoderError() OVERRIDE;
183
184  // Flushes both the packet buffer and the sync buffer.
185  virtual void FlushBuffers() OVERRIDE;
186
187  virtual void PacketBufferStatistics(int* current_num_packets,
188                                      int* max_num_packets) const OVERRIDE;
189
190  // Get sequence number and timestamp of the latest RTP.
191  // This method is to facilitate NACK.
192  virtual int DecodedRtpInfo(int* sequence_number,
193                             uint32_t* timestamp) const OVERRIDE;
194
195  // This accessor method is only intended for testing purposes.
196  const SyncBuffer* sync_buffer_for_test() const;
197
198 protected:
199  static const int kOutputSizeMs = 10;
200  static const int kMaxFrameSize = 2880;  // 60 ms @ 48 kHz.
201  // TODO(hlundin): Provide a better value for kSyncBufferSize.
202  static const int kSyncBufferSize = 2 * kMaxFrameSize;
203
204  // Inserts a new packet into NetEq. This is used by the InsertPacket method
205  // above. Returns 0 on success, otherwise an error code.
206  // TODO(hlundin): Merge this with InsertPacket above?
207  int InsertPacketInternal(const WebRtcRTPHeader& rtp_header,
208                           const uint8_t* payload,
209                           int length_bytes,
210                           uint32_t receive_timestamp,
211                           bool is_sync_packet)
212      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
213
214  // Delivers 10 ms of audio data. The data is written to |output|, which can
215  // hold (at least) |max_length| elements. The number of channels that were
216  // written to the output is provided in the output variable |num_channels|,
217  // and each channel contains |samples_per_channel| elements. If more than one
218  // channel is written, the samples are interleaved.
219  // Returns 0 on success, otherwise an error code.
220  int GetAudioInternal(size_t max_length,
221                       int16_t* output,
222                       int* samples_per_channel,
223                       int* num_channels) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
224
225  // Provides a decision to the GetAudioInternal method. The decision what to
226  // do is written to |operation|. Packets to decode are written to
227  // |packet_list|, and a DTMF event to play is written to |dtmf_event|. When
228  // DTMF should be played, |play_dtmf| is set to true by the method.
229  // Returns 0 on success, otherwise an error code.
230  int GetDecision(Operations* operation,
231                  PacketList* packet_list,
232                  DtmfEvent* dtmf_event,
233                  bool* play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
234
235  // Decodes the speech packets in |packet_list|, and writes the results to
236  // |decoded_buffer|, which is allocated to hold |decoded_buffer_length|
237  // elements. The length of the decoded data is written to |decoded_length|.
238  // The speech type -- speech or (codec-internal) comfort noise -- is written
239  // to |speech_type|. If |packet_list| contains any SID frames for RFC 3389
240  // comfort noise, those are not decoded.
241  int Decode(PacketList* packet_list,
242             Operations* operation,
243             int* decoded_length,
244             AudioDecoder::SpeechType* speech_type)
245      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
246
247  // Sub-method to Decode(). Performs the actual decoding.
248  int DecodeLoop(PacketList* packet_list,
249                 Operations* operation,
250                 AudioDecoder* decoder,
251                 int* decoded_length,
252                 AudioDecoder::SpeechType* speech_type)
253      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
254
255  // Sub-method which calls the Normal class to perform the normal operation.
256  void DoNormal(const int16_t* decoded_buffer,
257                size_t decoded_length,
258                AudioDecoder::SpeechType speech_type,
259                bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
260
261  // Sub-method which calls the Merge class to perform the merge operation.
262  void DoMerge(int16_t* decoded_buffer,
263               size_t decoded_length,
264               AudioDecoder::SpeechType speech_type,
265               bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
266
267  // Sub-method which calls the Expand class to perform the expand operation.
268  int DoExpand(bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
269
270  // Sub-method which calls the Accelerate class to perform the accelerate
271  // operation.
272  int DoAccelerate(int16_t* decoded_buffer,
273                   size_t decoded_length,
274                   AudioDecoder::SpeechType speech_type,
275                   bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
276
277  // Sub-method which calls the PreemptiveExpand class to perform the
278  // preemtive expand operation.
279  int DoPreemptiveExpand(int16_t* decoded_buffer,
280                         size_t decoded_length,
281                         AudioDecoder::SpeechType speech_type,
282                         bool play_dtmf) EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
283
284  // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
285  // noise. |packet_list| can either contain one SID frame to update the
286  // noise parameters, or no payload at all, in which case the previously
287  // received parameters are used.
288  int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf)
289      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
290
291  // Calls the audio decoder to generate codec-internal comfort noise when
292  // no packet was received.
293  void DoCodecInternalCng() EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
294
295  // Calls the DtmfToneGenerator class to generate DTMF tones.
296  int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
297      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
298
299  // Produces packet-loss concealment using alternative methods. If the codec
300  // has an internal PLC, it is called to generate samples. Otherwise, the
301  // method performs zero-stuffing.
302  void DoAlternativePlc(bool increase_timestamp)
303      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
304
305  // Overdub DTMF on top of |output|.
306  int DtmfOverdub(const DtmfEvent& dtmf_event,
307                  size_t num_channels,
308                  int16_t* output) const EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
309
310  // Extracts packets from |packet_buffer_| to produce at least
311  // |required_samples| samples. The packets are inserted into |packet_list|.
312  // Returns the number of samples that the packets in the list will produce, or
313  // -1 in case of an error.
314  int ExtractPackets(int required_samples, PacketList* packet_list)
315      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
316
317  // Resets various variables and objects to new values based on the sample rate
318  // |fs_hz| and |channels| number audio channels.
319  void SetSampleRateAndChannels(int fs_hz, size_t channels)
320      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
321
322  // Returns the output type for the audio produced by the latest call to
323  // GetAudio().
324  NetEqOutputType LastOutputType() EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
325
326  // Updates Expand and Merge.
327  virtual void UpdatePlcComponents(int fs_hz, size_t channels)
328      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
329
330  // Creates DecisionLogic object for the given mode.
331  virtual void CreateDecisionLogic(NetEqPlayoutMode mode)
332      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
333
334  const scoped_ptr<CriticalSectionWrapper> crit_sect_;
335  const scoped_ptr<BufferLevelFilter> buffer_level_filter_
336      GUARDED_BY(crit_sect_);
337  const scoped_ptr<DecoderDatabase> decoder_database_ GUARDED_BY(crit_sect_);
338  const scoped_ptr<DelayManager> delay_manager_ GUARDED_BY(crit_sect_);
339  const scoped_ptr<DelayPeakDetector> delay_peak_detector_
340      GUARDED_BY(crit_sect_);
341  const scoped_ptr<DtmfBuffer> dtmf_buffer_ GUARDED_BY(crit_sect_);
342  const scoped_ptr<DtmfToneGenerator> dtmf_tone_generator_
343      GUARDED_BY(crit_sect_);
344  const scoped_ptr<PacketBuffer> packet_buffer_ GUARDED_BY(crit_sect_);
345  const scoped_ptr<PayloadSplitter> payload_splitter_ GUARDED_BY(crit_sect_);
346  const scoped_ptr<TimestampScaler> timestamp_scaler_ GUARDED_BY(crit_sect_);
347  const scoped_ptr<PostDecodeVad> vad_ GUARDED_BY(crit_sect_);
348  const scoped_ptr<ExpandFactory> expand_factory_ GUARDED_BY(crit_sect_);
349  const scoped_ptr<AccelerateFactory> accelerate_factory_
350      GUARDED_BY(crit_sect_);
351  const scoped_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
352      GUARDED_BY(crit_sect_);
353
354  scoped_ptr<BackgroundNoise> background_noise_ GUARDED_BY(crit_sect_);
355  scoped_ptr<DecisionLogic> decision_logic_ GUARDED_BY(crit_sect_);
356  scoped_ptr<AudioMultiVector> algorithm_buffer_ GUARDED_BY(crit_sect_);
357  scoped_ptr<SyncBuffer> sync_buffer_ GUARDED_BY(crit_sect_);
358  scoped_ptr<Expand> expand_ GUARDED_BY(crit_sect_);
359  scoped_ptr<Normal> normal_ GUARDED_BY(crit_sect_);
360  scoped_ptr<Merge> merge_ GUARDED_BY(crit_sect_);
361  scoped_ptr<Accelerate> accelerate_ GUARDED_BY(crit_sect_);
362  scoped_ptr<PreemptiveExpand> preemptive_expand_ GUARDED_BY(crit_sect_);
363  RandomVector random_vector_ GUARDED_BY(crit_sect_);
364  scoped_ptr<ComfortNoise> comfort_noise_ GUARDED_BY(crit_sect_);
365  Rtcp rtcp_ GUARDED_BY(crit_sect_);
366  StatisticsCalculator stats_ GUARDED_BY(crit_sect_);
367  int fs_hz_ GUARDED_BY(crit_sect_);
368  int fs_mult_ GUARDED_BY(crit_sect_);
369  int output_size_samples_ GUARDED_BY(crit_sect_);
370  int decoder_frame_length_ GUARDED_BY(crit_sect_);
371  Modes last_mode_ GUARDED_BY(crit_sect_);
372  scoped_ptr<int16_t[]> mute_factor_array_ GUARDED_BY(crit_sect_);
373  size_t decoded_buffer_length_ GUARDED_BY(crit_sect_);
374  scoped_ptr<int16_t[]> decoded_buffer_ GUARDED_BY(crit_sect_);
375  uint32_t playout_timestamp_ GUARDED_BY(crit_sect_);
376  bool new_codec_ GUARDED_BY(crit_sect_);
377  uint32_t timestamp_ GUARDED_BY(crit_sect_);
378  bool reset_decoder_ GUARDED_BY(crit_sect_);
379  uint8_t current_rtp_payload_type_ GUARDED_BY(crit_sect_);
380  uint8_t current_cng_rtp_payload_type_ GUARDED_BY(crit_sect_);
381  uint32_t ssrc_ GUARDED_BY(crit_sect_);
382  bool first_packet_ GUARDED_BY(crit_sect_);
383  int error_code_ GUARDED_BY(crit_sect_);  // Store last error code.
384  int decoder_error_code_ GUARDED_BY(crit_sect_);
385  const BackgroundNoiseMode background_noise_mode_ GUARDED_BY(crit_sect_);
386
387  // These values are used by NACK module to estimate time-to-play of
388  // a missing packet. Occasionally, NetEq might decide to decode more
389  // than one packet. Therefore, these values store sequence number and
390  // timestamp of the first packet pulled from the packet buffer. In
391  // such cases, these values do not exactly represent the sequence number
392  // or timestamp associated with a 10ms audio pulled from NetEq. NACK
393  // module is designed to compensate for this.
394  int decoded_packet_sequence_number_ GUARDED_BY(crit_sect_);
395  uint32_t decoded_packet_timestamp_ GUARDED_BY(crit_sect_);
396
397 private:
398  DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
399};
400
401}  // namespace webrtc
402#endif  // WEBRTC_MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
403