1/*
2 *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
12#define WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
13
14#include <vector>
15
16#include "webrtc/base/thread_annotations.h"
17#include "webrtc/common_audio/vad/include/webrtc_vad.h"
18#include "webrtc/engine_configurations.h"
19#include "webrtc/modules/audio_coding/main/interface/audio_coding_module.h"
20#include "webrtc/modules/audio_coding/main/acm2/acm_codec_database.h"
21#include "webrtc/modules/audio_coding/main/acm2/acm_resampler.h"
22#include "webrtc/modules/audio_coding/main/acm2/call_statistics.h"
23#include "webrtc/modules/audio_coding/main/acm2/initial_delay_manager.h"
24#include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
25#include "webrtc/modules/interface/module_common_types.h"
26#include "webrtc/system_wrappers/interface/scoped_ptr.h"
27#include "webrtc/typedefs.h"
28
29namespace webrtc {
30
31struct CodecInst;
32class CriticalSectionWrapper;
33class NetEq;
34
35namespace acm2 {
36
37class Nack;
38
39class AcmReceiver {
40 public:
41  struct Decoder {
42    bool registered;
43    uint8_t payload_type;
44    // This field is meaningful for codecs where both mono and
45    // stereo versions are registered under the same ID.
46    int channels;
47  };
48
49  // Constructor of the class
50  explicit AcmReceiver(const AudioCodingModule::Config& config);
51
52  // Destructor of the class.
53  ~AcmReceiver();
54
55  //
56  // Inserts a payload with its associated RTP-header into NetEq.
57  //
58  // Input:
59  //   - rtp_header           : RTP header for the incoming payload containing
60  //                            information about payload type, sequence number,
61  //                            timestamp, SSRC and marker bit.
62  //   - incoming_payload     : Incoming audio payload.
63  //   - length_payload       : Length of incoming audio payload in bytes.
64  //
65  // Return value             : 0 if OK.
66  //                           <0 if NetEq returned an error.
67  //
68  int InsertPacket(const WebRtcRTPHeader& rtp_header,
69                   const uint8_t* incoming_payload,
70                   int length_payload);
71
72  //
73  // Asks NetEq for 10 milliseconds of decoded audio.
74  //
75  // Input:
76  //   -desired_freq_hz       : specifies the sampling rate [Hz] of the output
77  //                            audio. If set -1 indicates to resampling is
78  //                            is required and the audio returned at the
79  //                            sampling rate of the decoder.
80  //
81  // Output:
82  //   -audio_frame           : an audio frame were output data and
83  //                            associated parameters are written to.
84  //
85  // Return value             : 0 if OK.
86  //                           -1 if NetEq returned an error.
87  //
88  int GetAudio(int desired_freq_hz, AudioFrame* audio_frame);
89
90  //
91  // Adds a new codec to the NetEq codec database.
92  //
93  // Input:
94  //   - acm_codec_id        : ACM codec ID.
95  //   - payload_type        : payload type.
96  //   - audio_decoder       : pointer to a decoder object. If it is NULL
97  //                           then NetEq will internally create the decoder
98  //                           object. Otherwise, NetEq will store this pointer
99  //                           as the decoder corresponding with the given
100  //                           payload type. NetEq won't acquire the ownership
101  //                           of this pointer. It is up to the client of this
102  //                           class (ACM) to delete it. By providing
103  //                           |audio_decoder| ACM will have control over the
104  //                           decoder instance of the codec. This is essential
105  //                           for a codec like iSAC which encoder/decoder
106  //                           encoder has to know about decoder (bandwidth
107  //                           estimator that is updated at decoding time).
108  //
109  // Return value             : 0 if OK.
110  //                           <0 if NetEq returned an error.
111  //
112  int AddCodec(int acm_codec_id,
113               uint8_t payload_type,
114               int channels,
115               AudioDecoder* audio_decoder);
116
117  //
118  // Sets a minimum delay for packet buffer. The given delay is maintained,
119  // unless channel condition dictates a higher delay.
120  //
121  // Input:
122  //   - delay_ms             : minimum delay in milliseconds.
123  //
124  // Return value             : 0 if OK.
125  //                           <0 if NetEq returned an error.
126  //
127  int SetMinimumDelay(int delay_ms);
128
129  //
130  // Sets a maximum delay [ms] for the packet buffer. The target delay does not
131  // exceed the given value, even if channel condition requires so.
132  //
133  // Input:
134  //   - delay_ms             : maximum delay in milliseconds.
135  //
136  // Return value             : 0 if OK.
137  //                           <0 if NetEq returned an error.
138  //
139  int SetMaximumDelay(int delay_ms);
140
141  //
142  // Get least required delay computed based on channel conditions. Note that
143  // this is before applying any user-defined limits (specified by calling
144  // (SetMinimumDelay() and/or SetMaximumDelay()).
145  //
146  int LeastRequiredDelayMs() const;
147
148  //
149  // Sets an initial delay of |delay_ms| milliseconds. This introduces a playout
150  // delay. Silence (zero signal) is played out until equivalent of |delay_ms|
151  // millisecond of audio is buffered. Then, NetEq maintains the delay.
152  //
153  // Input:
154  //   - delay_ms             : initial delay in milliseconds.
155  //
156  // Return value             : 0 if OK.
157  //                           <0 if NetEq returned an error.
158  //
159  int SetInitialDelay(int delay_ms);
160
161  //
162  // Resets the initial delay to zero.
163  //
164  void ResetInitialDelay();
165
166  //
167  // Get the current sampling frequency in Hz.
168  //
169  // Return value             : Sampling frequency in Hz.
170  //
171  int current_sample_rate_hz() const;
172
173  //
174  // Sets the playout mode.
175  //
176  // Input:
177  //   - mode                 : an enumerator specifying the playout mode.
178  //
179  void SetPlayoutMode(AudioPlayoutMode mode);
180
181  //
182  // Get the current playout mode.
183  //
184  // Return value             : The current playout mode.
185  //
186  AudioPlayoutMode PlayoutMode() const;
187
188  //
189  // Get the current network statistics from NetEq.
190  //
191  // Output:
192  //   - statistics           : The current network statistics.
193  //
194  void NetworkStatistics(ACMNetworkStatistics* statistics);
195
196  //
197  // Enable post-decoding VAD.
198  //
199  void EnableVad();
200
201  //
202  // Disable post-decoding VAD.
203  //
204  void DisableVad();
205
206  //
207  // Returns whether post-decoding VAD is enabled (true) or disabled (false).
208  //
209  bool vad_enabled() const { return vad_enabled_; }
210
211  //
212  // Flushes the NetEq packet and speech buffers.
213  //
214  void FlushBuffers();
215
216  //
217  // Removes a payload-type from the NetEq codec database.
218  //
219  // Input:
220  //   - payload_type         : the payload-type to be removed.
221  //
222  // Return value             : 0 if OK.
223  //                           -1 if an error occurred.
224  //
225  int RemoveCodec(uint8_t payload_type);
226
227  //
228  // Remove all registered codecs.
229  //
230  int RemoveAllCodecs();
231
232  //
233  // Set ID.
234  //
235  void set_id(int id);  // TODO(turajs): can be inline.
236
237  //
238  // Gets the RTP timestamp of the last sample delivered by GetAudio().
239  // Returns true if the RTP timestamp is valid, otherwise false.
240  //
241  bool GetPlayoutTimestamp(uint32_t* timestamp);
242
243  //
244  // Return the index of the codec associated with the last non-CNG/non-DTMF
245  // received payload. If no non-CNG/non-DTMF payload is received -1 is
246  // returned.
247  //
248  int last_audio_codec_id() const;  // TODO(turajs): can be inline.
249
250  //
251  // Return the payload-type of the last non-CNG/non-DTMF RTP packet. If no
252  // non-CNG/non-DTMF packet is received -1 is returned.
253  //
254  int last_audio_payload_type() const;  // TODO(turajs): can be inline.
255
256  //
257  // Get the audio codec associated with the last non-CNG/non-DTMF received
258  // payload. If no non-CNG/non-DTMF packet is received -1 is returned,
259  // otherwise return 0.
260  //
261  int LastAudioCodec(CodecInst* codec) const;
262
263  //
264  // Return payload type of RED if it is registered, otherwise return -1;
265  //
266  int RedPayloadType() const;
267
268  //
269  // Get a decoder given its registered payload-type.
270  //
271  // Input:
272  //    -payload_type         : the payload-type of the codec to be retrieved.
273  //
274  // Output:
275  //    -codec                : codec associated with the given payload-type.
276  //
277  // Return value             : 0 if succeeded.
278  //                           -1 if failed, e.g. given payload-type is not
279  //                              registered.
280  //
281  int DecoderByPayloadType(uint8_t payload_type,
282                           CodecInst* codec) const;
283
284  //
285  // Enable NACK and set the maximum size of the NACK list. If NACK is already
286  // enabled then the maximum NACK list size is modified accordingly.
287  //
288  // Input:
289  //    -max_nack_list_size  : maximum NACK list size
290  //                           should be positive (none zero) and less than or
291  //                           equal to |Nack::kNackListSizeLimit|
292  // Return value
293  //                         : 0 if succeeded.
294  //                          -1 if failed
295  //
296  int EnableNack(size_t max_nack_list_size);
297
298  // Disable NACK.
299  void DisableNack();
300
301  //
302  // Get a list of packets to be retransmitted.
303  //
304  // Input:
305  //    -round_trip_time_ms : estimate of the round-trip-time (in milliseconds).
306  // Return value           : list of packets to be retransmitted.
307  //
308  std::vector<uint16_t> GetNackList(int round_trip_time_ms) const;
309
310  //
311  // Get statistics of calls to GetAudio().
312  void GetDecodingCallStatistics(AudioDecodingCallStats* stats) const;
313
314 private:
315  int PayloadType2CodecIndex(uint8_t payload_type) const;
316
317  bool GetSilence(int desired_sample_rate_hz, AudioFrame* frame)
318      EXCLUSIVE_LOCKS_REQUIRED(crit_sect_);
319
320  int GetNumSyncPacketToInsert(uint16_t received_squence_number);
321
322  int RtpHeaderToCodecIndex(
323      const RTPHeader& rtp_header, const uint8_t* payload) const;
324
325  uint32_t NowInTimestamp(int decoder_sampling_rate) const;
326
327  void InsertStreamOfSyncPackets(InitialDelayManager::SyncStream* sync_stream);
328
329  scoped_ptr<CriticalSectionWrapper> crit_sect_;
330  int id_;  // TODO(henrik.lundin) Make const.
331  int last_audio_decoder_ GUARDED_BY(crit_sect_);
332  AudioFrame::VADActivity previous_audio_activity_ GUARDED_BY(crit_sect_);
333  int current_sample_rate_hz_ GUARDED_BY(crit_sect_);
334  ACMResampler resampler_ GUARDED_BY(crit_sect_);
335  // Used in GetAudio, declared as member to avoid allocating every 10ms.
336  // TODO(henrik.lundin) Stack-allocate in GetAudio instead?
337  int16_t audio_buffer_[AudioFrame::kMaxDataSizeSamples] GUARDED_BY(crit_sect_);
338  scoped_ptr<Nack> nack_ GUARDED_BY(crit_sect_);
339  bool nack_enabled_ GUARDED_BY(crit_sect_);
340  CallStatistics call_stats_ GUARDED_BY(crit_sect_);
341  NetEq* neteq_;
342  Decoder decoders_[ACMCodecDB::kMaxNumCodecs];
343  bool vad_enabled_;
344  Clock* clock_;  // TODO(henrik.lundin) Make const if possible.
345
346  // Indicates if a non-zero initial delay is set, and the receiver is in
347  // AV-sync mode.
348  bool av_sync_;
349  scoped_ptr<InitialDelayManager> initial_delay_manager_;
350
351  // The following are defined as members to avoid creating them in every
352  // iteration. |missing_packets_sync_stream_| is *ONLY* used in InsertPacket().
353  // |late_packets_sync_stream_| is only used in GetAudio(). Both of these
354  // member variables are allocated only when we AV-sync is enabled, i.e.
355  // initial delay is set.
356  scoped_ptr<InitialDelayManager::SyncStream> missing_packets_sync_stream_;
357  scoped_ptr<InitialDelayManager::SyncStream> late_packets_sync_stream_;
358};
359
360}  // namespace acm2
361
362}  // namespace webrtc
363
364#endif  // WEBRTC_MODULES_AUDIO_CODING_MAIN_ACM2_ACM_RECEIVER_H_
365