1/*
2 *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
12#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
13
14#include <string.h>  // Provide access to size_t.
15
16#include <vector>
17
18#include "webrtc/base/constructormagic.h"
19#include "webrtc/common_types.h"
20#include "webrtc/modules/audio_coding/neteq/interface/audio_decoder.h"
21#include "webrtc/typedefs.h"
22
23namespace webrtc {
24
25// Forward declarations.
26struct WebRtcRTPHeader;
27
28struct NetEqNetworkStatistics {
29  uint16_t current_buffer_size_ms;  // Current jitter buffer size in ms.
30  uint16_t preferred_buffer_size_ms;  // Target buffer size in ms.
31  uint16_t jitter_peaks_found;  // 1 if adding extra delay due to peaky
32                                // jitter; 0 otherwise.
33  uint16_t packet_loss_rate;  // Loss rate (network + late) in Q14.
34  uint16_t packet_discard_rate;  // Late loss rate in Q14.
35  uint16_t expand_rate;  // Fraction (of original stream) of synthesized
36                         // speech inserted through expansion (in Q14).
37  uint16_t preemptive_rate;  // Fraction of data inserted through pre-emptive
38                             // expansion (in Q14).
39  uint16_t accelerate_rate;  // Fraction of data removed through acceleration
40                             // (in Q14).
41  int32_t clockdrift_ppm;  // Average clock-drift in parts-per-million
42                           // (positive or negative).
43  int added_zero_samples;  // Number of zero samples added in "off" mode.
44};
45
46enum NetEqOutputType {
47  kOutputNormal,
48  kOutputPLC,
49  kOutputCNG,
50  kOutputPLCtoCNG,
51  kOutputVADPassive
52};
53
54enum NetEqPlayoutMode {
55  kPlayoutOn,
56  kPlayoutOff,
57  kPlayoutFax,
58  kPlayoutStreaming
59};
60
61// This is the interface class for NetEq.
62class NetEq {
63 public:
64  enum BackgroundNoiseMode {
65    kBgnOn,    // Default behavior with eternal noise.
66    kBgnFade,  // Noise fades to zero after some time.
67    kBgnOff    // Background noise is always zero.
68  };
69
70  struct Config {
71    Config()
72        : sample_rate_hz(16000),
73          enable_audio_classifier(false),
74          max_packets_in_buffer(50),
75          // |max_delay_ms| has the same effect as calling SetMaximumDelay().
76          max_delay_ms(2000),
77          background_noise_mode(kBgnOff) {}
78
79    int sample_rate_hz;  // Initial vale. Will change with input data.
80    bool enable_audio_classifier;
81    int max_packets_in_buffer;
82    int max_delay_ms;
83    BackgroundNoiseMode background_noise_mode;
84  };
85
86  enum ReturnCodes {
87    kOK = 0,
88    kFail = -1,
89    kNotImplemented = -2
90  };
91
92  enum ErrorCodes {
93    kNoError = 0,
94    kOtherError,
95    kInvalidRtpPayloadType,
96    kUnknownRtpPayloadType,
97    kCodecNotSupported,
98    kDecoderExists,
99    kDecoderNotFound,
100    kInvalidSampleRate,
101    kInvalidPointer,
102    kAccelerateError,
103    kPreemptiveExpandError,
104    kComfortNoiseErrorCode,
105    kDecoderErrorCode,
106    kOtherDecoderError,
107    kInvalidOperation,
108    kDtmfParameterError,
109    kDtmfParsingError,
110    kDtmfInsertError,
111    kStereoNotSupported,
112    kSampleUnderrun,
113    kDecodedTooMuch,
114    kFrameSplitError,
115    kRedundancySplitError,
116    kPacketBufferCorruption,
117    kSyncPacketNotAccepted
118  };
119
120  // Creates a new NetEq object, with parameters set in |config|. The |config|
121  // object will only have to be valid for the duration of the call to this
122  // method.
123  static NetEq* Create(const NetEq::Config& config);
124
125  virtual ~NetEq() {}
126
127  // Inserts a new packet into NetEq. The |receive_timestamp| is an indication
128  // of the time when the packet was received, and should be measured with
129  // the same tick rate as the RTP timestamp of the current payload.
130  // Returns 0 on success, -1 on failure.
131  virtual int InsertPacket(const WebRtcRTPHeader& rtp_header,
132                           const uint8_t* payload,
133                           int length_bytes,
134                           uint32_t receive_timestamp) = 0;
135
136  // Inserts a sync-packet into packet queue. Sync-packets are decoded to
137  // silence and are intended to keep AV-sync intact in an event of long packet
138  // losses when Video NACK is enabled but Audio NACK is not. Clients of NetEq
139  // might insert sync-packet when they observe that buffer level of NetEq is
140  // decreasing below a certain threshold, defined by the application.
141  // Sync-packets should have the same payload type as the last audio payload
142  // type, i.e. they cannot have DTMF or CNG payload type, nor a codec change
143  // can be implied by inserting a sync-packet.
144  // Returns kOk on success, kFail on failure.
145  virtual int InsertSyncPacket(const WebRtcRTPHeader& rtp_header,
146                               uint32_t receive_timestamp) = 0;
147
148  // Instructs NetEq to deliver 10 ms of audio data. The data is written to
149  // |output_audio|, which can hold (at least) |max_length| elements.
150  // The number of channels that were written to the output is provided in
151  // the output variable |num_channels|, and each channel contains
152  // |samples_per_channel| elements. If more than one channel is written,
153  // the samples are interleaved.
154  // The speech type is written to |type|, if |type| is not NULL.
155  // Returns kOK on success, or kFail in case of an error.
156  virtual int GetAudio(size_t max_length, int16_t* output_audio,
157                       int* samples_per_channel, int* num_channels,
158                       NetEqOutputType* type) = 0;
159
160  // Associates |rtp_payload_type| with |codec| and stores the information in
161  // the codec database. Returns 0 on success, -1 on failure.
162  virtual int RegisterPayloadType(enum NetEqDecoder codec,
163                                  uint8_t rtp_payload_type) = 0;
164
165  // Provides an externally created decoder object |decoder| to insert in the
166  // decoder database. The decoder implements a decoder of type |codec| and
167  // associates it with |rtp_payload_type|. Returns kOK on success,
168  // kFail on failure.
169  virtual int RegisterExternalDecoder(AudioDecoder* decoder,
170                                      enum NetEqDecoder codec,
171                                      uint8_t rtp_payload_type) = 0;
172
173  // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
174  // -1 on failure.
175  virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
176
177  // Sets a minimum delay in millisecond for packet buffer. The minimum is
178  // maintained unless a higher latency is dictated by channel condition.
179  // Returns true if the minimum is successfully applied, otherwise false is
180  // returned.
181  virtual bool SetMinimumDelay(int delay_ms) = 0;
182
183  // Sets a maximum delay in milliseconds for packet buffer. The latency will
184  // not exceed the given value, even required delay (given the channel
185  // conditions) is higher. Calling this method has the same effect as setting
186  // the |max_delay_ms| value in the NetEq::Config struct.
187  virtual bool SetMaximumDelay(int delay_ms) = 0;
188
189  // The smallest latency required. This is computed bases on inter-arrival
190  // time and internal NetEq logic. Note that in computing this latency none of
191  // the user defined limits (applied by calling setMinimumDelay() and/or
192  // SetMaximumDelay()) are applied.
193  virtual int LeastRequiredDelayMs() const = 0;
194
195  // Not implemented.
196  virtual int SetTargetDelay() = 0;
197
198  // Not implemented.
199  virtual int TargetDelay() = 0;
200
201  // Not implemented.
202  virtual int CurrentDelay() = 0;
203
204  // Sets the playout mode to |mode|.
205  virtual void SetPlayoutMode(NetEqPlayoutMode mode) = 0;
206
207  // Returns the current playout mode.
208  virtual NetEqPlayoutMode PlayoutMode() const = 0;
209
210  // Writes the current network statistics to |stats|. The statistics are reset
211  // after the call.
212  virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
213
214  // Writes the last packet waiting times (in ms) to |waiting_times|. The number
215  // of values written is no more than 100, but may be smaller if the interface
216  // is polled again before 100 packets has arrived.
217  virtual void WaitingTimes(std::vector<int>* waiting_times) = 0;
218
219  // Writes the current RTCP statistics to |stats|. The statistics are reset
220  // and a new report period is started with the call.
221  virtual void GetRtcpStatistics(RtcpStatistics* stats) = 0;
222
223  // Same as RtcpStatistics(), but does not reset anything.
224  virtual void GetRtcpStatisticsNoReset(RtcpStatistics* stats) = 0;
225
226  // Enables post-decode VAD. When enabled, GetAudio() will return
227  // kOutputVADPassive when the signal contains no speech.
228  virtual void EnableVad() = 0;
229
230  // Disables post-decode VAD.
231  virtual void DisableVad() = 0;
232
233  // Gets the RTP timestamp for the last sample delivered by GetAudio().
234  // Returns true if the RTP timestamp is valid, otherwise false.
235  virtual bool GetPlayoutTimestamp(uint32_t* timestamp) = 0;
236
237  // Not implemented.
238  virtual int SetTargetNumberOfChannels() = 0;
239
240  // Not implemented.
241  virtual int SetTargetSampleRate() = 0;
242
243  // Returns the error code for the last occurred error. If no error has
244  // occurred, 0 is returned.
245  virtual int LastError() = 0;
246
247  // Returns the error code last returned by a decoder (audio or comfort noise).
248  // When LastError() returns kDecoderErrorCode or kComfortNoiseErrorCode, check
249  // this method to get the decoder's error code.
250  virtual int LastDecoderError() = 0;
251
252  // Flushes both the packet buffer and the sync buffer.
253  virtual void FlushBuffers() = 0;
254
255  // Current usage of packet-buffer and it's limits.
256  virtual void PacketBufferStatistics(int* current_num_packets,
257                                      int* max_num_packets) const = 0;
258
259  // Get sequence number and timestamp of the latest RTP.
260  // This method is to facilitate NACK.
261  virtual int DecodedRtpInfo(int* sequence_number,
262                             uint32_t* timestamp) const = 0;
263
264 protected:
265  NetEq() {}
266
267 private:
268  DISALLOW_COPY_AND_ASSIGN(NetEq);
269};
270
271}  // namespace webrtc
272#endif  // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INTERFACE_NETEQ_H_
273