1/*
2 *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3 *
4 *  Use of this source code is governed by a BSD-style license
5 *  that can be found in the LICENSE file in the root of the source
6 *  tree. An additional intellectual property rights grant can be found
7 *  in the file PATENTS.  All contributing project authors may
8 *  be found in the AUTHORS file in the root of the source tree.
9 */
10
11/*
12 * This file includes unit tests for NetEQ.
13 */
14
15#include "webrtc/modules/audio_coding/neteq/interface/neteq.h"
16
17#include <math.h>
18#include <stdlib.h>
19#include <string.h>  // memset
20
21#include <algorithm>
22#include <set>
23#include <string>
24#include <vector>
25
26#include "gflags/gflags.h"
27#include "testing/gtest/include/gtest/gtest.h"
28#include "webrtc/modules/audio_coding/neteq/test/NETEQTEST_RTPpacket.h"
29#include "webrtc/modules/audio_coding/neteq/tools/audio_loop.h"
30#include "webrtc/modules/audio_coding/codecs/pcm16b/include/pcm16b.h"
31#include "webrtc/test/testsupport/fileutils.h"
32#include "webrtc/test/testsupport/gtest_disable.h"
33#include "webrtc/typedefs.h"
34
35DEFINE_bool(gen_ref, false, "Generate reference files.");
36
37namespace webrtc {
38
39static bool IsAllZero(const int16_t* buf, int buf_length) {
40  bool all_zero = true;
41  for (int n = 0; n < buf_length && all_zero; ++n)
42    all_zero = buf[n] == 0;
43  return all_zero;
44}
45
46static bool IsAllNonZero(const int16_t* buf, int buf_length) {
47  bool all_non_zero = true;
48  for (int n = 0; n < buf_length && all_non_zero; ++n)
49    all_non_zero = buf[n] != 0;
50  return all_non_zero;
51}
52
53class RefFiles {
54 public:
55  RefFiles(const std::string& input_file, const std::string& output_file);
56  ~RefFiles();
57  template<class T> void ProcessReference(const T& test_results);
58  template<typename T, size_t n> void ProcessReference(
59      const T (&test_results)[n],
60      size_t length);
61  template<typename T, size_t n> void WriteToFile(
62      const T (&test_results)[n],
63      size_t length);
64  template<typename T, size_t n> void ReadFromFileAndCompare(
65      const T (&test_results)[n],
66      size_t length);
67  void WriteToFile(const NetEqNetworkStatistics& stats);
68  void ReadFromFileAndCompare(const NetEqNetworkStatistics& stats);
69  void WriteToFile(const RtcpStatistics& stats);
70  void ReadFromFileAndCompare(const RtcpStatistics& stats);
71
72  FILE* input_fp_;
73  FILE* output_fp_;
74};
75
76RefFiles::RefFiles(const std::string &input_file,
77                   const std::string &output_file)
78    : input_fp_(NULL),
79      output_fp_(NULL) {
80  if (!input_file.empty()) {
81    input_fp_ = fopen(input_file.c_str(), "rb");
82    EXPECT_TRUE(input_fp_ != NULL);
83  }
84  if (!output_file.empty()) {
85    output_fp_ = fopen(output_file.c_str(), "wb");
86    EXPECT_TRUE(output_fp_ != NULL);
87  }
88}
89
90RefFiles::~RefFiles() {
91  if (input_fp_) {
92    EXPECT_EQ(EOF, fgetc(input_fp_));  // Make sure that we reached the end.
93    fclose(input_fp_);
94  }
95  if (output_fp_) fclose(output_fp_);
96}
97
98template<class T>
99void RefFiles::ProcessReference(const T& test_results) {
100  WriteToFile(test_results);
101  ReadFromFileAndCompare(test_results);
102}
103
104template<typename T, size_t n>
105void RefFiles::ProcessReference(const T (&test_results)[n], size_t length) {
106  WriteToFile(test_results, length);
107  ReadFromFileAndCompare(test_results, length);
108}
109
110template<typename T, size_t n>
111void RefFiles::WriteToFile(const T (&test_results)[n], size_t length) {
112  if (output_fp_) {
113    ASSERT_EQ(length, fwrite(&test_results, sizeof(T), length, output_fp_));
114  }
115}
116
117template<typename T, size_t n>
118void RefFiles::ReadFromFileAndCompare(const T (&test_results)[n],
119                                      size_t length) {
120  if (input_fp_) {
121    // Read from ref file.
122    T* ref = new T[length];
123    ASSERT_EQ(length, fread(ref, sizeof(T), length, input_fp_));
124    // Compare
125    ASSERT_EQ(0, memcmp(&test_results, ref, sizeof(T) * length));
126    delete [] ref;
127  }
128}
129
130void RefFiles::WriteToFile(const NetEqNetworkStatistics& stats) {
131  if (output_fp_) {
132    ASSERT_EQ(1u, fwrite(&stats, sizeof(NetEqNetworkStatistics), 1,
133                         output_fp_));
134  }
135}
136
137void RefFiles::ReadFromFileAndCompare(
138    const NetEqNetworkStatistics& stats) {
139  if (input_fp_) {
140    // Read from ref file.
141    size_t stat_size = sizeof(NetEqNetworkStatistics);
142    NetEqNetworkStatistics ref_stats;
143    ASSERT_EQ(1u, fread(&ref_stats, stat_size, 1, input_fp_));
144    // Compare
145    ASSERT_EQ(0, memcmp(&stats, &ref_stats, stat_size));
146  }
147}
148
149void RefFiles::WriteToFile(const RtcpStatistics& stats) {
150  if (output_fp_) {
151    ASSERT_EQ(1u, fwrite(&(stats.fraction_lost), sizeof(stats.fraction_lost), 1,
152                         output_fp_));
153    ASSERT_EQ(1u, fwrite(&(stats.cumulative_lost),
154                         sizeof(stats.cumulative_lost), 1, output_fp_));
155    ASSERT_EQ(1u, fwrite(&(stats.extended_max_sequence_number),
156                         sizeof(stats.extended_max_sequence_number), 1,
157                         output_fp_));
158    ASSERT_EQ(1u, fwrite(&(stats.jitter), sizeof(stats.jitter), 1,
159                         output_fp_));
160  }
161}
162
163void RefFiles::ReadFromFileAndCompare(
164    const RtcpStatistics& stats) {
165  if (input_fp_) {
166    // Read from ref file.
167    RtcpStatistics ref_stats;
168    ASSERT_EQ(1u, fread(&(ref_stats.fraction_lost),
169                        sizeof(ref_stats.fraction_lost), 1, input_fp_));
170    ASSERT_EQ(1u, fread(&(ref_stats.cumulative_lost),
171                        sizeof(ref_stats.cumulative_lost), 1, input_fp_));
172    ASSERT_EQ(1u, fread(&(ref_stats.extended_max_sequence_number),
173                        sizeof(ref_stats.extended_max_sequence_number), 1,
174                        input_fp_));
175    ASSERT_EQ(1u, fread(&(ref_stats.jitter), sizeof(ref_stats.jitter), 1,
176                        input_fp_));
177    // Compare
178    ASSERT_EQ(ref_stats.fraction_lost, stats.fraction_lost);
179    ASSERT_EQ(ref_stats.cumulative_lost, stats.cumulative_lost);
180    ASSERT_EQ(ref_stats.extended_max_sequence_number,
181              stats.extended_max_sequence_number);
182    ASSERT_EQ(ref_stats.jitter, stats.jitter);
183  }
184}
185
186class NetEqDecodingTest : public ::testing::Test {
187 protected:
188  // NetEQ must be polled for data once every 10 ms. Thus, neither of the
189  // constants below can be changed.
190  static const int kTimeStepMs = 10;
191  static const int kBlockSize8kHz = kTimeStepMs * 8;
192  static const int kBlockSize16kHz = kTimeStepMs * 16;
193  static const int kBlockSize32kHz = kTimeStepMs * 32;
194  static const int kMaxBlockSize = kBlockSize32kHz;
195  static const int kInitSampleRateHz = 8000;
196
197  NetEqDecodingTest();
198  virtual void SetUp();
199  virtual void TearDown();
200  void SelectDecoders(NetEqDecoder* used_codec);
201  void LoadDecoders();
202  void OpenInputFile(const std::string &rtp_file);
203  void Process(NETEQTEST_RTPpacket* rtp_ptr, int* out_len);
204  void DecodeAndCompare(const std::string& rtp_file,
205                        const std::string& ref_file,
206                        const std::string& stat_ref_file,
207                        const std::string& rtcp_ref_file);
208  static void PopulateRtpInfo(int frame_index,
209                              int timestamp,
210                              WebRtcRTPHeader* rtp_info);
211  static void PopulateCng(int frame_index,
212                          int timestamp,
213                          WebRtcRTPHeader* rtp_info,
214                          uint8_t* payload,
215                          int* payload_len);
216
217  void WrapTest(uint16_t start_seq_no, uint32_t start_timestamp,
218                const std::set<uint16_t>& drop_seq_numbers,
219                bool expect_seq_no_wrap, bool expect_timestamp_wrap);
220
221  void LongCngWithClockDrift(double drift_factor,
222                             double network_freeze_ms,
223                             bool pull_audio_during_freeze,
224                             int delay_tolerance_ms,
225                             int max_time_to_speech_ms);
226
227  void DuplicateCng();
228
229  uint32_t PlayoutTimestamp();
230
231  NetEq* neteq_;
232  NetEq::Config config_;
233  FILE* rtp_fp_;
234  unsigned int sim_clock_;
235  int16_t out_data_[kMaxBlockSize];
236  int output_sample_rate_;
237  int algorithmic_delay_ms_;
238};
239
240// Allocating the static const so that it can be passed by reference.
241const int NetEqDecodingTest::kTimeStepMs;
242const int NetEqDecodingTest::kBlockSize8kHz;
243const int NetEqDecodingTest::kBlockSize16kHz;
244const int NetEqDecodingTest::kBlockSize32kHz;
245const int NetEqDecodingTest::kMaxBlockSize;
246const int NetEqDecodingTest::kInitSampleRateHz;
247
248NetEqDecodingTest::NetEqDecodingTest()
249    : neteq_(NULL),
250      config_(),
251      rtp_fp_(NULL),
252      sim_clock_(0),
253      output_sample_rate_(kInitSampleRateHz),
254      algorithmic_delay_ms_(0) {
255  config_.sample_rate_hz = kInitSampleRateHz;
256  memset(out_data_, 0, sizeof(out_data_));
257}
258
259void NetEqDecodingTest::SetUp() {
260  neteq_ = NetEq::Create(config_);
261  NetEqNetworkStatistics stat;
262  ASSERT_EQ(0, neteq_->NetworkStatistics(&stat));
263  algorithmic_delay_ms_ = stat.current_buffer_size_ms;
264  ASSERT_TRUE(neteq_);
265  LoadDecoders();
266}
267
268void NetEqDecodingTest::TearDown() {
269  delete neteq_;
270  if (rtp_fp_)
271    fclose(rtp_fp_);
272}
273
274void NetEqDecodingTest::LoadDecoders() {
275  // Load PCMu.
276  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCMu, 0));
277  // Load PCMa.
278  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCMa, 8));
279#ifndef WEBRTC_ANDROID
280  // Load iLBC.
281  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderILBC, 102));
282#endif  // WEBRTC_ANDROID
283  // Load iSAC.
284  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderISAC, 103));
285#ifndef WEBRTC_ANDROID
286  // Load iSAC SWB.
287  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderISACswb, 104));
288  // Load iSAC FB.
289  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderISACfb, 105));
290#endif  // WEBRTC_ANDROID
291  // Load PCM16B nb.
292  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16B, 93));
293  // Load PCM16B wb.
294  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16Bwb, 94));
295  // Load PCM16B swb32.
296  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16Bswb32kHz, 95));
297  // Load CNG 8 kHz.
298  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGnb, 13));
299  // Load CNG 16 kHz.
300  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGwb, 98));
301}
302
303void NetEqDecodingTest::OpenInputFile(const std::string &rtp_file) {
304  rtp_fp_ = fopen(rtp_file.c_str(), "rb");
305  ASSERT_TRUE(rtp_fp_ != NULL);
306  ASSERT_EQ(0, NETEQTEST_RTPpacket::skipFileHeader(rtp_fp_));
307}
308
309void NetEqDecodingTest::Process(NETEQTEST_RTPpacket* rtp, int* out_len) {
310  // Check if time to receive.
311  while ((sim_clock_ >= rtp->time()) &&
312         (rtp->dataLen() >= 0)) {
313    if (rtp->dataLen() > 0) {
314      WebRtcRTPHeader rtpInfo;
315      rtp->parseHeader(&rtpInfo);
316      ASSERT_EQ(0, neteq_->InsertPacket(
317          rtpInfo,
318          rtp->payload(),
319          rtp->payloadLen(),
320          rtp->time() * (output_sample_rate_ / 1000)));
321    }
322    // Get next packet.
323    ASSERT_NE(-1, rtp->readFromFile(rtp_fp_));
324  }
325
326  // Get audio from NetEq.
327  NetEqOutputType type;
328  int num_channels;
329  ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, out_len,
330                                &num_channels, &type));
331  ASSERT_TRUE((*out_len == kBlockSize8kHz) ||
332              (*out_len == kBlockSize16kHz) ||
333              (*out_len == kBlockSize32kHz));
334  output_sample_rate_ = *out_len / 10 * 1000;
335
336  // Increase time.
337  sim_clock_ += kTimeStepMs;
338}
339
340void NetEqDecodingTest::DecodeAndCompare(const std::string& rtp_file,
341                                         const std::string& ref_file,
342                                         const std::string& stat_ref_file,
343                                         const std::string& rtcp_ref_file) {
344  OpenInputFile(rtp_file);
345
346  std::string ref_out_file = "";
347  if (ref_file.empty()) {
348    ref_out_file = webrtc::test::OutputPath() + "neteq_universal_ref.pcm";
349  }
350  RefFiles ref_files(ref_file, ref_out_file);
351
352  std::string stat_out_file = "";
353  if (stat_ref_file.empty()) {
354    stat_out_file = webrtc::test::OutputPath() + "neteq_network_stats.dat";
355  }
356  RefFiles network_stat_files(stat_ref_file, stat_out_file);
357
358  std::string rtcp_out_file = "";
359  if (rtcp_ref_file.empty()) {
360    rtcp_out_file = webrtc::test::OutputPath() + "neteq_rtcp_stats.dat";
361  }
362  RefFiles rtcp_stat_files(rtcp_ref_file, rtcp_out_file);
363
364  NETEQTEST_RTPpacket rtp;
365  ASSERT_GT(rtp.readFromFile(rtp_fp_), 0);
366  int i = 0;
367  while (rtp.dataLen() >= 0) {
368    std::ostringstream ss;
369    ss << "Lap number " << i++ << " in DecodeAndCompare while loop";
370    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
371    int out_len = 0;
372    ASSERT_NO_FATAL_FAILURE(Process(&rtp, &out_len));
373    ASSERT_NO_FATAL_FAILURE(ref_files.ProcessReference(out_data_, out_len));
374
375    // Query the network statistics API once per second
376    if (sim_clock_ % 1000 == 0) {
377      // Process NetworkStatistics.
378      NetEqNetworkStatistics network_stats;
379      ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
380      ASSERT_NO_FATAL_FAILURE(
381          network_stat_files.ProcessReference(network_stats));
382
383      // Process RTCPstat.
384      RtcpStatistics rtcp_stats;
385      neteq_->GetRtcpStatistics(&rtcp_stats);
386      ASSERT_NO_FATAL_FAILURE(rtcp_stat_files.ProcessReference(rtcp_stats));
387    }
388  }
389}
390
391void NetEqDecodingTest::PopulateRtpInfo(int frame_index,
392                                        int timestamp,
393                                        WebRtcRTPHeader* rtp_info) {
394  rtp_info->header.sequenceNumber = frame_index;
395  rtp_info->header.timestamp = timestamp;
396  rtp_info->header.ssrc = 0x1234;  // Just an arbitrary SSRC.
397  rtp_info->header.payloadType = 94;  // PCM16b WB codec.
398  rtp_info->header.markerBit = 0;
399}
400
401void NetEqDecodingTest::PopulateCng(int frame_index,
402                                    int timestamp,
403                                    WebRtcRTPHeader* rtp_info,
404                                    uint8_t* payload,
405                                    int* payload_len) {
406  rtp_info->header.sequenceNumber = frame_index;
407  rtp_info->header.timestamp = timestamp;
408  rtp_info->header.ssrc = 0x1234;  // Just an arbitrary SSRC.
409  rtp_info->header.payloadType = 98;  // WB CNG.
410  rtp_info->header.markerBit = 0;
411  payload[0] = 64;  // Noise level -64 dBov, quite arbitrarily chosen.
412  *payload_len = 1;  // Only noise level, no spectral parameters.
413}
414
415TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(TestBitExactness)) {
416  const std::string input_rtp_file = webrtc::test::ProjectRootPath() +
417      "resources/audio_coding/neteq_universal_new.rtp";
418  // Note that neteq4_universal_ref.pcm and neteq4_universal_ref_win_32.pcm
419  // are identical. The latter could have been removed, but if clients still
420  // have a copy of the file, the test will fail.
421  const std::string input_ref_file =
422      webrtc::test::ResourcePath("audio_coding/neteq4_universal_ref", "pcm");
423#if defined(_MSC_VER) && (_MSC_VER >= 1700)
424  // For Visual Studio 2012 and later, we will have to use the generic reference
425  // file, rather than the windows-specific one.
426  const std::string network_stat_ref_file = webrtc::test::ProjectRootPath() +
427      "resources/audio_coding/neteq4_network_stats.dat";
428#else
429  const std::string network_stat_ref_file =
430      webrtc::test::ResourcePath("audio_coding/neteq4_network_stats", "dat");
431#endif
432  const std::string rtcp_stat_ref_file =
433      webrtc::test::ResourcePath("audio_coding/neteq4_rtcp_stats", "dat");
434
435  if (FLAGS_gen_ref) {
436    DecodeAndCompare(input_rtp_file, "", "", "");
437  } else {
438    DecodeAndCompare(input_rtp_file,
439                     input_ref_file,
440                     network_stat_ref_file,
441                     rtcp_stat_ref_file);
442  }
443}
444
445// TODO(hlundin): Re-enable test once the statistics interface is up and again.
446TEST_F(NetEqDecodingTest, TestFrameWaitingTimeStatistics) {
447  // Use fax mode to avoid time-scaling. This is to simplify the testing of
448  // packet waiting times in the packet buffer.
449  neteq_->SetPlayoutMode(kPlayoutFax);
450  ASSERT_EQ(kPlayoutFax, neteq_->PlayoutMode());
451  // Insert 30 dummy packets at once. Each packet contains 10 ms 16 kHz audio.
452  size_t num_frames = 30;
453  const int kSamples = 10 * 16;
454  const int kPayloadBytes = kSamples * 2;
455  for (size_t i = 0; i < num_frames; ++i) {
456    uint16_t payload[kSamples] = {0};
457    WebRtcRTPHeader rtp_info;
458    rtp_info.header.sequenceNumber = i;
459    rtp_info.header.timestamp = i * kSamples;
460    rtp_info.header.ssrc = 0x1234;  // Just an arbitrary SSRC.
461    rtp_info.header.payloadType = 94;  // PCM16b WB codec.
462    rtp_info.header.markerBit = 0;
463    ASSERT_EQ(0, neteq_->InsertPacket(
464        rtp_info,
465        reinterpret_cast<uint8_t*>(payload),
466        kPayloadBytes, 0));
467  }
468  // Pull out all data.
469  for (size_t i = 0; i < num_frames; ++i) {
470    int out_len;
471    int num_channels;
472    NetEqOutputType type;
473    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
474                                  &num_channels, &type));
475    ASSERT_EQ(kBlockSize16kHz, out_len);
476  }
477
478  std::vector<int> waiting_times;
479  neteq_->WaitingTimes(&waiting_times);
480  EXPECT_EQ(num_frames, waiting_times.size());
481  // Since all frames are dumped into NetEQ at once, but pulled out with 10 ms
482  // spacing (per definition), we expect the delay to increase with 10 ms for
483  // each packet.
484  for (size_t i = 0; i < waiting_times.size(); ++i) {
485    EXPECT_EQ(static_cast<int>(i + 1) * 10, waiting_times[i]);
486  }
487
488  // Check statistics again and make sure it's been reset.
489  neteq_->WaitingTimes(&waiting_times);
490  int len = waiting_times.size();
491  EXPECT_EQ(0, len);
492
493  // Process > 100 frames, and make sure that that we get statistics
494  // only for 100 frames. Note the new SSRC, causing NetEQ to reset.
495  num_frames = 110;
496  for (size_t i = 0; i < num_frames; ++i) {
497    uint16_t payload[kSamples] = {0};
498    WebRtcRTPHeader rtp_info;
499    rtp_info.header.sequenceNumber = i;
500    rtp_info.header.timestamp = i * kSamples;
501    rtp_info.header.ssrc = 0x1235;  // Just an arbitrary SSRC.
502    rtp_info.header.payloadType = 94;  // PCM16b WB codec.
503    rtp_info.header.markerBit = 0;
504    ASSERT_EQ(0, neteq_->InsertPacket(
505        rtp_info,
506        reinterpret_cast<uint8_t*>(payload),
507        kPayloadBytes, 0));
508    int out_len;
509    int num_channels;
510    NetEqOutputType type;
511    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
512                                  &num_channels, &type));
513    ASSERT_EQ(kBlockSize16kHz, out_len);
514  }
515
516  neteq_->WaitingTimes(&waiting_times);
517  EXPECT_EQ(100u, waiting_times.size());
518}
519
520TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimeNegative) {
521  const int kNumFrames = 3000;  // Needed for convergence.
522  int frame_index = 0;
523  const int kSamples = 10 * 16;
524  const int kPayloadBytes = kSamples * 2;
525  while (frame_index < kNumFrames) {
526    // Insert one packet each time, except every 10th time where we insert two
527    // packets at once. This will create a negative clock-drift of approx. 10%.
528    int num_packets = (frame_index % 10 == 0 ? 2 : 1);
529    for (int n = 0; n < num_packets; ++n) {
530      uint8_t payload[kPayloadBytes] = {0};
531      WebRtcRTPHeader rtp_info;
532      PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
533      ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
534      ++frame_index;
535    }
536
537    // Pull out data once.
538    int out_len;
539    int num_channels;
540    NetEqOutputType type;
541    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
542                                  &num_channels, &type));
543    ASSERT_EQ(kBlockSize16kHz, out_len);
544  }
545
546  NetEqNetworkStatistics network_stats;
547  ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
548  EXPECT_EQ(-103196, network_stats.clockdrift_ppm);
549}
550
551TEST_F(NetEqDecodingTest, TestAverageInterArrivalTimePositive) {
552  const int kNumFrames = 5000;  // Needed for convergence.
553  int frame_index = 0;
554  const int kSamples = 10 * 16;
555  const int kPayloadBytes = kSamples * 2;
556  for (int i = 0; i < kNumFrames; ++i) {
557    // Insert one packet each time, except every 10th time where we don't insert
558    // any packet. This will create a positive clock-drift of approx. 11%.
559    int num_packets = (i % 10 == 9 ? 0 : 1);
560    for (int n = 0; n < num_packets; ++n) {
561      uint8_t payload[kPayloadBytes] = {0};
562      WebRtcRTPHeader rtp_info;
563      PopulateRtpInfo(frame_index, frame_index * kSamples, &rtp_info);
564      ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
565      ++frame_index;
566    }
567
568    // Pull out data once.
569    int out_len;
570    int num_channels;
571    NetEqOutputType type;
572    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
573                                  &num_channels, &type));
574    ASSERT_EQ(kBlockSize16kHz, out_len);
575  }
576
577  NetEqNetworkStatistics network_stats;
578  ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
579  EXPECT_EQ(110946, network_stats.clockdrift_ppm);
580}
581
582void NetEqDecodingTest::LongCngWithClockDrift(double drift_factor,
583                                              double network_freeze_ms,
584                                              bool pull_audio_during_freeze,
585                                              int delay_tolerance_ms,
586                                              int max_time_to_speech_ms) {
587  uint16_t seq_no = 0;
588  uint32_t timestamp = 0;
589  const int kFrameSizeMs = 30;
590  const int kSamples = kFrameSizeMs * 16;
591  const int kPayloadBytes = kSamples * 2;
592  double next_input_time_ms = 0.0;
593  double t_ms;
594  int out_len;
595  int num_channels;
596  NetEqOutputType type;
597
598  // Insert speech for 5 seconds.
599  const int kSpeechDurationMs = 5000;
600  for (t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) {
601    // Each turn in this for loop is 10 ms.
602    while (next_input_time_ms <= t_ms) {
603      // Insert one 30 ms speech frame.
604      uint8_t payload[kPayloadBytes] = {0};
605      WebRtcRTPHeader rtp_info;
606      PopulateRtpInfo(seq_no, timestamp, &rtp_info);
607      ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
608      ++seq_no;
609      timestamp += kSamples;
610      next_input_time_ms += static_cast<double>(kFrameSizeMs) * drift_factor;
611    }
612    // Pull out data once.
613    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
614                                  &num_channels, &type));
615    ASSERT_EQ(kBlockSize16kHz, out_len);
616  }
617
618  EXPECT_EQ(kOutputNormal, type);
619  int32_t delay_before = timestamp - PlayoutTimestamp();
620
621  // Insert CNG for 1 minute (= 60000 ms).
622  const int kCngPeriodMs = 100;
623  const int kCngPeriodSamples = kCngPeriodMs * 16;  // Period in 16 kHz samples.
624  const int kCngDurationMs = 60000;
625  for (; t_ms < kSpeechDurationMs + kCngDurationMs; t_ms += 10) {
626    // Each turn in this for loop is 10 ms.
627    while (next_input_time_ms <= t_ms) {
628      // Insert one CNG frame each 100 ms.
629      uint8_t payload[kPayloadBytes];
630      int payload_len;
631      WebRtcRTPHeader rtp_info;
632      PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
633      ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, payload_len, 0));
634      ++seq_no;
635      timestamp += kCngPeriodSamples;
636      next_input_time_ms += static_cast<double>(kCngPeriodMs) * drift_factor;
637    }
638    // Pull out data once.
639    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
640                                  &num_channels, &type));
641    ASSERT_EQ(kBlockSize16kHz, out_len);
642  }
643
644  EXPECT_EQ(kOutputCNG, type);
645
646  if (network_freeze_ms > 0) {
647    // First keep pulling audio for |network_freeze_ms| without inserting
648    // any data, then insert CNG data corresponding to |network_freeze_ms|
649    // without pulling any output audio.
650    const double loop_end_time = t_ms + network_freeze_ms;
651    for (; t_ms < loop_end_time; t_ms += 10) {
652      // Pull out data once.
653      ASSERT_EQ(0,
654                neteq_->GetAudio(
655                    kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
656      ASSERT_EQ(kBlockSize16kHz, out_len);
657      EXPECT_EQ(kOutputCNG, type);
658    }
659    bool pull_once = pull_audio_during_freeze;
660    // If |pull_once| is true, GetAudio will be called once half-way through
661    // the network recovery period.
662    double pull_time_ms = (t_ms + next_input_time_ms) / 2;
663    while (next_input_time_ms <= t_ms) {
664      if (pull_once && next_input_time_ms >= pull_time_ms) {
665        pull_once = false;
666        // Pull out data once.
667        ASSERT_EQ(
668            0,
669            neteq_->GetAudio(
670                kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
671        ASSERT_EQ(kBlockSize16kHz, out_len);
672        EXPECT_EQ(kOutputCNG, type);
673        t_ms += 10;
674      }
675      // Insert one CNG frame each 100 ms.
676      uint8_t payload[kPayloadBytes];
677      int payload_len;
678      WebRtcRTPHeader rtp_info;
679      PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
680      ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, payload_len, 0));
681      ++seq_no;
682      timestamp += kCngPeriodSamples;
683      next_input_time_ms += kCngPeriodMs * drift_factor;
684    }
685  }
686
687  // Insert speech again until output type is speech.
688  double speech_restart_time_ms = t_ms;
689  while (type != kOutputNormal) {
690    // Each turn in this for loop is 10 ms.
691    while (next_input_time_ms <= t_ms) {
692      // Insert one 30 ms speech frame.
693      uint8_t payload[kPayloadBytes] = {0};
694      WebRtcRTPHeader rtp_info;
695      PopulateRtpInfo(seq_no, timestamp, &rtp_info);
696      ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
697      ++seq_no;
698      timestamp += kSamples;
699      next_input_time_ms += kFrameSizeMs * drift_factor;
700    }
701    // Pull out data once.
702    ASSERT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_, &out_len,
703                                  &num_channels, &type));
704    ASSERT_EQ(kBlockSize16kHz, out_len);
705    // Increase clock.
706    t_ms += 10;
707  }
708
709  // Check that the speech starts again within reasonable time.
710  double time_until_speech_returns_ms = t_ms - speech_restart_time_ms;
711  EXPECT_LT(time_until_speech_returns_ms, max_time_to_speech_ms);
712  int32_t delay_after = timestamp - PlayoutTimestamp();
713  // Compare delay before and after, and make sure it differs less than 20 ms.
714  EXPECT_LE(delay_after, delay_before + delay_tolerance_ms * 16);
715  EXPECT_GE(delay_after, delay_before - delay_tolerance_ms * 16);
716}
717
718TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDrift) {
719  // Apply a clock drift of -25 ms / s (sender faster than receiver).
720  const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
721  const double kNetworkFreezeTimeMs = 0.0;
722  const bool kGetAudioDuringFreezeRecovery = false;
723  const int kDelayToleranceMs = 20;
724  const int kMaxTimeToSpeechMs = 100;
725  LongCngWithClockDrift(kDriftFactor,
726                        kNetworkFreezeTimeMs,
727                        kGetAudioDuringFreezeRecovery,
728                        kDelayToleranceMs,
729                        kMaxTimeToSpeechMs);
730}
731
732TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDrift) {
733  // Apply a clock drift of +25 ms / s (sender slower than receiver).
734  const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
735  const double kNetworkFreezeTimeMs = 0.0;
736  const bool kGetAudioDuringFreezeRecovery = false;
737  const int kDelayToleranceMs = 20;
738  const int kMaxTimeToSpeechMs = 100;
739  LongCngWithClockDrift(kDriftFactor,
740                        kNetworkFreezeTimeMs,
741                        kGetAudioDuringFreezeRecovery,
742                        kDelayToleranceMs,
743                        kMaxTimeToSpeechMs);
744}
745
746TEST_F(NetEqDecodingTest, LongCngWithNegativeClockDriftNetworkFreeze) {
747  // Apply a clock drift of -25 ms / s (sender faster than receiver).
748  const double kDriftFactor = 1000.0 / (1000.0 + 25.0);
749  const double kNetworkFreezeTimeMs = 5000.0;
750  const bool kGetAudioDuringFreezeRecovery = false;
751  const int kDelayToleranceMs = 50;
752  const int kMaxTimeToSpeechMs = 200;
753  LongCngWithClockDrift(kDriftFactor,
754                        kNetworkFreezeTimeMs,
755                        kGetAudioDuringFreezeRecovery,
756                        kDelayToleranceMs,
757                        kMaxTimeToSpeechMs);
758}
759
760TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreeze) {
761  // Apply a clock drift of +25 ms / s (sender slower than receiver).
762  const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
763  const double kNetworkFreezeTimeMs = 5000.0;
764  const bool kGetAudioDuringFreezeRecovery = false;
765  const int kDelayToleranceMs = 20;
766  const int kMaxTimeToSpeechMs = 100;
767  LongCngWithClockDrift(kDriftFactor,
768                        kNetworkFreezeTimeMs,
769                        kGetAudioDuringFreezeRecovery,
770                        kDelayToleranceMs,
771                        kMaxTimeToSpeechMs);
772}
773
774TEST_F(NetEqDecodingTest, LongCngWithPositiveClockDriftNetworkFreezeExtraPull) {
775  // Apply a clock drift of +25 ms / s (sender slower than receiver).
776  const double kDriftFactor = 1000.0 / (1000.0 - 25.0);
777  const double kNetworkFreezeTimeMs = 5000.0;
778  const bool kGetAudioDuringFreezeRecovery = true;
779  const int kDelayToleranceMs = 20;
780  const int kMaxTimeToSpeechMs = 100;
781  LongCngWithClockDrift(kDriftFactor,
782                        kNetworkFreezeTimeMs,
783                        kGetAudioDuringFreezeRecovery,
784                        kDelayToleranceMs,
785                        kMaxTimeToSpeechMs);
786}
787
788TEST_F(NetEqDecodingTest, LongCngWithoutClockDrift) {
789  const double kDriftFactor = 1.0;  // No drift.
790  const double kNetworkFreezeTimeMs = 0.0;
791  const bool kGetAudioDuringFreezeRecovery = false;
792  const int kDelayToleranceMs = 10;
793  const int kMaxTimeToSpeechMs = 50;
794  LongCngWithClockDrift(kDriftFactor,
795                        kNetworkFreezeTimeMs,
796                        kGetAudioDuringFreezeRecovery,
797                        kDelayToleranceMs,
798                        kMaxTimeToSpeechMs);
799}
800
801TEST_F(NetEqDecodingTest, UnknownPayloadType) {
802  const int kPayloadBytes = 100;
803  uint8_t payload[kPayloadBytes] = {0};
804  WebRtcRTPHeader rtp_info;
805  PopulateRtpInfo(0, 0, &rtp_info);
806  rtp_info.header.payloadType = 1;  // Not registered as a decoder.
807  EXPECT_EQ(NetEq::kFail,
808            neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
809  EXPECT_EQ(NetEq::kUnknownRtpPayloadType, neteq_->LastError());
810}
811
812TEST_F(NetEqDecodingTest, DISABLED_ON_ANDROID(DecoderError)) {
813  const int kPayloadBytes = 100;
814  uint8_t payload[kPayloadBytes] = {0};
815  WebRtcRTPHeader rtp_info;
816  PopulateRtpInfo(0, 0, &rtp_info);
817  rtp_info.header.payloadType = 103;  // iSAC, but the payload is invalid.
818  EXPECT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
819  NetEqOutputType type;
820  // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
821  // to GetAudio.
822  for (int i = 0; i < kMaxBlockSize; ++i) {
823    out_data_[i] = 1;
824  }
825  int num_channels;
826  int samples_per_channel;
827  EXPECT_EQ(NetEq::kFail,
828            neteq_->GetAudio(kMaxBlockSize, out_data_,
829                             &samples_per_channel, &num_channels, &type));
830  // Verify that there is a decoder error to check.
831  EXPECT_EQ(NetEq::kDecoderErrorCode, neteq_->LastError());
832  // Code 6730 is an iSAC error code.
833  EXPECT_EQ(6730, neteq_->LastDecoderError());
834  // Verify that the first 160 samples are set to 0, and that the remaining
835  // samples are left unmodified.
836  static const int kExpectedOutputLength = 160;  // 10 ms at 16 kHz sample rate.
837  for (int i = 0; i < kExpectedOutputLength; ++i) {
838    std::ostringstream ss;
839    ss << "i = " << i;
840    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
841    EXPECT_EQ(0, out_data_[i]);
842  }
843  for (int i = kExpectedOutputLength; i < kMaxBlockSize; ++i) {
844    std::ostringstream ss;
845    ss << "i = " << i;
846    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
847    EXPECT_EQ(1, out_data_[i]);
848  }
849}
850
851TEST_F(NetEqDecodingTest, GetAudioBeforeInsertPacket) {
852  NetEqOutputType type;
853  // Set all of |out_data_| to 1, and verify that it was set to 0 by the call
854  // to GetAudio.
855  for (int i = 0; i < kMaxBlockSize; ++i) {
856    out_data_[i] = 1;
857  }
858  int num_channels;
859  int samples_per_channel;
860  EXPECT_EQ(0, neteq_->GetAudio(kMaxBlockSize, out_data_,
861                                &samples_per_channel,
862                                &num_channels, &type));
863  // Verify that the first block of samples is set to 0.
864  static const int kExpectedOutputLength =
865      kInitSampleRateHz / 100;  // 10 ms at initial sample rate.
866  for (int i = 0; i < kExpectedOutputLength; ++i) {
867    std::ostringstream ss;
868    ss << "i = " << i;
869    SCOPED_TRACE(ss.str());  // Print out the parameter values on failure.
870    EXPECT_EQ(0, out_data_[i]);
871  }
872}
873
874class NetEqBgnTest : public NetEqDecodingTest {
875 protected:
876  virtual void TestCondition(double sum_squared_noise,
877                             bool should_be_faded) = 0;
878
879  void CheckBgn(int sampling_rate_hz) {
880    int expected_samples_per_channel = 0;
881    uint8_t payload_type = 0xFF;  // Invalid.
882    if (sampling_rate_hz == 8000) {
883      expected_samples_per_channel = kBlockSize8kHz;
884      payload_type = 93;  // PCM 16, 8 kHz.
885    } else if (sampling_rate_hz == 16000) {
886      expected_samples_per_channel = kBlockSize16kHz;
887      payload_type = 94;  // PCM 16, 16 kHZ.
888    } else if (sampling_rate_hz == 32000) {
889      expected_samples_per_channel = kBlockSize32kHz;
890      payload_type = 95;  // PCM 16, 32 kHz.
891    } else {
892      ASSERT_TRUE(false);  // Unsupported test case.
893    }
894
895    NetEqOutputType type;
896    int16_t output[kBlockSize32kHz];  // Maximum size is chosen.
897    test::AudioLoop input;
898    // We are using the same 32 kHz input file for all tests, regardless of
899    // |sampling_rate_hz|. The output may sound weird, but the test is still
900    // valid.
901    ASSERT_TRUE(input.Init(
902        webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm"),
903        10 * sampling_rate_hz,  // Max 10 seconds loop length.
904        expected_samples_per_channel));
905
906    // Payload of 10 ms of PCM16 32 kHz.
907    uint8_t payload[kBlockSize32kHz * sizeof(int16_t)];
908    WebRtcRTPHeader rtp_info;
909    PopulateRtpInfo(0, 0, &rtp_info);
910    rtp_info.header.payloadType = payload_type;
911
912    int number_channels = 0;
913    int samples_per_channel = 0;
914
915    uint32_t receive_timestamp = 0;
916    for (int n = 0; n < 10; ++n) {  // Insert few packets and get audio.
917      int enc_len_bytes =
918          WebRtcPcm16b_EncodeW16(input.GetNextBlock(),
919                                 expected_samples_per_channel,
920                                 reinterpret_cast<int16_t*>(payload));
921      ASSERT_EQ(enc_len_bytes, expected_samples_per_channel * 2);
922
923      number_channels = 0;
924      samples_per_channel = 0;
925      ASSERT_EQ(0,
926                neteq_->InsertPacket(
927                    rtp_info, payload, enc_len_bytes, receive_timestamp));
928      ASSERT_EQ(0,
929                neteq_->GetAudio(kBlockSize32kHz,
930                                 output,
931                                 &samples_per_channel,
932                                 &number_channels,
933                                 &type));
934      ASSERT_EQ(1, number_channels);
935      ASSERT_EQ(expected_samples_per_channel, samples_per_channel);
936      ASSERT_EQ(kOutputNormal, type);
937
938      // Next packet.
939      rtp_info.header.timestamp += expected_samples_per_channel;
940      rtp_info.header.sequenceNumber++;
941      receive_timestamp += expected_samples_per_channel;
942    }
943
944    number_channels = 0;
945    samples_per_channel = 0;
946
947    // Get audio without inserting packets, expecting PLC and PLC-to-CNG. Pull
948    // one frame without checking speech-type. This is the first frame pulled
949    // without inserting any packet, and might not be labeled as PLC.
950    ASSERT_EQ(0,
951              neteq_->GetAudio(kBlockSize32kHz,
952                               output,
953                               &samples_per_channel,
954                               &number_channels,
955                               &type));
956    ASSERT_EQ(1, number_channels);
957    ASSERT_EQ(expected_samples_per_channel, samples_per_channel);
958
959    // To be able to test the fading of background noise we need at lease to
960    // pull 611 frames.
961    const int kFadingThreshold = 611;
962
963    // Test several CNG-to-PLC packet for the expected behavior. The number 20
964    // is arbitrary, but sufficiently large to test enough number of frames.
965    const int kNumPlcToCngTestFrames = 20;
966    bool plc_to_cng = false;
967    for (int n = 0; n < kFadingThreshold + kNumPlcToCngTestFrames; ++n) {
968      number_channels = 0;
969      samples_per_channel = 0;
970      memset(output, 1, sizeof(output));  // Set to non-zero.
971      ASSERT_EQ(0,
972                neteq_->GetAudio(kBlockSize32kHz,
973                                 output,
974                                 &samples_per_channel,
975                                 &number_channels,
976                                 &type));
977      ASSERT_EQ(1, number_channels);
978      ASSERT_EQ(expected_samples_per_channel, samples_per_channel);
979      if (type == kOutputPLCtoCNG) {
980        plc_to_cng = true;
981        double sum_squared = 0;
982        for (int k = 0; k < number_channels * samples_per_channel; ++k)
983          sum_squared += output[k] * output[k];
984        TestCondition(sum_squared, n > kFadingThreshold);
985      } else {
986        EXPECT_EQ(kOutputPLC, type);
987      }
988    }
989    EXPECT_TRUE(plc_to_cng);  // Just to be sure that PLC-to-CNG has occurred.
990  }
991};
992
993class NetEqBgnTestOn : public NetEqBgnTest {
994 protected:
995  NetEqBgnTestOn() : NetEqBgnTest() {
996    config_.background_noise_mode = NetEq::kBgnOn;
997  }
998
999  void TestCondition(double sum_squared_noise, bool /*should_be_faded*/) {
1000    EXPECT_NE(0, sum_squared_noise);
1001  }
1002};
1003
1004class NetEqBgnTestOff : public NetEqBgnTest {
1005 protected:
1006  NetEqBgnTestOff() : NetEqBgnTest() {
1007    config_.background_noise_mode = NetEq::kBgnOff;
1008  }
1009
1010  void TestCondition(double sum_squared_noise, bool /*should_be_faded*/) {
1011    EXPECT_EQ(0, sum_squared_noise);
1012  }
1013};
1014
1015class NetEqBgnTestFade : public NetEqBgnTest {
1016 protected:
1017  NetEqBgnTestFade() : NetEqBgnTest() {
1018    config_.background_noise_mode = NetEq::kBgnFade;
1019  }
1020
1021  void TestCondition(double sum_squared_noise, bool should_be_faded) {
1022    if (should_be_faded)
1023      EXPECT_EQ(0, sum_squared_noise);
1024  }
1025};
1026
1027TEST_F(NetEqBgnTestOn, RunTest) {
1028  CheckBgn(8000);
1029  CheckBgn(16000);
1030  CheckBgn(32000);
1031}
1032
1033TEST_F(NetEqBgnTestOff, RunTest) {
1034  CheckBgn(8000);
1035  CheckBgn(16000);
1036  CheckBgn(32000);
1037}
1038
1039TEST_F(NetEqBgnTestFade, RunTest) {
1040  CheckBgn(8000);
1041  CheckBgn(16000);
1042  CheckBgn(32000);
1043}
1044
1045TEST_F(NetEqDecodingTest, SyncPacketInsert) {
1046  WebRtcRTPHeader rtp_info;
1047  uint32_t receive_timestamp = 0;
1048  // For the readability use the following payloads instead of the defaults of
1049  // this test.
1050  uint8_t kPcm16WbPayloadType = 1;
1051  uint8_t kCngNbPayloadType = 2;
1052  uint8_t kCngWbPayloadType = 3;
1053  uint8_t kCngSwb32PayloadType = 4;
1054  uint8_t kCngSwb48PayloadType = 5;
1055  uint8_t kAvtPayloadType = 6;
1056  uint8_t kRedPayloadType = 7;
1057  uint8_t kIsacPayloadType = 9;  // Payload type 8 is already registered.
1058
1059  // Register decoders.
1060  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderPCM16Bwb,
1061                                           kPcm16WbPayloadType));
1062  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGnb, kCngNbPayloadType));
1063  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGwb, kCngWbPayloadType));
1064  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGswb32kHz,
1065                                           kCngSwb32PayloadType));
1066  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderCNGswb48kHz,
1067                                           kCngSwb48PayloadType));
1068  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderAVT, kAvtPayloadType));
1069  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderRED, kRedPayloadType));
1070  ASSERT_EQ(0, neteq_->RegisterPayloadType(kDecoderISAC, kIsacPayloadType));
1071
1072  PopulateRtpInfo(0, 0, &rtp_info);
1073  rtp_info.header.payloadType = kPcm16WbPayloadType;
1074
1075  // The first packet injected cannot be sync-packet.
1076  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1077
1078  // Payload length of 10 ms PCM16 16 kHz.
1079  const int kPayloadBytes = kBlockSize16kHz * sizeof(int16_t);
1080  uint8_t payload[kPayloadBytes] = {0};
1081  ASSERT_EQ(0, neteq_->InsertPacket(
1082      rtp_info, payload, kPayloadBytes, receive_timestamp));
1083
1084  // Next packet. Last packet contained 10 ms audio.
1085  rtp_info.header.sequenceNumber++;
1086  rtp_info.header.timestamp += kBlockSize16kHz;
1087  receive_timestamp += kBlockSize16kHz;
1088
1089  // Unacceptable payload types CNG, AVT (DTMF), RED.
1090  rtp_info.header.payloadType = kCngNbPayloadType;
1091  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1092
1093  rtp_info.header.payloadType = kCngWbPayloadType;
1094  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1095
1096  rtp_info.header.payloadType = kCngSwb32PayloadType;
1097  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1098
1099  rtp_info.header.payloadType = kCngSwb48PayloadType;
1100  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1101
1102  rtp_info.header.payloadType = kAvtPayloadType;
1103  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1104
1105  rtp_info.header.payloadType = kRedPayloadType;
1106  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1107
1108  // Change of codec cannot be initiated with a sync packet.
1109  rtp_info.header.payloadType = kIsacPayloadType;
1110  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1111
1112  // Change of SSRC is not allowed with a sync packet.
1113  rtp_info.header.payloadType = kPcm16WbPayloadType;
1114  ++rtp_info.header.ssrc;
1115  EXPECT_EQ(-1, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1116
1117  --rtp_info.header.ssrc;
1118  EXPECT_EQ(0, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1119}
1120
1121// First insert several noise like packets, then sync-packets. Decoding all
1122// packets should not produce error, statistics should not show any packet loss
1123// and sync-packets should decode to zero.
1124// TODO(turajs) we will have a better test if we have a referece NetEq, and
1125// when Sync packets are inserted in "test" NetEq we insert all-zero payload
1126// in reference NetEq and compare the output of those two.
1127TEST_F(NetEqDecodingTest, SyncPacketDecode) {
1128  WebRtcRTPHeader rtp_info;
1129  PopulateRtpInfo(0, 0, &rtp_info);
1130  const int kPayloadBytes = kBlockSize16kHz * sizeof(int16_t);
1131  uint8_t payload[kPayloadBytes];
1132  int16_t decoded[kBlockSize16kHz];
1133  int algorithmic_frame_delay = algorithmic_delay_ms_ / 10 + 1;
1134  for (int n = 0; n < kPayloadBytes; ++n) {
1135    payload[n] = (rand() & 0xF0) + 1;  // Non-zero random sequence.
1136  }
1137  // Insert some packets which decode to noise. We are not interested in
1138  // actual decoded values.
1139  NetEqOutputType output_type;
1140  int num_channels;
1141  int samples_per_channel;
1142  uint32_t receive_timestamp = 0;
1143  for (int n = 0; n < 100; ++n) {
1144    ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
1145                                      receive_timestamp));
1146    ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded,
1147                                  &samples_per_channel, &num_channels,
1148                                  &output_type));
1149    ASSERT_EQ(kBlockSize16kHz, samples_per_channel);
1150    ASSERT_EQ(1, num_channels);
1151
1152    rtp_info.header.sequenceNumber++;
1153    rtp_info.header.timestamp += kBlockSize16kHz;
1154    receive_timestamp += kBlockSize16kHz;
1155  }
1156  const int kNumSyncPackets = 10;
1157
1158  // Make sure sufficient number of sync packets are inserted that we can
1159  // conduct a test.
1160  ASSERT_GT(kNumSyncPackets, algorithmic_frame_delay);
1161  // Insert sync-packets, the decoded sequence should be all-zero.
1162  for (int n = 0; n < kNumSyncPackets; ++n) {
1163    ASSERT_EQ(0, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1164    ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded,
1165                                  &samples_per_channel, &num_channels,
1166                                  &output_type));
1167    ASSERT_EQ(kBlockSize16kHz, samples_per_channel);
1168    ASSERT_EQ(1, num_channels);
1169    if (n > algorithmic_frame_delay) {
1170      EXPECT_TRUE(IsAllZero(decoded, samples_per_channel * num_channels));
1171    }
1172    rtp_info.header.sequenceNumber++;
1173    rtp_info.header.timestamp += kBlockSize16kHz;
1174    receive_timestamp += kBlockSize16kHz;
1175  }
1176
1177  // We insert regular packets, if sync packet are not correctly buffered then
1178  // network statistics would show some packet loss.
1179  for (int n = 0; n <= algorithmic_frame_delay + 10; ++n) {
1180    ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
1181                                      receive_timestamp));
1182    ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded,
1183                                  &samples_per_channel, &num_channels,
1184                                  &output_type));
1185    if (n >= algorithmic_frame_delay + 1) {
1186      // Expect that this frame contain samples from regular RTP.
1187      EXPECT_TRUE(IsAllNonZero(decoded, samples_per_channel * num_channels));
1188    }
1189    rtp_info.header.sequenceNumber++;
1190    rtp_info.header.timestamp += kBlockSize16kHz;
1191    receive_timestamp += kBlockSize16kHz;
1192  }
1193  NetEqNetworkStatistics network_stats;
1194  ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
1195  // Expecting a "clean" network.
1196  EXPECT_EQ(0, network_stats.packet_loss_rate);
1197  EXPECT_EQ(0, network_stats.expand_rate);
1198  EXPECT_EQ(0, network_stats.accelerate_rate);
1199  EXPECT_LE(network_stats.preemptive_rate, 150);
1200}
1201
1202// Test if the size of the packet buffer reported correctly when containing
1203// sync packets. Also, test if network packets override sync packets. That is to
1204// prefer decoding a network packet to a sync packet, if both have same sequence
1205// number and timestamp.
1206TEST_F(NetEqDecodingTest, SyncPacketBufferSizeAndOverridenByNetworkPackets) {
1207  WebRtcRTPHeader rtp_info;
1208  PopulateRtpInfo(0, 0, &rtp_info);
1209  const int kPayloadBytes = kBlockSize16kHz * sizeof(int16_t);
1210  uint8_t payload[kPayloadBytes];
1211  int16_t decoded[kBlockSize16kHz];
1212  for (int n = 0; n < kPayloadBytes; ++n) {
1213    payload[n] = (rand() & 0xF0) + 1;  // Non-zero random sequence.
1214  }
1215  // Insert some packets which decode to noise. We are not interested in
1216  // actual decoded values.
1217  NetEqOutputType output_type;
1218  int num_channels;
1219  int samples_per_channel;
1220  uint32_t receive_timestamp = 0;
1221  int algorithmic_frame_delay = algorithmic_delay_ms_ / 10 + 1;
1222  for (int n = 0; n < algorithmic_frame_delay; ++n) {
1223    ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
1224                                      receive_timestamp));
1225    ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded,
1226                                  &samples_per_channel, &num_channels,
1227                                  &output_type));
1228    ASSERT_EQ(kBlockSize16kHz, samples_per_channel);
1229    ASSERT_EQ(1, num_channels);
1230    rtp_info.header.sequenceNumber++;
1231    rtp_info.header.timestamp += kBlockSize16kHz;
1232    receive_timestamp += kBlockSize16kHz;
1233  }
1234  const int kNumSyncPackets = 10;
1235
1236  WebRtcRTPHeader first_sync_packet_rtp_info;
1237  memcpy(&first_sync_packet_rtp_info, &rtp_info, sizeof(rtp_info));
1238
1239  // Insert sync-packets, but no decoding.
1240  for (int n = 0; n < kNumSyncPackets; ++n) {
1241    ASSERT_EQ(0, neteq_->InsertSyncPacket(rtp_info, receive_timestamp));
1242    rtp_info.header.sequenceNumber++;
1243    rtp_info.header.timestamp += kBlockSize16kHz;
1244    receive_timestamp += kBlockSize16kHz;
1245  }
1246  NetEqNetworkStatistics network_stats;
1247  ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
1248  EXPECT_EQ(kNumSyncPackets * 10 + algorithmic_delay_ms_,
1249            network_stats.current_buffer_size_ms);
1250
1251  // Rewind |rtp_info| to that of the first sync packet.
1252  memcpy(&rtp_info, &first_sync_packet_rtp_info, sizeof(rtp_info));
1253
1254  // Insert.
1255  for (int n = 0; n < kNumSyncPackets; ++n) {
1256    ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
1257                                      receive_timestamp));
1258    rtp_info.header.sequenceNumber++;
1259    rtp_info.header.timestamp += kBlockSize16kHz;
1260    receive_timestamp += kBlockSize16kHz;
1261  }
1262
1263  // Decode.
1264  for (int n = 0; n < kNumSyncPackets; ++n) {
1265    ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded,
1266                                  &samples_per_channel, &num_channels,
1267                                  &output_type));
1268    ASSERT_EQ(kBlockSize16kHz, samples_per_channel);
1269    ASSERT_EQ(1, num_channels);
1270    EXPECT_TRUE(IsAllNonZero(decoded, samples_per_channel * num_channels));
1271  }
1272}
1273
1274void NetEqDecodingTest::WrapTest(uint16_t start_seq_no,
1275                                 uint32_t start_timestamp,
1276                                 const std::set<uint16_t>& drop_seq_numbers,
1277                                 bool expect_seq_no_wrap,
1278                                 bool expect_timestamp_wrap) {
1279  uint16_t seq_no = start_seq_no;
1280  uint32_t timestamp = start_timestamp;
1281  const int kBlocksPerFrame = 3;  // Number of 10 ms blocks per frame.
1282  const int kFrameSizeMs = kBlocksPerFrame * kTimeStepMs;
1283  const int kSamples = kBlockSize16kHz * kBlocksPerFrame;
1284  const int kPayloadBytes = kSamples * sizeof(int16_t);
1285  double next_input_time_ms = 0.0;
1286  int16_t decoded[kBlockSize16kHz];
1287  int num_channels;
1288  int samples_per_channel;
1289  NetEqOutputType output_type;
1290  uint32_t receive_timestamp = 0;
1291
1292  // Insert speech for 2 seconds.
1293  const int kSpeechDurationMs = 2000;
1294  int packets_inserted = 0;
1295  uint16_t last_seq_no;
1296  uint32_t last_timestamp;
1297  bool timestamp_wrapped = false;
1298  bool seq_no_wrapped = false;
1299  for (double t_ms = 0; t_ms < kSpeechDurationMs; t_ms += 10) {
1300    // Each turn in this for loop is 10 ms.
1301    while (next_input_time_ms <= t_ms) {
1302      // Insert one 30 ms speech frame.
1303      uint8_t payload[kPayloadBytes] = {0};
1304      WebRtcRTPHeader rtp_info;
1305      PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1306      if (drop_seq_numbers.find(seq_no) == drop_seq_numbers.end()) {
1307        // This sequence number was not in the set to drop. Insert it.
1308        ASSERT_EQ(0,
1309                  neteq_->InsertPacket(rtp_info, payload, kPayloadBytes,
1310                                       receive_timestamp));
1311        ++packets_inserted;
1312      }
1313      NetEqNetworkStatistics network_stats;
1314      ASSERT_EQ(0, neteq_->NetworkStatistics(&network_stats));
1315
1316      // Due to internal NetEq logic, preferred buffer-size is about 4 times the
1317      // packet size for first few packets. Therefore we refrain from checking
1318      // the criteria.
1319      if (packets_inserted > 4) {
1320        // Expect preferred and actual buffer size to be no more than 2 frames.
1321        EXPECT_LE(network_stats.preferred_buffer_size_ms, kFrameSizeMs * 2);
1322        EXPECT_LE(network_stats.current_buffer_size_ms, kFrameSizeMs * 2 +
1323                  algorithmic_delay_ms_);
1324      }
1325      last_seq_no = seq_no;
1326      last_timestamp = timestamp;
1327
1328      ++seq_no;
1329      timestamp += kSamples;
1330      receive_timestamp += kSamples;
1331      next_input_time_ms += static_cast<double>(kFrameSizeMs);
1332
1333      seq_no_wrapped |= seq_no < last_seq_no;
1334      timestamp_wrapped |= timestamp < last_timestamp;
1335    }
1336    // Pull out data once.
1337    ASSERT_EQ(0, neteq_->GetAudio(kBlockSize16kHz, decoded,
1338                                  &samples_per_channel, &num_channels,
1339                                  &output_type));
1340    ASSERT_EQ(kBlockSize16kHz, samples_per_channel);
1341    ASSERT_EQ(1, num_channels);
1342
1343    // Expect delay (in samples) to be less than 2 packets.
1344    EXPECT_LE(timestamp - PlayoutTimestamp(),
1345              static_cast<uint32_t>(kSamples * 2));
1346  }
1347  // Make sure we have actually tested wrap-around.
1348  ASSERT_EQ(expect_seq_no_wrap, seq_no_wrapped);
1349  ASSERT_EQ(expect_timestamp_wrap, timestamp_wrapped);
1350}
1351
1352TEST_F(NetEqDecodingTest, SequenceNumberWrap) {
1353  // Start with a sequence number that will soon wrap.
1354  std::set<uint16_t> drop_seq_numbers;  // Don't drop any packets.
1355  WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false);
1356}
1357
1358TEST_F(NetEqDecodingTest, SequenceNumberWrapAndDrop) {
1359  // Start with a sequence number that will soon wrap.
1360  std::set<uint16_t> drop_seq_numbers;
1361  drop_seq_numbers.insert(0xFFFF);
1362  drop_seq_numbers.insert(0x0);
1363  WrapTest(0xFFFF - 10, 0, drop_seq_numbers, true, false);
1364}
1365
1366TEST_F(NetEqDecodingTest, TimestampWrap) {
1367  // Start with a timestamp that will soon wrap.
1368  std::set<uint16_t> drop_seq_numbers;
1369  WrapTest(0, 0xFFFFFFFF - 3000, drop_seq_numbers, false, true);
1370}
1371
1372TEST_F(NetEqDecodingTest, TimestampAndSequenceNumberWrap) {
1373  // Start with a timestamp and a sequence number that will wrap at the same
1374  // time.
1375  std::set<uint16_t> drop_seq_numbers;
1376  WrapTest(0xFFFF - 10, 0xFFFFFFFF - 5000, drop_seq_numbers, true, true);
1377}
1378
1379void NetEqDecodingTest::DuplicateCng() {
1380  uint16_t seq_no = 0;
1381  uint32_t timestamp = 0;
1382  const int kFrameSizeMs = 10;
1383  const int kSampleRateKhz = 16;
1384  const int kSamples = kFrameSizeMs * kSampleRateKhz;
1385  const int kPayloadBytes = kSamples * 2;
1386
1387  const int algorithmic_delay_samples = std::max(
1388      algorithmic_delay_ms_ * kSampleRateKhz, 5 * kSampleRateKhz / 8);
1389  // Insert three speech packet. Three are needed to get the frame length
1390  // correct.
1391  int out_len;
1392  int num_channels;
1393  NetEqOutputType type;
1394  uint8_t payload[kPayloadBytes] = {0};
1395  WebRtcRTPHeader rtp_info;
1396  for (int i = 0; i < 3; ++i) {
1397    PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1398    ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
1399    ++seq_no;
1400    timestamp += kSamples;
1401
1402    // Pull audio once.
1403    ASSERT_EQ(0,
1404              neteq_->GetAudio(
1405                  kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
1406    ASSERT_EQ(kBlockSize16kHz, out_len);
1407  }
1408  // Verify speech output.
1409  EXPECT_EQ(kOutputNormal, type);
1410
1411  // Insert same CNG packet twice.
1412  const int kCngPeriodMs = 100;
1413  const int kCngPeriodSamples = kCngPeriodMs * kSampleRateKhz;
1414  int payload_len;
1415  PopulateCng(seq_no, timestamp, &rtp_info, payload, &payload_len);
1416  // This is the first time this CNG packet is inserted.
1417  ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, payload_len, 0));
1418
1419  // Pull audio once and make sure CNG is played.
1420  ASSERT_EQ(0,
1421            neteq_->GetAudio(
1422                kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
1423  ASSERT_EQ(kBlockSize16kHz, out_len);
1424  EXPECT_EQ(kOutputCNG, type);
1425  EXPECT_EQ(timestamp - algorithmic_delay_samples, PlayoutTimestamp());
1426
1427  // Insert the same CNG packet again. Note that at this point it is old, since
1428  // we have already decoded the first copy of it.
1429  ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, payload_len, 0));
1430
1431  // Pull audio until we have played |kCngPeriodMs| of CNG. Start at 10 ms since
1432  // we have already pulled out CNG once.
1433  for (int cng_time_ms = 10; cng_time_ms < kCngPeriodMs; cng_time_ms += 10) {
1434    ASSERT_EQ(0,
1435              neteq_->GetAudio(
1436                  kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
1437    ASSERT_EQ(kBlockSize16kHz, out_len);
1438    EXPECT_EQ(kOutputCNG, type);
1439    EXPECT_EQ(timestamp - algorithmic_delay_samples,
1440              PlayoutTimestamp());
1441  }
1442
1443  // Insert speech again.
1444  ++seq_no;
1445  timestamp += kCngPeriodSamples;
1446  PopulateRtpInfo(seq_no, timestamp, &rtp_info);
1447  ASSERT_EQ(0, neteq_->InsertPacket(rtp_info, payload, kPayloadBytes, 0));
1448
1449  // Pull audio once and verify that the output is speech again.
1450  ASSERT_EQ(0,
1451            neteq_->GetAudio(
1452                kMaxBlockSize, out_data_, &out_len, &num_channels, &type));
1453  ASSERT_EQ(kBlockSize16kHz, out_len);
1454  EXPECT_EQ(kOutputNormal, type);
1455  EXPECT_EQ(timestamp + kSamples - algorithmic_delay_samples,
1456            PlayoutTimestamp());
1457}
1458
1459uint32_t NetEqDecodingTest::PlayoutTimestamp() {
1460  uint32_t playout_timestamp = 0;
1461  EXPECT_TRUE(neteq_->GetPlayoutTimestamp(&playout_timestamp));
1462  return playout_timestamp;
1463}
1464
1465TEST_F(NetEqDecodingTest, DiscardDuplicateCng) { DuplicateCng(); }
1466}  // namespace webrtc
1467