1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// ffmpeg_unittests verify that the parts of the FFmpeg API that Chromium uses
6// function as advertised for each media format that Chromium supports.  This
7// mostly includes stuff like reporting proper timestamps, seeking to
8// keyframes, and supporting certain features like reordered_opaque.
9//
10
11#include <limits>
12#include <queue>
13
14#include "base/base_paths.h"
15#include "base/files/file_path.h"
16#include "base/files/file_util.h"
17#include "base/files/memory_mapped_file.h"
18#include "base/memory/scoped_ptr.h"
19#include "base/path_service.h"
20#include "base/strings/string_util.h"
21#include "base/test/perf_test_suite.h"
22#include "base/test/perf_time_logger.h"
23#include "media/base/media.h"
24#include "media/ffmpeg/ffmpeg_common.h"
25#include "media/filters/ffmpeg_glue.h"
26#include "media/filters/in_memory_url_protocol.h"
27#include "testing/gtest/include/gtest/gtest.h"
28
29int main(int argc, char** argv) {
30  return base::PerfTestSuite(argc, argv).Run();
31}
32
33namespace media {
34
35// Mirror setting in ffmpeg_video_decoder.
36static const int kDecodeThreads = 2;
37
38class AVPacketQueue {
39 public:
40  AVPacketQueue() {
41  }
42
43  ~AVPacketQueue() {
44    flush();
45  }
46
47  bool empty() {
48    return packets_.empty();
49  }
50
51  AVPacket* peek() {
52    return packets_.front();
53  }
54
55  void pop() {
56    AVPacket* packet = packets_.front();
57    packets_.pop();
58    av_free_packet(packet);
59    delete packet;
60  }
61
62  void push(AVPacket* packet) {
63    av_dup_packet(packet);
64    packets_.push(packet);
65  }
66
67  void flush() {
68    while (!empty()) {
69      pop();
70    }
71  }
72
73 private:
74  std::queue<AVPacket*> packets_;
75
76  DISALLOW_COPY_AND_ASSIGN(AVPacketQueue);
77};
78
79// TODO(dalecurtis): We should really just use PipelineIntegrationTests instead
80// of a one-off step decoder so we're exercising the real pipeline.
81class FFmpegTest : public testing::TestWithParam<const char*> {
82 protected:
83  FFmpegTest()
84      : av_format_context_(NULL),
85        audio_stream_index_(-1),
86        video_stream_index_(-1),
87        decoded_audio_time_(AV_NOPTS_VALUE),
88        decoded_audio_duration_(AV_NOPTS_VALUE),
89        decoded_video_time_(AV_NOPTS_VALUE),
90        decoded_video_duration_(AV_NOPTS_VALUE),
91        duration_(AV_NOPTS_VALUE) {
92    InitializeFFmpeg();
93
94    audio_buffer_.reset(av_frame_alloc());
95    video_buffer_.reset(av_frame_alloc());
96  }
97
98  virtual ~FFmpegTest() {
99  }
100
101  void OpenAndReadFile(const std::string& name) {
102    OpenFile(name);
103    OpenCodecs();
104    ReadRemainingFile();
105  }
106
107  void OpenFile(const std::string& name) {
108    base::FilePath path;
109    PathService::Get(base::DIR_SOURCE_ROOT, &path);
110    path = path.AppendASCII("media")
111        .AppendASCII("test")
112        .AppendASCII("data")
113        .AppendASCII("content")
114        .AppendASCII(name.c_str());
115    EXPECT_TRUE(base::PathExists(path));
116
117    CHECK(file_data_.Initialize(path));
118    protocol_.reset(new InMemoryUrlProtocol(
119        file_data_.data(), file_data_.length(), false));
120    glue_.reset(new FFmpegGlue(protocol_.get()));
121
122    ASSERT_TRUE(glue_->OpenContext()) << "Could not open " << path.value();
123    av_format_context_ = glue_->format_context();
124    ASSERT_LE(0, avformat_find_stream_info(av_format_context_, NULL))
125        << "Could not find stream information for " << path.value();
126
127    // Determine duration by picking max stream duration.
128    for (unsigned int i = 0; i < av_format_context_->nb_streams; ++i) {
129      AVStream* av_stream = av_format_context_->streams[i];
130      int64 duration = ConvertFromTimeBase(
131          av_stream->time_base, av_stream->duration).InMicroseconds();
132      duration_ = std::max(duration_, duration);
133    }
134
135    // Final check to see if the container itself specifies a duration.
136    AVRational av_time_base = {1, AV_TIME_BASE};
137    int64 duration =
138        ConvertFromTimeBase(av_time_base,
139                            av_format_context_->duration).InMicroseconds();
140    duration_ = std::max(duration_, duration);
141  }
142
143  void OpenCodecs() {
144    for (unsigned int i = 0; i < av_format_context_->nb_streams; ++i) {
145      AVStream* av_stream = av_format_context_->streams[i];
146      AVCodecContext* av_codec_context = av_stream->codec;
147      AVCodec* av_codec = avcodec_find_decoder(av_codec_context->codec_id);
148
149      EXPECT_TRUE(av_codec)
150          << "Could not find AVCodec with CodecID "
151          << av_codec_context->codec_id;
152
153      av_codec_context->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
154      av_codec_context->thread_count = kDecodeThreads;
155
156      EXPECT_EQ(0, avcodec_open2(av_codec_context, av_codec, NULL))
157          << "Could not open AVCodecContext with CodecID "
158          << av_codec_context->codec_id;
159
160      if (av_codec->type == AVMEDIA_TYPE_AUDIO) {
161        EXPECT_EQ(-1, audio_stream_index_) << "Found multiple audio streams.";
162        audio_stream_index_ = static_cast<int>(i);
163      } else if (av_codec->type == AVMEDIA_TYPE_VIDEO) {
164        EXPECT_EQ(-1, video_stream_index_) << "Found multiple video streams.";
165        video_stream_index_ = static_cast<int>(i);
166      } else {
167        ADD_FAILURE() << "Found unknown stream type.";
168      }
169    }
170  }
171
172  void Flush() {
173    if (has_audio()) {
174      audio_packets_.flush();
175      avcodec_flush_buffers(av_audio_context());
176    }
177    if (has_video()) {
178      video_packets_.flush();
179      avcodec_flush_buffers(av_video_context());
180    }
181  }
182
183  void ReadUntil(int64 time) {
184    while (true) {
185      scoped_ptr<AVPacket> packet(new AVPacket());
186      if (av_read_frame(av_format_context_, packet.get()) < 0) {
187        break;
188      }
189
190      int stream_index = static_cast<int>(packet->stream_index);
191      int64 packet_time = AV_NOPTS_VALUE;
192      if (stream_index == audio_stream_index_) {
193        packet_time =
194            ConvertFromTimeBase(av_audio_stream()->time_base, packet->pts)
195                .InMicroseconds();
196        audio_packets_.push(packet.release());
197      } else if (stream_index == video_stream_index_) {
198        packet_time =
199            ConvertFromTimeBase(av_video_stream()->time_base, packet->pts)
200                .InMicroseconds();
201        video_packets_.push(packet.release());
202      } else {
203        ADD_FAILURE() << "Found packet that belongs to unknown stream.";
204      }
205
206      if (packet_time > time) {
207        break;
208      }
209    }
210  }
211
212  void ReadRemainingFile() {
213    ReadUntil(std::numeric_limits<int64>::max());
214  }
215
216  bool StepDecodeAudio() {
217    EXPECT_TRUE(has_audio());
218    if (!has_audio() || audio_packets_.empty()) {
219      return false;
220    }
221
222    // Decode until output is produced, end of stream, or error.
223    while (true) {
224      int result = 0;
225      int got_audio = 0;
226      bool end_of_stream = false;
227
228      AVPacket packet;
229      if (audio_packets_.empty()) {
230        av_init_packet(&packet);
231        end_of_stream = true;
232      } else {
233        memcpy(&packet, audio_packets_.peek(), sizeof(packet));
234      }
235
236      av_frame_unref(audio_buffer_.get());
237      result = avcodec_decode_audio4(av_audio_context(), audio_buffer_.get(),
238                                     &got_audio, &packet);
239      if (!audio_packets_.empty()) {
240        audio_packets_.pop();
241      }
242
243      EXPECT_GE(result, 0) << "Audio decode error.";
244      if (result < 0 || (got_audio == 0 && end_of_stream)) {
245        return false;
246      }
247
248      if (result > 0) {
249        double microseconds = 1.0L * audio_buffer_->nb_samples /
250            av_audio_context()->sample_rate *
251            base::Time::kMicrosecondsPerSecond;
252        decoded_audio_duration_ = static_cast<int64>(microseconds);
253
254        if (packet.pts == static_cast<int64>(AV_NOPTS_VALUE)) {
255          EXPECT_NE(decoded_audio_time_, static_cast<int64>(AV_NOPTS_VALUE))
256              << "We never received an initial timestamped audio packet! "
257              << "Looks like there's a seeking/parsing bug in FFmpeg.";
258          decoded_audio_time_ += decoded_audio_duration_;
259        } else {
260          decoded_audio_time_ =
261              ConvertFromTimeBase(av_audio_stream()->time_base, packet.pts)
262                  .InMicroseconds();
263        }
264        return true;
265      }
266    }
267    return true;
268  }
269
270  bool StepDecodeVideo() {
271    EXPECT_TRUE(has_video());
272    if (!has_video() || video_packets_.empty()) {
273      return false;
274    }
275
276    // Decode until output is produced, end of stream, or error.
277    while (true) {
278      int result = 0;
279      int got_picture = 0;
280      bool end_of_stream = false;
281
282      AVPacket packet;
283      if (video_packets_.empty()) {
284        av_init_packet(&packet);
285        end_of_stream = true;
286      } else {
287        memcpy(&packet, video_packets_.peek(), sizeof(packet));
288      }
289
290      av_frame_unref(video_buffer_.get());
291      av_video_context()->reordered_opaque = packet.pts;
292      result = avcodec_decode_video2(av_video_context(), video_buffer_.get(),
293                                     &got_picture, &packet);
294      if (!video_packets_.empty()) {
295        video_packets_.pop();
296      }
297
298      EXPECT_GE(result, 0) << "Video decode error.";
299      if (result < 0 || (got_picture == 0 && end_of_stream)) {
300        return false;
301      }
302
303      if (got_picture) {
304        AVRational doubled_time_base;
305        doubled_time_base.den = av_video_stream()->r_frame_rate.num;
306        doubled_time_base.num = av_video_stream()->r_frame_rate.den;
307        doubled_time_base.den *= 2;
308
309        decoded_video_time_ =
310            ConvertFromTimeBase(av_video_stream()->time_base,
311                             video_buffer_->reordered_opaque)
312                .InMicroseconds();
313        decoded_video_duration_ =
314            ConvertFromTimeBase(doubled_time_base,
315                             2 + video_buffer_->repeat_pict)
316                .InMicroseconds();
317        return true;
318      }
319    }
320  }
321
322  void DecodeRemainingAudio() {
323    while (StepDecodeAudio()) {}
324  }
325
326  void DecodeRemainingVideo() {
327    while (StepDecodeVideo()) {}
328  }
329
330  void SeekTo(double position) {
331    int64 seek_time =
332        static_cast<int64>(position * base::Time::kMicrosecondsPerSecond);
333    int flags = AVSEEK_FLAG_BACKWARD;
334
335    // Passing -1 as our stream index lets FFmpeg pick a default stream.
336    // FFmpeg will attempt to use the lowest-index video stream, if present,
337    // followed by the lowest-index audio stream.
338    EXPECT_GE(0, av_seek_frame(av_format_context_, -1, seek_time, flags))
339        << "Failed to seek to position " << position;
340    Flush();
341  }
342
343  bool has_audio() { return audio_stream_index_ >= 0; }
344  bool has_video() { return video_stream_index_ >= 0; }
345  int64 decoded_audio_time() { return decoded_audio_time_; }
346  int64 decoded_audio_duration() { return decoded_audio_duration_; }
347  int64 decoded_video_time() { return decoded_video_time_; }
348  int64 decoded_video_duration() { return decoded_video_duration_; }
349  int64 duration() { return duration_; }
350
351  AVStream* av_audio_stream() {
352    return av_format_context_->streams[audio_stream_index_];
353  }
354  AVStream* av_video_stream() {
355    return av_format_context_->streams[video_stream_index_];
356  }
357  AVCodecContext* av_audio_context() {
358    return av_audio_stream()->codec;
359  }
360  AVCodecContext* av_video_context() {
361    return av_video_stream()->codec;
362  }
363
364 private:
365  void InitializeFFmpeg() {
366    static bool initialized = false;
367    if (initialized) {
368      return;
369    }
370
371    base::FilePath path;
372    PathService::Get(base::DIR_MODULE, &path);
373    EXPECT_TRUE(InitializeMediaLibrary(path))
374        << "Could not initialize media library.";
375
376    initialized = true;
377  }
378
379  AVFormatContext* av_format_context_;
380  int audio_stream_index_;
381  int video_stream_index_;
382  AVPacketQueue audio_packets_;
383  AVPacketQueue video_packets_;
384
385  scoped_ptr<AVFrame, media::ScopedPtrAVFreeFrame> audio_buffer_;
386  scoped_ptr<AVFrame, media::ScopedPtrAVFreeFrame> video_buffer_;
387
388  int64 decoded_audio_time_;
389  int64 decoded_audio_duration_;
390  int64 decoded_video_time_;
391  int64 decoded_video_duration_;
392  int64 duration_;
393
394  base::MemoryMappedFile file_data_;
395  scoped_ptr<InMemoryUrlProtocol> protocol_;
396  scoped_ptr<FFmpegGlue> glue_;
397
398  DISALLOW_COPY_AND_ASSIGN(FFmpegTest);
399};
400
401#define FFMPEG_TEST_CASE(name, extension) \
402    INSTANTIATE_TEST_CASE_P(name##_##extension, FFmpegTest, \
403                            testing::Values(#name "." #extension));
404
405// Covers all our basic formats.
406FFMPEG_TEST_CASE(sync0, mp4);
407FFMPEG_TEST_CASE(sync0, ogv);
408FFMPEG_TEST_CASE(sync0, webm);
409FFMPEG_TEST_CASE(sync1, m4a);
410FFMPEG_TEST_CASE(sync1, mp3);
411FFMPEG_TEST_CASE(sync1, mp4);
412FFMPEG_TEST_CASE(sync1, ogg);
413FFMPEG_TEST_CASE(sync1, ogv);
414FFMPEG_TEST_CASE(sync1, webm);
415FFMPEG_TEST_CASE(sync2, m4a);
416FFMPEG_TEST_CASE(sync2, mp3);
417FFMPEG_TEST_CASE(sync2, mp4);
418FFMPEG_TEST_CASE(sync2, ogg);
419FFMPEG_TEST_CASE(sync2, ogv);
420FFMPEG_TEST_CASE(sync2, webm);
421
422// Covers our LayoutTest file.
423FFMPEG_TEST_CASE(counting, ogv);
424
425TEST_P(FFmpegTest, Perf) {
426  {
427    base::PerfTimeLogger timer("Opening file");
428    OpenFile(GetParam());
429  }
430  {
431    base::PerfTimeLogger timer("Opening codecs");
432    OpenCodecs();
433  }
434  {
435    base::PerfTimeLogger timer("Reading file");
436    ReadRemainingFile();
437  }
438  if (has_audio()) {
439    base::PerfTimeLogger timer("Decoding audio");
440    DecodeRemainingAudio();
441  }
442  if (has_video()) {
443    base::PerfTimeLogger timer("Decoding video");
444    DecodeRemainingVideo();
445  }
446  {
447    base::PerfTimeLogger timer("Seeking to zero");
448    SeekTo(0);
449  }
450}
451
452TEST_P(FFmpegTest, Loop_Audio) {
453  OpenAndReadFile(GetParam());
454  if (!has_audio()) {
455    return;
456  }
457
458  const int kSteps = 4;
459  std::vector<int64> expected_timestamps_;
460  for (int i = 0; i < kSteps; ++i) {
461    EXPECT_TRUE(StepDecodeAudio());
462    expected_timestamps_.push_back(decoded_audio_time());
463  }
464
465  SeekTo(0);
466  ReadRemainingFile();
467
468  for (int i = 0; i < kSteps; ++i) {
469    EXPECT_TRUE(StepDecodeAudio());
470    EXPECT_EQ(expected_timestamps_[i], decoded_audio_time())
471        << "Frame " << i << " had a mismatched timestamp.";
472  }
473}
474
475TEST_P(FFmpegTest, Loop_Video) {
476  OpenAndReadFile(GetParam());
477  if (!has_video()) {
478    return;
479  }
480
481  const int kSteps = 4;
482  std::vector<int64> expected_timestamps_;
483  for (int i = 0; i < kSteps; ++i) {
484    EXPECT_TRUE(StepDecodeVideo());
485    expected_timestamps_.push_back(decoded_video_time());
486  }
487
488  SeekTo(0);
489  ReadRemainingFile();
490
491  for (int i = 0; i < kSteps; ++i) {
492    EXPECT_TRUE(StepDecodeVideo());
493    EXPECT_EQ(expected_timestamps_[i], decoded_video_time())
494        << "Frame " << i << " had a mismatched timestamp.";
495  }
496}
497
498TEST_P(FFmpegTest, Seek_Audio) {
499  OpenAndReadFile(GetParam());
500  if (!has_audio() && duration() >= 0.5) {
501    return;
502  }
503
504  SeekTo(duration() - 0.5);
505  ReadRemainingFile();
506
507  EXPECT_TRUE(StepDecodeAudio());
508  EXPECT_NE(static_cast<int64>(AV_NOPTS_VALUE), decoded_audio_time());
509}
510
511TEST_P(FFmpegTest, Seek_Video) {
512  OpenAndReadFile(GetParam());
513  if (!has_video() && duration() >= 0.5) {
514    return;
515  }
516
517  SeekTo(duration() - 0.5);
518  ReadRemainingFile();
519
520  EXPECT_TRUE(StepDecodeVideo());
521  EXPECT_NE(static_cast<int64>(AV_NOPTS_VALUE), decoded_video_time());
522}
523
524TEST_P(FFmpegTest, Decode_Audio) {
525  OpenAndReadFile(GetParam());
526  if (!has_audio()) {
527    return;
528  }
529
530  int64 last_audio_time = AV_NOPTS_VALUE;
531  while (StepDecodeAudio()) {
532    ASSERT_GT(decoded_audio_time(), last_audio_time);
533    last_audio_time = decoded_audio_time();
534  }
535}
536
537TEST_P(FFmpegTest, Decode_Video) {
538  OpenAndReadFile(GetParam());
539  if (!has_video()) {
540    return;
541  }
542
543  int64 last_video_time = AV_NOPTS_VALUE;
544  while (StepDecodeVideo()) {
545    ASSERT_GT(decoded_video_time(), last_video_time);
546    last_video_time = decoded_video_time();
547  }
548}
549
550TEST_P(FFmpegTest, Duration) {
551  OpenAndReadFile(GetParam());
552
553  if (has_audio()) {
554    DecodeRemainingAudio();
555  }
556
557  if (has_video()) {
558    DecodeRemainingVideo();
559  }
560
561  double expected = static_cast<double>(duration());
562  double actual = static_cast<double>(
563      std::max(decoded_audio_time() + decoded_audio_duration(),
564               decoded_video_time() + decoded_video_duration()));
565  EXPECT_NEAR(expected, actual, 500000)
566      << "Duration is off by more than 0.5 seconds.";
567}
568
569TEST_F(FFmpegTest, VideoPlayedCollapse) {
570  OpenFile("test.ogv");
571  OpenCodecs();
572
573  SeekTo(0.5);
574  ReadRemainingFile();
575  EXPECT_TRUE(StepDecodeVideo());
576  VLOG(1) << decoded_video_time();
577
578  SeekTo(2.83);
579  ReadRemainingFile();
580  EXPECT_TRUE(StepDecodeVideo());
581  VLOG(1) << decoded_video_time();
582
583  SeekTo(0.4);
584  ReadRemainingFile();
585  EXPECT_TRUE(StepDecodeVideo());
586  VLOG(1) << decoded_video_time();
587}
588
589}  // namespace media
590