ffmpeg_unittest.cc revision 868fa2fe829687343ffae624259930155e16dbd8
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// ffmpeg_unittests verify that the parts of the FFmpeg API that Chromium uses
6// function as advertised for each media format that Chromium supports.  This
7// mostly includes stuff like reporting proper timestamps, seeking to
8// keyframes, and supporting certain features like reordered_opaque.
9//
10
11#include <limits>
12#include <queue>
13
14#include "base/base_paths.h"
15#include "base/file_util.h"
16#include "base/files/file_path.h"
17#include "base/files/memory_mapped_file.h"
18#include "base/memory/scoped_ptr.h"
19#include "base/path_service.h"
20#include "base/perftimer.h"
21#include "base/strings/string_util.h"
22#include "base/test/perf_test_suite.h"
23#include "media/base/media.h"
24#include "media/ffmpeg/ffmpeg_common.h"
25#include "media/filters/ffmpeg_glue.h"
26#include "media/filters/in_memory_url_protocol.h"
27#include "testing/gtest/include/gtest/gtest.h"
28
29int main(int argc, char** argv) {
30  return base::PerfTestSuite(argc, argv).Run();
31}
32
33namespace media {
34
35// Mirror setting in ffmpeg_video_decoder.
36static const int kDecodeThreads = 2;
37
38class AVPacketQueue {
39 public:
40  AVPacketQueue() {
41  }
42
43  ~AVPacketQueue() {
44    flush();
45  }
46
47  bool empty() {
48    return packets_.empty();
49  }
50
51  AVPacket* peek() {
52    return packets_.front();
53  }
54
55  void pop() {
56    AVPacket* packet = packets_.front();
57    packets_.pop();
58    av_free_packet(packet);
59    delete packet;
60  }
61
62  void push(AVPacket* packet) {
63    av_dup_packet(packet);
64    packets_.push(packet);
65  }
66
67  void flush() {
68    while (!empty()) {
69      pop();
70    }
71  }
72
73 private:
74  std::queue<AVPacket*> packets_;
75
76  DISALLOW_COPY_AND_ASSIGN(AVPacketQueue);
77};
78
79// TODO(dalecurtis): We should really just use PipelineIntegrationTests instead
80// of a one-off step decoder so we're exercising the real pipeline.
81class FFmpegTest : public testing::TestWithParam<const char*> {
82 protected:
83  FFmpegTest()
84      : av_format_context_(NULL),
85        audio_stream_index_(-1),
86        video_stream_index_(-1),
87        audio_buffer_(NULL),
88        video_buffer_(NULL),
89        decoded_audio_time_(AV_NOPTS_VALUE),
90        decoded_audio_duration_(AV_NOPTS_VALUE),
91        decoded_video_time_(AV_NOPTS_VALUE),
92        decoded_video_duration_(AV_NOPTS_VALUE),
93        duration_(AV_NOPTS_VALUE) {
94    InitializeFFmpeg();
95
96    audio_buffer_.reset(avcodec_alloc_frame());
97    video_buffer_.reset(avcodec_alloc_frame());
98  }
99
100  virtual ~FFmpegTest() {
101  }
102
103  void OpenAndReadFile(const std::string& name) {
104    OpenFile(name);
105    OpenCodecs();
106    ReadRemainingFile();
107  }
108
109  void OpenFile(const std::string& name) {
110    base::FilePath path;
111    PathService::Get(base::DIR_SOURCE_ROOT, &path);
112    path = path.AppendASCII("media")
113        .AppendASCII("test")
114        .AppendASCII("data")
115        .AppendASCII("content")
116        .AppendASCII(name.c_str());
117    EXPECT_TRUE(file_util::PathExists(path));
118
119    CHECK(file_data_.Initialize(path));
120    protocol_.reset(new InMemoryUrlProtocol(
121        file_data_.data(), file_data_.length(), false));
122    glue_.reset(new FFmpegGlue(protocol_.get()));
123
124    ASSERT_TRUE(glue_->OpenContext()) << "Could not open " << path.value();
125    av_format_context_ = glue_->format_context();
126    ASSERT_LE(0, avformat_find_stream_info(av_format_context_, NULL))
127        << "Could not find stream information for " << path.value();
128
129    // Determine duration by picking max stream duration.
130    for (unsigned int i = 0; i < av_format_context_->nb_streams; ++i) {
131      AVStream* av_stream = av_format_context_->streams[i];
132      int64 duration = ConvertFromTimeBase(
133          av_stream->time_base, av_stream->duration).InMicroseconds();
134      duration_ = std::max(duration_, duration);
135    }
136
137    // Final check to see if the container itself specifies a duration.
138    AVRational av_time_base = {1, AV_TIME_BASE};
139    int64 duration =
140        ConvertFromTimeBase(av_time_base,
141                            av_format_context_->duration).InMicroseconds();
142    duration_ = std::max(duration_, duration);
143  }
144
145  void OpenCodecs() {
146    for (unsigned int i = 0; i < av_format_context_->nb_streams; ++i) {
147      AVStream* av_stream = av_format_context_->streams[i];
148      AVCodecContext* av_codec_context = av_stream->codec;
149      AVCodec* av_codec = avcodec_find_decoder(av_codec_context->codec_id);
150
151      EXPECT_TRUE(av_codec)
152          << "Could not find AVCodec with CodecID "
153          << av_codec_context->codec_id;
154
155      av_codec_context->error_concealment = FF_EC_GUESS_MVS | FF_EC_DEBLOCK;
156      av_codec_context->thread_count = kDecodeThreads;
157
158      EXPECT_EQ(0, avcodec_open2(av_codec_context, av_codec, NULL))
159          << "Could not open AVCodecContext with CodecID "
160          << av_codec_context->codec_id;
161
162      if (av_codec->type == AVMEDIA_TYPE_AUDIO) {
163        EXPECT_EQ(-1, audio_stream_index_) << "Found multiple audio streams.";
164        audio_stream_index_ = static_cast<int>(i);
165      } else if (av_codec->type == AVMEDIA_TYPE_VIDEO) {
166        EXPECT_EQ(-1, video_stream_index_) << "Found multiple video streams.";
167        video_stream_index_ = static_cast<int>(i);
168      } else {
169        ADD_FAILURE() << "Found unknown stream type.";
170      }
171    }
172  }
173
174  void Flush() {
175    if (has_audio()) {
176      audio_packets_.flush();
177      avcodec_flush_buffers(av_audio_context());
178    }
179    if (has_video()) {
180      video_packets_.flush();
181      avcodec_flush_buffers(av_video_context());
182    }
183  }
184
185  void ReadUntil(int64 time) {
186    while (true) {
187      scoped_ptr<AVPacket> packet(new AVPacket());
188      if (av_read_frame(av_format_context_, packet.get()) < 0) {
189        break;
190      }
191
192      int stream_index = static_cast<int>(packet->stream_index);
193      int64 packet_time = AV_NOPTS_VALUE;
194      if (stream_index == audio_stream_index_) {
195        packet_time =
196            ConvertFromTimeBase(av_audio_stream()->time_base, packet->pts)
197                .InMicroseconds();
198        audio_packets_.push(packet.release());
199      } else if (stream_index == video_stream_index_) {
200        packet_time =
201            ConvertFromTimeBase(av_video_stream()->time_base, packet->pts)
202                .InMicroseconds();
203        video_packets_.push(packet.release());
204      } else {
205        ADD_FAILURE() << "Found packet that belongs to unknown stream.";
206      }
207
208      if (packet_time > time) {
209        break;
210      }
211    }
212  }
213
214  void ReadRemainingFile() {
215    ReadUntil(std::numeric_limits<int64>::max());
216  }
217
218  bool StepDecodeAudio() {
219    EXPECT_TRUE(has_audio());
220    if (!has_audio() || audio_packets_.empty()) {
221      return false;
222    }
223
224    // Decode until output is produced, end of stream, or error.
225    while (true) {
226      int result = 0;
227      int got_audio = 0;
228      bool end_of_stream = false;
229
230      AVPacket packet;
231      if (audio_packets_.empty()) {
232        av_init_packet(&packet);
233        end_of_stream = true;
234      } else {
235        memcpy(&packet, audio_packets_.peek(), sizeof(packet));
236      }
237
238      avcodec_get_frame_defaults(audio_buffer_.get());
239      result = avcodec_decode_audio4(av_audio_context(), audio_buffer_.get(),
240                                     &got_audio, &packet);
241      if (!audio_packets_.empty()) {
242        audio_packets_.pop();
243      }
244
245      EXPECT_GE(result, 0) << "Audio decode error.";
246      if (result < 0 || (got_audio == 0 && end_of_stream)) {
247        return false;
248      }
249
250      if (result > 0) {
251        double microseconds = 1.0L * audio_buffer_->nb_samples /
252            av_audio_context()->sample_rate *
253            base::Time::kMicrosecondsPerSecond;
254        decoded_audio_duration_ = static_cast<int64>(microseconds);
255
256        if (packet.pts == static_cast<int64>(AV_NOPTS_VALUE)) {
257          EXPECT_NE(decoded_audio_time_, static_cast<int64>(AV_NOPTS_VALUE))
258              << "We never received an initial timestamped audio packet! "
259              << "Looks like there's a seeking/parsing bug in FFmpeg.";
260          decoded_audio_time_ += decoded_audio_duration_;
261        } else {
262          decoded_audio_time_ =
263              ConvertFromTimeBase(av_audio_stream()->time_base, packet.pts)
264                  .InMicroseconds();
265        }
266        return true;
267      }
268    }
269    return true;
270  }
271
272  bool StepDecodeVideo() {
273    EXPECT_TRUE(has_video());
274    if (!has_video() || video_packets_.empty()) {
275      return false;
276    }
277
278    // Decode until output is produced, end of stream, or error.
279    while (true) {
280      int result = 0;
281      int got_picture = 0;
282      bool end_of_stream = false;
283
284      AVPacket packet;
285      if (video_packets_.empty()) {
286        av_init_packet(&packet);
287        end_of_stream = true;
288      } else {
289        memcpy(&packet, video_packets_.peek(), sizeof(packet));
290      }
291
292      avcodec_get_frame_defaults(video_buffer_.get());
293      av_video_context()->reordered_opaque = packet.pts;
294      result = avcodec_decode_video2(av_video_context(), video_buffer_.get(),
295                                     &got_picture, &packet);
296      if (!video_packets_.empty()) {
297        video_packets_.pop();
298      }
299
300      EXPECT_GE(result, 0) << "Video decode error.";
301      if (result < 0 || (got_picture == 0 && end_of_stream)) {
302        return false;
303      }
304
305      if (got_picture) {
306        AVRational doubled_time_base;
307        doubled_time_base.den = av_video_stream()->r_frame_rate.num;
308        doubled_time_base.num = av_video_stream()->r_frame_rate.den;
309        doubled_time_base.den *= 2;
310
311        decoded_video_time_ =
312            ConvertFromTimeBase(av_video_stream()->time_base,
313                             video_buffer_->reordered_opaque)
314                .InMicroseconds();
315        decoded_video_duration_ =
316            ConvertFromTimeBase(doubled_time_base,
317                             2 + video_buffer_->repeat_pict)
318                .InMicroseconds();
319        return true;
320      }
321    }
322  }
323
324  void DecodeRemainingAudio() {
325    while (StepDecodeAudio()) {}
326  }
327
328  void DecodeRemainingVideo() {
329    while (StepDecodeVideo()) {}
330  }
331
332  void SeekTo(double position) {
333    int64 seek_time =
334        static_cast<int64>(position * base::Time::kMicrosecondsPerSecond);
335    int flags = AVSEEK_FLAG_BACKWARD;
336
337    // Passing -1 as our stream index lets FFmpeg pick a default stream.
338    // FFmpeg will attempt to use the lowest-index video stream, if present,
339    // followed by the lowest-index audio stream.
340    EXPECT_GE(0, av_seek_frame(av_format_context_, -1, seek_time, flags))
341        << "Failed to seek to position " << position;
342    Flush();
343  }
344
345  bool has_audio() { return audio_stream_index_ >= 0; }
346  bool has_video() { return video_stream_index_ >= 0; }
347  int64 decoded_audio_time() { return decoded_audio_time_; }
348  int64 decoded_audio_duration() { return decoded_audio_duration_; }
349  int64 decoded_video_time() { return decoded_video_time_; }
350  int64 decoded_video_duration() { return decoded_video_duration_; }
351  int64 duration() { return duration_; }
352
353  AVStream* av_audio_stream() {
354    return av_format_context_->streams[audio_stream_index_];
355  }
356  AVStream* av_video_stream() {
357    return av_format_context_->streams[video_stream_index_];
358  }
359  AVCodecContext* av_audio_context() {
360    return av_audio_stream()->codec;
361  }
362  AVCodecContext* av_video_context() {
363    return av_video_stream()->codec;
364  }
365
366 private:
367  void InitializeFFmpeg() {
368    static bool initialized = false;
369    if (initialized) {
370      return;
371    }
372
373    base::FilePath path;
374    PathService::Get(base::DIR_MODULE, &path);
375    EXPECT_TRUE(InitializeMediaLibrary(path))
376        << "Could not initialize media library.";
377
378    initialized = true;
379  }
380
381  AVFormatContext* av_format_context_;
382  int audio_stream_index_;
383  int video_stream_index_;
384  AVPacketQueue audio_packets_;
385  AVPacketQueue video_packets_;
386
387  scoped_ptr_malloc<AVFrame, media::ScopedPtrAVFree> audio_buffer_;
388  scoped_ptr_malloc<AVFrame, media::ScopedPtrAVFree> video_buffer_;
389
390  int64 decoded_audio_time_;
391  int64 decoded_audio_duration_;
392  int64 decoded_video_time_;
393  int64 decoded_video_duration_;
394  int64 duration_;
395
396  base::MemoryMappedFile file_data_;
397  scoped_ptr<InMemoryUrlProtocol> protocol_;
398  scoped_ptr<FFmpegGlue> glue_;
399
400  DISALLOW_COPY_AND_ASSIGN(FFmpegTest);
401};
402
403#define FFMPEG_TEST_CASE(name, extension) \
404    INSTANTIATE_TEST_CASE_P(name##_##extension, FFmpegTest, \
405                            testing::Values(#name "." #extension));
406
407// Covers all our basic formats.
408FFMPEG_TEST_CASE(sync0, mp4);
409FFMPEG_TEST_CASE(sync0, ogv);
410FFMPEG_TEST_CASE(sync0, webm);
411FFMPEG_TEST_CASE(sync1, m4a);
412FFMPEG_TEST_CASE(sync1, mp3);
413FFMPEG_TEST_CASE(sync1, mp4);
414FFMPEG_TEST_CASE(sync1, ogg);
415FFMPEG_TEST_CASE(sync1, ogv);
416FFMPEG_TEST_CASE(sync1, webm);
417FFMPEG_TEST_CASE(sync2, m4a);
418FFMPEG_TEST_CASE(sync2, mp3);
419FFMPEG_TEST_CASE(sync2, mp4);
420FFMPEG_TEST_CASE(sync2, ogg);
421FFMPEG_TEST_CASE(sync2, ogv);
422FFMPEG_TEST_CASE(sync2, webm);
423
424// Covers our LayoutTest file.
425FFMPEG_TEST_CASE(counting, ogv);
426
427TEST_P(FFmpegTest, Perf) {
428  {
429    PerfTimeLogger timer("Opening file");
430    OpenFile(GetParam());
431  }
432  {
433    PerfTimeLogger timer("Opening codecs");
434    OpenCodecs();
435  }
436  {
437    PerfTimeLogger timer("Reading file");
438    ReadRemainingFile();
439  }
440  if (has_audio()) {
441    PerfTimeLogger timer("Decoding audio");
442    DecodeRemainingAudio();
443  }
444  if (has_video()) {
445    PerfTimeLogger timer("Decoding video");
446    DecodeRemainingVideo();
447  }
448  {
449    PerfTimeLogger timer("Seeking to zero");
450    SeekTo(0);
451  }
452}
453
454TEST_P(FFmpegTest, Loop_Audio) {
455  OpenAndReadFile(GetParam());
456  if (!has_audio()) {
457    return;
458  }
459
460  const int kSteps = 4;
461  std::vector<int64> expected_timestamps_;
462  for (int i = 0; i < kSteps; ++i) {
463    EXPECT_TRUE(StepDecodeAudio());
464    expected_timestamps_.push_back(decoded_audio_time());
465  }
466
467  SeekTo(0);
468  ReadRemainingFile();
469
470  for (int i = 0; i < kSteps; ++i) {
471    EXPECT_TRUE(StepDecodeAudio());
472    EXPECT_EQ(expected_timestamps_[i], decoded_audio_time())
473        << "Frame " << i << " had a mismatched timestamp.";
474  }
475}
476
477TEST_P(FFmpegTest, Loop_Video) {
478  OpenAndReadFile(GetParam());
479  if (!has_video()) {
480    return;
481  }
482
483  const int kSteps = 4;
484  std::vector<int64> expected_timestamps_;
485  for (int i = 0; i < kSteps; ++i) {
486    EXPECT_TRUE(StepDecodeVideo());
487    expected_timestamps_.push_back(decoded_video_time());
488  }
489
490  SeekTo(0);
491  ReadRemainingFile();
492
493  for (int i = 0; i < kSteps; ++i) {
494    EXPECT_TRUE(StepDecodeVideo());
495    EXPECT_EQ(expected_timestamps_[i], decoded_video_time())
496        << "Frame " << i << " had a mismatched timestamp.";
497  }
498}
499
500TEST_P(FFmpegTest, Seek_Audio) {
501  OpenAndReadFile(GetParam());
502  if (!has_audio() && duration() >= 0.5) {
503    return;
504  }
505
506  SeekTo(duration() - 0.5);
507  ReadRemainingFile();
508
509  EXPECT_TRUE(StepDecodeAudio());
510  EXPECT_NE(static_cast<int64>(AV_NOPTS_VALUE), decoded_audio_time());
511}
512
513TEST_P(FFmpegTest, Seek_Video) {
514  OpenAndReadFile(GetParam());
515  if (!has_video() && duration() >= 0.5) {
516    return;
517  }
518
519  SeekTo(duration() - 0.5);
520  ReadRemainingFile();
521
522  EXPECT_TRUE(StepDecodeVideo());
523  EXPECT_NE(static_cast<int64>(AV_NOPTS_VALUE), decoded_video_time());
524}
525
526TEST_P(FFmpegTest, Decode_Audio) {
527  OpenAndReadFile(GetParam());
528  if (!has_audio()) {
529    return;
530  }
531
532  int64 last_audio_time = AV_NOPTS_VALUE;
533  while (StepDecodeAudio()) {
534    ASSERT_GT(decoded_audio_time(), last_audio_time);
535    last_audio_time = decoded_audio_time();
536  }
537}
538
539TEST_P(FFmpegTest, Decode_Video) {
540  OpenAndReadFile(GetParam());
541  if (!has_video()) {
542    return;
543  }
544
545  int64 last_video_time = AV_NOPTS_VALUE;
546  while (StepDecodeVideo()) {
547    ASSERT_GT(decoded_video_time(), last_video_time);
548    last_video_time = decoded_video_time();
549  }
550}
551
552TEST_P(FFmpegTest, Duration) {
553  OpenAndReadFile(GetParam());
554
555  if (has_audio()) {
556    DecodeRemainingAudio();
557  }
558
559  if (has_video()) {
560    DecodeRemainingVideo();
561  }
562
563  double expected = static_cast<double>(duration());
564  double actual = static_cast<double>(
565      std::max(decoded_audio_time() + decoded_audio_duration(),
566               decoded_video_time() + decoded_video_duration()));
567  EXPECT_NEAR(expected, actual, 500000)
568      << "Duration is off by more than 0.5 seconds.";
569}
570
571TEST_F(FFmpegTest, VideoPlayedCollapse) {
572  OpenFile("test.ogv");
573  OpenCodecs();
574
575  SeekTo(0.5);
576  ReadRemainingFile();
577  EXPECT_TRUE(StepDecodeVideo());
578  VLOG(1) << decoded_video_time();
579
580  SeekTo(2.83);
581  ReadRemainingFile();
582  EXPECT_TRUE(StepDecodeVideo());
583  VLOG(1) << decoded_video_time();
584
585  SeekTo(0.4);
586  ReadRemainingFile();
587  EXPECT_TRUE(StepDecodeVideo());
588  VLOG(1) << decoded_video_time();
589}
590
591}  // namespace media
592