1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/base/container_names.h"
6
7#include <cctype>
8#include <limits>
9
10#include "base/basictypes.h"
11#include "base/logging.h"
12#include "media/base/bit_reader.h"
13
14namespace media {
15
16namespace container_names {
17
18#define TAG(a, b, c, d) \
19    ((static_cast<uint32>(static_cast<uint8>(a)) << 24) | \
20     (static_cast<uint32>(static_cast<uint8>(b)) << 16) | \
21     (static_cast<uint32>(static_cast<uint8>(c)) << 8) | \
22     (static_cast<uint32>(static_cast<uint8>(d))))
23
24#define RCHECK(x)     \
25    do {              \
26      if (!(x))       \
27        return false; \
28    } while (0)
29
30#define UTF8_BYTE_ORDER_MARK "\xef\xbb\xbf"
31
32// Helper function to read 2 bytes (16 bits, big endian) from a buffer.
33static int Read16(const uint8* p) {
34  return p[0] << 8 | p[1];
35}
36
37// Helper function to read 3 bytes (24 bits, big endian) from a buffer.
38static uint32 Read24(const uint8* p) {
39  return p[0] << 16 | p[1] << 8 | p[2];
40}
41
42// Helper function to read 4 bytes (32 bits, big endian) from a buffer.
43static uint32 Read32(const uint8* p) {
44  return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
45}
46
47// Helper function to read 4 bytes (32 bits, little endian) from a buffer.
48static uint32 Read32LE(const uint8* p) {
49  return p[3] << 24 | p[2] << 16 | p[1] << 8 | p[0];
50}
51
52// Helper function to do buffer comparisons with a string without going off the
53// end of the buffer.
54static bool StartsWith(const uint8* buffer,
55                       size_t buffer_size,
56                       const char* prefix) {
57  size_t prefix_size = strlen(prefix);
58  return (prefix_size <= buffer_size &&
59          memcmp(buffer, prefix, prefix_size) == 0);
60}
61
62// Helper function to do buffer comparisons with another buffer (to allow for
63// embedded \0 in the comparison) without going off the end of the buffer.
64static bool StartsWith(const uint8* buffer,
65                       size_t buffer_size,
66                       const uint8* prefix,
67                       size_t prefix_size) {
68  return (prefix_size <= buffer_size &&
69          memcmp(buffer, prefix, prefix_size) == 0);
70}
71
72// Helper function to read up to 64 bits from a bit stream.
73static uint64 ReadBits(BitReader* reader, int num_bits) {
74  DCHECK_GE(reader->bits_available(), num_bits);
75  DCHECK((num_bits > 0) && (num_bits <= 64));
76  uint64 value;
77  reader->ReadBits(num_bits, &value);
78  return value;
79}
80
81const int kAc3FrameSizeTable[38][3] = {
82  { 128, 138, 192 }, { 128, 140, 192 }, { 160, 174, 240 }, { 160, 176, 240 },
83  { 192, 208, 288 }, { 192, 210, 288 }, { 224, 242, 336 }, { 224, 244, 336 },
84  { 256, 278, 384 }, { 256, 280, 384 }, { 320, 348, 480 }, { 320, 350, 480 },
85  { 384, 416, 576 }, { 384, 418, 576 }, { 448, 486, 672 }, { 448, 488, 672 },
86  { 512, 556, 768 }, { 512, 558, 768 }, { 640, 696, 960 }, { 640, 698, 960 },
87  { 768, 834, 1152 }, { 768, 836, 1152 }, { 896, 974, 1344 },
88  { 896, 976, 1344 }, { 1024, 1114, 1536 }, { 1024, 1116, 1536 },
89  { 1280, 1392, 1920 }, { 1280, 1394, 1920 }, { 1536, 1670, 2304 },
90  { 1536, 1672, 2304 }, { 1792, 1950, 2688 }, { 1792, 1952, 2688 },
91  { 2048, 2228, 3072 }, { 2048, 2230, 3072 }, { 2304, 2506, 3456 },
92  { 2304, 2508, 3456 }, { 2560, 2768, 3840 }, { 2560, 2770, 3840 }
93};
94
95// Checks for an ADTS AAC container.
96static bool CheckAac(const uint8* buffer, int buffer_size) {
97  // Audio Data Transport Stream (ADTS) header is 7 or 9 bytes
98  // (from http://wiki.multimedia.cx/index.php?title=ADTS)
99  RCHECK(buffer_size > 6);
100
101  int offset = 0;
102  while (offset + 6 < buffer_size) {
103    BitReader reader(buffer + offset, 6);
104
105    // Syncword must be 0xfff.
106    RCHECK(ReadBits(&reader, 12) == 0xfff);
107
108    // Skip MPEG version.
109    reader.SkipBits(1);
110
111    // Layer is always 0.
112    RCHECK(ReadBits(&reader, 2) == 0);
113
114    // Skip protection + profile.
115    reader.SkipBits(1 + 2);
116
117    // Check sampling frequency index.
118    RCHECK(ReadBits(&reader, 4) != 15);  // Forbidden.
119
120    // Skip private stream, channel configuration, originality, home,
121    // copyrighted stream, and copyright_start.
122    reader.SkipBits(1 + 3 + 1 + 1 + 1 + 1);
123
124    // Get frame length (includes header).
125    int size = ReadBits(&reader, 13);
126    RCHECK(size > 0);
127    offset += size;
128  }
129  return true;
130}
131
132const uint16 kAc3SyncWord = 0x0b77;
133
134// Checks for an AC3 container.
135static bool CheckAc3(const uint8* buffer, int buffer_size) {
136  // Reference: ATSC Standard: Digital Audio Compression (AC-3, E-AC-3)
137  //            Doc. A/52:2012
138  // (http://www.atsc.org/cms/standards/A52-2012(12-17).pdf)
139
140  // AC3 container looks like syncinfo | bsi | audblk * 6 | aux | check.
141  RCHECK(buffer_size > 6);
142
143  int offset = 0;
144  while (offset + 6 < buffer_size) {
145    BitReader reader(buffer + offset, 6);
146
147    // Check syncinfo.
148    RCHECK(ReadBits(&reader, 16) == kAc3SyncWord);
149
150    // Skip crc1.
151    reader.SkipBits(16);
152
153    // Verify fscod.
154    int sample_rate_code = ReadBits(&reader, 2);
155    RCHECK(sample_rate_code != 3);  // Reserved.
156
157    // Verify frmsizecod.
158    int frame_size_code = ReadBits(&reader, 6);
159    RCHECK(frame_size_code < 38);  // Undefined.
160
161    // Verify bsid.
162    RCHECK(ReadBits(&reader, 5) < 10);  // Normally 8 or 6, 16 used by EAC3.
163
164    offset += kAc3FrameSizeTable[frame_size_code][sample_rate_code];
165  }
166  return true;
167}
168
169// Checks for an EAC3 container (very similar to AC3)
170static bool CheckEac3(const uint8* buffer, int buffer_size) {
171  // Reference: ATSC Standard: Digital Audio Compression (AC-3, E-AC-3)
172  //            Doc. A/52:2012
173  // (http://www.atsc.org/cms/standards/A52-2012(12-17).pdf)
174
175  // EAC3 container looks like syncinfo | bsi | audfrm | audblk* | aux | check.
176  RCHECK(buffer_size > 6);
177
178  int offset = 0;
179  while (offset + 6 < buffer_size) {
180    BitReader reader(buffer + offset, 6);
181
182    // Check syncinfo.
183    RCHECK(ReadBits(&reader, 16) == kAc3SyncWord);
184
185    // Verify strmtyp.
186    RCHECK(ReadBits(&reader, 2) != 3);
187
188    // Skip substreamid.
189    reader.SkipBits(3);
190
191    // Get frmsize. Include syncinfo size and convert to bytes.
192    int frame_size = (ReadBits(&reader, 11) + 1) * 2;
193    RCHECK(frame_size >= 7);
194
195    // Skip fscod, fscod2, acmod, and lfeon.
196    reader.SkipBits(2 + 2 + 3 + 1);
197
198    // Verify bsid.
199    int bit_stream_id = ReadBits(&reader, 5);
200    RCHECK(bit_stream_id >= 11 && bit_stream_id <= 16);
201
202    offset += frame_size;
203  }
204  return true;
205}
206
207// Additional checks for a BINK container.
208static bool CheckBink(const uint8* buffer, int buffer_size) {
209  // Reference: http://wiki.multimedia.cx/index.php?title=Bink_Container
210  RCHECK(buffer_size >= 44);
211
212  // Verify number of frames specified.
213  RCHECK(Read32LE(buffer + 8) > 0);
214
215  // Verify width in range.
216  int width = Read32LE(buffer + 20);
217  RCHECK(width > 0 && width <= 32767);
218
219  // Verify height in range.
220  int height = Read32LE(buffer + 24);
221  RCHECK(height > 0 && height <= 32767);
222
223  // Verify frames per second specified.
224  RCHECK(Read32LE(buffer + 28) > 0);
225
226  // Verify video frames per second specified.
227  RCHECK(Read32LE(buffer + 32) > 0);
228
229  // Number of audio tracks must be 256 or less.
230  return (Read32LE(buffer + 40) <= 256);
231}
232
233// Additional checks for a CAF container.
234static bool CheckCaf(const uint8* buffer, int buffer_size) {
235  // Reference: Apple Core Audio Format Specification 1.0
236  // (https://developer.apple.com/library/mac/#documentation/MusicAudio/Reference/CAFSpec/CAF_spec/CAF_spec.html)
237  RCHECK(buffer_size >= 52);
238  BitReader reader(buffer, buffer_size);
239
240  // mFileType should be "caff".
241  RCHECK(ReadBits(&reader, 32) == TAG('c', 'a', 'f', 'f'));
242
243  // mFileVersion should be 1.
244  RCHECK(ReadBits(&reader, 16) == 1);
245
246  // Skip mFileFlags.
247  reader.SkipBits(16);
248
249  // First chunk should be Audio Description chunk, size 32l.
250  RCHECK(ReadBits(&reader, 32) == TAG('d', 'e', 's', 'c'));
251  RCHECK(ReadBits(&reader, 64) == 32);
252
253  // CAFAudioFormat.mSampleRate(float64) not 0
254  RCHECK(ReadBits(&reader, 64) != 0);
255
256  // CAFAudioFormat.mFormatID not 0
257  RCHECK(ReadBits(&reader, 32) != 0);
258
259  // Skip CAFAudioFormat.mBytesPerPacket and mFramesPerPacket.
260  reader.SkipBits(32 + 32);
261
262  // CAFAudioFormat.mChannelsPerFrame not 0
263  RCHECK(ReadBits(&reader, 32) != 0);
264  return true;
265}
266
267static bool kSamplingFrequencyValid[16] = { false, true, true, true, false,
268                                            false, true, true, true, false,
269                                            false, true, true, true, false,
270                                            false };
271static bool kExtAudioIdValid[8] = { true, false, true, false, false, false,
272                                    true, false };
273
274// Additional checks for a DTS container.
275static bool CheckDts(const uint8* buffer, int buffer_size) {
276  // Reference: ETSI TS 102 114 V1.3.1 (2011-08)
277  // (http://www.etsi.org/deliver/etsi_ts/102100_102199/102114/01.03.01_60/ts_102114v010301p.pdf)
278  RCHECK(buffer_size > 11);
279
280  int offset = 0;
281  while (offset + 11 < buffer_size) {
282    BitReader reader(buffer + offset, 11);
283
284    // Verify sync word.
285    RCHECK(ReadBits(&reader, 32) == 0x7ffe8001);
286
287    // Skip frame type and deficit sample count.
288    reader.SkipBits(1 + 5);
289
290    // Verify CRC present flag.
291    RCHECK(ReadBits(&reader, 1) == 0);  // CPF must be 0.
292
293    // Verify number of PCM sample blocks.
294    RCHECK(ReadBits(&reader, 7) >= 5);
295
296    // Verify primary frame byte size.
297    int frame_size = ReadBits(&reader, 14);
298    RCHECK(frame_size >= 95);
299
300    // Skip audio channel arrangement.
301    reader.SkipBits(6);
302
303    // Verify core audio sampling frequency is an allowed value.
304    RCHECK(kSamplingFrequencyValid[ReadBits(&reader, 4)]);
305
306    // Verify transmission bit rate is valid.
307    RCHECK(ReadBits(&reader, 5) <= 25);
308
309    // Verify reserved field is 0.
310    RCHECK(ReadBits(&reader, 1) == 0);
311
312    // Skip dynamic range flag, time stamp flag, auxiliary data flag, and HDCD.
313    reader.SkipBits(1 + 1 + 1 + 1);
314
315    // Verify extension audio descriptor flag is an allowed value.
316    RCHECK(kExtAudioIdValid[ReadBits(&reader, 3)]);
317
318    // Skip extended coding flag and audio sync word insertion flag.
319    reader.SkipBits(1 + 1);
320
321    // Verify low frequency effects flag is an allowed value.
322    RCHECK(ReadBits(&reader, 2) != 3);
323
324    offset += frame_size + 1;
325  }
326  return true;
327}
328
329// Checks for a DV container.
330static bool CheckDV(const uint8* buffer, int buffer_size) {
331  // Reference: SMPTE 314M (Annex A has differences with IEC 61834).
332  // (http://standards.smpte.org/content/978-1-61482-454-1/st-314-2005/SEC1.body.pdf)
333  RCHECK(buffer_size > 11);
334
335  int offset = 0;
336  int current_sequence_number = -1;
337  int last_block_number[6];
338  while (offset + 11 < buffer_size) {
339    BitReader reader(buffer + offset, 11);
340
341    // Decode ID data. Sections 5, 6, and 7 are reserved.
342    int section = ReadBits(&reader, 3);
343    RCHECK(section < 5);
344
345    // Next bit must be 1.
346    RCHECK(ReadBits(&reader, 1) == 1);
347
348    // Skip arbitrary bits.
349    reader.SkipBits(4);
350
351    int sequence_number = ReadBits(&reader, 4);
352
353    // Skip FSC.
354    reader.SkipBits(1);
355
356    // Next 3 bits must be 1.
357    RCHECK(ReadBits(&reader, 3) == 7);
358
359    int block_number = ReadBits(&reader, 8);
360
361    if (section == 0) {  // Header.
362      // Validate the reserved bits in the next 8 bytes.
363      reader.SkipBits(1);
364      RCHECK(ReadBits(&reader, 1) == 0);
365      RCHECK(ReadBits(&reader, 11) == 0x7ff);
366      reader.SkipBits(4);
367      RCHECK(ReadBits(&reader, 4) == 0xf);
368      reader.SkipBits(4);
369      RCHECK(ReadBits(&reader, 4) == 0xf);
370      reader.SkipBits(4);
371      RCHECK(ReadBits(&reader, 4) == 0xf);
372      reader.SkipBits(3);
373      RCHECK(ReadBits(&reader, 24) == 0xffffff);
374      current_sequence_number = sequence_number;
375      for (size_t i = 0; i < arraysize(last_block_number); ++i)
376        last_block_number[i] = -1;
377    } else {
378      // Sequence number must match (this will also fail if no header seen).
379      RCHECK(sequence_number == current_sequence_number);
380      // Block number should be increasing.
381      RCHECK(block_number > last_block_number[section]);
382      last_block_number[section] = block_number;
383    }
384
385    // Move to next block.
386    offset += 80;
387  }
388  return true;
389}
390
391
392// Checks for a GSM container.
393static bool CheckGsm(const uint8* buffer, int buffer_size) {
394  // Reference: ETSI EN 300 961 V8.1.1
395  // (http://www.etsi.org/deliver/etsi_en/300900_300999/300961/08.01.01_60/en_300961v080101p.pdf)
396  // also http://tools.ietf.org/html/rfc3551#page-24
397  // GSM files have a 33 byte block, only first 4 bits are fixed.
398  RCHECK(buffer_size >= 1024);  // Need enough data to do a decent check.
399
400  int offset = 0;
401  while (offset < buffer_size) {
402    // First 4 bits of each block are xD.
403    RCHECK((buffer[offset] & 0xf0) == 0xd0);
404    offset += 33;
405  }
406  return true;
407}
408
409// Advance to the first set of |num_bits| bits that match |start_code|. |offset|
410// is the current location in the buffer, and is updated. |bytes_needed| is the
411// number of bytes that must remain in the buffer when |start_code| is found.
412// Returns true if start_code found (and enough space in the buffer after it),
413// false otherwise.
414static bool AdvanceToStartCode(const uint8* buffer,
415                               int buffer_size,
416                               int* offset,
417                               int bytes_needed,
418                               int num_bits,
419                               uint32 start_code) {
420  DCHECK_GE(bytes_needed, 3);
421  DCHECK_LE(num_bits, 24);  // Only supports up to 24 bits.
422
423  // Create a mask to isolate |num_bits| bits, once shifted over.
424  uint32 bits_to_shift = 24 - num_bits;
425  uint32 mask = (1 << num_bits) - 1;
426  while (*offset + bytes_needed < buffer_size) {
427    uint32 next = Read24(buffer + *offset);
428    if (((next >> bits_to_shift) & mask) == start_code)
429      return true;
430    ++(*offset);
431  }
432  return false;
433}
434
435// Checks for an H.261 container.
436static bool CheckH261(const uint8* buffer, int buffer_size) {
437  // Reference: ITU-T Recommendation H.261 (03/1993)
438  // (http://www.itu.int/rec/T-REC-H.261-199303-I/en)
439  RCHECK(buffer_size > 16);
440
441  int offset = 0;
442  bool seen_start_code = false;
443  while (true) {
444    // Advance to picture_start_code, if there is one.
445    if (!AdvanceToStartCode(buffer, buffer_size, &offset, 4, 20, 0x10)) {
446      // No start code found (or off end of buffer), so success if
447      // there was at least one valid header.
448      return seen_start_code;
449    }
450
451    // Now verify the block. AdvanceToStartCode() made sure that there are
452    // at least 4 bytes remaining in the buffer.
453    BitReader reader(buffer + offset, buffer_size - offset);
454    RCHECK(ReadBits(&reader, 20) == 0x10);
455
456    // Skip the temporal reference and PTYPE.
457    reader.SkipBits(5 + 6);
458
459    // Skip any extra insertion information. Since this is open-ended, if we run
460    // out of bits assume that the buffer is correctly formatted.
461    int extra = ReadBits(&reader, 1);
462    while (extra == 1) {
463      if (!reader.SkipBits(8))
464        return seen_start_code;
465      if (!reader.ReadBits(1, &extra))
466        return seen_start_code;
467    }
468
469    // Next should be a Group of Blocks start code. Again, if we run out of
470    // bits, then assume that the buffer up to here is correct, and the buffer
471    // just happened to end in the middle of a header.
472    int next;
473    if (!reader.ReadBits(16, &next))
474      return seen_start_code;
475    RCHECK(next == 1);
476
477    // Move to the next block.
478    seen_start_code = true;
479    offset += 4;
480  }
481}
482
483// Checks for an H.263 container.
484static bool CheckH263(const uint8* buffer, int buffer_size) {
485  // Reference: ITU-T Recommendation H.263 (01/2005)
486  // (http://www.itu.int/rec/T-REC-H.263-200501-I/en)
487  // header is PSC(22b) + TR(8b) + PTYPE(8+b).
488  RCHECK(buffer_size > 16);
489
490  int offset = 0;
491  bool seen_start_code = false;
492  while (true) {
493    // Advance to picture_start_code, if there is one.
494    if (!AdvanceToStartCode(buffer, buffer_size, &offset, 9, 22, 0x20)) {
495      // No start code found (or off end of buffer), so success if
496      // there was at least one valid header.
497      return seen_start_code;
498    }
499
500    // Now verify the block. AdvanceToStartCode() made sure that there are
501    // at least 9 bytes remaining in the buffer.
502    BitReader reader(buffer + offset, 9);
503    RCHECK(ReadBits(&reader, 22) == 0x20);
504
505    // Skip the temporal reference.
506    reader.SkipBits(8);
507
508    // Verify that the first 2 bits of PTYPE are 10b.
509    RCHECK(ReadBits(&reader, 2) == 2);
510
511    // Skip the split screen indicator, document camera indicator, and full
512    // picture freeze release.
513    reader.SkipBits(1 + 1 + 1);
514
515    // Verify Source Format.
516    int format = ReadBits(&reader, 3);
517    RCHECK(format != 0 && format != 6);  // Forbidden or reserved.
518
519    if (format == 7) {
520      // Verify full extended PTYPE.
521      int ufep = ReadBits(&reader, 3);
522      if (ufep == 1) {
523        // Verify the optional part of PLUSPTYPE.
524        format = ReadBits(&reader, 3);
525        RCHECK(format != 0 && format != 7);  // Reserved.
526        reader.SkipBits(11);
527        // Next 4 bits should be b1000.
528        RCHECK(ReadBits(&reader, 4) == 8);  // Not allowed.
529      } else {
530        RCHECK(ufep == 0);  // Only 0 and 1 allowed.
531      }
532
533      // Verify picture type code is not a reserved value.
534      int picture_type_code = ReadBits(&reader, 3);
535      RCHECK(picture_type_code != 6 && picture_type_code != 7);  // Reserved.
536
537      // Skip picture resampling mode, reduced resolution mode,
538      // and rounding type.
539      reader.SkipBits(1 + 1 + 1);
540
541      // Next 3 bits should be b001.
542      RCHECK(ReadBits(&reader, 3) == 1);  // Not allowed.
543    }
544
545    // Move to the next block.
546    seen_start_code = true;
547    offset += 9;
548  }
549}
550
551// Checks for an H.264 container.
552static bool CheckH264(const uint8* buffer, int buffer_size) {
553  // Reference: ITU-T Recommendation H.264 (01/2012)
554  // (http://www.itu.int/rec/T-REC-H.264)
555  // Section B.1: Byte stream NAL unit syntax and semantics.
556  RCHECK(buffer_size > 4);
557
558  int offset = 0;
559  int parameter_count = 0;
560  while (true) {
561    // Advance to picture_start_code, if there is one.
562    if (!AdvanceToStartCode(buffer, buffer_size, &offset, 4, 24, 1)) {
563      // No start code found (or off end of buffer), so success if
564      // there was at least one valid header.
565      return parameter_count > 0;
566    }
567
568    // Now verify the block. AdvanceToStartCode() made sure that there are
569    // at least 4 bytes remaining in the buffer.
570    BitReader reader(buffer + offset, 4);
571    RCHECK(ReadBits(&reader, 24) == 1);
572
573    // Verify forbidden_zero_bit.
574    RCHECK(ReadBits(&reader, 1) == 0);
575
576    // Extract nal_ref_idc and nal_unit_type.
577    int nal_ref_idc = ReadBits(&reader, 2);
578    int nal_unit_type = ReadBits(&reader, 5);
579
580    switch (nal_unit_type) {
581      case 5:  // Coded slice of an IDR picture.
582        RCHECK(nal_ref_idc != 0);
583        break;
584      case 6:   // Supplemental enhancement information (SEI).
585      case 9:   // Access unit delimiter.
586      case 10:  // End of sequence.
587      case 11:  // End of stream.
588      case 12:  // Filler data.
589        RCHECK(nal_ref_idc == 0);
590        break;
591      case 7:  // Sequence parameter set.
592      case 8:  // Picture parameter set.
593        ++parameter_count;
594        break;
595    }
596
597    // Skip the current start_code_prefix and move to the next.
598    offset += 4;
599  }
600}
601
602static const char kHlsSignature[] = "#EXTM3U";
603static const char kHls1[] = "#EXT-X-STREAM-INF:";
604static const char kHls2[] = "#EXT-X-TARGETDURATION:";
605static const char kHls3[] = "#EXT-X-MEDIA-SEQUENCE:";
606
607// Additional checks for a HLS container.
608static bool CheckHls(const uint8* buffer, int buffer_size) {
609  // HLS is simply a play list used for Apple HTTP Live Streaming.
610  // Reference: Apple HTTP Live Streaming Overview
611  // (http://goo.gl/MIwxj)
612
613  if (StartsWith(buffer, buffer_size, kHlsSignature)) {
614    // Need to find "#EXT-X-STREAM-INF:", "#EXT-X-TARGETDURATION:", or
615    // "#EXT-X-MEDIA-SEQUENCE:" somewhere in the buffer. Other playlists (like
616    // WinAmp) only have additional lines with #EXTINF
617    // (http://en.wikipedia.org/wiki/M3U).
618    int offset = strlen(kHlsSignature);
619    while (offset < buffer_size) {
620      if (buffer[offset] == '#') {
621        if (StartsWith(buffer + offset, buffer_size - offset, kHls1) ||
622            StartsWith(buffer + offset, buffer_size - offset, kHls2) ||
623            StartsWith(buffer + offset, buffer_size - offset, kHls3)) {
624          return true;
625        }
626      }
627      ++offset;
628    }
629  }
630  return false;
631}
632
633// Checks for a MJPEG stream.
634static bool CheckMJpeg(const uint8* buffer, int buffer_size) {
635  // Reference: ISO/IEC 10918-1 : 1993(E), Annex B
636  // (http://www.w3.org/Graphics/JPEG/itu-t81.pdf)
637  RCHECK(buffer_size >= 16);
638
639  int offset = 0;
640  int last_restart = -1;
641  int num_codes = 0;
642  while (offset + 5 < buffer_size) {
643    // Marker codes are always a two byte code with the first byte xFF.
644    RCHECK(buffer[offset] == 0xff);
645    uint8 code = buffer[offset + 1];
646    RCHECK(code >= 0xc0 || code == 1);
647
648    // Skip sequences of xFF.
649    if (code == 0xff) {
650      ++offset;
651      continue;
652    }
653
654    // Success if the next marker code is EOI (end of image)
655    if (code == 0xd9)
656      return true;
657
658    // Check remaining codes.
659    if (code == 0xd8 || code == 1) {
660      // SOI (start of image) / TEM (private use). No other data with header.
661      offset += 2;
662    } else if (code >= 0xd0 && code <= 0xd7) {
663      // RST (restart) codes must be in sequence. No other data with header.
664      int restart = code & 0x07;
665      if (last_restart >= 0)
666        RCHECK(restart == (last_restart + 1) % 8);
667      last_restart = restart;
668      offset += 2;
669    } else {
670      // All remaining marker codes are followed by a length of the header.
671      int length = Read16(buffer + offset + 2) + 2;
672
673      // Special handling of SOS (start of scan) marker since the entropy
674      // coded data follows the SOS. Any xFF byte in the data block must be
675      // followed by x00 in the data.
676      if (code == 0xda) {
677        int number_components = buffer[offset + 4];
678        RCHECK(length == 8 + 2 * number_components);
679
680        // Advance to the next marker.
681        offset += length;
682        while (offset + 2 < buffer_size) {
683          if (buffer[offset] == 0xff && buffer[offset + 1] != 0)
684            break;
685          ++offset;
686        }
687      } else {
688        // Skip over the marker data for the other marker codes.
689        offset += length;
690      }
691    }
692    ++num_codes;
693  }
694  return (num_codes > 1);
695}
696
697enum Mpeg2StartCodes {
698  PROGRAM_END_CODE = 0xb9,
699  PACK_START_CODE = 0xba
700};
701
702// Checks for a MPEG2 Program Stream.
703static bool CheckMpeg2ProgramStream(const uint8* buffer, int buffer_size) {
704  // Reference: ISO/IEC 13818-1 : 2000 (E) / ITU-T Rec. H.222.0 (2000 E).
705  RCHECK(buffer_size > 14);
706
707  int offset = 0;
708  while (offset + 14 < buffer_size) {
709    BitReader reader(buffer + offset, 14);
710
711    // Must start with pack_start_code.
712    RCHECK(ReadBits(&reader, 24) == 1);
713    RCHECK(ReadBits(&reader, 8) == PACK_START_CODE);
714
715    // Determine MPEG version (MPEG1 has b0010, while MPEG2 has b01).
716    int mpeg_version = ReadBits(&reader, 2);
717    if (mpeg_version == 0) {
718      // MPEG1, 10 byte header
719      // Validate rest of version code
720      RCHECK(ReadBits(&reader, 2) == 2);
721    } else {
722      RCHECK(mpeg_version == 1);
723    }
724
725    // Skip system_clock_reference_base [32..30].
726    reader.SkipBits(3);
727
728    // Verify marker bit.
729    RCHECK(ReadBits(&reader, 1) == 1);
730
731    // Skip system_clock_reference_base [29..15].
732    reader.SkipBits(15);
733
734    // Verify next marker bit.
735    RCHECK(ReadBits(&reader, 1) == 1);
736
737    // Skip system_clock_reference_base [14..0].
738    reader.SkipBits(15);
739
740    // Verify next marker bit.
741    RCHECK(ReadBits(&reader, 1) == 1);
742
743    if (mpeg_version == 0) {
744      // Verify second marker bit.
745      RCHECK(ReadBits(&reader, 1) == 1);
746
747      // Skip mux_rate.
748      reader.SkipBits(22);
749
750      // Verify next marker bit.
751      RCHECK(ReadBits(&reader, 1) == 1);
752
753      // Update offset to be after this header.
754      offset += 12;
755    } else {
756      // Must be MPEG2.
757      // Skip program_mux_rate.
758      reader.SkipBits(22);
759
760      // Verify pair of marker bits.
761      RCHECK(ReadBits(&reader, 2) == 3);
762
763      // Skip reserved.
764      reader.SkipBits(5);
765
766      // Update offset to be after this header.
767      int pack_stuffing_length = ReadBits(&reader, 3);
768      offset += 14 + pack_stuffing_length;
769    }
770
771    // Check for system headers and PES_packets.
772    while (offset + 6 < buffer_size && Read24(buffer + offset) == 1) {
773      // Next 8 bits determine stream type.
774      int stream_id = buffer[offset + 3];
775
776      // Some stream types are reserved and shouldn't occur.
777      if (mpeg_version == 0)
778        RCHECK(stream_id != 0xbc && stream_id < 0xf0);
779      else
780        RCHECK(stream_id != 0xfc && stream_id != 0xfd && stream_id != 0xfe);
781
782      // Some stream types are used for pack headers.
783      if (stream_id == PACK_START_CODE)  // back to outer loop.
784        break;
785      if (stream_id == PROGRAM_END_CODE)  // end of stream.
786        return true;
787
788      int pes_length = Read16(buffer + offset + 4);
789      RCHECK(pes_length > 0);
790      offset = offset + 6 + pes_length;
791    }
792  }
793  // Success as we are off the end of the buffer and liked everything
794  // in the buffer.
795  return true;
796}
797
798const uint8 kMpeg2SyncWord = 0x47;
799
800// Checks for a MPEG2 Transport Stream.
801static bool CheckMpeg2TransportStream(const uint8* buffer, int buffer_size) {
802  // Spec: ISO/IEC 13818-1 : 2000 (E) / ITU-T Rec. H.222.0 (2000 E).
803  // Normal packet size is 188 bytes. However, some systems add various error
804  // correction data at the end, resulting in packet of length 192/204/208
805  // (https://en.wikipedia.org/wiki/MPEG_transport_stream). Determine the
806  // length with the first packet.
807  RCHECK(buffer_size >= 250);  // Want more than 1 packet to check.
808
809  int offset = 0;
810  int packet_length = -1;
811  while (buffer[offset] != kMpeg2SyncWord && offset < 20) {
812    // Skip over any header in the first 20 bytes.
813    ++offset;
814  }
815
816  while (offset + 6 < buffer_size) {
817    BitReader reader(buffer + offset, 6);
818
819    // Must start with sync byte.
820    RCHECK(ReadBits(&reader, 8) == kMpeg2SyncWord);
821
822    // Skip transport_error_indicator, payload_unit_start_indicator, and
823    // transport_priority.
824    reader.SkipBits(1 + 1 + 1);
825
826    // Verify the pid is not a reserved value.
827    int pid = ReadBits(&reader, 13);
828    RCHECK(pid < 3 || pid > 15);
829
830    // Skip transport_scrambling_control.
831    reader.SkipBits(2);
832
833    // Adaptation_field_control can not be 0.
834    int adaptation_field_control = ReadBits(&reader, 2);
835    RCHECK(adaptation_field_control != 0);
836
837    // If there is an adaptation_field, verify it.
838    if (adaptation_field_control >= 2) {
839      // Skip continuity_counter.
840      reader.SkipBits(4);
841
842      // Get adaptation_field_length and verify it.
843      int adaptation_field_length = ReadBits(&reader, 8);
844      if (adaptation_field_control == 2)
845        RCHECK(adaptation_field_length == 183);
846      else
847        RCHECK(adaptation_field_length <= 182);
848    }
849
850    // Attempt to determine the packet length on the first packet.
851    if (packet_length < 0) {
852      if (buffer[offset + 188] == kMpeg2SyncWord)
853        packet_length = 188;
854      else if (buffer[offset + 192] == kMpeg2SyncWord)
855        packet_length = 192;
856      else if (buffer[offset + 204] == kMpeg2SyncWord)
857        packet_length = 204;
858      else
859        packet_length = 208;
860    }
861    offset += packet_length;
862  }
863  return true;
864}
865
866enum Mpeg4StartCodes {
867  VISUAL_OBJECT_SEQUENCE_START_CODE = 0xb0,
868  VISUAL_OBJECT_SEQUENCE_END_CODE = 0xb1,
869  VISUAL_OBJECT_START_CODE = 0xb5,
870  VOP_START_CODE = 0xb6
871};
872
873// Checks for a raw MPEG4 bitstream container.
874static bool CheckMpeg4BitStream(const uint8* buffer, int buffer_size) {
875  // Defined in ISO/IEC 14496-2:2001.
876  // However, no length ... simply scan for start code values.
877  // Note tags are very similar to H.264.
878  RCHECK(buffer_size > 4);
879
880  int offset = 0;
881  int sequence_start_count = 0;
882  int sequence_end_count = 0;
883  int visual_object_count = 0;
884  int vop_count = 0;
885  while (true) {
886    // Advance to start_code, if there is one.
887    if (!AdvanceToStartCode(buffer, buffer_size, &offset, 6, 24, 1)) {
888      // Not a complete sequence in memory, so return true if we've seen a
889      // visual_object_sequence_start_code and a visual_object_start_code.
890      return (sequence_start_count > 0 && visual_object_count > 0);
891    }
892
893    // Now verify the block. AdvanceToStartCode() made sure that there are
894    // at least 6 bytes remaining in the buffer.
895    BitReader reader(buffer + offset, 6);
896    RCHECK(ReadBits(&reader, 24) == 1);
897
898    int start_code = ReadBits(&reader, 8);
899    RCHECK(start_code < 0x30 || start_code > 0xaf);  // 30..AF and
900    RCHECK(start_code < 0xb7 || start_code > 0xb9);  // B7..B9 reserved
901
902    switch (start_code) {
903      case VISUAL_OBJECT_SEQUENCE_START_CODE: {
904        ++sequence_start_count;
905        // Verify profile in not one of many reserved values.
906        int profile = ReadBits(&reader, 8);
907        RCHECK(profile > 0);
908        RCHECK(profile < 0x04 || profile > 0x10);
909        RCHECK(profile < 0x13 || profile > 0x20);
910        RCHECK(profile < 0x23 || profile > 0x31);
911        RCHECK(profile < 0x35 || profile > 0x41);
912        RCHECK(profile < 0x43 || profile > 0x60);
913        RCHECK(profile < 0x65 || profile > 0x70);
914        RCHECK(profile < 0x73 || profile > 0x80);
915        RCHECK(profile < 0x83 || profile > 0x90);
916        RCHECK(profile < 0x95 || profile > 0xa0);
917        RCHECK(profile < 0xa4 || profile > 0xb0);
918        RCHECK(profile < 0xb5 || profile > 0xc0);
919        RCHECK(profile < 0xc3 || profile > 0xd0);
920        RCHECK(profile < 0xe4);
921        break;
922      }
923
924      case VISUAL_OBJECT_SEQUENCE_END_CODE:
925        RCHECK(++sequence_end_count == sequence_start_count);
926        break;
927
928      case VISUAL_OBJECT_START_CODE: {
929        ++visual_object_count;
930        if (ReadBits(&reader, 1) == 1) {
931          int visual_object_verid = ReadBits(&reader, 4);
932          RCHECK(visual_object_verid > 0 && visual_object_verid < 3);
933          RCHECK(ReadBits(&reader, 3) != 0);
934        }
935        int visual_object_type = ReadBits(&reader, 4);
936        RCHECK(visual_object_type > 0 && visual_object_type < 6);
937        break;
938      }
939
940      case VOP_START_CODE:
941        RCHECK(++vop_count <= visual_object_count);
942        break;
943    }
944    // Skip this block.
945    offset += 6;
946  }
947}
948
949// Additional checks for a MOV/QuickTime/MPEG4 container.
950static bool CheckMov(const uint8* buffer, int buffer_size) {
951  // Reference: ISO/IEC 14496-12:2005(E).
952  // (http://standards.iso.org/ittf/PubliclyAvailableStandards/c061988_ISO_IEC_14496-12_2012.zip)
953  RCHECK(buffer_size > 8);
954
955  int offset = 0;
956  while (offset + 8 < buffer_size) {
957    uint32 atomsize = Read32(buffer + offset);
958    uint32 atomtype = Read32(buffer + offset + 4);
959    // Only need to check for ones that are valid at the top level.
960    switch (atomtype) {
961      case TAG('f','t','y','p'):
962      case TAG('p','d','i','n'):
963      case TAG('m','o','o','v'):
964      case TAG('m','o','o','f'):
965      case TAG('m','f','r','a'):
966      case TAG('m','d','a','t'):
967      case TAG('f','r','e','e'):
968      case TAG('s','k','i','p'):
969      case TAG('m','e','t','a'):
970      case TAG('m','e','c','o'):
971      case TAG('s','t','y','p'):
972      case TAG('s','i','d','x'):
973      case TAG('s','s','i','x'):
974      case TAG('p','r','f','t'):
975      case TAG('b','l','o','c'):
976        break;
977      default:
978        return false;
979    }
980    if (atomsize == 1) {
981      // Indicates that the length is the next 64bits.
982      if (offset + 16 > buffer_size)
983        break;
984      if (Read32(buffer + offset + 8) != 0)
985        break;  // Offset is way past buffer size.
986      atomsize = Read32(buffer + offset + 12);
987    }
988    if (atomsize == 0 || atomsize > static_cast<size_t>(buffer_size))
989      break;  // Indicates the last atom or length too big.
990    offset += atomsize;
991  }
992  return true;
993}
994
995enum MPEGVersion {
996  VERSION_25 = 0,
997  VERSION_RESERVED,
998  VERSION_2,
999  VERSION_1
1000};
1001enum MPEGLayer {
1002  L_RESERVED = 0,
1003  LAYER_3,
1004  LAYER_2,
1005  LAYER_1
1006};
1007
1008static int kSampleRateTable[4][4] = { { 11025, 12000, 8000, 0 },   // v2.5
1009                                      { 0, 0, 0, 0 },              // not used
1010                                      { 22050, 24000, 16000, 0 },  // v2
1011                                      { 44100, 48000, 32000, 0 }   // v1
1012};
1013
1014static int kBitRateTableV1L1[16] = { 0, 32, 64, 96, 128, 160, 192, 224, 256,
1015                                     288, 320, 352, 384, 416, 448, 0 };
1016static int kBitRateTableV1L2[16] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 160,
1017                                     192, 224, 256, 320, 384, 0 };
1018static int kBitRateTableV1L3[16] = { 0, 32, 40, 48, 56, 64, 80, 96, 112, 128,
1019                                     160, 192, 224, 256, 320, 0 };
1020static int kBitRateTableV2L1[16] = { 0, 32, 48, 56, 64, 80, 96, 112, 128, 144,
1021                                     160, 176, 192, 224, 256, 0 };
1022static int kBitRateTableV2L23[16] = { 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96,
1023                                      112, 128, 144, 160, 0 };
1024
1025static bool ValidMpegAudioFrameHeader(const uint8* header,
1026                                      int header_size,
1027                                      int* framesize) {
1028  // Reference: http://mpgedit.org/mpgedit/mpeg_format/mpeghdr.htm.
1029  DCHECK_GE(header_size, 4);
1030  *framesize = 0;
1031  BitReader reader(header, 4);  // Header can only be 4 bytes long.
1032
1033  // Verify frame sync (11 bits) are all set.
1034  RCHECK(ReadBits(&reader, 11) == 0x7ff);
1035
1036  // Verify MPEG audio version id.
1037  int version = ReadBits(&reader, 2);
1038  RCHECK(version != 1);  // Reserved.
1039
1040  // Verify layer.
1041  int layer = ReadBits(&reader, 2);
1042  RCHECK(layer != 0);
1043
1044  // Skip protection bit.
1045  reader.SkipBits(1);
1046
1047  // Verify bitrate index.
1048  int bitrate_index = ReadBits(&reader, 4);
1049  RCHECK(bitrate_index != 0xf);
1050
1051  // Verify sampling rate frequency index.
1052  int sampling_index = ReadBits(&reader, 2);
1053  RCHECK(sampling_index != 3);
1054
1055  // Get padding bit.
1056  int padding = ReadBits(&reader, 1);
1057
1058  // Frame size:
1059  // For Layer I files = (12 * BitRate / SampleRate + Padding) * 4
1060  // For others = 144 * BitRate / SampleRate + Padding
1061  // Unfortunately, BitRate and SampleRate are coded.
1062  int sampling_rate = kSampleRateTable[version][sampling_index];
1063  int bitrate;
1064  if (version == VERSION_1) {
1065    if (layer == LAYER_1)
1066      bitrate = kBitRateTableV1L1[bitrate_index];
1067    else if (layer == LAYER_2)
1068      bitrate = kBitRateTableV1L2[bitrate_index];
1069    else
1070      bitrate = kBitRateTableV1L3[bitrate_index];
1071  } else {
1072    if (layer == LAYER_1)
1073      bitrate = kBitRateTableV2L1[bitrate_index];
1074    else
1075      bitrate = kBitRateTableV2L23[bitrate_index];
1076  }
1077  if (layer == LAYER_1)
1078    *framesize = ((12000 * bitrate) / sampling_rate + padding) * 4;
1079  else
1080    *framesize = (144000 * bitrate) / sampling_rate + padding;
1081  return (bitrate > 0 && sampling_rate > 0);
1082}
1083
1084// Extract a size encoded the MP3 way.
1085static int GetMp3HeaderSize(const uint8* buffer, int buffer_size) {
1086  DCHECK_GE(buffer_size, 9);
1087  int size = ((buffer[6] & 0x7f) << 21) + ((buffer[7] & 0x7f) << 14) +
1088             ((buffer[8] & 0x7f) << 7) + (buffer[9] & 0x7f) + 10;
1089  if (buffer[5] & 0x10)  // Footer added?
1090    size += 10;
1091  return size;
1092}
1093
1094// Additional checks for a MP3 container.
1095static bool CheckMp3(const uint8* buffer, int buffer_size, bool seenHeader) {
1096  RCHECK(buffer_size >= 10);  // Must be enough to read the initial header.
1097
1098  int framesize;
1099  int numSeen = 0;
1100  int offset = 0;
1101  if (seenHeader) {
1102    offset = GetMp3HeaderSize(buffer, buffer_size);
1103  } else {
1104    // Skip over leading 0's.
1105    while (offset < buffer_size && buffer[offset] == 0)
1106      ++offset;
1107  }
1108
1109  while (offset + 3 < buffer_size) {
1110    RCHECK(ValidMpegAudioFrameHeader(
1111        buffer + offset, buffer_size - offset, &framesize));
1112
1113    // Have we seen enough valid headers?
1114    if (++numSeen > 10)
1115      return true;
1116    offset += framesize;
1117  }
1118  // Off the end of the buffer, return success if a few valid headers seen.
1119  return numSeen > 2;
1120}
1121
1122// Check that the next characters in |buffer| represent a number. The format
1123// accepted is optional whitespace followed by 1 or more digits. |max_digits|
1124// specifies the maximum number of digits to process. Returns true if a valid
1125// number is found, false otherwise.
1126static bool VerifyNumber(const uint8* buffer,
1127                         int buffer_size,
1128                         int* offset,
1129                         int max_digits) {
1130  RCHECK(*offset < buffer_size);
1131
1132  // Skip over any leading space.
1133  while (isspace(buffer[*offset])) {
1134    ++(*offset);
1135    RCHECK(*offset < buffer_size);
1136  }
1137
1138  // Need to process up to max_digits digits.
1139  int numSeen = 0;
1140  while (--max_digits >= 0 && isdigit(buffer[*offset])) {
1141    ++numSeen;
1142    ++(*offset);
1143    if (*offset >= buffer_size)
1144      return true;  // Out of space but seen a digit.
1145  }
1146
1147  // Success if at least one digit seen.
1148  return (numSeen > 0);
1149}
1150
1151// Check that the next character in |buffer| is one of |c1| or |c2|. |c2| is
1152// optional. Returns true if there is a match, false if no match or out of
1153// space.
1154static inline bool VerifyCharacters(const uint8* buffer,
1155                                    int buffer_size,
1156                                    int* offset,
1157                                    char c1,
1158                                    char c2) {
1159  RCHECK(*offset < buffer_size);
1160  char c = static_cast<char>(buffer[(*offset)++]);
1161  return (c == c1 || (c == c2 && c2 != 0));
1162}
1163
1164// Checks for a SRT container.
1165static bool CheckSrt(const uint8* buffer, int buffer_size) {
1166  // Reference: http://en.wikipedia.org/wiki/SubRip
1167  RCHECK(buffer_size > 20);
1168
1169  // First line should just be the subtitle sequence number.
1170  int offset = StartsWith(buffer, buffer_size, UTF8_BYTE_ORDER_MARK) ? 3 : 0;
1171  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1172  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '\n', '\r'));
1173
1174  // Skip any additional \n\r.
1175  while (VerifyCharacters(buffer, buffer_size, &offset, '\n', '\r')) {}
1176  --offset;  // Since VerifyCharacters() gobbled up the next non-CR/LF.
1177
1178  // Second line should look like the following:
1179  //   00:00:10,500 --> 00:00:13,000
1180  // Units separator can be , or .
1181  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1182  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1183  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1184  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1185  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1186  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ',', '.'));
1187  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 3));
1188  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ' ', 0));
1189  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '-', 0));
1190  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '-', 0));
1191  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, '>', 0));
1192  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ' ', 0));
1193  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 100));
1194  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1195  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1196  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ':', 0));
1197  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 2));
1198  RCHECK(VerifyCharacters(buffer, buffer_size, &offset, ',', '.'));
1199  RCHECK(VerifyNumber(buffer, buffer_size, &offset, 3));
1200  return true;
1201}
1202
1203// Read a Matroska Element Id.
1204static int GetElementId(BitReader* reader) {
1205  // Element ID is coded with the leading zero bits (max 3) determining size.
1206  // If it is an invalid encoding or the end of the buffer is reached,
1207  // return -1 as a tag that won't be expected.
1208  if (reader->bits_available() >= 8) {
1209    int num_bits_to_read = 0;
1210    static int prefix[] = { 0x80, 0x4000, 0x200000, 0x10000000 };
1211    for (int i = 0; i < 4; ++i) {
1212      num_bits_to_read += 7;
1213      if (ReadBits(reader, 1) == 1) {
1214        if (reader->bits_available() < num_bits_to_read)
1215          break;
1216        // prefix[] adds back the bits read individually.
1217        return ReadBits(reader, num_bits_to_read) | prefix[i];
1218      }
1219    }
1220  }
1221  // Invalid encoding, return something not expected.
1222  return -1;
1223}
1224
1225// Read a Matroska Unsigned Integer (VINT).
1226static uint64 GetVint(BitReader* reader) {
1227  // Values are coded with the leading zero bits (max 7) determining size.
1228  // If it is an invalid coding or the end of the buffer is reached,
1229  // return something that will go off the end of the buffer.
1230  if (reader->bits_available() >= 8) {
1231    int num_bits_to_read = 0;
1232    for (int i = 0; i < 8; ++i) {
1233      num_bits_to_read += 7;
1234      if (ReadBits(reader, 1) == 1) {
1235        if (reader->bits_available() < num_bits_to_read)
1236          break;
1237        return ReadBits(reader, num_bits_to_read);
1238      }
1239    }
1240  }
1241  // Incorrect format (more than 7 leading 0's) or off the end of the buffer.
1242  // Since the return value is used as a byte size, return a value that will
1243  // cause a failure when used.
1244  return (reader->bits_available() / 8) + 2;
1245}
1246
1247// Additional checks for a WEBM container.
1248static bool CheckWebm(const uint8* buffer, int buffer_size) {
1249  // Reference: http://www.matroska.org/technical/specs/index.html
1250  RCHECK(buffer_size > 12);
1251
1252  BitReader reader(buffer, buffer_size);
1253
1254  // Verify starting Element Id.
1255  RCHECK(GetElementId(&reader) == 0x1a45dfa3);
1256
1257  // Get the header size, and ensure there are enough bits to check.
1258  int header_size = GetVint(&reader);
1259  RCHECK(reader.bits_available() / 8 >= header_size);
1260
1261  // Loop through the header.
1262  while (reader.bits_available() > 0) {
1263    int tag = GetElementId(&reader);
1264    int tagsize = GetVint(&reader);
1265    switch (tag) {
1266      case 0x4286:  // EBMLVersion
1267      case 0x42f7:  // EBMLReadVersion
1268      case 0x42f2:  // EBMLMaxIdLength
1269      case 0x42f3:  // EBMLMaxSizeLength
1270      case 0x4287:  // DocTypeVersion
1271      case 0x4285:  // DocTypeReadVersion
1272      case 0xec:    // void
1273      case 0xbf:    // CRC32
1274        RCHECK(reader.SkipBits(tagsize * 8));
1275        break;
1276
1277      case 0x4282:  // EBMLDocType
1278        // Need to see "webm" or "matroska" next.
1279        switch (ReadBits(&reader, 32)) {
1280          case TAG('w', 'e', 'b', 'm') :
1281            return true;
1282          case TAG('m', 'a', 't', 'r') :
1283            return (ReadBits(&reader, 32) == TAG('o', 's', 'k', 'a'));
1284        }
1285        return false;
1286
1287      default:  // Unrecognized tag
1288        return false;
1289    }
1290  }
1291  return false;
1292}
1293
1294enum VC1StartCodes {
1295  VC1_FRAME_START_CODE = 0x0d,
1296  VC1_ENTRY_POINT_START_CODE = 0x0e,
1297  VC1_SEQUENCE_START_CODE = 0x0f
1298};
1299
1300// Checks for a VC1 bitstream container.
1301static bool CheckVC1(const uint8* buffer, int buffer_size) {
1302  // Reference: SMPTE 421M
1303  // (http://standards.smpte.org/content/978-1-61482-555-5/st-421-2006/SEC1.body.pdf)
1304  // However, no length ... simply scan for start code values.
1305  // Expect to see SEQ | [ [ ENTRY ] PIC* ]*
1306  // Note tags are very similar to H.264.
1307
1308  RCHECK(buffer_size >= 24);
1309
1310  // First check for Bitstream Metadata Serialization (Annex L)
1311  if (buffer[0] == 0xc5 &&
1312      Read32(buffer + 4) == 0x04 &&
1313      Read32(buffer + 20) == 0x0c) {
1314    // Verify settings in STRUCT_C and STRUCT_A
1315    BitReader reader(buffer + 8, 12);
1316
1317    int profile = ReadBits(&reader, 4);
1318    if (profile == 0 || profile == 4) {  // simple or main
1319      // Skip FRMRTQ_POSTPROC, BITRTQ_POSTPROC, and LOOPFILTER.
1320      reader.SkipBits(3 + 5 + 1);
1321
1322      // Next bit must be 0.
1323      RCHECK(ReadBits(&reader, 1) == 0);
1324
1325      // Skip MULTIRES.
1326      reader.SkipBits(1);
1327
1328      // Next bit must be 1.
1329      RCHECK(ReadBits(&reader, 1) == 1);
1330
1331      // Skip FASTUVMC, EXTENDED_MV, DQUANT, and VSTRANSFORM.
1332      reader.SkipBits(1 + 1 + 2 + 1);
1333
1334      // Next bit must be 0.
1335      RCHECK(ReadBits(&reader, 1) == 0);
1336
1337      // Skip OVERLAP, SYNCMARKER, RANGERED, MAXBFRAMES, QUANTIZER, and
1338      // FINTERPFLAG.
1339      reader.SkipBits(1 + 1 + 1 + 3 + 2 + 1);
1340
1341      // Next bit must be 1.
1342      RCHECK(ReadBits(&reader, 1) == 1);
1343
1344    } else {
1345      RCHECK(profile == 12);  // Other profile values not allowed.
1346      RCHECK(ReadBits(&reader, 28) == 0);
1347    }
1348
1349    // Now check HORIZ_SIZE and VERT_SIZE, which must be 8192 or less.
1350    RCHECK(ReadBits(&reader, 32) <= 8192);
1351    RCHECK(ReadBits(&reader, 32) <= 8192);
1352    return true;
1353  }
1354
1355  // Buffer isn't Bitstream Metadata, so scan for start codes.
1356  int offset = 0;
1357  int sequence_start_code = 0;
1358  int frame_start_code = 0;
1359  while (true) {
1360    // Advance to start_code, if there is one.
1361    if (!AdvanceToStartCode(buffer, buffer_size, &offset, 5, 24, 1)) {
1362      // Not a complete sequence in memory, so return true if we've seen a
1363      // sequence start and a frame start (not checking entry points since
1364      // they only occur in advanced profiles).
1365      return (sequence_start_code > 0 && frame_start_code > 0);
1366    }
1367
1368    // Now verify the block. AdvanceToStartCode() made sure that there are
1369    // at least 5 bytes remaining in the buffer.
1370    BitReader reader(buffer + offset, 5);
1371    RCHECK(ReadBits(&reader, 24) == 1);
1372
1373    // Keep track of the number of certain types received.
1374    switch (ReadBits(&reader, 8)) {
1375      case VC1_SEQUENCE_START_CODE: {
1376        ++sequence_start_code;
1377        switch (ReadBits(&reader, 2)) {
1378          case 0:  // simple
1379          case 1:  // main
1380            RCHECK(ReadBits(&reader, 2) == 0);
1381            break;
1382          case 2:  // complex
1383            return false;
1384          case 3:  // advanced
1385            RCHECK(ReadBits(&reader, 3) <= 4);  // Verify level = 0..4
1386            RCHECK(ReadBits(&reader, 2) == 1);  // Verify colordiff_format = 1
1387            break;
1388        }
1389        break;
1390      }
1391
1392      case VC1_ENTRY_POINT_START_CODE:
1393        // No fields in entry data to check. However, it must occur after
1394        // sequence header.
1395        RCHECK(sequence_start_code > 0);
1396        break;
1397
1398      case VC1_FRAME_START_CODE:
1399        ++frame_start_code;
1400        break;
1401    }
1402    offset += 5;
1403  }
1404}
1405
1406// For some formats the signature is a bunch of characters. They are defined
1407// below. Note that the first 4 characters of the string may be used as a TAG
1408// in LookupContainerByFirst4. For signatures that contain embedded \0, use
1409// uint8[].
1410static const char kAmrSignature[] = "#!AMR";
1411static const uint8 kAsfSignature[] = { 0x30, 0x26, 0xb2, 0x75, 0x8e, 0x66, 0xcf,
1412                                       0x11, 0xa6, 0xd9, 0x00, 0xaa, 0x00, 0x62,
1413                                       0xce, 0x6c };
1414static const char kAssSignature[] = "[Script Info]";
1415static const char kAssBomSignature[] = UTF8_BYTE_ORDER_MARK "[Script Info]";
1416static const uint8 kWtvSignature[] = { 0xb7, 0xd8, 0x00, 0x20, 0x37, 0x49, 0xda,
1417                                       0x11, 0xa6, 0x4e, 0x00, 0x07, 0xe9, 0x5e,
1418                                       0xad, 0x8d };
1419
1420// Attempt to determine the container type from the buffer provided. This is
1421// a simple pass, that uses the first 4 bytes of the buffer as an index to get
1422// a rough idea of the container format.
1423static MediaContainerName LookupContainerByFirst4(const uint8* buffer,
1424                                                  int buffer_size) {
1425  // Minimum size that the code expects to exist without checking size.
1426  if (buffer_size < 12)
1427    return CONTAINER_UNKNOWN;
1428
1429  uint32 first4 = Read32(buffer);
1430  switch (first4) {
1431    case 0x1a45dfa3:
1432      if (CheckWebm(buffer, buffer_size))
1433        return CONTAINER_WEBM;
1434      break;
1435
1436    case 0x3026b275:
1437      if (StartsWith(buffer,
1438                     buffer_size,
1439                     kAsfSignature,
1440                     sizeof(kAsfSignature))) {
1441        return CONTAINER_ASF;
1442      }
1443      break;
1444
1445    case TAG('#','!','A','M'):
1446      if (StartsWith(buffer, buffer_size, kAmrSignature))
1447        return CONTAINER_AMR;
1448      break;
1449
1450    case TAG('#','E','X','T'):
1451      if (CheckHls(buffer, buffer_size))
1452        return CONTAINER_HLS;
1453      break;
1454
1455    case TAG('.','R','M','F'):
1456      if (buffer[4] == 0 && buffer[5] == 0)
1457        return CONTAINER_RM;
1458      break;
1459
1460    case TAG('.','r','a','\xfd'):
1461      return CONTAINER_RM;
1462
1463    case TAG('B','I','K','b'):
1464    case TAG('B','I','K','d'):
1465    case TAG('B','I','K','f'):
1466    case TAG('B','I','K','g'):
1467    case TAG('B','I','K','h'):
1468    case TAG('B','I','K','i'):
1469      if (CheckBink(buffer, buffer_size))
1470        return CONTAINER_BINK;
1471      break;
1472
1473    case TAG('c','a','f','f'):
1474      if (CheckCaf(buffer, buffer_size))
1475        return CONTAINER_CAF;
1476      break;
1477
1478    case TAG('D','E','X','A'):
1479      if (buffer_size > 15 &&
1480          Read16(buffer + 11) <= 2048 &&
1481          Read16(buffer + 13) <= 2048) {
1482        return CONTAINER_DXA;
1483      }
1484      break;
1485
1486    case TAG('D','T','S','H'):
1487      if (Read32(buffer + 4) == TAG('D','H','D','R'))
1488        return CONTAINER_DTSHD;
1489      break;
1490
1491    case 0x64a30100:
1492    case 0x64a30200:
1493    case 0x64a30300:
1494    case 0x64a30400:
1495    case 0x0001a364:
1496    case 0x0002a364:
1497    case 0x0003a364:
1498      if (Read32(buffer + 4) != 0 && Read32(buffer + 8) != 0)
1499        return CONTAINER_IRCAM;
1500      break;
1501
1502    case TAG('f','L','a','C'):
1503      return CONTAINER_FLAC;
1504
1505    case TAG('F','L','V',0):
1506    case TAG('F','L','V',1):
1507    case TAG('F','L','V',2):
1508    case TAG('F','L','V',3):
1509    case TAG('F','L','V',4):
1510      if (buffer[5] == 0 && Read32(buffer + 5) > 8)
1511        return CONTAINER_FLV;
1512      break;
1513
1514    case TAG('F','O','R','M'):
1515      switch (Read32(buffer + 8)) {
1516        case TAG('A','I','F','F'):
1517        case TAG('A','I','F','C'):
1518          return CONTAINER_AIFF;
1519      }
1520      break;
1521
1522    case TAG('M','A','C',' '):
1523      return CONTAINER_APE;
1524
1525    case TAG('O','N','2',' '):
1526      if (Read32(buffer + 8) == TAG('O','N','2','f'))
1527        return CONTAINER_AVI;
1528      break;
1529
1530    case TAG('O','g','g','S'):
1531      if (buffer[5] <= 7)
1532        return CONTAINER_OGG;
1533      break;
1534
1535    case TAG('R','F','6','4'):
1536      if (buffer_size > 16 && Read32(buffer + 12) == TAG('d','s','6','4'))
1537        return CONTAINER_WAV;
1538      break;
1539
1540    case TAG('R','I','F','F'):
1541      switch (Read32(buffer + 8)) {
1542        case TAG('A','V','I',' '):
1543        case TAG('A','V','I','X'):
1544        case TAG('A','V','I','\x19'):
1545        case TAG('A','M','V',' '):
1546          return CONTAINER_AVI;
1547        case TAG('W','A','V','E'):
1548          return CONTAINER_WAV;
1549      }
1550      break;
1551
1552    case TAG('[','S','c','r'):
1553      if (StartsWith(buffer, buffer_size, kAssSignature))
1554        return CONTAINER_ASS;
1555      break;
1556
1557    case TAG('\xef','\xbb','\xbf','['):
1558      if (StartsWith(buffer, buffer_size, kAssBomSignature))
1559        return CONTAINER_ASS;
1560      break;
1561
1562    case 0x7ffe8001:
1563    case 0xfe7f0180:
1564    case 0x1fffe800:
1565    case 0xff1f00e8:
1566      if (CheckDts(buffer, buffer_size))
1567        return CONTAINER_DTS;
1568      break;
1569
1570    case 0xb7d80020:
1571      if (StartsWith(buffer,
1572                     buffer_size,
1573                     kWtvSignature,
1574                     sizeof(kWtvSignature))) {
1575        return CONTAINER_WTV;
1576      }
1577      break;
1578  }
1579
1580  // Now try a few different ones that look at something other
1581  // than the first 4 bytes.
1582  uint32 first3 = first4 & 0xffffff00;
1583  switch (first3) {
1584    case TAG('C','W','S',0):
1585    case TAG('F','W','S',0):
1586      return CONTAINER_SWF;
1587
1588    case TAG('I','D','3',0):
1589      if (CheckMp3(buffer, buffer_size, true))
1590        return CONTAINER_MP3;
1591      break;
1592  }
1593
1594  // Maybe the first 2 characters are something we can use.
1595  uint32 first2 = Read16(buffer);
1596  switch (first2) {
1597    case kAc3SyncWord:
1598      if (CheckAc3(buffer, buffer_size))
1599        return CONTAINER_AC3;
1600      if (CheckEac3(buffer, buffer_size))
1601        return CONTAINER_EAC3;
1602      break;
1603
1604    case 0xfff0:
1605    case 0xfff1:
1606    case 0xfff8:
1607    case 0xfff9:
1608      if (CheckAac(buffer, buffer_size))
1609        return CONTAINER_AAC;
1610      break;
1611  }
1612
1613  // Check if the file is in MP3 format without the header.
1614  if (CheckMp3(buffer, buffer_size, false))
1615    return CONTAINER_MP3;
1616
1617  return CONTAINER_UNKNOWN;
1618}
1619
1620// Attempt to determine the container name from the buffer provided.
1621MediaContainerName DetermineContainer(const uint8* buffer, int buffer_size) {
1622  DCHECK(buffer);
1623
1624  // Since MOV/QuickTime/MPEG4 streams are common, check for them first.
1625  if (CheckMov(buffer, buffer_size))
1626    return CONTAINER_MOV;
1627
1628  // Next attempt the simple checks, that typically look at just the
1629  // first few bytes of the file.
1630  MediaContainerName result = LookupContainerByFirst4(buffer, buffer_size);
1631  if (result != CONTAINER_UNKNOWN)
1632    return result;
1633
1634  // Additional checks that may scan a portion of the buffer.
1635  if (CheckMpeg2ProgramStream(buffer, buffer_size))
1636    return CONTAINER_MPEG2PS;
1637  if (CheckMpeg2TransportStream(buffer, buffer_size))
1638    return CONTAINER_MPEG2TS;
1639  if (CheckMJpeg(buffer, buffer_size))
1640    return CONTAINER_MJPEG;
1641  if (CheckDV(buffer, buffer_size))
1642    return CONTAINER_DV;
1643  if (CheckH261(buffer, buffer_size))
1644    return CONTAINER_H261;
1645  if (CheckH263(buffer, buffer_size))
1646    return CONTAINER_H263;
1647  if (CheckH264(buffer, buffer_size))
1648    return CONTAINER_H264;
1649  if (CheckMpeg4BitStream(buffer, buffer_size))
1650    return CONTAINER_MPEG4BS;
1651  if (CheckVC1(buffer, buffer_size))
1652    return CONTAINER_VC1;
1653  if (CheckSrt(buffer, buffer_size))
1654    return CONTAINER_SRT;
1655  if (CheckGsm(buffer, buffer_size))
1656    return CONTAINER_GSM;
1657
1658  // AC3/EAC3 might not start at the beginning of the stream,
1659  // so scan for a start code.
1660  int offset = 1;  // No need to start at byte 0 due to First4 check.
1661  if (AdvanceToStartCode(buffer, buffer_size, &offset, 4, 16, kAc3SyncWord)) {
1662    if (CheckAc3(buffer + offset, buffer_size - offset))
1663      return CONTAINER_AC3;
1664    if (CheckEac3(buffer + offset, buffer_size - offset))
1665      return CONTAINER_EAC3;
1666  }
1667
1668  return CONTAINER_UNKNOWN;
1669}
1670
1671}  // namespace container_names
1672
1673}  // namespace media
1674