1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "media/formats/webm/webm_parser.h"
6
7// This file contains code to parse WebM file elements. It was created
8// from information in the Matroska spec.
9// http://www.matroska.org/technical/specs/index.html
10// This file contains code for encrypted WebM. Current WebM
11// encrypted request for comments specification is here
12// http://wiki.webmproject.org/encryption/webm-encryption-rfc
13
14#include <iomanip>
15
16#include "base/logging.h"
17#include "base/numerics/safe_conversions.h"
18#include "media/formats/webm/webm_constants.h"
19
20namespace media {
21
22enum ElementType {
23  UNKNOWN,
24  LIST,  // Referred to as Master Element in the Matroska spec.
25  UINT,
26  FLOAT,
27  BINARY,
28  STRING,
29  SKIP,
30};
31
32struct ElementIdInfo {
33  ElementType type_;
34  int id_;
35};
36
37struct ListElementInfo {
38  int id_;
39  int level_;
40  const ElementIdInfo* id_info_;
41  int id_info_count_;
42};
43
44// The following are tables indicating what IDs are valid sub-elements
45// of particular elements. If an element is encountered that doesn't
46// appear in the list, a parsing error is signalled. Some elements are
47// marked as SKIP because they are valid, but we don't care about them
48// right now.
49static const ElementIdInfo kEBMLHeaderIds[] = {
50  {UINT, kWebMIdEBMLVersion},
51  {UINT, kWebMIdEBMLReadVersion},
52  {UINT, kWebMIdEBMLMaxIDLength},
53  {UINT, kWebMIdEBMLMaxSizeLength},
54  {STRING, kWebMIdDocType},
55  {UINT, kWebMIdDocTypeVersion},
56  {UINT, kWebMIdDocTypeReadVersion},
57};
58
59static const ElementIdInfo kSegmentIds[] = {
60  {LIST, kWebMIdSeekHead},
61  {LIST, kWebMIdInfo},
62  {LIST, kWebMIdCluster},
63  {LIST, kWebMIdTracks},
64  {LIST, kWebMIdCues},
65  {LIST, kWebMIdAttachments},
66  {LIST, kWebMIdChapters},
67  {LIST, kWebMIdTags},
68};
69
70static const ElementIdInfo kSeekHeadIds[] = {
71  {LIST, kWebMIdSeek},
72};
73
74static const ElementIdInfo kSeekIds[] = {
75  {BINARY, kWebMIdSeekID},
76  {UINT, kWebMIdSeekPosition},
77};
78
79static const ElementIdInfo kInfoIds[] = {
80  {BINARY, kWebMIdSegmentUID},
81  {STRING, kWebMIdSegmentFilename},
82  {BINARY, kWebMIdPrevUID},
83  {STRING, kWebMIdPrevFilename},
84  {BINARY, kWebMIdNextUID},
85  {STRING, kWebMIdNextFilename},
86  {BINARY, kWebMIdSegmentFamily},
87  {LIST, kWebMIdChapterTranslate},
88  {UINT, kWebMIdTimecodeScale},
89  {FLOAT, kWebMIdDuration},
90  {BINARY, kWebMIdDateUTC},
91  {STRING, kWebMIdTitle},
92  {STRING, kWebMIdMuxingApp},
93  {STRING, kWebMIdWritingApp},
94};
95
96static const ElementIdInfo kChapterTranslateIds[] = {
97  {UINT, kWebMIdChapterTranslateEditionUID},
98  {UINT, kWebMIdChapterTranslateCodec},
99  {BINARY, kWebMIdChapterTranslateID},
100};
101
102static const ElementIdInfo kClusterIds[] = {
103  {BINARY, kWebMIdSimpleBlock},
104  {UINT, kWebMIdTimecode},
105  {LIST, kWebMIdSilentTracks},
106  {UINT, kWebMIdPosition},
107  {UINT, kWebMIdPrevSize},
108  {LIST, kWebMIdBlockGroup},
109};
110
111static const ElementIdInfo kSilentTracksIds[] = {
112  {UINT, kWebMIdSilentTrackNumber},
113};
114
115static const ElementIdInfo kBlockGroupIds[] = {
116  {BINARY, kWebMIdBlock},
117  {LIST, kWebMIdBlockAdditions},
118  {UINT, kWebMIdBlockDuration},
119  {UINT, kWebMIdReferencePriority},
120  {BINARY, kWebMIdReferenceBlock},
121  {BINARY, kWebMIdCodecState},
122  {BINARY, kWebMIdDiscardPadding},
123  {LIST, kWebMIdSlices},
124};
125
126static const ElementIdInfo kBlockAdditionsIds[] = {
127  {LIST, kWebMIdBlockMore},
128};
129
130static const ElementIdInfo kBlockMoreIds[] = {
131  {UINT, kWebMIdBlockAddID},
132  {BINARY, kWebMIdBlockAdditional},
133};
134
135static const ElementIdInfo kSlicesIds[] = {
136  {LIST, kWebMIdTimeSlice},
137};
138
139static const ElementIdInfo kTimeSliceIds[] = {
140  {UINT, kWebMIdLaceNumber},
141};
142
143static const ElementIdInfo kTracksIds[] = {
144  {LIST, kWebMIdTrackEntry},
145};
146
147static const ElementIdInfo kTrackEntryIds[] = {
148  {UINT, kWebMIdTrackNumber},
149  {BINARY, kWebMIdTrackUID},
150  {UINT, kWebMIdTrackType},
151  {UINT, kWebMIdFlagEnabled},
152  {UINT, kWebMIdFlagDefault},
153  {UINT, kWebMIdFlagForced},
154  {UINT, kWebMIdFlagLacing},
155  {UINT, kWebMIdMinCache},
156  {UINT, kWebMIdMaxCache},
157  {UINT, kWebMIdDefaultDuration},
158  {FLOAT, kWebMIdTrackTimecodeScale},
159  {UINT, kWebMIdMaxBlockAdditionId},
160  {STRING, kWebMIdName},
161  {STRING, kWebMIdLanguage},
162  {STRING, kWebMIdCodecID},
163  {BINARY, kWebMIdCodecPrivate},
164  {STRING, kWebMIdCodecName},
165  {UINT, kWebMIdAttachmentLink},
166  {UINT, kWebMIdCodecDecodeAll},
167  {UINT, kWebMIdTrackOverlay},
168  {UINT, kWebMIdCodecDelay},
169  {UINT, kWebMIdSeekPreRoll},
170  {LIST, kWebMIdTrackTranslate},
171  {LIST, kWebMIdVideo},
172  {LIST, kWebMIdAudio},
173  {LIST, kWebMIdTrackOperation},
174  {LIST, kWebMIdContentEncodings},
175};
176
177static const ElementIdInfo kTrackTranslateIds[] = {
178  {UINT, kWebMIdTrackTranslateEditionUID},
179  {UINT, kWebMIdTrackTranslateCodec},
180  {BINARY, kWebMIdTrackTranslateTrackID},
181};
182
183static const ElementIdInfo kVideoIds[] = {
184  {UINT, kWebMIdFlagInterlaced},
185  {UINT, kWebMIdStereoMode},
186  {UINT, kWebMIdAlphaMode},
187  {UINT, kWebMIdPixelWidth},
188  {UINT, kWebMIdPixelHeight},
189  {UINT, kWebMIdPixelCropBottom},
190  {UINT, kWebMIdPixelCropTop},
191  {UINT, kWebMIdPixelCropLeft},
192  {UINT, kWebMIdPixelCropRight},
193  {UINT, kWebMIdDisplayWidth},
194  {UINT, kWebMIdDisplayHeight},
195  {UINT, kWebMIdDisplayUnit},
196  {UINT, kWebMIdAspectRatioType},
197  {BINARY, kWebMIdColorSpace},
198  {FLOAT, kWebMIdFrameRate},
199};
200
201static const ElementIdInfo kAudioIds[] = {
202  {FLOAT, kWebMIdSamplingFrequency},
203  {FLOAT, kWebMIdOutputSamplingFrequency},
204  {UINT, kWebMIdChannels},
205  {UINT, kWebMIdBitDepth},
206};
207
208static const ElementIdInfo kTrackOperationIds[] = {
209  {LIST, kWebMIdTrackCombinePlanes},
210  {LIST, kWebMIdJoinBlocks},
211};
212
213static const ElementIdInfo kTrackCombinePlanesIds[] = {
214  {LIST, kWebMIdTrackPlane},
215};
216
217static const ElementIdInfo kTrackPlaneIds[] = {
218  {UINT, kWebMIdTrackPlaneUID},
219  {UINT, kWebMIdTrackPlaneType},
220};
221
222static const ElementIdInfo kJoinBlocksIds[] = {
223  {UINT, kWebMIdTrackJoinUID},
224};
225
226static const ElementIdInfo kContentEncodingsIds[] = {
227  {LIST, kWebMIdContentEncoding},
228};
229
230static const ElementIdInfo kContentEncodingIds[] = {
231  {UINT, kWebMIdContentEncodingOrder},
232  {UINT, kWebMIdContentEncodingScope},
233  {UINT, kWebMIdContentEncodingType},
234  {LIST, kWebMIdContentCompression},
235  {LIST, kWebMIdContentEncryption},
236};
237
238static const ElementIdInfo kContentCompressionIds[] = {
239  {UINT, kWebMIdContentCompAlgo},
240  {BINARY, kWebMIdContentCompSettings},
241};
242
243static const ElementIdInfo kContentEncryptionIds[] = {
244  {LIST, kWebMIdContentEncAESSettings},
245  {UINT, kWebMIdContentEncAlgo},
246  {BINARY, kWebMIdContentEncKeyID},
247  {BINARY, kWebMIdContentSignature},
248  {BINARY, kWebMIdContentSigKeyID},
249  {UINT, kWebMIdContentSigAlgo},
250  {UINT, kWebMIdContentSigHashAlgo},
251};
252
253static const ElementIdInfo kContentEncAESSettingsIds[] = {
254  {UINT, kWebMIdAESSettingsCipherMode},
255};
256
257static const ElementIdInfo kCuesIds[] = {
258  {LIST, kWebMIdCuePoint},
259};
260
261static const ElementIdInfo kCuePointIds[] = {
262  {UINT, kWebMIdCueTime},
263  {LIST, kWebMIdCueTrackPositions},
264};
265
266static const ElementIdInfo kCueTrackPositionsIds[] = {
267  {UINT, kWebMIdCueTrack},
268  {UINT, kWebMIdCueClusterPosition},
269  {UINT, kWebMIdCueBlockNumber},
270  {UINT, kWebMIdCueCodecState},
271  {LIST, kWebMIdCueReference},
272};
273
274static const ElementIdInfo kCueReferenceIds[] = {
275  {UINT, kWebMIdCueRefTime},
276};
277
278static const ElementIdInfo kAttachmentsIds[] = {
279  {LIST, kWebMIdAttachedFile},
280};
281
282static const ElementIdInfo kAttachedFileIds[] = {
283  {STRING, kWebMIdFileDescription},
284  {STRING, kWebMIdFileName},
285  {STRING, kWebMIdFileMimeType},
286  {BINARY, kWebMIdFileData},
287  {UINT, kWebMIdFileUID},
288};
289
290static const ElementIdInfo kChaptersIds[] = {
291  {LIST, kWebMIdEditionEntry},
292};
293
294static const ElementIdInfo kEditionEntryIds[] = {
295  {UINT, kWebMIdEditionUID},
296  {UINT, kWebMIdEditionFlagHidden},
297  {UINT, kWebMIdEditionFlagDefault},
298  {UINT, kWebMIdEditionFlagOrdered},
299  {LIST, kWebMIdChapterAtom},
300};
301
302static const ElementIdInfo kChapterAtomIds[] = {
303  {UINT, kWebMIdChapterUID},
304  {UINT, kWebMIdChapterTimeStart},
305  {UINT, kWebMIdChapterTimeEnd},
306  {UINT, kWebMIdChapterFlagHidden},
307  {UINT, kWebMIdChapterFlagEnabled},
308  {BINARY, kWebMIdChapterSegmentUID},
309  {UINT, kWebMIdChapterSegmentEditionUID},
310  {UINT, kWebMIdChapterPhysicalEquiv},
311  {LIST, kWebMIdChapterTrack},
312  {LIST, kWebMIdChapterDisplay},
313  {LIST, kWebMIdChapProcess},
314};
315
316static const ElementIdInfo kChapterTrackIds[] = {
317  {UINT, kWebMIdChapterTrackNumber},
318};
319
320static const ElementIdInfo kChapterDisplayIds[] = {
321  {STRING, kWebMIdChapString},
322  {STRING, kWebMIdChapLanguage},
323  {STRING, kWebMIdChapCountry},
324};
325
326static const ElementIdInfo kChapProcessIds[] = {
327  {UINT, kWebMIdChapProcessCodecID},
328  {BINARY, kWebMIdChapProcessPrivate},
329  {LIST, kWebMIdChapProcessCommand},
330};
331
332static const ElementIdInfo kChapProcessCommandIds[] = {
333  {UINT, kWebMIdChapProcessTime},
334  {BINARY, kWebMIdChapProcessData},
335};
336
337static const ElementIdInfo kTagsIds[] = {
338  {LIST, kWebMIdTag},
339};
340
341static const ElementIdInfo kTagIds[] = {
342  {LIST, kWebMIdTargets},
343  {LIST, kWebMIdSimpleTag},
344};
345
346static const ElementIdInfo kTargetsIds[] = {
347  {UINT, kWebMIdTargetTypeValue},
348  {STRING, kWebMIdTargetType},
349  {UINT, kWebMIdTagTrackUID},
350  {UINT, kWebMIdTagEditionUID},
351  {UINT, kWebMIdTagChapterUID},
352  {UINT, kWebMIdTagAttachmentUID},
353};
354
355static const ElementIdInfo kSimpleTagIds[] = {
356  {STRING, kWebMIdTagName},
357  {STRING, kWebMIdTagLanguage},
358  {UINT, kWebMIdTagDefault},
359  {STRING, kWebMIdTagString},
360  {BINARY, kWebMIdTagBinary},
361};
362
363#define LIST_ELEMENT_INFO(id, level, id_info) \
364    { (id), (level), (id_info), arraysize(id_info) }
365
366static const ListElementInfo kListElementInfo[] = {
367  LIST_ELEMENT_INFO(kWebMIdCluster, 1, kClusterIds),
368  LIST_ELEMENT_INFO(kWebMIdEBMLHeader, 0, kEBMLHeaderIds),
369  LIST_ELEMENT_INFO(kWebMIdSegment, 0, kSegmentIds),
370  LIST_ELEMENT_INFO(kWebMIdSeekHead, 1, kSeekHeadIds),
371  LIST_ELEMENT_INFO(kWebMIdSeek, 2, kSeekIds),
372  LIST_ELEMENT_INFO(kWebMIdInfo, 1, kInfoIds),
373  LIST_ELEMENT_INFO(kWebMIdChapterTranslate, 2, kChapterTranslateIds),
374  LIST_ELEMENT_INFO(kWebMIdSilentTracks, 2, kSilentTracksIds),
375  LIST_ELEMENT_INFO(kWebMIdBlockGroup, 2, kBlockGroupIds),
376  LIST_ELEMENT_INFO(kWebMIdBlockAdditions, 3, kBlockAdditionsIds),
377  LIST_ELEMENT_INFO(kWebMIdBlockMore, 4, kBlockMoreIds),
378  LIST_ELEMENT_INFO(kWebMIdSlices, 3, kSlicesIds),
379  LIST_ELEMENT_INFO(kWebMIdTimeSlice, 4, kTimeSliceIds),
380  LIST_ELEMENT_INFO(kWebMIdTracks, 1, kTracksIds),
381  LIST_ELEMENT_INFO(kWebMIdTrackEntry, 2, kTrackEntryIds),
382  LIST_ELEMENT_INFO(kWebMIdTrackTranslate, 3, kTrackTranslateIds),
383  LIST_ELEMENT_INFO(kWebMIdVideo, 3, kVideoIds),
384  LIST_ELEMENT_INFO(kWebMIdAudio, 3, kAudioIds),
385  LIST_ELEMENT_INFO(kWebMIdTrackOperation, 3, kTrackOperationIds),
386  LIST_ELEMENT_INFO(kWebMIdTrackCombinePlanes, 4, kTrackCombinePlanesIds),
387  LIST_ELEMENT_INFO(kWebMIdTrackPlane, 5, kTrackPlaneIds),
388  LIST_ELEMENT_INFO(kWebMIdJoinBlocks, 4, kJoinBlocksIds),
389  LIST_ELEMENT_INFO(kWebMIdContentEncodings, 3, kContentEncodingsIds),
390  LIST_ELEMENT_INFO(kWebMIdContentEncoding, 4, kContentEncodingIds),
391  LIST_ELEMENT_INFO(kWebMIdContentCompression, 5, kContentCompressionIds),
392  LIST_ELEMENT_INFO(kWebMIdContentEncryption, 5, kContentEncryptionIds),
393  LIST_ELEMENT_INFO(kWebMIdContentEncAESSettings, 6, kContentEncAESSettingsIds),
394  LIST_ELEMENT_INFO(kWebMIdCues, 1, kCuesIds),
395  LIST_ELEMENT_INFO(kWebMIdCuePoint, 2, kCuePointIds),
396  LIST_ELEMENT_INFO(kWebMIdCueTrackPositions, 3, kCueTrackPositionsIds),
397  LIST_ELEMENT_INFO(kWebMIdCueReference, 4, kCueReferenceIds),
398  LIST_ELEMENT_INFO(kWebMIdAttachments, 1, kAttachmentsIds),
399  LIST_ELEMENT_INFO(kWebMIdAttachedFile, 2, kAttachedFileIds),
400  LIST_ELEMENT_INFO(kWebMIdChapters, 1, kChaptersIds),
401  LIST_ELEMENT_INFO(kWebMIdEditionEntry, 2, kEditionEntryIds),
402  LIST_ELEMENT_INFO(kWebMIdChapterAtom, 3, kChapterAtomIds),
403  LIST_ELEMENT_INFO(kWebMIdChapterTrack, 4, kChapterTrackIds),
404  LIST_ELEMENT_INFO(kWebMIdChapterDisplay, 4, kChapterDisplayIds),
405  LIST_ELEMENT_INFO(kWebMIdChapProcess, 4, kChapProcessIds),
406  LIST_ELEMENT_INFO(kWebMIdChapProcessCommand, 5, kChapProcessCommandIds),
407  LIST_ELEMENT_INFO(kWebMIdTags, 1, kTagsIds),
408  LIST_ELEMENT_INFO(kWebMIdTag, 2, kTagIds),
409  LIST_ELEMENT_INFO(kWebMIdTargets, 3, kTargetsIds),
410  LIST_ELEMENT_INFO(kWebMIdSimpleTag, 3, kSimpleTagIds),
411};
412
413// Parses an element header id or size field. These fields are variable length
414// encoded. The first byte indicates how many bytes the field occupies.
415// |buf|  - The buffer to parse.
416// |size| - The number of bytes in |buf|
417// |max_bytes| - The maximum number of bytes the field can be. ID fields
418//               set this to 4 & element size fields set this to 8. If the
419//               first byte indicates a larger field size than this it is a
420//               parser error.
421// |mask_first_byte| - For element size fields the field length encoding bits
422//                     need to be masked off. This parameter is true for
423//                     element size fields and is false for ID field values.
424//
425// Returns: The number of bytes parsed on success. -1 on error.
426static int ParseWebMElementHeaderField(const uint8* buf, int size,
427                                       int max_bytes, bool mask_first_byte,
428                                       int64* num) {
429  DCHECK(buf);
430  DCHECK(num);
431
432  if (size < 0)
433    return -1;
434
435  if (size == 0)
436    return 0;
437
438  int mask = 0x80;
439  uint8 ch = buf[0];
440  int extra_bytes = -1;
441  bool all_ones = false;
442  for (int i = 0; i < max_bytes; ++i) {
443    if ((ch & mask) != 0) {
444      mask = ~mask & 0xff;
445      *num = mask_first_byte ? ch & mask : ch;
446      all_ones = (ch & mask) == mask;
447      extra_bytes = i;
448      break;
449    }
450    mask = 0x80 | mask >> 1;
451  }
452
453  if (extra_bytes == -1)
454    return -1;
455
456  // Return 0 if we need more data.
457  if ((1 + extra_bytes) > size)
458    return 0;
459
460  int bytes_used = 1;
461
462  for (int i = 0; i < extra_bytes; ++i) {
463    ch = buf[bytes_used++];
464    all_ones &= (ch == 0xff);
465    *num = (*num << 8) | ch;
466  }
467
468  if (all_ones)
469    *num = kint64max;
470
471  return bytes_used;
472}
473
474int WebMParseElementHeader(const uint8* buf, int size,
475                           int* id, int64* element_size) {
476  DCHECK(buf);
477  DCHECK_GE(size, 0);
478  DCHECK(id);
479  DCHECK(element_size);
480
481  if (size == 0)
482    return 0;
483
484  int64 tmp = 0;
485  int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp);
486
487  if (num_id_bytes <= 0)
488    return num_id_bytes;
489
490  if (tmp == kint64max)
491    tmp = kWebMReservedId;
492
493  *id = static_cast<int>(tmp);
494
495  int num_size_bytes = ParseWebMElementHeaderField(buf + num_id_bytes,
496                                                   size - num_id_bytes,
497                                                   8, true, &tmp);
498
499  if (num_size_bytes <= 0)
500    return num_size_bytes;
501
502  if (tmp == kint64max)
503    tmp = kWebMUnknownSize;
504
505  *element_size = tmp;
506  DVLOG(3) << "WebMParseElementHeader() : id " << std::hex << *id << std::dec
507           << " size " << *element_size;
508  return num_id_bytes + num_size_bytes;
509}
510
511// Finds ElementType for a specific ID.
512static ElementType FindIdType(int id,
513                              const ElementIdInfo* id_info,
514                              int id_info_count) {
515
516  // Check for global element IDs that can be anywhere.
517  if (id == kWebMIdVoid || id == kWebMIdCRC32)
518    return SKIP;
519
520  for (int i = 0; i < id_info_count; ++i) {
521    if (id == id_info[i].id_)
522      return id_info[i].type_;
523  }
524
525  return UNKNOWN;
526}
527
528// Finds ListElementInfo for a specific ID.
529static const ListElementInfo* FindListInfo(int id) {
530  for (size_t i = 0; i < arraysize(kListElementInfo); ++i) {
531    if (id == kListElementInfo[i].id_)
532      return &kListElementInfo[i];
533  }
534
535  return NULL;
536}
537
538static int FindListLevel(int id) {
539  const ListElementInfo* list_info = FindListInfo(id);
540  if (list_info)
541    return list_info->level_;
542
543  return -1;
544}
545
546static int ParseUInt(const uint8* buf, int size, int id,
547                     WebMParserClient* client) {
548  if ((size <= 0) || (size > 8))
549    return -1;
550
551  // Read in the big-endian integer.
552  uint64 value = 0;
553  for (int i = 0; i < size; ++i)
554    value = (value << 8) | buf[i];
555
556  // We use int64 in place of uint64 everywhere for convenience.  See this bug
557  // for more details: http://crbug.com/366750#c3
558  if (!base::IsValueInRangeForNumericType<int64>(value))
559    return -1;
560
561  if (!client->OnUInt(id, value))
562    return -1;
563
564  return size;
565}
566
567static int ParseFloat(const uint8* buf, int size, int id,
568                      WebMParserClient* client) {
569
570  if ((size != 4) && (size != 8))
571    return -1;
572
573  double value = -1;
574
575  // Read the bytes from big-endian form into a native endian integer.
576  int64 tmp = 0;
577  for (int i = 0; i < size; ++i)
578    tmp = (tmp << 8) | buf[i];
579
580  // Use a union to convert the integer bit pattern into a floating point
581  // number.
582  if (size == 4) {
583    union {
584      int32 src;
585      float dst;
586    } tmp2;
587    tmp2.src = static_cast<int32>(tmp);
588    value = tmp2.dst;
589  } else if (size == 8) {
590    union {
591      int64 src;
592      double dst;
593    } tmp2;
594    tmp2.src = tmp;
595    value = tmp2.dst;
596  } else {
597    return -1;
598  }
599
600  if (!client->OnFloat(id, value))
601    return -1;
602
603  return size;
604}
605
606static int ParseBinary(const uint8* buf, int size, int id,
607                       WebMParserClient* client) {
608  return client->OnBinary(id, buf, size) ? size : -1;
609}
610
611static int ParseString(const uint8* buf, int size, int id,
612                       WebMParserClient* client) {
613  const uint8* end = static_cast<const uint8*>(memchr(buf, '\0', size));
614  int length = (end != NULL) ? static_cast<int>(end - buf) : size;
615  std::string str(reinterpret_cast<const char*>(buf), length);
616  return client->OnString(id, str) ? size : -1;
617}
618
619static int ParseNonListElement(ElementType type, int id, int64 element_size,
620                               const uint8* buf, int size,
621                               WebMParserClient* client) {
622  DCHECK_GE(size, element_size);
623
624  int result = -1;
625  switch(type) {
626    case LIST:
627      NOTIMPLEMENTED();
628      result = -1;
629      break;
630    case UINT:
631      result = ParseUInt(buf, element_size, id, client);
632      break;
633    case FLOAT:
634      result = ParseFloat(buf, element_size, id, client);
635      break;
636    case BINARY:
637      result = ParseBinary(buf, element_size, id, client);
638      break;
639    case STRING:
640      result = ParseString(buf, element_size, id, client);
641      break;
642    case SKIP:
643      result = element_size;
644      break;
645    default:
646      DVLOG(1) << "Unhandled ID type " << type;
647      return -1;
648  };
649
650  DCHECK_LE(result, size);
651  return result;
652}
653
654WebMParserClient::WebMParserClient() {}
655WebMParserClient::~WebMParserClient() {}
656
657WebMParserClient* WebMParserClient::OnListStart(int id) {
658  DVLOG(1) << "Unexpected list element start with ID " << std::hex << id;
659  return NULL;
660}
661
662bool WebMParserClient::OnListEnd(int id) {
663  DVLOG(1) << "Unexpected list element end with ID " << std::hex << id;
664  return false;
665}
666
667bool WebMParserClient::OnUInt(int id, int64 val) {
668  DVLOG(1) << "Unexpected unsigned integer element with ID " << std::hex << id;
669  return false;
670}
671
672bool WebMParserClient::OnFloat(int id, double val) {
673  DVLOG(1) << "Unexpected float element with ID " << std::hex << id;
674  return false;
675}
676
677bool WebMParserClient::OnBinary(int id, const uint8* data, int size) {
678  DVLOG(1) << "Unexpected binary element with ID " << std::hex << id;
679  return false;
680}
681
682bool WebMParserClient::OnString(int id, const std::string& str) {
683  DVLOG(1) << "Unexpected string element with ID " << std::hex << id;
684  return false;
685}
686
687WebMListParser::WebMListParser(int id, WebMParserClient* client)
688    : state_(NEED_LIST_HEADER),
689      root_id_(id),
690      root_level_(FindListLevel(id)),
691      root_client_(client) {
692  DCHECK_GE(root_level_, 0);
693  DCHECK(client);
694}
695
696WebMListParser::~WebMListParser() {}
697
698void WebMListParser::Reset() {
699  ChangeState(NEED_LIST_HEADER);
700  list_state_stack_.clear();
701}
702
703int WebMListParser::Parse(const uint8* buf, int size) {
704  DCHECK(buf);
705
706  if (size < 0 || state_ == PARSE_ERROR || state_ == DONE_PARSING_LIST)
707    return -1;
708
709  if (size == 0)
710    return 0;
711
712  const uint8* cur = buf;
713  int cur_size = size;
714  int bytes_parsed = 0;
715
716  while (cur_size > 0 && state_ != PARSE_ERROR && state_ != DONE_PARSING_LIST) {
717    int element_id = 0;
718    int64 element_size = 0;
719    int result = WebMParseElementHeader(cur, cur_size, &element_id,
720                                        &element_size);
721
722    if (result < 0)
723      return result;
724
725    if (result == 0)
726      return bytes_parsed;
727
728    switch(state_) {
729      case NEED_LIST_HEADER: {
730        if (element_id != root_id_) {
731          ChangeState(PARSE_ERROR);
732          return -1;
733        }
734
735        // Only allow Segment & Cluster to have an unknown size.
736        if (element_size == kWebMUnknownSize &&
737            (element_id != kWebMIdSegment) &&
738            (element_id != kWebMIdCluster)) {
739          ChangeState(PARSE_ERROR);
740          return -1;
741        }
742
743        ChangeState(INSIDE_LIST);
744        if (!OnListStart(root_id_, element_size))
745          return -1;
746
747        break;
748      }
749
750      case INSIDE_LIST: {
751        int header_size = result;
752        const uint8* element_data = cur + header_size;
753        int element_data_size = cur_size - header_size;
754
755        if (element_size < element_data_size)
756          element_data_size = element_size;
757
758        result = ParseListElement(header_size, element_id, element_size,
759                                  element_data, element_data_size);
760
761        DCHECK_LE(result, header_size + element_data_size);
762        if (result < 0) {
763          ChangeState(PARSE_ERROR);
764          return -1;
765        }
766
767        if (result == 0)
768          return bytes_parsed;
769
770        break;
771      }
772      case DONE_PARSING_LIST:
773      case PARSE_ERROR:
774        // Shouldn't be able to get here.
775        NOTIMPLEMENTED();
776        break;
777    }
778
779    cur += result;
780    cur_size -= result;
781    bytes_parsed += result;
782  }
783
784  return (state_ == PARSE_ERROR) ? -1 : bytes_parsed;
785}
786
787bool WebMListParser::IsParsingComplete() const {
788  return state_ == DONE_PARSING_LIST;
789}
790
791void WebMListParser::ChangeState(State new_state) {
792  state_ = new_state;
793}
794
795int WebMListParser::ParseListElement(int header_size,
796                                     int id, int64 element_size,
797                                     const uint8* data, int size) {
798  DCHECK_GT(list_state_stack_.size(), 0u);
799
800  ListState& list_state = list_state_stack_.back();
801  DCHECK(list_state.element_info_);
802
803  const ListElementInfo* element_info = list_state.element_info_;
804  ElementType id_type =
805      FindIdType(id, element_info->id_info_, element_info->id_info_count_);
806
807  // Unexpected ID.
808  if (id_type == UNKNOWN) {
809    if (list_state.size_ != kWebMUnknownSize ||
810        !IsSiblingOrAncestor(list_state.id_, id)) {
811      DVLOG(1) << "No ElementType info for ID 0x" << std::hex << id;
812      return -1;
813    }
814
815    // We've reached the end of a list of unknown size. Update the size now that
816    // we know it and dispatch the end of list calls.
817    list_state.size_ = list_state.bytes_parsed_;
818
819    if (!OnListEnd())
820      return -1;
821
822    // Check to see if all open lists have ended.
823    if (list_state_stack_.size() == 0)
824      return 0;
825
826    list_state = list_state_stack_.back();
827  }
828
829  // Make sure the whole element can fit inside the current list.
830  int64 total_element_size = header_size + element_size;
831  if (list_state.size_ != kWebMUnknownSize &&
832      list_state.size_ < list_state.bytes_parsed_ + total_element_size) {
833    return -1;
834  }
835
836  if (id_type == LIST) {
837    list_state.bytes_parsed_ += header_size;
838
839    if (!OnListStart(id, element_size))
840      return -1;
841    return header_size;
842  }
843
844  // Make sure we have the entire element before trying to parse a non-list
845  // element.
846  if (size < element_size)
847    return 0;
848
849  int bytes_parsed = ParseNonListElement(id_type, id, element_size,
850                                         data, size, list_state.client_);
851  DCHECK_LE(bytes_parsed, size);
852
853  // Return if an error occurred or we need more data.
854  // Note: bytes_parsed is 0 for a successful parse of a size 0 element. We
855  // need to check the element_size to disambiguate the "need more data" case
856  // from a successful parse.
857  if (bytes_parsed < 0 || (bytes_parsed == 0 && element_size != 0))
858    return bytes_parsed;
859
860  int result = header_size + bytes_parsed;
861  list_state.bytes_parsed_ += result;
862
863  // See if we have reached the end of the current list.
864  if (list_state.bytes_parsed_ == list_state.size_) {
865    if (!OnListEnd())
866      return -1;
867  }
868
869  return result;
870}
871
872bool WebMListParser::OnListStart(int id, int64 size) {
873  const ListElementInfo* element_info = FindListInfo(id);
874  if (!element_info)
875    return false;
876
877  int current_level = root_level_ + list_state_stack_.size() - 1;
878  if (current_level + 1 != element_info->level_)
879    return false;
880
881  WebMParserClient* current_list_client = NULL;
882  if (!list_state_stack_.empty()) {
883    // Make sure the new list doesn't go past the end of the current list.
884    ListState current_list_state = list_state_stack_.back();
885    if (current_list_state.size_ != kWebMUnknownSize &&
886        current_list_state.size_ < current_list_state.bytes_parsed_ + size)
887      return false;
888    current_list_client = current_list_state.client_;
889  } else {
890    current_list_client = root_client_;
891  }
892
893  WebMParserClient* new_list_client = current_list_client->OnListStart(id);
894  if (!new_list_client)
895    return false;
896
897  ListState new_list_state = { id, size, 0, element_info, new_list_client };
898  list_state_stack_.push_back(new_list_state);
899
900  if (size == 0)
901    return OnListEnd();
902
903  return true;
904}
905
906bool WebMListParser::OnListEnd() {
907  int lists_ended = 0;
908  for (; !list_state_stack_.empty(); ++lists_ended) {
909    const ListState& list_state = list_state_stack_.back();
910    int64 bytes_parsed = list_state.bytes_parsed_;
911    int id = list_state.id_;
912
913    if (bytes_parsed != list_state.size_)
914      break;
915
916    list_state_stack_.pop_back();
917
918    WebMParserClient* client = NULL;
919    if (!list_state_stack_.empty()) {
920      // Update the bytes_parsed_ for the parent element.
921      list_state_stack_.back().bytes_parsed_ += bytes_parsed;
922      client = list_state_stack_.back().client_;
923    } else {
924      client = root_client_;
925    }
926
927    if (!client->OnListEnd(id))
928      return false;
929  }
930
931  DCHECK_GE(lists_ended, 1);
932
933  if (list_state_stack_.empty())
934    ChangeState(DONE_PARSING_LIST);
935
936  return true;
937}
938
939bool WebMListParser::IsSiblingOrAncestor(int id_a, int id_b) const {
940  DCHECK((id_a == kWebMIdSegment) || (id_a == kWebMIdCluster));
941
942  if (id_a == kWebMIdCluster) {
943    // kWebMIdCluster siblings.
944    for (size_t i = 0; i < arraysize(kSegmentIds); i++) {
945      if (kSegmentIds[i].id_ == id_b)
946        return true;
947    }
948  }
949
950  // kWebMIdSegment siblings.
951  return ((id_b == kWebMIdSegment) || (id_b == kWebMIdEBMLHeader));
952}
953
954}  // namespace media
955