Shaka Packager SDK
webm_parser.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 // This file contains code to parse WebM file elements. It was created
6 // from information in the Matroska spec.
7 // http://www.matroska.org/technical/specs/index.html
8 //
9 // This file contains code for encrypted WebM. Current WebM
10 // encrypted request for comments specification is here
11 // http://wiki.webmproject.org/encryption/webm-encryption-rfc
12 
13 #include <packager/media/formats/webm/webm_parser.h>
14 
15 #include <limits>
16 
17 #include <absl/log/check.h>
18 #include <absl/log/log.h>
19 
20 #include <packager/macros/logging.h>
21 #include <packager/media/formats/webm/webm_constants.h>
22 
23 namespace shaka {
24 namespace media {
25 
26 enum ElementType {
27  UNKNOWN,
28  LIST, // Referred to as Master Element in the Matroska spec.
29  UINT,
30  FLOAT,
31  BINARY,
32  STRING,
33  SKIP,
34 };
35 
36 struct ElementIdInfo {
37  ElementType type_;
38  int id_;
39 };
40 
41 struct ListElementInfo {
42  int id_;
43  int level_;
44  const ElementIdInfo* id_info_;
45  int id_info_count_;
46 };
47 
48 // The following are tables indicating what IDs are valid sub-elements
49 // of particular elements. If an element is encountered that doesn't
50 // appear in the list, a parsing error is signalled. Some elements are
51 // marked as SKIP because they are valid, but we don't care about them
52 // right now.
53 static const ElementIdInfo kEBMLHeaderIds[] = {
54  {UINT, kWebMIdEBMLVersion},
55  {UINT, kWebMIdEBMLReadVersion},
56  {UINT, kWebMIdEBMLMaxIDLength},
57  {UINT, kWebMIdEBMLMaxSizeLength},
58  {STRING, kWebMIdDocType},
59  {UINT, kWebMIdDocTypeVersion},
60  {UINT, kWebMIdDocTypeReadVersion},
61 };
62 
63 static const ElementIdInfo kSegmentIds[] = {
64  {LIST, kWebMIdSeekHead},
65  {LIST, kWebMIdInfo},
66  {LIST, kWebMIdCluster},
67  {LIST, kWebMIdTracks},
68  {LIST, kWebMIdCues},
69  {LIST, kWebMIdAttachments},
70  {LIST, kWebMIdChapters},
71  {LIST, kWebMIdTags},
72 };
73 
74 static const ElementIdInfo kSeekHeadIds[] = {
75  {LIST, kWebMIdSeek},
76 };
77 
78 static const ElementIdInfo kSeekIds[] = {
79  {BINARY, kWebMIdSeekID},
80  {UINT, kWebMIdSeekPosition},
81 };
82 
83 static const ElementIdInfo kInfoIds[] = {
84  {BINARY, kWebMIdSegmentUID},
85  {STRING, kWebMIdSegmentFilename},
86  {BINARY, kWebMIdPrevUID},
87  {STRING, kWebMIdPrevFilename},
88  {BINARY, kWebMIdNextUID},
89  {STRING, kWebMIdNextFilename},
90  {BINARY, kWebMIdSegmentFamily},
91  {LIST, kWebMIdChapterTranslate},
92  {UINT, kWebMIdTimecodeScale},
93  {FLOAT, kWebMIdDuration},
94  {BINARY, kWebMIdDateUTC},
95  {STRING, kWebMIdTitle},
96  {STRING, kWebMIdMuxingApp},
97  {STRING, kWebMIdWritingApp},
98 };
99 
100 static const ElementIdInfo kChapterTranslateIds[] = {
101  {UINT, kWebMIdChapterTranslateEditionUID},
102  {UINT, kWebMIdChapterTranslateCodec},
103  {BINARY, kWebMIdChapterTranslateID},
104 };
105 
106 static const ElementIdInfo kClusterIds[] = {
107  {BINARY, kWebMIdSimpleBlock},
108  {UINT, kWebMIdTimecode},
109  {LIST, kWebMIdSilentTracks},
110  {UINT, kWebMIdPosition},
111  {UINT, kWebMIdPrevSize},
112  {LIST, kWebMIdBlockGroup},
113 };
114 
115 static const ElementIdInfo kSilentTracksIds[] = {
116  {UINT, kWebMIdSilentTrackNumber},
117 };
118 
119 static const ElementIdInfo kBlockGroupIds[] = {
120  {BINARY, kWebMIdBlock},
121  {LIST, kWebMIdBlockAdditions},
122  {UINT, kWebMIdBlockDuration},
123  {UINT, kWebMIdReferencePriority},
124  {BINARY, kWebMIdReferenceBlock},
125  {BINARY, kWebMIdCodecState},
126  {BINARY, kWebMIdDiscardPadding},
127  {LIST, kWebMIdSlices},
128 };
129 
130 static const ElementIdInfo kBlockAdditionsIds[] = {
131  {LIST, kWebMIdBlockMore},
132 };
133 
134 static const ElementIdInfo kBlockMoreIds[] = {
135  {UINT, kWebMIdBlockAddID},
136  {BINARY, kWebMIdBlockAdditional},
137 };
138 
139 static const ElementIdInfo kSlicesIds[] = {
140  {LIST, kWebMIdTimeSlice},
141 };
142 
143 static const ElementIdInfo kTimeSliceIds[] = {
144  {UINT, kWebMIdLaceNumber},
145 };
146 
147 static const ElementIdInfo kTracksIds[] = {
148  {LIST, kWebMIdTrackEntry},
149 };
150 
151 static const ElementIdInfo kTrackEntryIds[] = {
152  {UINT, kWebMIdTrackNumber},
153  {BINARY, kWebMIdTrackUID},
154  {UINT, kWebMIdTrackType},
155  {UINT, kWebMIdFlagEnabled},
156  {UINT, kWebMIdFlagDefault},
157  {UINT, kWebMIdFlagForced},
158  {UINT, kWebMIdFlagLacing},
159  {UINT, kWebMIdMinCache},
160  {UINT, kWebMIdMaxCache},
161  {UINT, kWebMIdDefaultDuration},
162  {FLOAT, kWebMIdTrackTimecodeScale},
163  {UINT, kWebMIdMaxBlockAdditionId},
164  {STRING, kWebMIdName},
165  {STRING, kWebMIdLanguage},
166  {STRING, kWebMIdCodecID},
167  {BINARY, kWebMIdCodecPrivate},
168  {STRING, kWebMIdCodecName},
169  {UINT, kWebMIdAttachmentLink},
170  {UINT, kWebMIdCodecDecodeAll},
171  {UINT, kWebMIdTrackOverlay},
172  {UINT, kWebMIdCodecDelay},
173  {UINT, kWebMIdSeekPreRoll},
174  {LIST, kWebMIdTrackTranslate},
175  {LIST, kWebMIdVideo},
176  {LIST, kWebMIdAudio},
177  {LIST, kWebMIdTrackOperation},
178  {LIST, kWebMIdContentEncodings},
179 };
180 
181 static const ElementIdInfo kTrackTranslateIds[] = {
182  {UINT, kWebMIdTrackTranslateEditionUID},
183  {UINT, kWebMIdTrackTranslateCodec},
184  {BINARY, kWebMIdTrackTranslateTrackID},
185 };
186 
187 static const ElementIdInfo kVideoIds[] = {
188  {UINT, kWebMIdFlagInterlaced},
189  {UINT, kWebMIdStereoMode},
190  {UINT, kWebMIdAlphaMode},
191  {UINT, kWebMIdPixelWidth},
192  {UINT, kWebMIdPixelHeight},
193  {UINT, kWebMIdPixelCropBottom},
194  {UINT, kWebMIdPixelCropTop},
195  {UINT, kWebMIdPixelCropLeft},
196  {UINT, kWebMIdPixelCropRight},
197  {UINT, kWebMIdDisplayWidth},
198  {UINT, kWebMIdDisplayHeight},
199  {UINT, kWebMIdDisplayUnit},
200  {UINT, kWebMIdAspectRatioType},
201  {BINARY, kWebMIdColorSpace},
202  {FLOAT, kWebMIdFrameRate},
203  {LIST, kWebMIdColor},
204  {LIST, kWebMIdProjection},
205 };
206 
207 static const ElementIdInfo kColorIds[] = {
208  {UINT, kWebMIdColorMatrixCoefficients},
209  {UINT, kWebMIdColorBitsPerChannel},
210  {UINT, kWebMIdColorChromaSubsamplingHorz},
211  {UINT, kWebMIdColorChromaSubsamplingVert},
212  {UINT, kWebMIdColorCbSamplingHorz},
213  {UINT, kWebMIdColorCbSamplingVert},
214  {UINT, kWebMIdColorChromaSitingHorz},
215  {UINT, kWebMIdColorChromaSitingVert},
216  {UINT, kWebMIdColorRange},
217  {UINT, kWebMIdColorTransferCharacteristics},
218  {UINT, kWebMIdColorPrimaries},
219  {UINT, kWebMIdColorMaxCLL},
220  {UINT, kWebMIdColorMaxFALL},
221  {LIST, kWebMIdColorMasteringMetadata},
222 };
223 
224 static const ElementIdInfo kProjectionIds[] = {
225  {UINT, kWebMIdProjectionType},
226 };
227 
228 static const ElementIdInfo kAudioIds[] = {
229  {FLOAT, kWebMIdSamplingFrequency},
230  {FLOAT, kWebMIdOutputSamplingFrequency},
231  {UINT, kWebMIdChannels},
232  {UINT, kWebMIdBitDepth},
233 };
234 
235 static const ElementIdInfo kTrackOperationIds[] = {
236  {LIST, kWebMIdTrackCombinePlanes},
237  {LIST, kWebMIdJoinBlocks},
238 };
239 
240 static const ElementIdInfo kTrackCombinePlanesIds[] = {
241  {LIST, kWebMIdTrackPlane},
242 };
243 
244 static const ElementIdInfo kTrackPlaneIds[] = {
245  {UINT, kWebMIdTrackPlaneUID},
246  {UINT, kWebMIdTrackPlaneType},
247 };
248 
249 static const ElementIdInfo kJoinBlocksIds[] = {
250  {UINT, kWebMIdTrackJoinUID},
251 };
252 
253 static const ElementIdInfo kContentEncodingsIds[] = {
254  {LIST, kWebMIdContentEncoding},
255 };
256 
257 static const ElementIdInfo kContentEncodingIds[] = {
258  {UINT, kWebMIdContentEncodingOrder},
259  {UINT, kWebMIdContentEncodingScope},
260  {UINT, kWebMIdContentEncodingType},
261  {LIST, kWebMIdContentCompression},
262  {LIST, kWebMIdContentEncryption},
263 };
264 
265 static const ElementIdInfo kContentCompressionIds[] = {
266  {UINT, kWebMIdContentCompAlgo},
267  {BINARY, kWebMIdContentCompSettings},
268 };
269 
270 static const ElementIdInfo kContentEncryptionIds[] = {
271  {LIST, kWebMIdContentEncAESSettings},
272  {UINT, kWebMIdContentEncAlgo},
273  {BINARY, kWebMIdContentEncKeyID},
274  {BINARY, kWebMIdContentSignature},
275  {BINARY, kWebMIdContentSigKeyID},
276  {UINT, kWebMIdContentSigAlgo},
277  {UINT, kWebMIdContentSigHashAlgo},
278 };
279 
280 static const ElementIdInfo kContentEncAESSettingsIds[] = {
281  {UINT, kWebMIdAESSettingsCipherMode},
282 };
283 
284 static const ElementIdInfo kCuesIds[] = {
285  {LIST, kWebMIdCuePoint},
286 };
287 
288 static const ElementIdInfo kCuePointIds[] = {
289  {UINT, kWebMIdCueTime},
290  {LIST, kWebMIdCueTrackPositions},
291 };
292 
293 static const ElementIdInfo kCueTrackPositionsIds[] = {
294  {UINT, kWebMIdCueTrack},
295  {UINT, kWebMIdCueClusterPosition},
296  {UINT, kWebMIdCueBlockNumber},
297  {UINT, kWebMIdCueCodecState},
298  {LIST, kWebMIdCueReference},
299 };
300 
301 static const ElementIdInfo kCueReferenceIds[] = {
302  {UINT, kWebMIdCueRefTime},
303 };
304 
305 static const ElementIdInfo kAttachmentsIds[] = {
306  {LIST, kWebMIdAttachedFile},
307 };
308 
309 static const ElementIdInfo kAttachedFileIds[] = {
310  {STRING, kWebMIdFileDescription},
311  {STRING, kWebMIdFileName},
312  {STRING, kWebMIdFileMimeType},
313  {BINARY, kWebMIdFileData},
314  {UINT, kWebMIdFileUID},
315 };
316 
317 static const ElementIdInfo kChaptersIds[] = {
318  {LIST, kWebMIdEditionEntry},
319 };
320 
321 static const ElementIdInfo kEditionEntryIds[] = {
322  {UINT, kWebMIdEditionUID},
323  {UINT, kWebMIdEditionFlagHidden},
324  {UINT, kWebMIdEditionFlagDefault},
325  {UINT, kWebMIdEditionFlagOrdered},
326  {LIST, kWebMIdChapterAtom},
327 };
328 
329 static const ElementIdInfo kChapterAtomIds[] = {
330  {UINT, kWebMIdChapterUID},
331  {UINT, kWebMIdChapterTimeStart},
332  {UINT, kWebMIdChapterTimeEnd},
333  {UINT, kWebMIdChapterFlagHidden},
334  {UINT, kWebMIdChapterFlagEnabled},
335  {BINARY, kWebMIdChapterSegmentUID},
336  {UINT, kWebMIdChapterSegmentEditionUID},
337  {UINT, kWebMIdChapterPhysicalEquiv},
338  {LIST, kWebMIdChapterTrack},
339  {LIST, kWebMIdChapterDisplay},
340  {LIST, kWebMIdChapProcess},
341 };
342 
343 static const ElementIdInfo kChapterTrackIds[] = {
344  {UINT, kWebMIdChapterTrackNumber},
345 };
346 
347 static const ElementIdInfo kChapterDisplayIds[] = {
348  {STRING, kWebMIdChapString},
349  {STRING, kWebMIdChapLanguage},
350  {STRING, kWebMIdChapCountry},
351 };
352 
353 static const ElementIdInfo kChapProcessIds[] = {
354  {UINT, kWebMIdChapProcessCodecID},
355  {BINARY, kWebMIdChapProcessPrivate},
356  {LIST, kWebMIdChapProcessCommand},
357 };
358 
359 static const ElementIdInfo kChapProcessCommandIds[] = {
360  {UINT, kWebMIdChapProcessTime},
361  {BINARY, kWebMIdChapProcessData},
362 };
363 
364 static const ElementIdInfo kTagsIds[] = {
365  {LIST, kWebMIdTag},
366 };
367 
368 static const ElementIdInfo kTagIds[] = {
369  {LIST, kWebMIdTargets},
370  {LIST, kWebMIdSimpleTag},
371 };
372 
373 static const ElementIdInfo kTargetsIds[] = {
374  {UINT, kWebMIdTargetTypeValue},
375  {STRING, kWebMIdTargetType},
376  {UINT, kWebMIdTagTrackUID},
377  {UINT, kWebMIdTagEditionUID},
378  {UINT, kWebMIdTagChapterUID},
379  {UINT, kWebMIdTagAttachmentUID},
380 };
381 
382 static const ElementIdInfo kSimpleTagIds[] = {
383  {STRING, kWebMIdTagName},
384  {STRING, kWebMIdTagLanguage},
385  {UINT, kWebMIdTagDefault},
386  {STRING, kWebMIdTagString},
387  {BINARY, kWebMIdTagBinary},
388 };
389 
390 #define LIST_ELEMENT_INFO(id, level, id_info) \
391  { (id), (level), (id_info), std::size(id_info) }
392 
393 static const ListElementInfo kListElementInfo[] = {
394  LIST_ELEMENT_INFO(kWebMIdCluster, 1, kClusterIds),
395  LIST_ELEMENT_INFO(kWebMIdEBMLHeader, 0, kEBMLHeaderIds),
396  LIST_ELEMENT_INFO(kWebMIdSegment, 0, kSegmentIds),
397  LIST_ELEMENT_INFO(kWebMIdSeekHead, 1, kSeekHeadIds),
398  LIST_ELEMENT_INFO(kWebMIdSeek, 2, kSeekIds),
399  LIST_ELEMENT_INFO(kWebMIdInfo, 1, kInfoIds),
400  LIST_ELEMENT_INFO(kWebMIdChapterTranslate, 2, kChapterTranslateIds),
401  LIST_ELEMENT_INFO(kWebMIdSilentTracks, 2, kSilentTracksIds),
402  LIST_ELEMENT_INFO(kWebMIdBlockGroup, 2, kBlockGroupIds),
403  LIST_ELEMENT_INFO(kWebMIdBlockAdditions, 3, kBlockAdditionsIds),
404  LIST_ELEMENT_INFO(kWebMIdBlockMore, 4, kBlockMoreIds),
405  LIST_ELEMENT_INFO(kWebMIdSlices, 3, kSlicesIds),
406  LIST_ELEMENT_INFO(kWebMIdTimeSlice, 4, kTimeSliceIds),
407  LIST_ELEMENT_INFO(kWebMIdTracks, 1, kTracksIds),
408  LIST_ELEMENT_INFO(kWebMIdTrackEntry, 2, kTrackEntryIds),
409  LIST_ELEMENT_INFO(kWebMIdTrackTranslate, 3, kTrackTranslateIds),
410  LIST_ELEMENT_INFO(kWebMIdVideo, 3, kVideoIds),
411  LIST_ELEMENT_INFO(kWebMIdColor, 4, kColorIds),
412  LIST_ELEMENT_INFO(kWebMIdProjection, 4, kProjectionIds),
413  LIST_ELEMENT_INFO(kWebMIdAudio, 3, kAudioIds),
414  LIST_ELEMENT_INFO(kWebMIdTrackOperation, 3, kTrackOperationIds),
415  LIST_ELEMENT_INFO(kWebMIdTrackCombinePlanes, 4, kTrackCombinePlanesIds),
416  LIST_ELEMENT_INFO(kWebMIdTrackPlane, 5, kTrackPlaneIds),
417  LIST_ELEMENT_INFO(kWebMIdJoinBlocks, 4, kJoinBlocksIds),
418  LIST_ELEMENT_INFO(kWebMIdContentEncodings, 3, kContentEncodingsIds),
419  LIST_ELEMENT_INFO(kWebMIdContentEncoding, 4, kContentEncodingIds),
420  LIST_ELEMENT_INFO(kWebMIdContentCompression, 5, kContentCompressionIds),
421  LIST_ELEMENT_INFO(kWebMIdContentEncryption, 5, kContentEncryptionIds),
422  LIST_ELEMENT_INFO(kWebMIdContentEncAESSettings, 6, kContentEncAESSettingsIds),
423  LIST_ELEMENT_INFO(kWebMIdCues, 1, kCuesIds),
424  LIST_ELEMENT_INFO(kWebMIdCuePoint, 2, kCuePointIds),
425  LIST_ELEMENT_INFO(kWebMIdCueTrackPositions, 3, kCueTrackPositionsIds),
426  LIST_ELEMENT_INFO(kWebMIdCueReference, 4, kCueReferenceIds),
427  LIST_ELEMENT_INFO(kWebMIdAttachments, 1, kAttachmentsIds),
428  LIST_ELEMENT_INFO(kWebMIdAttachedFile, 2, kAttachedFileIds),
429  LIST_ELEMENT_INFO(kWebMIdChapters, 1, kChaptersIds),
430  LIST_ELEMENT_INFO(kWebMIdEditionEntry, 2, kEditionEntryIds),
431  LIST_ELEMENT_INFO(kWebMIdChapterAtom, 3, kChapterAtomIds),
432  LIST_ELEMENT_INFO(kWebMIdChapterTrack, 4, kChapterTrackIds),
433  LIST_ELEMENT_INFO(kWebMIdChapterDisplay, 4, kChapterDisplayIds),
434  LIST_ELEMENT_INFO(kWebMIdChapProcess, 4, kChapProcessIds),
435  LIST_ELEMENT_INFO(kWebMIdChapProcessCommand, 5, kChapProcessCommandIds),
436  LIST_ELEMENT_INFO(kWebMIdTags, 1, kTagsIds),
437  LIST_ELEMENT_INFO(kWebMIdTag, 2, kTagIds),
438  LIST_ELEMENT_INFO(kWebMIdTargets, 3, kTargetsIds),
439  LIST_ELEMENT_INFO(kWebMIdSimpleTag, 3, kSimpleTagIds),
440 };
441 
442 // Parses an element header id or size field. These fields are variable length
443 // encoded. The first byte indicates how many bytes the field occupies.
444 // |buf| - The buffer to parse.
445 // |size| - The number of bytes in |buf|
446 // |max_bytes| - The maximum number of bytes the field can be. ID fields
447 // set this to 4 & element size fields set this to 8. If the
448 // first byte indicates a larger field size than this it is a
449 // parser error.
450 // |mask_first_byte| - For element size fields the field length encoding bits
451 // need to be masked off. This parameter is true for
452 // element size fields and is false for ID field values.
453 //
454 // Returns: The number of bytes parsed on success. -1 on error.
455 static int ParseWebMElementHeaderField(const uint8_t* buf,
456  int size,
457  int max_bytes,
458  bool mask_first_byte,
459  int64_t* num) {
460  DCHECK(buf);
461  DCHECK(num);
462 
463  if (size < 0)
464  return -1;
465 
466  if (size == 0)
467  return 0;
468 
469  int mask = 0x80;
470  uint8_t ch = buf[0];
471  int extra_bytes = -1;
472  bool all_ones = false;
473  for (int i = 0; i < max_bytes; ++i) {
474  if ((ch & mask) != 0) {
475  mask = ~mask & 0xff;
476  *num = mask_first_byte ? ch & mask : ch;
477  all_ones = (ch & mask) == mask;
478  extra_bytes = i;
479  break;
480  }
481  mask = 0x80 | mask >> 1;
482  }
483 
484  if (extra_bytes == -1)
485  return -1;
486 
487  // Return 0 if we need more data.
488  if ((1 + extra_bytes) > size)
489  return 0;
490 
491  int bytes_used = 1;
492 
493  for (int i = 0; i < extra_bytes; ++i) {
494  ch = buf[bytes_used++];
495  all_ones &= (ch == 0xff);
496  *num = (*num << 8) | ch;
497  }
498 
499  if (all_ones)
500  *num = std::numeric_limits<int64_t>::max();
501 
502  return bytes_used;
503 }
504 
505 int WebMParseElementHeader(const uint8_t* buf,
506  int size,
507  int* id,
508  int64_t* element_size) {
509  DCHECK(buf);
510  DCHECK_GE(size, 0);
511  DCHECK(id);
512  DCHECK(element_size);
513 
514  if (size == 0)
515  return 0;
516 
517  int64_t tmp = 0;
518  int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp);
519 
520  if (num_id_bytes <= 0)
521  return num_id_bytes;
522 
523  if (tmp == std::numeric_limits<int64_t>::max())
524  tmp = kWebMReservedId;
525 
526  *id = static_cast<int>(tmp);
527 
528  int num_size_bytes = ParseWebMElementHeaderField(buf + num_id_bytes,
529  size - num_id_bytes,
530  8, true, &tmp);
531 
532  if (num_size_bytes <= 0)
533  return num_size_bytes;
534 
535  if (tmp == std::numeric_limits<int64_t>::max())
536  tmp = kWebMUnknownSize;
537 
538  *element_size = tmp;
539  DVLOG(3) << "WebMParseElementHeader() : id " << std::hex << *id << std::dec
540  << " size " << *element_size;
541  return num_id_bytes + num_size_bytes;
542 }
543 
544 // Finds ElementType for a specific ID.
545 static ElementType FindIdType(int id,
546  const ElementIdInfo* id_info,
547  int id_info_count) {
548 
549  // Check for global element IDs that can be anywhere.
550  if (id == kWebMIdVoid || id == kWebMIdCRC32)
551  return SKIP;
552 
553  for (int i = 0; i < id_info_count; ++i) {
554  if (id == id_info[i].id_)
555  return id_info[i].type_;
556  }
557 
558  return UNKNOWN;
559 }
560 
561 // Finds ListElementInfo for a specific ID.
562 static const ListElementInfo* FindListInfo(int id) {
563  for (size_t i = 0; i < std::size(kListElementInfo); ++i) {
564  if (id == kListElementInfo[i].id_)
565  return &kListElementInfo[i];
566  }
567 
568  return NULL;
569 }
570 
571 static int FindListLevel(int id) {
572  const ListElementInfo* list_info = FindListInfo(id);
573  if (list_info)
574  return list_info->level_;
575 
576  return -1;
577 }
578 
579 static int ParseUInt(const uint8_t* buf,
580  int size,
581  int id,
582  WebMParserClient* client) {
583  if ((size <= 0) || (size > 8))
584  return -1;
585 
586  // Read in the big-endian integer.
587  uint64_t value = 0;
588  for (int i = 0; i < size; ++i)
589  value = (value << 8) | buf[i];
590 
591  // We use int64_t in place of uint64_t everywhere for convenience. See this
592  // bug
593  // for more details: http://crbug.com/366750#c3
594  if (value > static_cast<uint64_t>(std::numeric_limits<int64_t>::max()))
595  return -1;
596 
597  if (!client->OnUInt(id, value))
598  return -1;
599 
600  return size;
601 }
602 
603 static int ParseFloat(const uint8_t* buf,
604  int size,
605  int id,
606  WebMParserClient* client) {
607  if ((size != 4) && (size != 8))
608  return -1;
609 
610  double value = -1;
611 
612  // Read the bytes from big-endian form into a native endian integer.
613  int64_t tmp = 0;
614  for (int i = 0; i < size; ++i)
615  tmp = (tmp << 8) | buf[i];
616 
617  // Use a union to convert the integer bit pattern into a floating point
618  // number.
619  if (size == 4) {
620  union {
621  int32_t src;
622  float dst;
623  } tmp2;
624  tmp2.src = static_cast<int32_t>(tmp);
625  value = tmp2.dst;
626  } else if (size == 8) {
627  union {
628  int64_t src;
629  double dst;
630  } tmp2;
631  tmp2.src = tmp;
632  value = tmp2.dst;
633  } else {
634  return -1;
635  }
636 
637  if (!client->OnFloat(id, value))
638  return -1;
639 
640  return size;
641 }
642 
643 static int ParseBinary(const uint8_t* buf,
644  int size,
645  int id,
646  WebMParserClient* client) {
647  return client->OnBinary(id, buf, size) ? size : -1;
648 }
649 
650 static int ParseString(const uint8_t* buf,
651  int size,
652  int id,
653  WebMParserClient* client) {
654  const uint8_t* end = static_cast<const uint8_t*>(memchr(buf, '\0', size));
655  int length = (end != NULL) ? static_cast<int>(end - buf) : size;
656  std::string str(reinterpret_cast<const char*>(buf), length);
657  return client->OnString(id, str) ? size : -1;
658 }
659 
660 static int ParseNonListElement(ElementType type,
661  int id,
662  int64_t element_size,
663  const uint8_t* buf,
664  int size,
665  WebMParserClient* client) {
666  DCHECK_GE(size, element_size);
667 
668  int result = -1;
669  switch(type) {
670  case LIST:
671  NOTIMPLEMENTED();
672  result = -1;
673  break;
674  case UINT:
675  result = ParseUInt(buf, element_size, id, client);
676  break;
677  case FLOAT:
678  result = ParseFloat(buf, element_size, id, client);
679  break;
680  case BINARY:
681  result = ParseBinary(buf, element_size, id, client);
682  break;
683  case STRING:
684  result = ParseString(buf, element_size, id, client);
685  break;
686  case SKIP:
687  result = element_size;
688  break;
689  default:
690  DVLOG(1) << "Unhandled ID type " << type;
691  return -1;
692  };
693 
694  DCHECK_LE(result, size);
695  return result;
696 }
697 
698 WebMParserClient::WebMParserClient() {}
699 WebMParserClient::~WebMParserClient() {}
700 
701 WebMParserClient* WebMParserClient::OnListStart(int id) {
702  DVLOG(1) << "Unexpected list element start with ID " << std::hex << id;
703  return NULL;
704 }
705 
706 bool WebMParserClient::OnListEnd(int id) {
707  DVLOG(1) << "Unexpected list element end with ID " << std::hex << id;
708  return false;
709 }
710 
711 bool WebMParserClient::OnUInt(int id, int64_t /*val*/) {
712  DVLOG(1) << "Unexpected unsigned integer element with ID " << std::hex << id;
713  return false;
714 }
715 
716 bool WebMParserClient::OnFloat(int id, double /*val*/) {
717  DVLOG(1) << "Unexpected float element with ID " << std::hex << id;
718  return false;
719 }
720 
721 bool WebMParserClient::OnBinary(int id, const uint8_t* /*data*/, int /*size*/) {
722  DVLOG(1) << "Unexpected binary element with ID " << std::hex << id;
723  return false;
724 }
725 
726 bool WebMParserClient::OnString(int id, const std::string& /*str*/) {
727  DVLOG(1) << "Unexpected string element with ID " << std::hex << id;
728  return false;
729 }
730 
732  : state_(NEED_LIST_HEADER),
733  root_id_(id),
734  root_level_(FindListLevel(id)),
735  root_client_(client) {
736  DCHECK_GE(root_level_, 0);
737  DCHECK(client);
738 }
739 
740 WebMListParser::~WebMListParser() {}
741 
743  ChangeState(NEED_LIST_HEADER);
744  list_state_stack_.clear();
745 }
746 
747 int WebMListParser::Parse(const uint8_t* buf, int size) {
748  DCHECK(buf);
749 
750  if (size < 0 || state_ == PARSE_ERROR || state_ == DONE_PARSING_LIST)
751  return -1;
752 
753  if (size == 0)
754  return 0;
755 
756  const uint8_t* cur = buf;
757  int cur_size = size;
758  int bytes_parsed = 0;
759 
760  while (cur_size > 0 && state_ != PARSE_ERROR && state_ != DONE_PARSING_LIST) {
761  int element_id = 0;
762  int64_t element_size = 0;
763  int result = WebMParseElementHeader(cur, cur_size, &element_id,
764  &element_size);
765 
766  if (result < 0)
767  return result;
768 
769  if (result == 0)
770  return bytes_parsed;
771 
772  switch(state_) {
773  case NEED_LIST_HEADER: {
774  if (element_id != root_id_) {
775  ChangeState(PARSE_ERROR);
776  return -1;
777  }
778 
779  // Only allow Segment & Cluster to have an unknown size.
780  if (element_size == kWebMUnknownSize &&
781  (element_id != kWebMIdSegment) &&
782  (element_id != kWebMIdCluster)) {
783  ChangeState(PARSE_ERROR);
784  return -1;
785  }
786 
787  ChangeState(INSIDE_LIST);
788  if (!OnListStart(root_id_, element_size))
789  return -1;
790 
791  break;
792  }
793 
794  case INSIDE_LIST: {
795  int header_size = result;
796  const uint8_t* element_data = cur + header_size;
797  int element_data_size = cur_size - header_size;
798 
799  if (element_size < element_data_size)
800  element_data_size = element_size;
801 
802  result = ParseListElement(header_size, element_id, element_size,
803  element_data, element_data_size);
804 
805  DCHECK_LE(result, header_size + element_data_size);
806  if (result < 0) {
807  ChangeState(PARSE_ERROR);
808  return -1;
809  }
810 
811  if (result == 0)
812  return bytes_parsed;
813 
814  break;
815  }
816  case DONE_PARSING_LIST:
817  case PARSE_ERROR:
818  // Shouldn't be able to get here.
819  NOTIMPLEMENTED();
820  break;
821  }
822 
823  cur += result;
824  cur_size -= result;
825  bytes_parsed += result;
826  }
827 
828  return (state_ == PARSE_ERROR) ? -1 : bytes_parsed;
829 }
830 
832  return state_ == DONE_PARSING_LIST;
833 }
834 
835 void WebMListParser::ChangeState(State new_state) {
836  state_ = new_state;
837 }
838 
839 int WebMListParser::ParseListElement(int header_size,
840  int id,
841  int64_t element_size,
842  const uint8_t* data,
843  int size) {
844  DCHECK_GT(list_state_stack_.size(), 0u);
845 
846  ListState& list_state = list_state_stack_.back();
847  DCHECK(list_state.element_info_);
848 
849  const ListElementInfo* element_info = list_state.element_info_;
850  ElementType id_type =
851  FindIdType(id, element_info->id_info_, element_info->id_info_count_);
852 
853  // Unexpected ID.
854  if (id_type == UNKNOWN) {
855  if (list_state.size_ != kWebMUnknownSize ||
856  !IsSiblingOrAncestor(list_state.id_, id)) {
857  DVLOG(1) << "No ElementType info for ID 0x" << std::hex << id;
858  return -1;
859  }
860 
861  // We've reached the end of a list of unknown size. Update the size now that
862  // we know it and dispatch the end of list calls.
863  list_state.size_ = list_state.bytes_parsed_;
864 
865  if (!OnListEnd())
866  return -1;
867 
868  // Check to see if all open lists have ended.
869  if (list_state_stack_.size() == 0)
870  return 0;
871 
872  list_state = list_state_stack_.back();
873  }
874 
875  // Make sure the whole element can fit inside the current list.
876  int64_t total_element_size = header_size + element_size;
877  if (list_state.size_ != kWebMUnknownSize &&
878  list_state.size_ < list_state.bytes_parsed_ + total_element_size) {
879  return -1;
880  }
881 
882  if (id_type == LIST) {
883  list_state.bytes_parsed_ += header_size;
884 
885  if (!OnListStart(id, element_size))
886  return -1;
887  return header_size;
888  }
889 
890  // Make sure we have the entire element before trying to parse a non-list
891  // element.
892  if (size < element_size)
893  return 0;
894 
895  int bytes_parsed = ParseNonListElement(id_type, id, element_size,
896  data, size, list_state.client_);
897  DCHECK_LE(bytes_parsed, size);
898 
899  // Return if an error occurred or we need more data.
900  // Note: bytes_parsed is 0 for a successful parse of a size 0 element. We
901  // need to check the element_size to disambiguate the "need more data" case
902  // from a successful parse.
903  if (bytes_parsed < 0 || (bytes_parsed == 0 && element_size != 0))
904  return bytes_parsed;
905 
906  int result = header_size + bytes_parsed;
907  list_state.bytes_parsed_ += result;
908 
909  // See if we have reached the end of the current list.
910  if (list_state.bytes_parsed_ == list_state.size_) {
911  if (!OnListEnd())
912  return -1;
913  }
914 
915  return result;
916 }
917 
918 bool WebMListParser::OnListStart(int id, int64_t size) {
919  const ListElementInfo* element_info = FindListInfo(id);
920  if (!element_info)
921  return false;
922 
923  int current_level =
924  root_level_ + static_cast<int>(list_state_stack_.size()) - 1;
925  if (current_level + 1 != element_info->level_)
926  return false;
927 
928  WebMParserClient* current_list_client = NULL;
929  if (!list_state_stack_.empty()) {
930  // Make sure the new list doesn't go past the end of the current list.
931  ListState current_list_state = list_state_stack_.back();
932  if (current_list_state.size_ != kWebMUnknownSize &&
933  current_list_state.size_ < current_list_state.bytes_parsed_ + size)
934  return false;
935  current_list_client = current_list_state.client_;
936  } else {
937  current_list_client = root_client_;
938  }
939 
940  WebMParserClient* new_list_client = current_list_client->OnListStart(id);
941  if (!new_list_client)
942  return false;
943 
944  ListState new_list_state = { id, size, 0, element_info, new_list_client };
945  list_state_stack_.push_back(new_list_state);
946 
947  if (size == 0)
948  return OnListEnd();
949 
950  return true;
951 }
952 
953 bool WebMListParser::OnListEnd() {
954  int lists_ended = 0;
955  for (; !list_state_stack_.empty(); ++lists_ended) {
956  const ListState& list_state = list_state_stack_.back();
957  int64_t bytes_parsed = list_state.bytes_parsed_;
958  int id = list_state.id_;
959 
960  if (bytes_parsed != list_state.size_)
961  break;
962 
963  list_state_stack_.pop_back();
964 
965  WebMParserClient* client = NULL;
966  if (!list_state_stack_.empty()) {
967  // Update the bytes_parsed_ for the parent element.
968  list_state_stack_.back().bytes_parsed_ += bytes_parsed;
969  client = list_state_stack_.back().client_;
970  } else {
971  client = root_client_;
972  }
973 
974  if (!client->OnListEnd(id))
975  return false;
976  }
977 
978  DCHECK_GE(lists_ended, 1);
979 
980  if (list_state_stack_.empty())
981  ChangeState(DONE_PARSING_LIST);
982 
983  return true;
984 }
985 
986 bool WebMListParser::IsSiblingOrAncestor(int id_a, int id_b) const {
987  DCHECK((id_a == kWebMIdSegment) || (id_a == kWebMIdCluster));
988 
989  if (id_a == kWebMIdCluster) {
990  // kWebMIdCluster siblings.
991  for (size_t i = 0; i < std::size(kSegmentIds); i++) {
992  if (kSegmentIds[i].id_ == id_b)
993  return true;
994  }
995  }
996 
997  // kWebMIdSegment siblings.
998  return ((id_b == kWebMIdSegment) || (id_b == kWebMIdEBMLHeader));
999 }
1000 
1001 } // namespace media
1002 } // namespace shaka
void Reset()
Resets the state of the parser so it can start parsing a new list.
Definition: webm_parser.cc:742
WebMListParser(int id, WebMParserClient *client)
Definition: webm_parser.cc:731
int Parse(const uint8_t *buf, int size)
Definition: webm_parser.cc:747
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66