Shaka Packager SDK
Loading...
Searching...
No Matches
webm_parser.cc
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file contains code to parse WebM file elements. It was created
6// from information in the Matroska spec.
7// http://www.matroska.org/technical/specs/index.html
8//
9// This file contains code for encrypted WebM. Current WebM
10// encrypted request for comments specification is here
11// http://wiki.webmproject.org/encryption/webm-encryption-rfc
12
13#include <packager/media/formats/webm/webm_parser.h>
14
15#include <limits>
16
17#include <absl/log/check.h>
18#include <absl/log/log.h>
19
20#include <packager/macros/logging.h>
21#include <packager/media/formats/webm/webm_constants.h>
22
23namespace shaka {
24namespace media {
25
26enum ElementType {
27 UNKNOWN,
28 LIST, // Referred to as Master Element in the Matroska spec.
29 UINT,
30 FLOAT,
31 BINARY,
32 STRING,
33 SKIP,
34};
35
36struct ElementIdInfo {
37 ElementType type_;
38 int id_;
39};
40
41struct ListElementInfo {
42 int id_;
43 int level_;
44 const ElementIdInfo* id_info_;
45 int id_info_count_;
46};
47
48// The following are tables indicating what IDs are valid sub-elements
49// of particular elements. If an element is encountered that doesn't
50// appear in the list, a parsing error is signalled. Some elements are
51// marked as SKIP because they are valid, but we don't care about them
52// right now.
53static const ElementIdInfo kEBMLHeaderIds[] = {
54 {UINT, kWebMIdEBMLVersion},
55 {UINT, kWebMIdEBMLReadVersion},
56 {UINT, kWebMIdEBMLMaxIDLength},
57 {UINT, kWebMIdEBMLMaxSizeLength},
58 {STRING, kWebMIdDocType},
59 {UINT, kWebMIdDocTypeVersion},
60 {UINT, kWebMIdDocTypeReadVersion},
61};
62
63static const ElementIdInfo kSegmentIds[] = {
64 {LIST, kWebMIdSeekHead},
65 {LIST, kWebMIdInfo},
66 {LIST, kWebMIdCluster},
67 {LIST, kWebMIdTracks},
68 {LIST, kWebMIdCues},
69 {LIST, kWebMIdAttachments},
70 {LIST, kWebMIdChapters},
71 {LIST, kWebMIdTags},
72};
73
74static const ElementIdInfo kSeekHeadIds[] = {
75 {LIST, kWebMIdSeek},
76};
77
78static const ElementIdInfo kSeekIds[] = {
79 {BINARY, kWebMIdSeekID},
80 {UINT, kWebMIdSeekPosition},
81};
82
83static const ElementIdInfo kInfoIds[] = {
84 {BINARY, kWebMIdSegmentUID},
85 {STRING, kWebMIdSegmentFilename},
86 {BINARY, kWebMIdPrevUID},
87 {STRING, kWebMIdPrevFilename},
88 {BINARY, kWebMIdNextUID},
89 {STRING, kWebMIdNextFilename},
90 {BINARY, kWebMIdSegmentFamily},
91 {LIST, kWebMIdChapterTranslate},
92 {UINT, kWebMIdTimecodeScale},
93 {FLOAT, kWebMIdDuration},
94 {BINARY, kWebMIdDateUTC},
95 {STRING, kWebMIdTitle},
96 {STRING, kWebMIdMuxingApp},
97 {STRING, kWebMIdWritingApp},
98};
99
100static const ElementIdInfo kChapterTranslateIds[] = {
101 {UINT, kWebMIdChapterTranslateEditionUID},
102 {UINT, kWebMIdChapterTranslateCodec},
103 {BINARY, kWebMIdChapterTranslateID},
104};
105
106static const ElementIdInfo kClusterIds[] = {
107 {BINARY, kWebMIdSimpleBlock},
108 {UINT, kWebMIdTimecode},
109 {LIST, kWebMIdSilentTracks},
110 {UINT, kWebMIdPosition},
111 {UINT, kWebMIdPrevSize},
112 {LIST, kWebMIdBlockGroup},
113};
114
115static const ElementIdInfo kSilentTracksIds[] = {
116 {UINT, kWebMIdSilentTrackNumber},
117};
118
119static const ElementIdInfo kBlockGroupIds[] = {
120 {BINARY, kWebMIdBlock},
121 {LIST, kWebMIdBlockAdditions},
122 {UINT, kWebMIdBlockDuration},
123 {UINT, kWebMIdReferencePriority},
124 {BINARY, kWebMIdReferenceBlock},
125 {BINARY, kWebMIdCodecState},
126 {BINARY, kWebMIdDiscardPadding},
127 {LIST, kWebMIdSlices},
128};
129
130static const ElementIdInfo kBlockAdditionsIds[] = {
131 {LIST, kWebMIdBlockMore},
132};
133
134static const ElementIdInfo kBlockMoreIds[] = {
135 {UINT, kWebMIdBlockAddID},
136 {BINARY, kWebMIdBlockAdditional},
137};
138
139static const ElementIdInfo kSlicesIds[] = {
140 {LIST, kWebMIdTimeSlice},
141};
142
143static const ElementIdInfo kTimeSliceIds[] = {
144 {UINT, kWebMIdLaceNumber},
145};
146
147static const ElementIdInfo kTracksIds[] = {
148 {LIST, kWebMIdTrackEntry},
149};
150
151static const ElementIdInfo kTrackEntryIds[] = {
152 {UINT, kWebMIdTrackNumber},
153 {BINARY, kWebMIdTrackUID},
154 {UINT, kWebMIdTrackType},
155 {UINT, kWebMIdFlagEnabled},
156 {UINT, kWebMIdFlagDefault},
157 {UINT, kWebMIdFlagForced},
158 {UINT, kWebMIdFlagLacing},
159 {UINT, kWebMIdMinCache},
160 {UINT, kWebMIdMaxCache},
161 {UINT, kWebMIdDefaultDuration},
162 {FLOAT, kWebMIdTrackTimecodeScale},
163 {UINT, kWebMIdMaxBlockAdditionId},
164 {STRING, kWebMIdName},
165 {STRING, kWebMIdLanguage},
166 {STRING, kWebMIdCodecID},
167 {BINARY, kWebMIdCodecPrivate},
168 {STRING, kWebMIdCodecName},
169 {UINT, kWebMIdAttachmentLink},
170 {UINT, kWebMIdCodecDecodeAll},
171 {UINT, kWebMIdTrackOverlay},
172 {UINT, kWebMIdCodecDelay},
173 {UINT, kWebMIdSeekPreRoll},
174 {LIST, kWebMIdTrackTranslate},
175 {LIST, kWebMIdVideo},
176 {LIST, kWebMIdAudio},
177 {LIST, kWebMIdTrackOperation},
178 {LIST, kWebMIdContentEncodings},
179};
180
181static const ElementIdInfo kTrackTranslateIds[] = {
182 {UINT, kWebMIdTrackTranslateEditionUID},
183 {UINT, kWebMIdTrackTranslateCodec},
184 {BINARY, kWebMIdTrackTranslateTrackID},
185};
186
187static const ElementIdInfo kVideoIds[] = {
188 {UINT, kWebMIdFlagInterlaced},
189 {UINT, kWebMIdStereoMode},
190 {UINT, kWebMIdAlphaMode},
191 {UINT, kWebMIdPixelWidth},
192 {UINT, kWebMIdPixelHeight},
193 {UINT, kWebMIdPixelCropBottom},
194 {UINT, kWebMIdPixelCropTop},
195 {UINT, kWebMIdPixelCropLeft},
196 {UINT, kWebMIdPixelCropRight},
197 {UINT, kWebMIdDisplayWidth},
198 {UINT, kWebMIdDisplayHeight},
199 {UINT, kWebMIdDisplayUnit},
200 {UINT, kWebMIdAspectRatioType},
201 {BINARY, kWebMIdColorSpace},
202 {FLOAT, kWebMIdFrameRate},
203 {LIST, kWebMIdColor},
204 {LIST, kWebMIdProjection},
205};
206
207static const ElementIdInfo kColorIds[] = {
208 {UINT, kWebMIdColorMatrixCoefficients},
209 {UINT, kWebMIdColorBitsPerChannel},
210 {UINT, kWebMIdColorChromaSubsamplingHorz},
211 {UINT, kWebMIdColorChromaSubsamplingVert},
212 {UINT, kWebMIdColorCbSamplingHorz},
213 {UINT, kWebMIdColorCbSamplingVert},
214 {UINT, kWebMIdColorChromaSitingHorz},
215 {UINT, kWebMIdColorChromaSitingVert},
216 {UINT, kWebMIdColorRange},
217 {UINT, kWebMIdColorTransferCharacteristics},
218 {UINT, kWebMIdColorPrimaries},
219 {UINT, kWebMIdColorMaxCLL},
220 {UINT, kWebMIdColorMaxFALL},
221 {LIST, kWebMIdColorMasteringMetadata},
222};
223
224static const ElementIdInfo kProjectionIds[] = {
225 {UINT, kWebMIdProjectionType},
226};
227
228static const ElementIdInfo kAudioIds[] = {
229 {FLOAT, kWebMIdSamplingFrequency},
230 {FLOAT, kWebMIdOutputSamplingFrequency},
231 {UINT, kWebMIdChannels},
232 {UINT, kWebMIdBitDepth},
233};
234
235static const ElementIdInfo kTrackOperationIds[] = {
236 {LIST, kWebMIdTrackCombinePlanes},
237 {LIST, kWebMIdJoinBlocks},
238};
239
240static const ElementIdInfo kTrackCombinePlanesIds[] = {
241 {LIST, kWebMIdTrackPlane},
242};
243
244static const ElementIdInfo kTrackPlaneIds[] = {
245 {UINT, kWebMIdTrackPlaneUID},
246 {UINT, kWebMIdTrackPlaneType},
247};
248
249static const ElementIdInfo kJoinBlocksIds[] = {
250 {UINT, kWebMIdTrackJoinUID},
251};
252
253static const ElementIdInfo kContentEncodingsIds[] = {
254 {LIST, kWebMIdContentEncoding},
255};
256
257static const ElementIdInfo kContentEncodingIds[] = {
258 {UINT, kWebMIdContentEncodingOrder},
259 {UINT, kWebMIdContentEncodingScope},
260 {UINT, kWebMIdContentEncodingType},
261 {LIST, kWebMIdContentCompression},
262 {LIST, kWebMIdContentEncryption},
263};
264
265static const ElementIdInfo kContentCompressionIds[] = {
266 {UINT, kWebMIdContentCompAlgo},
267 {BINARY, kWebMIdContentCompSettings},
268};
269
270static const ElementIdInfo kContentEncryptionIds[] = {
271 {LIST, kWebMIdContentEncAESSettings},
272 {UINT, kWebMIdContentEncAlgo},
273 {BINARY, kWebMIdContentEncKeyID},
274 {BINARY, kWebMIdContentSignature},
275 {BINARY, kWebMIdContentSigKeyID},
276 {UINT, kWebMIdContentSigAlgo},
277 {UINT, kWebMIdContentSigHashAlgo},
278};
279
280static const ElementIdInfo kContentEncAESSettingsIds[] = {
281 {UINT, kWebMIdAESSettingsCipherMode},
282};
283
284static const ElementIdInfo kCuesIds[] = {
285 {LIST, kWebMIdCuePoint},
286};
287
288static const ElementIdInfo kCuePointIds[] = {
289 {UINT, kWebMIdCueTime},
290 {LIST, kWebMIdCueTrackPositions},
291};
292
293static const ElementIdInfo kCueTrackPositionsIds[] = {
294 {UINT, kWebMIdCueTrack},
295 {UINT, kWebMIdCueClusterPosition},
296 {UINT, kWebMIdCueBlockNumber},
297 {UINT, kWebMIdCueCodecState},
298 {LIST, kWebMIdCueReference},
299};
300
301static const ElementIdInfo kCueReferenceIds[] = {
302 {UINT, kWebMIdCueRefTime},
303};
304
305static const ElementIdInfo kAttachmentsIds[] = {
306 {LIST, kWebMIdAttachedFile},
307};
308
309static const ElementIdInfo kAttachedFileIds[] = {
310 {STRING, kWebMIdFileDescription},
311 {STRING, kWebMIdFileName},
312 {STRING, kWebMIdFileMimeType},
313 {BINARY, kWebMIdFileData},
314 {UINT, kWebMIdFileUID},
315};
316
317static const ElementIdInfo kChaptersIds[] = {
318 {LIST, kWebMIdEditionEntry},
319};
320
321static const ElementIdInfo kEditionEntryIds[] = {
322 {UINT, kWebMIdEditionUID},
323 {UINT, kWebMIdEditionFlagHidden},
324 {UINT, kWebMIdEditionFlagDefault},
325 {UINT, kWebMIdEditionFlagOrdered},
326 {LIST, kWebMIdChapterAtom},
327};
328
329static const ElementIdInfo kChapterAtomIds[] = {
330 {UINT, kWebMIdChapterUID},
331 {UINT, kWebMIdChapterTimeStart},
332 {UINT, kWebMIdChapterTimeEnd},
333 {UINT, kWebMIdChapterFlagHidden},
334 {UINT, kWebMIdChapterFlagEnabled},
335 {BINARY, kWebMIdChapterSegmentUID},
336 {UINT, kWebMIdChapterSegmentEditionUID},
337 {UINT, kWebMIdChapterPhysicalEquiv},
338 {LIST, kWebMIdChapterTrack},
339 {LIST, kWebMIdChapterDisplay},
340 {LIST, kWebMIdChapProcess},
341};
342
343static const ElementIdInfo kChapterTrackIds[] = {
344 {UINT, kWebMIdChapterTrackNumber},
345};
346
347static const ElementIdInfo kChapterDisplayIds[] = {
348 {STRING, kWebMIdChapString},
349 {STRING, kWebMIdChapLanguage},
350 {STRING, kWebMIdChapCountry},
351};
352
353static const ElementIdInfo kChapProcessIds[] = {
354 {UINT, kWebMIdChapProcessCodecID},
355 {BINARY, kWebMIdChapProcessPrivate},
356 {LIST, kWebMIdChapProcessCommand},
357};
358
359static const ElementIdInfo kChapProcessCommandIds[] = {
360 {UINT, kWebMIdChapProcessTime},
361 {BINARY, kWebMIdChapProcessData},
362};
363
364static const ElementIdInfo kTagsIds[] = {
365 {LIST, kWebMIdTag},
366};
367
368static const ElementIdInfo kTagIds[] = {
369 {LIST, kWebMIdTargets},
370 {LIST, kWebMIdSimpleTag},
371};
372
373static const ElementIdInfo kTargetsIds[] = {
374 {UINT, kWebMIdTargetTypeValue},
375 {STRING, kWebMIdTargetType},
376 {UINT, kWebMIdTagTrackUID},
377 {UINT, kWebMIdTagEditionUID},
378 {UINT, kWebMIdTagChapterUID},
379 {UINT, kWebMIdTagAttachmentUID},
380};
381
382static const ElementIdInfo kSimpleTagIds[] = {
383 {STRING, kWebMIdTagName},
384 {STRING, kWebMIdTagLanguage},
385 {UINT, kWebMIdTagDefault},
386 {STRING, kWebMIdTagString},
387 {BINARY, kWebMIdTagBinary},
388};
389
390#define LIST_ELEMENT_INFO(id, level, id_info) \
391 { (id), (level), (id_info), std::size(id_info) }
392
393static const ListElementInfo kListElementInfo[] = {
394 LIST_ELEMENT_INFO(kWebMIdCluster, 1, kClusterIds),
395 LIST_ELEMENT_INFO(kWebMIdEBMLHeader, 0, kEBMLHeaderIds),
396 LIST_ELEMENT_INFO(kWebMIdSegment, 0, kSegmentIds),
397 LIST_ELEMENT_INFO(kWebMIdSeekHead, 1, kSeekHeadIds),
398 LIST_ELEMENT_INFO(kWebMIdSeek, 2, kSeekIds),
399 LIST_ELEMENT_INFO(kWebMIdInfo, 1, kInfoIds),
400 LIST_ELEMENT_INFO(kWebMIdChapterTranslate, 2, kChapterTranslateIds),
401 LIST_ELEMENT_INFO(kWebMIdSilentTracks, 2, kSilentTracksIds),
402 LIST_ELEMENT_INFO(kWebMIdBlockGroup, 2, kBlockGroupIds),
403 LIST_ELEMENT_INFO(kWebMIdBlockAdditions, 3, kBlockAdditionsIds),
404 LIST_ELEMENT_INFO(kWebMIdBlockMore, 4, kBlockMoreIds),
405 LIST_ELEMENT_INFO(kWebMIdSlices, 3, kSlicesIds),
406 LIST_ELEMENT_INFO(kWebMIdTimeSlice, 4, kTimeSliceIds),
407 LIST_ELEMENT_INFO(kWebMIdTracks, 1, kTracksIds),
408 LIST_ELEMENT_INFO(kWebMIdTrackEntry, 2, kTrackEntryIds),
409 LIST_ELEMENT_INFO(kWebMIdTrackTranslate, 3, kTrackTranslateIds),
410 LIST_ELEMENT_INFO(kWebMIdVideo, 3, kVideoIds),
411 LIST_ELEMENT_INFO(kWebMIdColor, 4, kColorIds),
412 LIST_ELEMENT_INFO(kWebMIdProjection, 4, kProjectionIds),
413 LIST_ELEMENT_INFO(kWebMIdAudio, 3, kAudioIds),
414 LIST_ELEMENT_INFO(kWebMIdTrackOperation, 3, kTrackOperationIds),
415 LIST_ELEMENT_INFO(kWebMIdTrackCombinePlanes, 4, kTrackCombinePlanesIds),
416 LIST_ELEMENT_INFO(kWebMIdTrackPlane, 5, kTrackPlaneIds),
417 LIST_ELEMENT_INFO(kWebMIdJoinBlocks, 4, kJoinBlocksIds),
418 LIST_ELEMENT_INFO(kWebMIdContentEncodings, 3, kContentEncodingsIds),
419 LIST_ELEMENT_INFO(kWebMIdContentEncoding, 4, kContentEncodingIds),
420 LIST_ELEMENT_INFO(kWebMIdContentCompression, 5, kContentCompressionIds),
421 LIST_ELEMENT_INFO(kWebMIdContentEncryption, 5, kContentEncryptionIds),
422 LIST_ELEMENT_INFO(kWebMIdContentEncAESSettings, 6, kContentEncAESSettingsIds),
423 LIST_ELEMENT_INFO(kWebMIdCues, 1, kCuesIds),
424 LIST_ELEMENT_INFO(kWebMIdCuePoint, 2, kCuePointIds),
425 LIST_ELEMENT_INFO(kWebMIdCueTrackPositions, 3, kCueTrackPositionsIds),
426 LIST_ELEMENT_INFO(kWebMIdCueReference, 4, kCueReferenceIds),
427 LIST_ELEMENT_INFO(kWebMIdAttachments, 1, kAttachmentsIds),
428 LIST_ELEMENT_INFO(kWebMIdAttachedFile, 2, kAttachedFileIds),
429 LIST_ELEMENT_INFO(kWebMIdChapters, 1, kChaptersIds),
430 LIST_ELEMENT_INFO(kWebMIdEditionEntry, 2, kEditionEntryIds),
431 LIST_ELEMENT_INFO(kWebMIdChapterAtom, 3, kChapterAtomIds),
432 LIST_ELEMENT_INFO(kWebMIdChapterTrack, 4, kChapterTrackIds),
433 LIST_ELEMENT_INFO(kWebMIdChapterDisplay, 4, kChapterDisplayIds),
434 LIST_ELEMENT_INFO(kWebMIdChapProcess, 4, kChapProcessIds),
435 LIST_ELEMENT_INFO(kWebMIdChapProcessCommand, 5, kChapProcessCommandIds),
436 LIST_ELEMENT_INFO(kWebMIdTags, 1, kTagsIds),
437 LIST_ELEMENT_INFO(kWebMIdTag, 2, kTagIds),
438 LIST_ELEMENT_INFO(kWebMIdTargets, 3, kTargetsIds),
439 LIST_ELEMENT_INFO(kWebMIdSimpleTag, 3, kSimpleTagIds),
440};
441
442// Parses an element header id or size field. These fields are variable length
443// encoded. The first byte indicates how many bytes the field occupies.
444// |buf| - The buffer to parse.
445// |size| - The number of bytes in |buf|
446// |max_bytes| - The maximum number of bytes the field can be. ID fields
447// set this to 4 & element size fields set this to 8. If the
448// first byte indicates a larger field size than this it is a
449// parser error.
450// |mask_first_byte| - For element size fields the field length encoding bits
451// need to be masked off. This parameter is true for
452// element size fields and is false for ID field values.
453//
454// Returns: The number of bytes parsed on success. -1 on error.
455static int ParseWebMElementHeaderField(const uint8_t* buf,
456 int size,
457 int max_bytes,
458 bool mask_first_byte,
459 int64_t* num) {
460 DCHECK(buf);
461 DCHECK(num);
462
463 if (size < 0)
464 return -1;
465
466 if (size == 0)
467 return 0;
468
469 int mask = 0x80;
470 uint8_t ch = buf[0];
471 int extra_bytes = -1;
472 bool all_ones = false;
473 for (int i = 0; i < max_bytes; ++i) {
474 if ((ch & mask) != 0) {
475 mask = ~mask & 0xff;
476 *num = mask_first_byte ? ch & mask : ch;
477 all_ones = (ch & mask) == mask;
478 extra_bytes = i;
479 break;
480 }
481 mask = 0x80 | mask >> 1;
482 }
483
484 if (extra_bytes == -1)
485 return -1;
486
487 // Return 0 if we need more data.
488 if ((1 + extra_bytes) > size)
489 return 0;
490
491 int bytes_used = 1;
492
493 for (int i = 0; i < extra_bytes; ++i) {
494 ch = buf[bytes_used++];
495 all_ones &= (ch == 0xff);
496 *num = (*num << 8) | ch;
497 }
498
499 if (all_ones)
500 *num = std::numeric_limits<int64_t>::max();
501
502 return bytes_used;
503}
504
505int WebMParseElementHeader(const uint8_t* buf,
506 int size,
507 int* id,
508 int64_t* element_size) {
509 DCHECK(buf);
510 DCHECK_GE(size, 0);
511 DCHECK(id);
512 DCHECK(element_size);
513
514 if (size == 0)
515 return 0;
516
517 int64_t tmp = 0;
518 int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp);
519
520 if (num_id_bytes <= 0)
521 return num_id_bytes;
522
523 if (tmp == std::numeric_limits<int64_t>::max())
524 tmp = kWebMReservedId;
525
526 *id = static_cast<int>(tmp);
527
528 int num_size_bytes = ParseWebMElementHeaderField(buf + num_id_bytes,
529 size - num_id_bytes,
530 8, true, &tmp);
531
532 if (num_size_bytes <= 0)
533 return num_size_bytes;
534
535 if (tmp == std::numeric_limits<int64_t>::max())
536 tmp = kWebMUnknownSize;
537
538 *element_size = tmp;
539 DVLOG(3) << "WebMParseElementHeader() : id " << std::hex << *id << std::dec
540 << " size " << *element_size;
541 return num_id_bytes + num_size_bytes;
542}
543
544// Finds ElementType for a specific ID.
545static ElementType FindIdType(int id,
546 const ElementIdInfo* id_info,
547 int id_info_count) {
548
549 // Check for global element IDs that can be anywhere.
550 if (id == kWebMIdVoid || id == kWebMIdCRC32)
551 return SKIP;
552
553 for (int i = 0; i < id_info_count; ++i) {
554 if (id == id_info[i].id_)
555 return id_info[i].type_;
556 }
557
558 return UNKNOWN;
559}
560
561// Finds ListElementInfo for a specific ID.
562static const ListElementInfo* FindListInfo(int id) {
563 for (size_t i = 0; i < std::size(kListElementInfo); ++i) {
564 if (id == kListElementInfo[i].id_)
565 return &kListElementInfo[i];
566 }
567
568 return NULL;
569}
570
571static int FindListLevel(int id) {
572 const ListElementInfo* list_info = FindListInfo(id);
573 if (list_info)
574 return list_info->level_;
575
576 return -1;
577}
578
579static int ParseUInt(const uint8_t* buf,
580 int size,
581 int id,
582 WebMParserClient* client) {
583 if ((size <= 0) || (size > 8))
584 return -1;
585
586 // Read in the big-endian integer.
587 uint64_t value = 0;
588 for (int i = 0; i < size; ++i)
589 value = (value << 8) | buf[i];
590
591 // We use int64_t in place of uint64_t everywhere for convenience. See this
592 // bug
593 // for more details: http://crbug.com/366750#c3
594 if (value > static_cast<uint64_t>(std::numeric_limits<int64_t>::max()))
595 return -1;
596
597 if (!client->OnUInt(id, value))
598 return -1;
599
600 return size;
601}
602
603static int ParseFloat(const uint8_t* buf,
604 int size,
605 int id,
606 WebMParserClient* client) {
607 if ((size != 4) && (size != 8))
608 return -1;
609
610 double value = -1;
611
612 // Read the bytes from big-endian form into a native endian integer.
613 int64_t tmp = 0;
614 for (int i = 0; i < size; ++i)
615 tmp = (tmp << 8) | buf[i];
616
617 // Use a union to convert the integer bit pattern into a floating point
618 // number.
619 if (size == 4) {
620 union {
621 int32_t src;
622 float dst;
623 } tmp2;
624 tmp2.src = static_cast<int32_t>(tmp);
625 value = tmp2.dst;
626 } else if (size == 8) {
627 union {
628 int64_t src;
629 double dst;
630 } tmp2;
631 tmp2.src = tmp;
632 value = tmp2.dst;
633 } else {
634 return -1;
635 }
636
637 if (!client->OnFloat(id, value))
638 return -1;
639
640 return size;
641}
642
643static int ParseBinary(const uint8_t* buf,
644 int size,
645 int id,
646 WebMParserClient* client) {
647 return client->OnBinary(id, buf, size) ? size : -1;
648}
649
650static int ParseString(const uint8_t* buf,
651 int size,
652 int id,
653 WebMParserClient* client) {
654 const uint8_t* end = static_cast<const uint8_t*>(memchr(buf, '\0', size));
655 int length = (end != NULL) ? static_cast<int>(end - buf) : size;
656 std::string str(reinterpret_cast<const char*>(buf), length);
657 return client->OnString(id, str) ? size : -1;
658}
659
660static int ParseNonListElement(ElementType type,
661 int id,
662 int64_t element_size,
663 const uint8_t* buf,
664 int size,
665 WebMParserClient* client) {
666 DCHECK_GE(size, element_size);
667
668 int result = -1;
669 switch(type) {
670 case LIST:
671 NOTIMPLEMENTED();
672 result = -1;
673 break;
674 case UINT:
675 result = ParseUInt(buf, element_size, id, client);
676 break;
677 case FLOAT:
678 result = ParseFloat(buf, element_size, id, client);
679 break;
680 case BINARY:
681 result = ParseBinary(buf, element_size, id, client);
682 break;
683 case STRING:
684 result = ParseString(buf, element_size, id, client);
685 break;
686 case SKIP:
687 result = element_size;
688 break;
689 default:
690 DVLOG(1) << "Unhandled ID type " << type;
691 return -1;
692 };
693
694 DCHECK_LE(result, size);
695 return result;
696}
697
698WebMParserClient::WebMParserClient() {}
699WebMParserClient::~WebMParserClient() {}
700
701WebMParserClient* WebMParserClient::OnListStart(int id) {
702 DVLOG(1) << "Unexpected list element start with ID " << std::hex << id;
703 return NULL;
704}
705
706bool WebMParserClient::OnListEnd(int id) {
707 DVLOG(1) << "Unexpected list element end with ID " << std::hex << id;
708 return false;
709}
710
711bool WebMParserClient::OnUInt(int id, int64_t /*val*/) {
712 DVLOG(1) << "Unexpected unsigned integer element with ID " << std::hex << id;
713 return false;
714}
715
716bool WebMParserClient::OnFloat(int id, double /*val*/) {
717 DVLOG(1) << "Unexpected float element with ID " << std::hex << id;
718 return false;
719}
720
721bool WebMParserClient::OnBinary(int id, const uint8_t* /*data*/, int /*size*/) {
722 DVLOG(1) << "Unexpected binary element with ID " << std::hex << id;
723 return false;
724}
725
726bool WebMParserClient::OnString(int id, const std::string& /*str*/) {
727 DVLOG(1) << "Unexpected string element with ID " << std::hex << id;
728 return false;
729}
730
732 : state_(NEED_LIST_HEADER),
733 root_id_(id),
734 root_level_(FindListLevel(id)),
735 root_client_(client) {
736 DCHECK_GE(root_level_, 0);
737 DCHECK(client);
738}
739
740WebMListParser::~WebMListParser() {}
741
743 ChangeState(NEED_LIST_HEADER);
744 list_state_stack_.clear();
745}
746
747int WebMListParser::Parse(const uint8_t* buf, int size) {
748 DCHECK(buf);
749
750 if (size < 0 || state_ == PARSE_ERROR || state_ == DONE_PARSING_LIST)
751 return -1;
752
753 if (size == 0)
754 return 0;
755
756 const uint8_t* cur = buf;
757 int cur_size = size;
758 int bytes_parsed = 0;
759
760 while (cur_size > 0 && state_ != PARSE_ERROR && state_ != DONE_PARSING_LIST) {
761 int element_id = 0;
762 int64_t element_size = 0;
763 int result = WebMParseElementHeader(cur, cur_size, &element_id,
764 &element_size);
765
766 if (result < 0)
767 return result;
768
769 if (result == 0)
770 return bytes_parsed;
771
772 switch(state_) {
773 case NEED_LIST_HEADER: {
774 if (element_id != root_id_) {
775 ChangeState(PARSE_ERROR);
776 return -1;
777 }
778
779 // Only allow Segment & Cluster to have an unknown size.
780 if (element_size == kWebMUnknownSize &&
781 (element_id != kWebMIdSegment) &&
782 (element_id != kWebMIdCluster)) {
783 ChangeState(PARSE_ERROR);
784 return -1;
785 }
786
787 ChangeState(INSIDE_LIST);
788 if (!OnListStart(root_id_, element_size))
789 return -1;
790
791 break;
792 }
793
794 case INSIDE_LIST: {
795 int header_size = result;
796 const uint8_t* element_data = cur + header_size;
797 int element_data_size = cur_size - header_size;
798
799 if (element_size < element_data_size)
800 element_data_size = element_size;
801
802 result = ParseListElement(header_size, element_id, element_size,
803 element_data, element_data_size);
804
805 DCHECK_LE(result, header_size + element_data_size);
806 if (result < 0) {
807 ChangeState(PARSE_ERROR);
808 return -1;
809 }
810
811 if (result == 0)
812 return bytes_parsed;
813
814 break;
815 }
816 case DONE_PARSING_LIST:
817 case PARSE_ERROR:
818 // Shouldn't be able to get here.
819 NOTIMPLEMENTED();
820 break;
821 }
822
823 cur += result;
824 cur_size -= result;
825 bytes_parsed += result;
826 }
827
828 return (state_ == PARSE_ERROR) ? -1 : bytes_parsed;
829}
830
832 return state_ == DONE_PARSING_LIST;
833}
834
835void WebMListParser::ChangeState(State new_state) {
836 state_ = new_state;
837}
838
839int WebMListParser::ParseListElement(int header_size,
840 int id,
841 int64_t element_size,
842 const uint8_t* data,
843 int size) {
844 DCHECK_GT(list_state_stack_.size(), 0u);
845
846 ListState& list_state = list_state_stack_.back();
847 DCHECK(list_state.element_info_);
848
849 const ListElementInfo* element_info = list_state.element_info_;
850 ElementType id_type =
851 FindIdType(id, element_info->id_info_, element_info->id_info_count_);
852
853 // Unexpected ID.
854 if (id_type == UNKNOWN) {
855 if (list_state.size_ != kWebMUnknownSize ||
856 !IsSiblingOrAncestor(list_state.id_, id)) {
857 DVLOG(1) << "No ElementType info for ID 0x" << std::hex << id;
858 return -1;
859 }
860
861 // We've reached the end of a list of unknown size. Update the size now that
862 // we know it and dispatch the end of list calls.
863 list_state.size_ = list_state.bytes_parsed_;
864
865 if (!OnListEnd())
866 return -1;
867
868 // Check to see if all open lists have ended.
869 if (list_state_stack_.size() == 0)
870 return 0;
871
872 list_state = list_state_stack_.back();
873 }
874
875 // Make sure the whole element can fit inside the current list.
876 int64_t total_element_size = header_size + element_size;
877 if (list_state.size_ != kWebMUnknownSize &&
878 list_state.size_ < list_state.bytes_parsed_ + total_element_size) {
879 return -1;
880 }
881
882 if (id_type == LIST) {
883 list_state.bytes_parsed_ += header_size;
884
885 if (!OnListStart(id, element_size))
886 return -1;
887 return header_size;
888 }
889
890 // Make sure we have the entire element before trying to parse a non-list
891 // element.
892 if (size < element_size)
893 return 0;
894
895 int bytes_parsed = ParseNonListElement(id_type, id, element_size,
896 data, size, list_state.client_);
897 DCHECK_LE(bytes_parsed, size);
898
899 // Return if an error occurred or we need more data.
900 // Note: bytes_parsed is 0 for a successful parse of a size 0 element. We
901 // need to check the element_size to disambiguate the "need more data" case
902 // from a successful parse.
903 if (bytes_parsed < 0 || (bytes_parsed == 0 && element_size != 0))
904 return bytes_parsed;
905
906 int result = header_size + bytes_parsed;
907 list_state.bytes_parsed_ += result;
908
909 // See if we have reached the end of the current list.
910 if (list_state.bytes_parsed_ == list_state.size_) {
911 if (!OnListEnd())
912 return -1;
913 }
914
915 return result;
916}
917
918bool WebMListParser::OnListStart(int id, int64_t size) {
919 const ListElementInfo* element_info = FindListInfo(id);
920 if (!element_info)
921 return false;
922
923 int current_level =
924 root_level_ + static_cast<int>(list_state_stack_.size()) - 1;
925 if (current_level + 1 != element_info->level_)
926 return false;
927
928 WebMParserClient* current_list_client = NULL;
929 if (!list_state_stack_.empty()) {
930 // Make sure the new list doesn't go past the end of the current list.
931 ListState current_list_state = list_state_stack_.back();
932 if (current_list_state.size_ != kWebMUnknownSize &&
933 current_list_state.size_ < current_list_state.bytes_parsed_ + size)
934 return false;
935 current_list_client = current_list_state.client_;
936 } else {
937 current_list_client = root_client_;
938 }
939
940 WebMParserClient* new_list_client = current_list_client->OnListStart(id);
941 if (!new_list_client)
942 return false;
943
944 ListState new_list_state = { id, size, 0, element_info, new_list_client };
945 list_state_stack_.push_back(new_list_state);
946
947 if (size == 0)
948 return OnListEnd();
949
950 return true;
951}
952
953bool WebMListParser::OnListEnd() {
954 int lists_ended = 0;
955 for (; !list_state_stack_.empty(); ++lists_ended) {
956 const ListState& list_state = list_state_stack_.back();
957 int64_t bytes_parsed = list_state.bytes_parsed_;
958 int id = list_state.id_;
959
960 if (bytes_parsed != list_state.size_)
961 break;
962
963 list_state_stack_.pop_back();
964
965 WebMParserClient* client = NULL;
966 if (!list_state_stack_.empty()) {
967 // Update the bytes_parsed_ for the parent element.
968 list_state_stack_.back().bytes_parsed_ += bytes_parsed;
969 client = list_state_stack_.back().client_;
970 } else {
971 client = root_client_;
972 }
973
974 if (!client->OnListEnd(id))
975 return false;
976 }
977
978 DCHECK_GE(lists_ended, 1);
979
980 if (list_state_stack_.empty())
981 ChangeState(DONE_PARSING_LIST);
982
983 return true;
984}
985
986bool WebMListParser::IsSiblingOrAncestor(int id_a, int id_b) const {
987 DCHECK((id_a == kWebMIdSegment) || (id_a == kWebMIdCluster));
988
989 if (id_a == kWebMIdCluster) {
990 // kWebMIdCluster siblings.
991 for (size_t i = 0; i < std::size(kSegmentIds); i++) {
992 if (kSegmentIds[i].id_ == id_b)
993 return true;
994 }
995 }
996
997 // kWebMIdSegment siblings.
998 return ((id_b == kWebMIdSegment) || (id_b == kWebMIdEBMLHeader));
999}
1000
1001} // namespace media
1002} // namespace shaka
void Reset()
Resets the state of the parser so it can start parsing a new list.
WebMListParser(int id, WebMParserClient *client)
int Parse(const uint8_t *buf, int size)
All the methods that are virtual are virtual for mocking.