Shaka Packager SDK
Loading...
Searching...
No Matches
webm_parser.cc
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// This file contains code to parse WebM file elements. It was created
6// from information in the Matroska spec.
7// http://www.matroska.org/technical/specs/index.html
8//
9// This file contains code for encrypted WebM. Current WebM
10// encrypted request for comments specification is here
11// http://wiki.webmproject.org/encryption/webm-encryption-rfc
12
13#include <packager/media/formats/webm/webm_parser.h>
14
15#include <limits>
16
17#include <absl/log/check.h>
18#include <absl/log/log.h>
19
20#include <packager/macros/logging.h>
21#include <packager/media/formats/webm/webm_constants.h>
22
23namespace shaka {
24namespace media {
25
26enum ElementType {
27 UNKNOWN,
28 LIST, // Referred to as Master Element in the Matroska spec.
29 UINT,
30 FLOAT,
31 BINARY,
32 STRING,
33 SKIP,
34};
35
36struct ElementIdInfo {
37 ElementType type_;
38 int id_;
39};
40
41struct ListElementInfo {
42 int id_;
43 int level_;
44 const ElementIdInfo* id_info_;
45 int id_info_count_;
46};
47
48// The following are tables indicating what IDs are valid sub-elements
49// of particular elements. If an element is encountered that doesn't
50// appear in the list, a parsing error is signalled. Some elements are
51// marked as SKIP because they are valid, but we don't care about them
52// right now.
53static const ElementIdInfo kEBMLHeaderIds[] = {
54 {UINT, kWebMIdEBMLVersion}, {UINT, kWebMIdEBMLReadVersion},
55 {UINT, kWebMIdEBMLMaxIDLength}, {UINT, kWebMIdEBMLMaxSizeLength},
56 {STRING, kWebMIdDocType}, {UINT, kWebMIdDocTypeVersion},
57 {UINT, kWebMIdDocTypeReadVersion},
58};
59
60static const ElementIdInfo kSegmentIds[] = {
61 {LIST, kWebMIdSeekHead}, {LIST, kWebMIdInfo}, {LIST, kWebMIdCluster},
62 {LIST, kWebMIdTracks}, {LIST, kWebMIdCues}, {LIST, kWebMIdAttachments},
63 {LIST, kWebMIdChapters}, {LIST, kWebMIdTags},
64};
65
66static const ElementIdInfo kSeekHeadIds[] = {
67 {LIST, kWebMIdSeek},
68};
69
70static const ElementIdInfo kSeekIds[] = {
71 {BINARY, kWebMIdSeekID},
72 {UINT, kWebMIdSeekPosition},
73};
74
75static const ElementIdInfo kInfoIds[] = {
76 {BINARY, kWebMIdSegmentUID}, {STRING, kWebMIdSegmentFilename},
77 {BINARY, kWebMIdPrevUID}, {STRING, kWebMIdPrevFilename},
78 {BINARY, kWebMIdNextUID}, {STRING, kWebMIdNextFilename},
79 {BINARY, kWebMIdSegmentFamily}, {LIST, kWebMIdChapterTranslate},
80 {UINT, kWebMIdTimecodeScale}, {FLOAT, kWebMIdDuration},
81 {BINARY, kWebMIdDateUTC}, {STRING, kWebMIdTitle},
82 {STRING, kWebMIdMuxingApp}, {STRING, kWebMIdWritingApp},
83};
84
85static const ElementIdInfo kChapterTranslateIds[] = {
86 {UINT, kWebMIdChapterTranslateEditionUID},
87 {UINT, kWebMIdChapterTranslateCodec},
88 {BINARY, kWebMIdChapterTranslateID},
89};
90
91static const ElementIdInfo kClusterIds[] = {
92 {BINARY, kWebMIdSimpleBlock}, {UINT, kWebMIdTimecode},
93 {LIST, kWebMIdSilentTracks}, {UINT, kWebMIdPosition},
94 {UINT, kWebMIdPrevSize}, {LIST, kWebMIdBlockGroup},
95};
96
97static const ElementIdInfo kSilentTracksIds[] = {
98 {UINT, kWebMIdSilentTrackNumber},
99};
100
101static const ElementIdInfo kBlockGroupIds[] = {
102 {BINARY, kWebMIdBlock}, {LIST, kWebMIdBlockAdditions},
103 {UINT, kWebMIdBlockDuration}, {UINT, kWebMIdReferencePriority},
104 {BINARY, kWebMIdReferenceBlock}, {BINARY, kWebMIdCodecState},
105 {BINARY, kWebMIdDiscardPadding}, {LIST, kWebMIdSlices},
106};
107
108static const ElementIdInfo kBlockAdditionsIds[] = {
109 {LIST, kWebMIdBlockMore},
110};
111
112static const ElementIdInfo kBlockMoreIds[] = {
113 {UINT, kWebMIdBlockAddID},
114 {BINARY, kWebMIdBlockAdditional},
115};
116
117static const ElementIdInfo kSlicesIds[] = {
118 {LIST, kWebMIdTimeSlice},
119};
120
121static const ElementIdInfo kTimeSliceIds[] = {
122 {UINT, kWebMIdLaceNumber},
123};
124
125static const ElementIdInfo kTracksIds[] = {
126 {LIST, kWebMIdTrackEntry},
127};
128
129static const ElementIdInfo kTrackEntryIds[] = {
130 {UINT, kWebMIdTrackNumber},
131 {BINARY, kWebMIdTrackUID},
132 {UINT, kWebMIdTrackType},
133 {UINT, kWebMIdFlagEnabled},
134 {UINT, kWebMIdFlagDefault},
135 {UINT, kWebMIdFlagForced},
136 {UINT, kWebMIdFlagLacing},
137 {UINT, kWebMIdMinCache},
138 {UINT, kWebMIdMaxCache},
139 {UINT, kWebMIdDefaultDuration},
140 {FLOAT, kWebMIdTrackTimecodeScale},
141 {UINT, kWebMIdMaxBlockAdditionId},
142 {STRING, kWebMIdName},
143 {STRING, kWebMIdLanguage},
144 {STRING, kWebMIdCodecID},
145 {BINARY, kWebMIdCodecPrivate},
146 {STRING, kWebMIdCodecName},
147 {UINT, kWebMIdAttachmentLink},
148 {UINT, kWebMIdCodecDecodeAll},
149 {UINT, kWebMIdTrackOverlay},
150 {UINT, kWebMIdCodecDelay},
151 {UINT, kWebMIdSeekPreRoll},
152 {LIST, kWebMIdTrackTranslate},
153 {LIST, kWebMIdVideo},
154 {LIST, kWebMIdAudio},
155 {LIST, kWebMIdTrackOperation},
156 {LIST, kWebMIdContentEncodings},
157};
158
159static const ElementIdInfo kTrackTranslateIds[] = {
160 {UINT, kWebMIdTrackTranslateEditionUID},
161 {UINT, kWebMIdTrackTranslateCodec},
162 {BINARY, kWebMIdTrackTranslateTrackID},
163};
164
165static const ElementIdInfo kVideoIds[] = {
166 {UINT, kWebMIdFlagInterlaced}, {UINT, kWebMIdStereoMode},
167 {UINT, kWebMIdAlphaMode}, {UINT, kWebMIdPixelWidth},
168 {UINT, kWebMIdPixelHeight}, {UINT, kWebMIdPixelCropBottom},
169 {UINT, kWebMIdPixelCropTop}, {UINT, kWebMIdPixelCropLeft},
170 {UINT, kWebMIdPixelCropRight}, {UINT, kWebMIdDisplayWidth},
171 {UINT, kWebMIdDisplayHeight}, {UINT, kWebMIdDisplayUnit},
172 {UINT, kWebMIdAspectRatioType}, {BINARY, kWebMIdColorSpace},
173 {FLOAT, kWebMIdFrameRate}, {LIST, kWebMIdColor},
174 {LIST, kWebMIdProjection},
175};
176
177static const ElementIdInfo kColorIds[] = {
178 {UINT, kWebMIdColorMatrixCoefficients},
179 {UINT, kWebMIdColorBitsPerChannel},
180 {UINT, kWebMIdColorChromaSubsamplingHorz},
181 {UINT, kWebMIdColorChromaSubsamplingVert},
182 {UINT, kWebMIdColorCbSamplingHorz},
183 {UINT, kWebMIdColorCbSamplingVert},
184 {UINT, kWebMIdColorChromaSitingHorz},
185 {UINT, kWebMIdColorChromaSitingVert},
186 {UINT, kWebMIdColorRange},
187 {UINT, kWebMIdColorTransferCharacteristics},
188 {UINT, kWebMIdColorPrimaries},
189 {UINT, kWebMIdColorMaxCLL},
190 {UINT, kWebMIdColorMaxFALL},
191 {LIST, kWebMIdColorMasteringMetadata},
192};
193
194static const ElementIdInfo kProjectionIds[] = {
195 {UINT, kWebMIdProjectionType},
196};
197
198static const ElementIdInfo kAudioIds[] = {
199 {FLOAT, kWebMIdSamplingFrequency},
200 {FLOAT, kWebMIdOutputSamplingFrequency},
201 {UINT, kWebMIdChannels},
202 {UINT, kWebMIdBitDepth},
203};
204
205static const ElementIdInfo kTrackOperationIds[] = {
206 {LIST, kWebMIdTrackCombinePlanes},
207 {LIST, kWebMIdJoinBlocks},
208};
209
210static const ElementIdInfo kTrackCombinePlanesIds[] = {
211 {LIST, kWebMIdTrackPlane},
212};
213
214static const ElementIdInfo kTrackPlaneIds[] = {
215 {UINT, kWebMIdTrackPlaneUID},
216 {UINT, kWebMIdTrackPlaneType},
217};
218
219static const ElementIdInfo kJoinBlocksIds[] = {
220 {UINT, kWebMIdTrackJoinUID},
221};
222
223static const ElementIdInfo kContentEncodingsIds[] = {
224 {LIST, kWebMIdContentEncoding},
225};
226
227static const ElementIdInfo kContentEncodingIds[] = {
228 {UINT, kWebMIdContentEncodingOrder}, {UINT, kWebMIdContentEncodingScope},
229 {UINT, kWebMIdContentEncodingType}, {LIST, kWebMIdContentCompression},
230 {LIST, kWebMIdContentEncryption},
231};
232
233static const ElementIdInfo kContentCompressionIds[] = {
234 {UINT, kWebMIdContentCompAlgo},
235 {BINARY, kWebMIdContentCompSettings},
236};
237
238static const ElementIdInfo kContentEncryptionIds[] = {
239 {LIST, kWebMIdContentEncAESSettings}, {UINT, kWebMIdContentEncAlgo},
240 {BINARY, kWebMIdContentEncKeyID}, {BINARY, kWebMIdContentSignature},
241 {BINARY, kWebMIdContentSigKeyID}, {UINT, kWebMIdContentSigAlgo},
242 {UINT, kWebMIdContentSigHashAlgo},
243};
244
245static const ElementIdInfo kContentEncAESSettingsIds[] = {
246 {UINT, kWebMIdAESSettingsCipherMode},
247};
248
249static const ElementIdInfo kCuesIds[] = {
250 {LIST, kWebMIdCuePoint},
251};
252
253static const ElementIdInfo kCuePointIds[] = {
254 {UINT, kWebMIdCueTime},
255 {LIST, kWebMIdCueTrackPositions},
256};
257
258static const ElementIdInfo kCueTrackPositionsIds[] = {
259 {UINT, kWebMIdCueTrack}, {UINT, kWebMIdCueClusterPosition},
260 {UINT, kWebMIdCueBlockNumber}, {UINT, kWebMIdCueCodecState},
261 {LIST, kWebMIdCueReference},
262};
263
264static const ElementIdInfo kCueReferenceIds[] = {
265 {UINT, kWebMIdCueRefTime},
266};
267
268static const ElementIdInfo kAttachmentsIds[] = {
269 {LIST, kWebMIdAttachedFile},
270};
271
272static const ElementIdInfo kAttachedFileIds[] = {
273 {STRING, kWebMIdFileDescription}, {STRING, kWebMIdFileName},
274 {STRING, kWebMIdFileMimeType}, {BINARY, kWebMIdFileData},
275 {UINT, kWebMIdFileUID},
276};
277
278static const ElementIdInfo kChaptersIds[] = {
279 {LIST, kWebMIdEditionEntry},
280};
281
282static const ElementIdInfo kEditionEntryIds[] = {
283 {UINT, kWebMIdEditionUID}, {UINT, kWebMIdEditionFlagHidden},
284 {UINT, kWebMIdEditionFlagDefault}, {UINT, kWebMIdEditionFlagOrdered},
285 {LIST, kWebMIdChapterAtom},
286};
287
288static const ElementIdInfo kChapterAtomIds[] = {
289 {UINT, kWebMIdChapterUID},
290 {UINT, kWebMIdChapterTimeStart},
291 {UINT, kWebMIdChapterTimeEnd},
292 {UINT, kWebMIdChapterFlagHidden},
293 {UINT, kWebMIdChapterFlagEnabled},
294 {BINARY, kWebMIdChapterSegmentUID},
295 {UINT, kWebMIdChapterSegmentEditionUID},
296 {UINT, kWebMIdChapterPhysicalEquiv},
297 {LIST, kWebMIdChapterTrack},
298 {LIST, kWebMIdChapterDisplay},
299 {LIST, kWebMIdChapProcess},
300};
301
302static const ElementIdInfo kChapterTrackIds[] = {
303 {UINT, kWebMIdChapterTrackNumber},
304};
305
306static const ElementIdInfo kChapterDisplayIds[] = {
307 {STRING, kWebMIdChapString},
308 {STRING, kWebMIdChapLanguage},
309 {STRING, kWebMIdChapCountry},
310};
311
312static const ElementIdInfo kChapProcessIds[] = {
313 {UINT, kWebMIdChapProcessCodecID},
314 {BINARY, kWebMIdChapProcessPrivate},
315 {LIST, kWebMIdChapProcessCommand},
316};
317
318static const ElementIdInfo kChapProcessCommandIds[] = {
319 {UINT, kWebMIdChapProcessTime},
320 {BINARY, kWebMIdChapProcessData},
321};
322
323static const ElementIdInfo kTagsIds[] = {
324 {LIST, kWebMIdTag},
325};
326
327static const ElementIdInfo kTagIds[] = {
328 {LIST, kWebMIdTargets},
329 {LIST, kWebMIdSimpleTag},
330};
331
332static const ElementIdInfo kTargetsIds[] = {
333 {UINT, kWebMIdTargetTypeValue}, {STRING, kWebMIdTargetType},
334 {UINT, kWebMIdTagTrackUID}, {UINT, kWebMIdTagEditionUID},
335 {UINT, kWebMIdTagChapterUID}, {UINT, kWebMIdTagAttachmentUID},
336};
337
338static const ElementIdInfo kSimpleTagIds[] = {
339 {STRING, kWebMIdTagName}, {STRING, kWebMIdTagLanguage},
340 {UINT, kWebMIdTagDefault}, {STRING, kWebMIdTagString},
341 {BINARY, kWebMIdTagBinary},
342};
343
344#define LIST_ELEMENT_INFO(id, level, id_info) \
345 {(id), (level), (id_info), std::size(id_info)}
346
347static const ListElementInfo kListElementInfo[] = {
348 LIST_ELEMENT_INFO(kWebMIdCluster, 1, kClusterIds),
349 LIST_ELEMENT_INFO(kWebMIdEBMLHeader, 0, kEBMLHeaderIds),
350 LIST_ELEMENT_INFO(kWebMIdSegment, 0, kSegmentIds),
351 LIST_ELEMENT_INFO(kWebMIdSeekHead, 1, kSeekHeadIds),
352 LIST_ELEMENT_INFO(kWebMIdSeek, 2, kSeekIds),
353 LIST_ELEMENT_INFO(kWebMIdInfo, 1, kInfoIds),
354 LIST_ELEMENT_INFO(kWebMIdChapterTranslate, 2, kChapterTranslateIds),
355 LIST_ELEMENT_INFO(kWebMIdSilentTracks, 2, kSilentTracksIds),
356 LIST_ELEMENT_INFO(kWebMIdBlockGroup, 2, kBlockGroupIds),
357 LIST_ELEMENT_INFO(kWebMIdBlockAdditions, 3, kBlockAdditionsIds),
358 LIST_ELEMENT_INFO(kWebMIdBlockMore, 4, kBlockMoreIds),
359 LIST_ELEMENT_INFO(kWebMIdSlices, 3, kSlicesIds),
360 LIST_ELEMENT_INFO(kWebMIdTimeSlice, 4, kTimeSliceIds),
361 LIST_ELEMENT_INFO(kWebMIdTracks, 1, kTracksIds),
362 LIST_ELEMENT_INFO(kWebMIdTrackEntry, 2, kTrackEntryIds),
363 LIST_ELEMENT_INFO(kWebMIdTrackTranslate, 3, kTrackTranslateIds),
364 LIST_ELEMENT_INFO(kWebMIdVideo, 3, kVideoIds),
365 LIST_ELEMENT_INFO(kWebMIdColor, 4, kColorIds),
366 LIST_ELEMENT_INFO(kWebMIdProjection, 4, kProjectionIds),
367 LIST_ELEMENT_INFO(kWebMIdAudio, 3, kAudioIds),
368 LIST_ELEMENT_INFO(kWebMIdTrackOperation, 3, kTrackOperationIds),
369 LIST_ELEMENT_INFO(kWebMIdTrackCombinePlanes, 4, kTrackCombinePlanesIds),
370 LIST_ELEMENT_INFO(kWebMIdTrackPlane, 5, kTrackPlaneIds),
371 LIST_ELEMENT_INFO(kWebMIdJoinBlocks, 4, kJoinBlocksIds),
372 LIST_ELEMENT_INFO(kWebMIdContentEncodings, 3, kContentEncodingsIds),
373 LIST_ELEMENT_INFO(kWebMIdContentEncoding, 4, kContentEncodingIds),
374 LIST_ELEMENT_INFO(kWebMIdContentCompression, 5, kContentCompressionIds),
375 LIST_ELEMENT_INFO(kWebMIdContentEncryption, 5, kContentEncryptionIds),
376 LIST_ELEMENT_INFO(kWebMIdContentEncAESSettings,
377 6,
378 kContentEncAESSettingsIds),
379 LIST_ELEMENT_INFO(kWebMIdCues, 1, kCuesIds),
380 LIST_ELEMENT_INFO(kWebMIdCuePoint, 2, kCuePointIds),
381 LIST_ELEMENT_INFO(kWebMIdCueTrackPositions, 3, kCueTrackPositionsIds),
382 LIST_ELEMENT_INFO(kWebMIdCueReference, 4, kCueReferenceIds),
383 LIST_ELEMENT_INFO(kWebMIdAttachments, 1, kAttachmentsIds),
384 LIST_ELEMENT_INFO(kWebMIdAttachedFile, 2, kAttachedFileIds),
385 LIST_ELEMENT_INFO(kWebMIdChapters, 1, kChaptersIds),
386 LIST_ELEMENT_INFO(kWebMIdEditionEntry, 2, kEditionEntryIds),
387 LIST_ELEMENT_INFO(kWebMIdChapterAtom, 3, kChapterAtomIds),
388 LIST_ELEMENT_INFO(kWebMIdChapterTrack, 4, kChapterTrackIds),
389 LIST_ELEMENT_INFO(kWebMIdChapterDisplay, 4, kChapterDisplayIds),
390 LIST_ELEMENT_INFO(kWebMIdChapProcess, 4, kChapProcessIds),
391 LIST_ELEMENT_INFO(kWebMIdChapProcessCommand, 5, kChapProcessCommandIds),
392 LIST_ELEMENT_INFO(kWebMIdTags, 1, kTagsIds),
393 LIST_ELEMENT_INFO(kWebMIdTag, 2, kTagIds),
394 LIST_ELEMENT_INFO(kWebMIdTargets, 3, kTargetsIds),
395 LIST_ELEMENT_INFO(kWebMIdSimpleTag, 3, kSimpleTagIds),
396};
397
398// Parses an element header id or size field. These fields are variable length
399// encoded. The first byte indicates how many bytes the field occupies.
400// |buf| - The buffer to parse.
401// |size| - The number of bytes in |buf|
402// |max_bytes| - The maximum number of bytes the field can be. ID fields
403// set this to 4 & element size fields set this to 8. If the
404// first byte indicates a larger field size than this it is a
405// parser error.
406// |mask_first_byte| - For element size fields the field length encoding bits
407// need to be masked off. This parameter is true for
408// element size fields and is false for ID field values.
409//
410// Returns: The number of bytes parsed on success. -1 on error.
411static int ParseWebMElementHeaderField(const uint8_t* buf,
412 int size,
413 int max_bytes,
414 bool mask_first_byte,
415 int64_t* num) {
416 DCHECK(buf);
417 DCHECK(num);
418
419 if (size < 0)
420 return -1;
421
422 if (size == 0)
423 return 0;
424
425 int mask = 0x80;
426 uint8_t ch = buf[0];
427 int extra_bytes = -1;
428 bool all_ones = false;
429 for (int i = 0; i < max_bytes; ++i) {
430 if ((ch & mask) != 0) {
431 mask = ~mask & 0xff;
432 *num = mask_first_byte ? ch & mask : ch;
433 all_ones = (ch & mask) == mask;
434 extra_bytes = i;
435 break;
436 }
437 mask = 0x80 | mask >> 1;
438 }
439
440 if (extra_bytes == -1)
441 return -1;
442
443 // Return 0 if we need more data.
444 if ((1 + extra_bytes) > size)
445 return 0;
446
447 int bytes_used = 1;
448
449 for (int i = 0; i < extra_bytes; ++i) {
450 ch = buf[bytes_used++];
451 all_ones &= (ch == 0xff);
452 *num = (*num << 8) | ch;
453 }
454
455 if (all_ones)
456 *num = std::numeric_limits<int64_t>::max();
457
458 return bytes_used;
459}
460
461int WebMParseElementHeader(const uint8_t* buf,
462 int size,
463 int* id,
464 int64_t* element_size) {
465 DCHECK(buf);
466 DCHECK_GE(size, 0);
467 DCHECK(id);
468 DCHECK(element_size);
469
470 if (size == 0)
471 return 0;
472
473 int64_t tmp = 0;
474 int num_id_bytes = ParseWebMElementHeaderField(buf, size, 4, false, &tmp);
475
476 if (num_id_bytes <= 0)
477 return num_id_bytes;
478
479 if (tmp == std::numeric_limits<int64_t>::max())
480 tmp = kWebMReservedId;
481
482 *id = static_cast<int>(tmp);
483
484 int num_size_bytes = ParseWebMElementHeaderField(
485 buf + num_id_bytes, size - num_id_bytes, 8, true, &tmp);
486
487 if (num_size_bytes <= 0)
488 return num_size_bytes;
489
490 if (tmp == std::numeric_limits<int64_t>::max())
491 tmp = kWebMUnknownSize;
492
493 *element_size = tmp;
494 DVLOG(3) << "WebMParseElementHeader() : id " << std::hex << *id << std::dec
495 << " size " << *element_size;
496 return num_id_bytes + num_size_bytes;
497}
498
499// Finds ElementType for a specific ID.
500static ElementType FindIdType(int id,
501 const ElementIdInfo* id_info,
502 int id_info_count) {
503 // Check for global element IDs that can be anywhere.
504 if (id == kWebMIdVoid || id == kWebMIdCRC32)
505 return SKIP;
506
507 for (int i = 0; i < id_info_count; ++i) {
508 if (id == id_info[i].id_)
509 return id_info[i].type_;
510 }
511
512 return UNKNOWN;
513}
514
515// Finds ListElementInfo for a specific ID.
516static const ListElementInfo* FindListInfo(int id) {
517 for (size_t i = 0; i < std::size(kListElementInfo); ++i) {
518 if (id == kListElementInfo[i].id_)
519 return &kListElementInfo[i];
520 }
521
522 return NULL;
523}
524
525static int FindListLevel(int id) {
526 const ListElementInfo* list_info = FindListInfo(id);
527 if (list_info)
528 return list_info->level_;
529
530 return -1;
531}
532
533static int ParseUInt(const uint8_t* buf,
534 int size,
535 int id,
536 WebMParserClient* client) {
537 if ((size <= 0) || (size > 8))
538 return -1;
539
540 // Read in the big-endian integer.
541 uint64_t value = 0;
542 for (int i = 0; i < size; ++i)
543 value = (value << 8) | buf[i];
544
545 // We use int64_t in place of uint64_t everywhere for convenience. See this
546 // bug
547 // for more details: http://crbug.com/366750#c3
548 if (value > static_cast<uint64_t>(std::numeric_limits<int64_t>::max()))
549 return -1;
550
551 if (!client->OnUInt(id, value))
552 return -1;
553
554 return size;
555}
556
557static int ParseFloat(const uint8_t* buf,
558 int size,
559 int id,
560 WebMParserClient* client) {
561 if ((size != 4) && (size != 8))
562 return -1;
563
564 double value = -1;
565
566 // Read the bytes from big-endian form into a native endian integer.
567 int64_t tmp = 0;
568 for (int i = 0; i < size; ++i)
569 tmp = (tmp << 8) | buf[i];
570
571 // Use a union to convert the integer bit pattern into a floating point
572 // number.
573 if (size == 4) {
574 union {
575 int32_t src;
576 float dst;
577 } tmp2;
578 tmp2.src = static_cast<int32_t>(tmp);
579 value = tmp2.dst;
580 } else if (size == 8) {
581 union {
582 int64_t src;
583 double dst;
584 } tmp2;
585 tmp2.src = tmp;
586 value = tmp2.dst;
587 } else {
588 return -1;
589 }
590
591 if (!client->OnFloat(id, value))
592 return -1;
593
594 return size;
595}
596
597static int ParseBinary(const uint8_t* buf,
598 int size,
599 int id,
600 WebMParserClient* client) {
601 return client->OnBinary(id, buf, size) ? size : -1;
602}
603
604static int ParseString(const uint8_t* buf,
605 int size,
606 int id,
607 WebMParserClient* client) {
608 const uint8_t* end = static_cast<const uint8_t*>(memchr(buf, '\0', size));
609 int length = (end != NULL) ? static_cast<int>(end - buf) : size;
610 std::string str(reinterpret_cast<const char*>(buf), length);
611 return client->OnString(id, str) ? size : -1;
612}
613
614static int ParseNonListElement(ElementType type,
615 int id,
616 int64_t element_size,
617 const uint8_t* buf,
618 int size,
619 WebMParserClient* client) {
620 DCHECK_GE(size, element_size);
621
622 int result = -1;
623 switch (type) {
624 case LIST:
625 NOTIMPLEMENTED();
626 result = -1;
627 break;
628 case UINT:
629 result = ParseUInt(buf, element_size, id, client);
630 break;
631 case FLOAT:
632 result = ParseFloat(buf, element_size, id, client);
633 break;
634 case BINARY:
635 result = ParseBinary(buf, element_size, id, client);
636 break;
637 case STRING:
638 result = ParseString(buf, element_size, id, client);
639 break;
640 case SKIP:
641 result = element_size;
642 break;
643 default:
644 DVLOG(1) << "Unhandled ID type " << type;
645 return -1;
646 };
647
648 DCHECK_LE(result, size);
649 return result;
650}
651
652WebMParserClient::WebMParserClient() {}
653WebMParserClient::~WebMParserClient() {}
654
655WebMParserClient* WebMParserClient::OnListStart(int id) {
656 DVLOG(1) << "Unexpected list element start with ID " << std::hex << id;
657 return NULL;
658}
659
660bool WebMParserClient::OnListEnd(int id) {
661 DVLOG(1) << "Unexpected list element end with ID " << std::hex << id;
662 return false;
663}
664
665bool WebMParserClient::OnUInt(int id, int64_t /*val*/) {
666 DVLOG(1) << "Unexpected unsigned integer element with ID " << std::hex << id;
667 return false;
668}
669
670bool WebMParserClient::OnFloat(int id, double /*val*/) {
671 DVLOG(1) << "Unexpected float element with ID " << std::hex << id;
672 return false;
673}
674
675bool WebMParserClient::OnBinary(int id, const uint8_t* /*data*/, int /*size*/) {
676 DVLOG(1) << "Unexpected binary element with ID " << std::hex << id;
677 return false;
678}
679
680bool WebMParserClient::OnString(int id, const std::string& /*str*/) {
681 DVLOG(1) << "Unexpected string element with ID " << std::hex << id;
682 return false;
683}
684
686 : state_(NEED_LIST_HEADER),
687 root_id_(id),
688 root_level_(FindListLevel(id)),
689 root_client_(client) {
690 DCHECK_GE(root_level_, 0);
691 DCHECK(client);
692}
693
694WebMListParser::~WebMListParser() {}
695
697 ChangeState(NEED_LIST_HEADER);
698 list_state_stack_.clear();
699}
700
701int WebMListParser::Parse(const uint8_t* buf, int size) {
702 DCHECK(buf);
703
704 if (size < 0 || state_ == PARSE_ERROR || state_ == DONE_PARSING_LIST)
705 return -1;
706
707 if (size == 0)
708 return 0;
709
710 const uint8_t* cur = buf;
711 int cur_size = size;
712 int bytes_parsed = 0;
713
714 while (cur_size > 0 && state_ != PARSE_ERROR && state_ != DONE_PARSING_LIST) {
715 int element_id = 0;
716 int64_t element_size = 0;
717 int result =
718 WebMParseElementHeader(cur, cur_size, &element_id, &element_size);
719
720 if (result < 0)
721 return result;
722
723 if (result == 0)
724 return bytes_parsed;
725
726 switch (state_) {
727 case NEED_LIST_HEADER: {
728 if (element_id != root_id_) {
729 ChangeState(PARSE_ERROR);
730 return -1;
731 }
732
733 // Only allow Segment & Cluster to have an unknown size.
734 if (element_size == kWebMUnknownSize &&
735 (element_id != kWebMIdSegment) && (element_id != kWebMIdCluster)) {
736 ChangeState(PARSE_ERROR);
737 return -1;
738 }
739
740 ChangeState(INSIDE_LIST);
741 if (!OnListStart(root_id_, element_size))
742 return -1;
743
744 break;
745 }
746
747 case INSIDE_LIST: {
748 int header_size = result;
749 const uint8_t* element_data = cur + header_size;
750 int element_data_size = cur_size - header_size;
751
752 if (element_size < element_data_size)
753 element_data_size = element_size;
754
755 result = ParseListElement(header_size, element_id, element_size,
756 element_data, element_data_size);
757
758 DCHECK_LE(result, header_size + element_data_size);
759 if (result < 0) {
760 ChangeState(PARSE_ERROR);
761 return -1;
762 }
763
764 if (result == 0)
765 return bytes_parsed;
766
767 break;
768 }
769 case DONE_PARSING_LIST:
770 case PARSE_ERROR:
771 // Shouldn't be able to get here.
772 NOTIMPLEMENTED();
773 break;
774 }
775
776 cur += result;
777 cur_size -= result;
778 bytes_parsed += result;
779 }
780
781 return (state_ == PARSE_ERROR) ? -1 : bytes_parsed;
782}
783
785 return state_ == DONE_PARSING_LIST;
786}
787
788void WebMListParser::ChangeState(State new_state) {
789 state_ = new_state;
790}
791
792int WebMListParser::ParseListElement(int header_size,
793 int id,
794 int64_t element_size,
795 const uint8_t* data,
796 int size) {
797 DCHECK_GT(list_state_stack_.size(), 0u);
798
799 ListState& list_state = list_state_stack_.back();
800 DCHECK(list_state.element_info_);
801
802 const ListElementInfo* element_info = list_state.element_info_;
803 ElementType id_type =
804 FindIdType(id, element_info->id_info_, element_info->id_info_count_);
805
806 // Unexpected ID.
807 if (id_type == UNKNOWN) {
808 if (list_state.size_ != kWebMUnknownSize ||
809 !IsSiblingOrAncestor(list_state.id_, id)) {
810 DVLOG(1) << "No ElementType info for ID 0x" << std::hex << id;
811 return -1;
812 }
813
814 // We've reached the end of a list of unknown size. Update the size now that
815 // we know it and dispatch the end of list calls.
816 list_state.size_ = list_state.bytes_parsed_;
817
818 if (!OnListEnd())
819 return -1;
820
821 // Check to see if all open lists have ended.
822 if (list_state_stack_.size() == 0)
823 return 0;
824
825 list_state = list_state_stack_.back();
826 }
827
828 // Make sure the whole element can fit inside the current list.
829 int64_t total_element_size = header_size + element_size;
830 if (list_state.size_ != kWebMUnknownSize &&
831 list_state.size_ < list_state.bytes_parsed_ + total_element_size) {
832 return -1;
833 }
834
835 if (id_type == LIST) {
836 list_state.bytes_parsed_ += header_size;
837
838 if (!OnListStart(id, element_size))
839 return -1;
840 return header_size;
841 }
842
843 // Make sure we have the entire element before trying to parse a non-list
844 // element.
845 if (size < element_size)
846 return 0;
847
848 int bytes_parsed = ParseNonListElement(id_type, id, element_size, data, size,
849 list_state.client_);
850 DCHECK_LE(bytes_parsed, size);
851
852 // Return if an error occurred or we need more data.
853 // Note: bytes_parsed is 0 for a successful parse of a size 0 element. We
854 // need to check the element_size to disambiguate the "need more data" case
855 // from a successful parse.
856 if (bytes_parsed < 0 || (bytes_parsed == 0 && element_size != 0))
857 return bytes_parsed;
858
859 int result = header_size + bytes_parsed;
860 list_state.bytes_parsed_ += result;
861
862 // See if we have reached the end of the current list.
863 if (list_state.bytes_parsed_ == list_state.size_) {
864 if (!OnListEnd())
865 return -1;
866 }
867
868 return result;
869}
870
871bool WebMListParser::OnListStart(int id, int64_t size) {
872 const ListElementInfo* element_info = FindListInfo(id);
873 if (!element_info)
874 return false;
875
876 int current_level =
877 root_level_ + static_cast<int>(list_state_stack_.size()) - 1;
878 if (current_level + 1 != element_info->level_)
879 return false;
880
881 WebMParserClient* current_list_client = NULL;
882 if (!list_state_stack_.empty()) {
883 // Make sure the new list doesn't go past the end of the current list.
884 ListState current_list_state = list_state_stack_.back();
885 if (current_list_state.size_ != kWebMUnknownSize &&
886 current_list_state.size_ < current_list_state.bytes_parsed_ + size)
887 return false;
888 current_list_client = current_list_state.client_;
889 } else {
890 current_list_client = root_client_;
891 }
892
893 WebMParserClient* new_list_client = current_list_client->OnListStart(id);
894 if (!new_list_client)
895 return false;
896
897 ListState new_list_state = {id, size, 0, element_info, new_list_client};
898 list_state_stack_.push_back(new_list_state);
899
900 if (size == 0)
901 return OnListEnd();
902
903 return true;
904}
905
906bool WebMListParser::OnListEnd() {
907 int lists_ended = 0;
908 for (; !list_state_stack_.empty(); ++lists_ended) {
909 const ListState& list_state = list_state_stack_.back();
910 int64_t bytes_parsed = list_state.bytes_parsed_;
911 int id = list_state.id_;
912
913 if (bytes_parsed != list_state.size_)
914 break;
915
916 list_state_stack_.pop_back();
917
918 WebMParserClient* client = NULL;
919 if (!list_state_stack_.empty()) {
920 // Update the bytes_parsed_ for the parent element.
921 list_state_stack_.back().bytes_parsed_ += bytes_parsed;
922 client = list_state_stack_.back().client_;
923 } else {
924 client = root_client_;
925 }
926
927 if (!client->OnListEnd(id))
928 return false;
929 }
930
931 DCHECK_GE(lists_ended, 1);
932
933 if (list_state_stack_.empty())
934 ChangeState(DONE_PARSING_LIST);
935
936 return true;
937}
938
939bool WebMListParser::IsSiblingOrAncestor(int id_a, int id_b) const {
940 DCHECK((id_a == kWebMIdSegment) || (id_a == kWebMIdCluster));
941
942 if (id_a == kWebMIdCluster) {
943 // kWebMIdCluster siblings.
944 for (size_t i = 0; i < std::size(kSegmentIds); i++) {
945 if (kSegmentIds[i].id_ == id_b)
946 return true;
947 }
948 }
949
950 // kWebMIdSegment siblings.
951 return ((id_b == kWebMIdSegment) || (id_b == kWebMIdEBMLHeader));
952}
953
954} // namespace media
955} // namespace shaka
void Reset()
Resets the state of the parser so it can start parsing a new list.
WebMListParser(int id, WebMParserClient *client)
int Parse(const uint8_t *buf, int size)
All the methods that are virtual are virtual for mocking.