Shaka Packager SDK
Loading...
Searching...
No Matches
mp4_media_parser.cc
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <packager/media/formats/mp4/mp4_media_parser.h>
6
7#include <algorithm>
8#include <functional>
9#include <limits>
10
11#include <absl/log/check.h>
12#include <absl/log/log.h>
13#include <absl/strings/numbers.h>
14
15#include <packager/file.h>
16#include <packager/file/file_closer.h>
17#include <packager/macros/compiler.h>
18#include <packager/macros/logging.h>
19#include <packager/media/base/audio_stream_info.h>
20#include <packager/media/base/buffer_reader.h>
21#include <packager/media/base/decrypt_config.h>
22#include <packager/media/base/key_source.h>
23#include <packager/media/base/media_sample.h>
24#include <packager/media/base/rcheck.h>
25#include <packager/media/base/video_stream_info.h>
26#include <packager/media/base/video_util.h>
27#include <packager/media/codecs/ac3_audio_util.h>
28#include <packager/media/codecs/ac4_audio_util.h>
29#include <packager/media/codecs/av1_codec_configuration_record.h>
30#include <packager/media/codecs/avc_decoder_configuration_record.h>
31#include <packager/media/codecs/dovi_decoder_configuration_record.h>
32#include <packager/media/codecs/ec3_audio_util.h>
33#include <packager/media/codecs/es_descriptor.h>
34#include <packager/media/codecs/hevc_decoder_configuration_record.h>
35#include <packager/media/codecs/iamf_audio_util.h>
36#include <packager/media/codecs/vp_codec_configuration_record.h>
37#include <packager/media/formats/mp4/box_definitions.h>
38#include <packager/media/formats/mp4/box_reader.h>
39#include <packager/media/formats/mp4/track_run_iterator.h>
40
41ABSL_FLAG(bool,
42 use_dovi_supplemental_codecs,
43 false,
44 "Set to true to signal DolbyVision using the modern supplemental "
45 "codecs approach instead of the legacy "
46 "duplicate representations approach");
47
48namespace shaka {
49namespace media {
50namespace mp4 {
51namespace {
52
53int64_t Rescale(int64_t time_in_old_scale,
54 int32_t old_scale,
55 int32_t new_scale) {
56 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
57}
58
59H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
60 switch (fourcc) {
61 case FOURCC_avc1:
62 case FOURCC_dvh1:
63 case FOURCC_hvc1:
64 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
65 case FOURCC_avc3:
66 case FOURCC_dvhe:
67 case FOURCC_hev1:
68 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
69 default:
70 return H26xStreamFormat::kUnSpecified;
71 }
72}
73
74Codec FourCCToCodec(FourCC fourcc) {
75 switch (fourcc) {
76 case FOURCC_av01:
77 return kCodecAV1;
78 case FOURCC_avc1:
79 case FOURCC_avc3:
80 return kCodecH264;
81 case FOURCC_dvh1:
82 case FOURCC_dvhe:
83 return kCodecH265DolbyVision;
84 case FOURCC_hev1:
85 case FOURCC_hvc1:
86 return kCodecH265;
87 case FOURCC_vp08:
88 return kCodecVP8;
89 case FOURCC_vp09:
90 return kCodecVP9;
91 case FOURCC_Opus:
92 return kCodecOpus;
93 case FOURCC_dtsc:
94 return kCodecDTSC;
95 case FOURCC_dtsh:
96 return kCodecDTSH;
97 case FOURCC_dtsl:
98 return kCodecDTSL;
99 case FOURCC_dtse:
100 return kCodecDTSE;
101 case FOURCC_dtsx:
102 return kCodecDTSX;
103 case FOURCC_dtsp:
104 return kCodecDTSP;
105 case FOURCC_dtsm:
106 return kCodecDTSM;
107 case FOURCC_ac_3:
108 return kCodecAC3;
109 case FOURCC_ec_3:
110 return kCodecEAC3;
111 case FOURCC_ac_4:
112 return kCodecAC4;
113 case FOURCC_alac:
114 return kCodecALAC;
115 case FOURCC_fLaC:
116 return kCodecFlac;
117 case FOURCC_iamf:
118 return kCodecIAMF;
119 case FOURCC_ipcm:
120 return kCodecPcm;
121 case FOURCC_mha1:
122 return kCodecMha1;
123 case FOURCC_mhm1:
124 return kCodecMhm1;
125 default:
126 return kUnknownCodec;
127 }
128}
129
130Codec ObjectTypeToCodec(ObjectType object_type) {
131 switch (object_type) {
132 case ObjectType::kISO_14496_3:
133 case ObjectType::kISO_13818_7_AAC_LC:
134 return kCodecAAC;
135 case ObjectType::kDTSC:
136 return kCodecDTSC;
137 case ObjectType::kDTSE:
138 return kCodecDTSE;
139 case ObjectType::kDTSH:
140 return kCodecDTSH;
141 case ObjectType::kDTSL:
142 return kCodecDTSL;
143 default:
144 return kUnknownCodec;
145 }
146}
147
148std::vector<uint8_t> GetDOVIDecoderConfig(
149 const std::vector<CodecConfiguration>& configs) {
150 for (const CodecConfiguration& config : configs) {
151 if (config.box_type == FOURCC_dvcC || config.box_type == FOURCC_dvvC) {
152 return config.data;
153 }
154 }
155 return std::vector<uint8_t>();
156}
157
158std::vector<uint8_t> GetLHEVCDecoderConfig(
159 const std::vector<CodecConfiguration>& configs) {
160 for (const CodecConfiguration& config : configs) {
161 if (config.box_type == FOURCC_lhvC) {
162 return config.data;
163 }
164 }
165 return std::vector<uint8_t>();
166}
167
168bool UpdateCodecStringForDolbyVision(
169 FourCC actual_format,
170 const std::vector<CodecConfiguration>& configs,
171 std::string* codec_string) {
172 DOVIDecoderConfigurationRecord dovi_config;
173 if (!dovi_config.Parse(GetDOVIDecoderConfig(configs))) {
174 LOG(ERROR) << "Failed to parse Dolby Vision decoder "
175 "configuration record.";
176 return false;
177 }
178 switch (actual_format) {
179 case FOURCC_dvh1:
180 case FOURCC_dvhe:
181 case FOURCC_dav1:
182 // Non-Backward compatibility mode. Replace the code string with
183 // Dolby Vision only.
184 *codec_string = dovi_config.GetCodecString(actual_format);
185 break;
186 case FOURCC_hev1:
187 // Backward compatibility mode. Two codecs are signalled: base codec
188 // without Dolby Vision and HDR with Dolby Vision.
189 *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dvhe);
190 break;
191 case FOURCC_hvc1:
192 // See above.
193 *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dvh1);
194 break;
195 case FOURCC_av01:
196 *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dav1);
197 break;
198 default:
199 LOG(ERROR) << "Unsupported format with extra codec "
200 << FourCCToString(actual_format);
201 return false;
202 }
203 return true;
204}
205
206bool UpdateDolbyVisionInfo(FourCC actual_format,
207 const std::vector<CodecConfiguration>& configs,
208 uint8_t transfer_characteristics,
209 std::string* codec_string,
210 std::string* dovi_supplemental_codec_string,
211 FourCC* dovi_compatible_brand) {
212 DOVIDecoderConfigurationRecord dovi_config;
213 if (!dovi_config.Parse(GetDOVIDecoderConfig(configs))) {
214 LOG(ERROR) << "Failed to parse Dolby Vision decoder "
215 "configuration record.";
216 return false;
217 }
218 switch (actual_format) {
219 case FOURCC_dvh1:
220 case FOURCC_dvhe:
221 case FOURCC_dav1:
222 // Non-Backward compatibility mode. Replace the code string with
223 // Dolby Vision only.
224 *codec_string = dovi_config.GetCodecString(actual_format);
225 break;
226 case FOURCC_hev1:
227 // Backward compatibility mode. Use supplemental codec indicating Dolby
228 // Dolby Vision content.
229 *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dvhe);
230 break;
231 case FOURCC_hvc1:
232 // See above.
233 *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dvh1);
234 break;
235 case FOURCC_av01:
236 *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dav1);
237 break;
238 default:
239 LOG(ERROR) << "Unsupported format with extra codec "
240 << FourCCToString(actual_format);
241 return false;
242 }
243 *dovi_compatible_brand =
244 dovi_config.GetDoViCompatibleBrand(transfer_characteristics);
245 return true;
246}
247
248bool UpdateLHEVCInfo(FourCC actual_format,
249 HEVCDecoderConfigurationRecord& hevc_config,
250 const std::vector<CodecConfiguration>& configs,
251 std::string* codec_string) {
252 if (!hevc_config.ParseLHEVCConfig(GetLHEVCDecoderConfig(configs))) {
253 LOG(ERROR) << "Failed to parse L-HEVC decoder "
254 "configuration record.";
255 return false;
256 }
257 *codec_string = hevc_config.GetCodecString(actual_format);
258 return true;
259}
260
261const uint64_t kNanosecondsPerSecond = 1000000000ull;
262
263} // namespace
264
265MP4MediaParser::MP4MediaParser()
266 : state_(kWaitingForInit),
267 decryption_key_source_(NULL),
268 moof_head_(0),
269 mdat_tail_(0) {}
270
271MP4MediaParser::~MP4MediaParser() {}
272
273void MP4MediaParser::Init(const InitCB& init_cb,
274 const NewMediaSampleCB& new_media_sample_cb,
275 const NewTextSampleCB& new_text_sample_cb,
276 KeySource* decryption_key_source) {
277 DCHECK_EQ(state_, kWaitingForInit);
278 DCHECK(init_cb_ == nullptr);
279 DCHECK(init_cb != nullptr);
280 DCHECK(new_media_sample_cb != nullptr);
281
282 ChangeState(kParsingBoxes);
283 init_cb_ = init_cb;
284 new_sample_cb_ = new_media_sample_cb;
285 decryption_key_source_ = decryption_key_source;
286 if (decryption_key_source)
287 decryptor_source_.reset(new DecryptorSource(decryption_key_source));
288}
289
290void MP4MediaParser::Reset() {
291 queue_.Reset();
292 runs_.reset();
293 moof_head_ = 0;
294 mdat_tail_ = 0;
295}
296
297bool MP4MediaParser::Flush() {
298 DCHECK_NE(state_, kWaitingForInit);
299 Reset();
300 ChangeState(kParsingBoxes);
301 return true;
302}
303
304bool MP4MediaParser::Parse(const uint8_t* buf, int size) {
305 DCHECK_NE(state_, kWaitingForInit);
306
307 if (state_ == kError)
308 return false;
309
310 queue_.Push(buf, size);
311
312 bool result, err = false;
313
314 do {
315 if (state_ == kParsingBoxes) {
316 result = ParseBox(&err);
317 } else {
318 DCHECK_EQ(kEmittingSamples, state_);
319 result = EnqueueSample(&err);
320 if (result) {
321 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
322 err = !ReadAndDiscardMDATsUntil(max_clear);
323 }
324 }
325 } while (result && !err);
326
327 if (err) {
328 DLOG(ERROR) << "Error while parsing MP4";
329 moov_.reset();
330 Reset();
331 ChangeState(kError);
332 return false;
333 }
334
335 return true;
336}
337
338bool MP4MediaParser::LoadMoov(const std::string& file_path) {
339 std::unique_ptr<File, FileCloser> file(
340 File::OpenWithNoBuffering(file_path.c_str(), "r"));
341 if (!file) {
342 LOG(ERROR) << "Unable to open media file '" << file_path << "'";
343 return false;
344 }
345 if (!file->Seek(0)) {
346 LOG(WARNING) << "Filesystem does not support seeking on file '" << file_path
347 << "'";
348 return false;
349 }
350
351 uint64_t file_position(0);
352 bool mdat_seen(false);
353 while (true) {
354 const uint32_t kBoxHeaderReadSize(16);
355 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
356 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
357 if (bytes_read == 0) {
358 LOG(ERROR) << "Could not find 'moov' box in file '" << file_path << "'";
359 return false;
360 }
361 if (bytes_read < kBoxHeaderReadSize) {
362 LOG(ERROR) << "Error reading media file '" << file_path << "'";
363 return false;
364 }
365 uint64_t box_size;
366 FourCC box_type;
367 bool err;
368 if (!BoxReader::StartBox(&buffer[0], kBoxHeaderReadSize, &box_type,
369 &box_size, &err)) {
370 LOG(ERROR) << "Could not start box from file '" << file_path << "'";
371 return false;
372 }
373 if (box_type == FOURCC_mdat) {
374 mdat_seen = true;
375 } else if (box_type == FOURCC_moov) {
376 if (!mdat_seen) {
377 // 'moov' is before 'mdat'. Nothing to do.
378 break;
379 }
380 // 'mdat' before 'moov'. Read and parse 'moov'.
381 if (!Parse(&buffer[0], bytes_read)) {
382 LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
383 return false;
384 }
385 uint64_t bytes_to_read = box_size - bytes_read;
386 buffer.resize(bytes_to_read);
387 while (bytes_to_read > 0) {
388 bytes_read = file->Read(&buffer[0], bytes_to_read);
389 if (bytes_read <= 0) {
390 LOG(ERROR) << "Error reading 'moov' contents from file '" << file_path
391 << "'";
392 return false;
393 }
394 if (!Parse(&buffer[0], bytes_read)) {
395 LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
396 return false;
397 }
398 bytes_to_read -= bytes_read;
399 }
400 queue_.Reset(); // So that we don't need to adjust data offsets.
401 mdat_tail_ = 0; // So it will skip boxes until mdat.
402 break; // Done.
403 }
404 file_position += box_size;
405 if (!file->Seek(file_position)) {
406 LOG(ERROR) << "Error skipping box in mp4 file '" << file_path << "'";
407 return false;
408 }
409 }
410 return true;
411}
412
413bool MP4MediaParser::ParseBox(bool* err) {
414 const uint8_t* buf;
415 int size;
416 queue_.Peek(&buf, &size);
417 if (!size)
418 return false;
419
420 std::unique_ptr<BoxReader> reader(BoxReader::ReadBox(buf, size, err));
421 if (reader.get() == NULL)
422 return false;
423
424 if (reader->type() == FOURCC_mdat) {
425 if (!moov_) {
426 // For seekable files, we seek to the 'moov' and load the 'moov' first
427 // then seek back (see LoadMoov function for details); we do not support
428 // having 'mdat' before 'moov' for non-seekable files. The code ends up
429 // here only if it is a non-seekable file.
430 NOTIMPLEMENTED() << " Non-seekable Files with 'mdat' box before 'moov' "
431 "box is not supported.";
432 *err = true;
433 return false;
434 } else {
435 // This can happen if there are unused 'mdat' boxes, which is unusual
436 // but allowed by the spec. Ignore the 'mdat' and proceed.
437 LOG(INFO)
438 << "Ignore unused 'mdat' box - this could be as a result of extra "
439 "not usable 'mdat' or 'mdat' associated with unrecognized track.";
440 }
441 }
442
443 // Set up mdat offset for ReadMDATsUntil().
444 mdat_tail_ = queue_.head() + reader->size();
445
446 if (reader->type() == FOURCC_moov) {
447 *err = !ParseMoov(reader.get());
448 } else if (reader->type() == FOURCC_moof) {
449 moof_head_ = queue_.head();
450 *err = !ParseMoof(reader.get());
451
452 // Return early to avoid evicting 'moof' data from queue. Auxiliary info may
453 // be located anywhere in the file, including inside the 'moof' itself.
454 // (Since 'default-base-is-moof' is mandated, no data references can come
455 // before the head of the 'moof', so keeping this box around is sufficient.)
456 return !(*err);
457 } else {
458 VLOG(2) << "Skipping top-level box: " << FourCCToString(reader->type());
459 }
460
461 queue_.Pop(static_cast<int>(reader->size()));
462 return !(*err);
463}
464
465bool MP4MediaParser::ParseMoov(BoxReader* reader) {
466 if (moov_)
467 return true; // Already parsed the 'moov' box.
468
469 moov_.reset(new Movie);
470 RCHECK(moov_->Parse(reader));
471 runs_.reset();
472
473 std::vector<std::shared_ptr<StreamInfo>> streams;
474
475 bool use_dovi_supplemental =
476 absl::GetFlag(FLAGS_use_dovi_supplemental_codecs);
477
478 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
479 track != moov_->tracks.end(); ++track) {
480 const int32_t timescale = track->media.header.timescale;
481
482 // Calculate duration (based on timescale).
483 int64_t duration = 0;
484 if (track->media.header.duration > 0) {
485 duration = track->media.header.duration;
486 } else if (moov_->extends.header.fragment_duration > 0) {
487 DCHECK(moov_->header.timescale != 0);
488 duration = Rescale(moov_->extends.header.fragment_duration,
489 moov_->header.timescale,
490 timescale);
491 } else if (moov_->header.duration > 0 &&
492 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
493 DCHECK(moov_->header.timescale != 0);
494 duration =
495 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
496 }
497
498 const SampleDescription& samp_descr =
499 track->media.information.sample_table.description;
500
501 size_t desc_idx = 0;
502
503 // Read sample description index from mvex if it exists otherwise read
504 // from the first entry in Sample To Chunk box.
505 if (moov_->extends.tracks.size() > 0) {
506 for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {
507 const TrackExtends& trex = moov_->extends.tracks[t];
508 if (trex.track_id == track->header.track_id) {
509 desc_idx = trex.default_sample_description_index;
510 break;
511 }
512 }
513 } else {
514 const std::vector<ChunkInfo>& chunk_info =
515 track->media.information.sample_table.sample_to_chunk.chunk_info;
516 RCHECK(chunk_info.size() > 0);
517 desc_idx = chunk_info[0].sample_description_index;
518 }
519 RCHECK(desc_idx > 0);
520 desc_idx -= 1; // BMFF descriptor index is one-based
521
522 if (samp_descr.type == kAudio) {
523 RCHECK(!samp_descr.audio_entries.empty());
524
525 // It is not uncommon to find otherwise-valid files with incorrect sample
526 // description indices, so we fail gracefully in that case.
527 if (desc_idx >= samp_descr.audio_entries.size())
528 desc_idx = 0;
529
530 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
531 const FourCC actual_format = entry.GetActualFormat();
532 Codec codec = FourCCToCodec(actual_format);
533 uint8_t num_channels = entry.channelcount;
534 uint32_t sampling_frequency = entry.samplerate;
535 uint64_t codec_delay_ns = 0;
536 uint8_t audio_object_type = 0;
537 uint32_t max_bitrate = 0;
538 uint32_t avg_bitrate = 0;
539 std::vector<uint8_t> codec_config;
540
541 switch (actual_format) {
542 case FOURCC_mp4a: {
543 const DecoderConfigDescriptor& decoder_config =
544 entry.esds.es_descriptor.decoder_config_descriptor();
545 max_bitrate = decoder_config.max_bitrate();
546 avg_bitrate = decoder_config.avg_bitrate();
547
548 codec = ObjectTypeToCodec(decoder_config.object_type());
549 if (codec == kCodecAAC) {
550 const AACAudioSpecificConfig& aac_audio_specific_config =
551 entry.esds.aac_audio_specific_config;
552 num_channels = aac_audio_specific_config.GetNumChannels();
553 sampling_frequency =
554 aac_audio_specific_config.GetSamplesPerSecond();
555 audio_object_type = aac_audio_specific_config.GetAudioObjectType();
556 codec_config =
557 decoder_config.decoder_specific_info_descriptor().data();
558 } else if (codec == kUnknownCodec) {
559 // Intentionally not to fail in the parser as there may be multiple
560 // streams in the source content, which allows the supported stream
561 // to be packaged. An error will be returned if the unsupported
562 // stream is passed to the muxer.
563 LOG(WARNING) << "Unsupported audio object type "
564 << static_cast<int>(decoder_config.object_type())
565 << " in stsd.es_desriptor.";
566 }
567 break;
568 }
569 case FOURCC_dtsc:
570 FALLTHROUGH_INTENDED;
571 case FOURCC_dtse:
572 FALLTHROUGH_INTENDED;
573 case FOURCC_dtsh:
574 FALLTHROUGH_INTENDED;
575 case FOURCC_dtsl:
576 FALLTHROUGH_INTENDED;
577 case FOURCC_dtsm:
578 codec_config = entry.ddts.extra_data;
579 max_bitrate = entry.ddts.max_bitrate;
580 avg_bitrate = entry.ddts.avg_bitrate;
581 break;
582 case FOURCC_dtsx:
583 codec_config = entry.udts.data;
584 break;
585 case FOURCC_ac_3:
586 codec_config = entry.dac3.data;
587 num_channels = static_cast<uint8_t>(GetAc3NumChannels(codec_config));
588 break;
589 case FOURCC_ec_3:
590 codec_config = entry.dec3.data;
591 num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config));
592 break;
593 case FOURCC_ac_4:
594 codec_config = entry.dac4.data;
595 // Stop the process if have errors when parsing AC-4 dac4 box,
596 // bitstream version 0 (has beed deprecated) and contains multiple
597 // presentations in single AC-4 stream (only used for broadcast).
598 if (!GetAc4CodecInfo(codec_config, &audio_object_type)) {
599 LOG(ERROR) << "Failed to parse dac4.";
600 return false;
601 }
602 break;
603 case FOURCC_alac:
604 codec_config = entry.alac.data;
605 break;
606 case FOURCC_fLaC:
607 codec_config = entry.dfla.data;
608 break;
609 case FOURCC_Opus:
610 codec_config = entry.dops.opus_identification_header;
611 codec_delay_ns =
612 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
613 break;
614 case FOURCC_iamf:
615 codec_config = entry.iacb.data;
616 if (!GetIamfCodecStringInfo(codec_config, audio_object_type)) {
617 LOG(ERROR) << "Failed to parse iamf.";
618 return false;
619 }
620 break;
621 case FOURCC_mha1:
622 case FOURCC_mhm1:
623 codec_config = entry.mhac.data;
624 audio_object_type = entry.mhac.mpeg_h_3da_profile_level_indication;
625 break;
626 default:
627 // Intentionally not to fail in the parser as there may be multiple
628 // streams in the source content, which allows the supported stream to
629 // be packaged.
630 // An error will be returned if the unsupported stream is passed to
631 // the muxer.
632 LOG(WARNING) << "Unsupported audio format '"
633 << FourCCToString(actual_format) << "' in stsd box.";
634 break;
635 }
636
637 // Extract possible seek preroll.
638 uint64_t seek_preroll_ns = 0;
639 for (const auto& sample_group_description :
640 track->media.information.sample_table.sample_group_descriptions) {
641 if (sample_group_description.grouping_type != FOURCC_roll)
642 continue;
643 const auto& audio_roll_recovery_entries =
644 sample_group_description.audio_roll_recovery_entries;
645 if (audio_roll_recovery_entries.size() != 1) {
646 LOG(WARNING) << "Unexpected number of entries in "
647 "SampleGroupDescription table with grouping type "
648 "'roll'.";
649 break;
650 }
651 const int16_t roll_distance_in_samples =
652 audio_roll_recovery_entries[0].roll_distance;
653 if (roll_distance_in_samples < 0) {
654 // IAMF requires the `samplerate` field to be set to 0.
655 // (https://aomediacodec.github.io/iamf/#iasampleentry-section)
656 if (actual_format == FOURCC_iamf)
657 continue;
658
659 RCHECK((sampling_frequency != 0));
660 seek_preroll_ns = kNanosecondsPerSecond *
661 (-roll_distance_in_samples) / sampling_frequency;
662 } else {
663 LOG(WARNING)
664 << "Roll distance is supposed to be negative, but seeing "
665 << roll_distance_in_samples;
666 }
667 break;
668 }
669
670 // The stream will be decrypted if a |decryptor_source_| is available.
671 const bool is_encrypted =
672 decryptor_source_
673 ? false
674 : entry.sinf.info.track_encryption.default_is_protected == 1;
675 DVLOG(1) << "is_audio_track_encrypted_: " << is_encrypted;
676 streams.emplace_back(new AudioStreamInfo(
677 track->header.track_id, timescale, duration, codec,
678 AudioStreamInfo::GetCodecString(codec, audio_object_type),
679 codec_config.data(), codec_config.size(), entry.samplesize,
680 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
681 max_bitrate, avg_bitrate, track->media.header.language.code,
682 is_encrypted));
683 }
684
685 if (samp_descr.type == kVideo) {
686 RCHECK(!samp_descr.video_entries.empty());
687 if (desc_idx >= samp_descr.video_entries.size())
688 desc_idx = 0;
689 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
690 std::vector<uint8_t> codec_configuration_data =
691 entry.codec_configuration.data;
692
693 uint32_t coded_width = entry.width;
694 uint32_t coded_height = entry.height;
695 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
696 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
697 if (pixel_width == 0 && pixel_height == 0) {
698 DerivePixelWidthHeight(coded_width, coded_height, track->header.width,
699 track->header.height, &pixel_width,
700 &pixel_height);
701 }
702 std::string codec_string;
703 std::string dovi_supplemental_codec_string("");
704 FourCC dovi_compatible_brand = FOURCC_NULL;
705 uint8_t nalu_length_size = 0;
706 uint8_t transfer_characteristics = 0;
707 uint8_t color_primaries = 0;
708 uint8_t matrix_coefficients = 0;
709
710 const FourCC actual_format = entry.GetActualFormat();
711 const Codec video_codec = FourCCToCodec(actual_format);
712 switch (actual_format) {
713 case FOURCC_av01: {
714 AV1CodecConfigurationRecord av1_config;
715 if (!av1_config.Parse(codec_configuration_data)) {
716 LOG(ERROR) << "Failed to parse av1c.";
717 return false;
718 }
719 // Generate the full codec string if the colr atom is present.
720 if (entry.colr.color_parameter_type != FOURCC_NULL) {
721 transfer_characteristics = entry.colr.transfer_characteristics;
722 color_primaries = entry.colr.color_primaries;
723 matrix_coefficients = entry.colr.matrix_coefficients;
724 codec_string = av1_config.GetCodecString(
725 color_primaries, transfer_characteristics, matrix_coefficients,
726 entry.colr.video_full_range_flag);
727 } else {
728 codec_string = av1_config.GetCodecString();
729 }
730
731 if (!entry.extra_codec_configs.empty()) {
732 // |extra_codec_configs| is present only for Dolby Vision.
733 if (use_dovi_supplemental) {
734 if (!UpdateDolbyVisionInfo(
735 actual_format, entry.extra_codec_configs,
736 transfer_characteristics, &codec_string,
737 &dovi_supplemental_codec_string,
738 &dovi_compatible_brand)) {
739 return false;
740 }
741 } else {
742 if (!UpdateCodecStringForDolbyVision(actual_format,
743 entry.extra_codec_configs,
744 &codec_string)) {
745 return false;
746 }
747 }
748 }
749 break;
750 }
751 case FOURCC_avc1:
752 case FOURCC_avc3: {
753 AVCDecoderConfigurationRecord avc_config;
754 if (!avc_config.Parse(codec_configuration_data)) {
755 LOG(ERROR) << "Failed to parse avcc.";
756 return false;
757 }
758 codec_string = avc_config.GetCodecString(actual_format);
759 nalu_length_size = avc_config.nalu_length_size();
760 transfer_characteristics = avc_config.transfer_characteristics();
761 color_primaries = avc_config.color_primaries();
762 matrix_coefficients = avc_config.matrix_coefficients();
763
764 // Use configurations from |avc_config| if it is valid.
765 if (avc_config.coded_width() != 0) {
766 DCHECK_NE(avc_config.coded_height(), 0u);
767 if (coded_width != avc_config.coded_width() ||
768 coded_height != avc_config.coded_height()) {
769 LOG(WARNING) << "Resolution in VisualSampleEntry (" << coded_width
770 << "," << coded_height
771 << ") does not match with resolution in "
772 "AVCDecoderConfigurationRecord ("
773 << avc_config.coded_width() << ","
774 << avc_config.coded_height()
775 << "). Use AVCDecoderConfigurationRecord.";
776 coded_width = avc_config.coded_width();
777 coded_height = avc_config.coded_height();
778 }
779
780 DCHECK_NE(avc_config.pixel_width(), 0u);
781 DCHECK_NE(avc_config.pixel_height(), 0u);
782 if (pixel_width != avc_config.pixel_width() ||
783 pixel_height != avc_config.pixel_height()) {
784 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
785 << "Pixel aspect ratio in PASP box (" << pixel_width << ","
786 << pixel_height
787 << ") does not match with SAR in "
788 "AVCDecoderConfigurationRecord "
789 "("
790 << avc_config.pixel_width() << ","
791 << avc_config.pixel_height()
792 << "). Use AVCDecoderConfigurationRecord.";
793 pixel_width = avc_config.pixel_width();
794 pixel_height = avc_config.pixel_height();
795 }
796 }
797 break;
798 }
799 case FOURCC_dvh1:
800 case FOURCC_dvhe:
801 case FOURCC_hev1:
802 case FOURCC_hvc1: {
803 HEVCDecoderConfigurationRecord hevc_config;
804 if (!hevc_config.Parse(codec_configuration_data)) {
805 LOG(ERROR) << "Failed to parse hevc.";
806 return false;
807 }
808 codec_string = hevc_config.GetCodecString(actual_format);
809 nalu_length_size = hevc_config.nalu_length_size();
810 transfer_characteristics = hevc_config.transfer_characteristics();
811 color_primaries = hevc_config.color_primaries();
812 matrix_coefficients = hevc_config.matrix_coefficients();
813
814 if (!entry.extra_codec_configs.empty()) {
815 // |extra_codec_configs| is present for Dolby Vision and/or
816 // stereo MV-HEVC.
817 if (entry.HaveDolbyVisionConfig()) {
818 if (use_dovi_supplemental) {
819 if (!UpdateDolbyVisionInfo(
820 actual_format, entry.extra_codec_configs,
821 transfer_characteristics, &codec_string,
822 &dovi_supplemental_codec_string,
823 &dovi_compatible_brand)) {
824 return false;
825 }
826 } else {
827 if (!UpdateCodecStringForDolbyVision(actual_format,
828 entry.extra_codec_configs,
829 &codec_string)) {
830 return false;
831 }
832 }
833 }
834 if (entry.HaveLHEVCConfig()) {
835 if (!UpdateLHEVCInfo(actual_format, hevc_config,
836 entry.extra_codec_configs, &codec_string)) {
837 return false;
838 }
839 }
840 }
841 break;
842 }
843 case FOURCC_vp08:
844 case FOURCC_vp09: {
845 VPCodecConfigurationRecord vp_config;
846 if (!vp_config.ParseMP4(codec_configuration_data)) {
847 LOG(ERROR) << "Failed to parse vpcc.";
848 return false;
849 }
850 if (actual_format == FOURCC_vp09 &&
851 (!vp_config.is_level_set() || vp_config.level() == 0)) {
852 const double kUnknownSampleDuration = 0.0;
853 vp_config.SetVP9Level(coded_width, coded_height,
854 kUnknownSampleDuration);
855 vp_config.WriteMP4(&codec_configuration_data);
856 }
857 codec_string = vp_config.GetCodecString(video_codec);
858 break;
859 }
860 default:
861 // Intentionally not to fail in the parser as there may be multiple
862 // streams in the source content, which allows the supported stream to
863 // be packaged.
864 // An error will be returned if the unsupported stream is passed to
865 // the muxer.
866 LOG(WARNING) << "Unsupported video format '"
867 << FourCCToString(actual_format) << "' in stsd box.";
868 break;
869 }
870
871 // The stream will be decrypted if a |decryptor_source_| is available.
872 const bool is_encrypted =
873 decryptor_source_
874 ? false
875 : entry.sinf.info.track_encryption.default_is_protected == 1;
876 DVLOG(1) << "is_video_track_encrypted_: " << is_encrypted;
877 std::shared_ptr<VideoStreamInfo> video_stream_info(new VideoStreamInfo(
878 track->header.track_id, timescale, duration, video_codec,
879 GetH26xStreamFormat(actual_format), codec_string,
880 codec_configuration_data.data(), codec_configuration_data.size(),
881 coded_width, coded_height, pixel_width, pixel_height, color_primaries,
882 matrix_coefficients, transfer_characteristics,
883 0, // trick_play_factor
884 nalu_length_size, track->media.header.language.code, is_encrypted));
885
886 if (use_dovi_supplemental) {
887 video_stream_info->set_supplemental_codec(
888 dovi_supplemental_codec_string);
889 video_stream_info->set_compatible_brand(dovi_compatible_brand);
890 }
891 video_stream_info->set_layered_codec_config(
892 GetLHEVCDecoderConfig(entry.extra_codec_configs));
893 video_stream_info->set_extra_config(entry.ExtraCodecConfigsAsVector());
894 video_stream_info->set_colr_data((entry.colr.raw_box).data(),
895 (entry.colr.raw_box).size());
896
897 // Set pssh raw data if it has.
898 if (moov_->pssh.size() > 0) {
899 std::vector<uint8_t> pssh_raw_data;
900 for (const auto& pssh : moov_->pssh) {
901 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
902 pssh.raw_box.end());
903 }
904 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
905 pssh_raw_data.size());
906 }
907
908 streams.push_back(video_stream_info);
909 }
910 }
911
912 init_cb_(streams);
913 if (!FetchKeysIfNecessary(moov_->pssh))
914 return false;
915 runs_.reset(new TrackRunIterator(moov_.get()));
916 RCHECK(runs_->Init());
917 ChangeState(kEmittingSamples);
918 return true;
919}
920
921bool MP4MediaParser::ParseMoof(BoxReader* reader) {
922 // Must already have initialization segment.
923 RCHECK(moov_.get());
924 MovieFragment moof;
925 RCHECK(moof.Parse(reader));
926 if (!runs_)
927 runs_.reset(new TrackRunIterator(moov_.get()));
928 RCHECK(runs_->Init(moof));
929 if (!FetchKeysIfNecessary(moof.pssh))
930 return false;
931 ChangeState(kEmittingSamples);
932 return true;
933}
934
935bool MP4MediaParser::FetchKeysIfNecessary(
936 const std::vector<ProtectionSystemSpecificHeader>& headers) {
937 if (headers.empty())
938 return true;
939
940 // An error will be returned later if the samples need to be decrypted.
941 if (!decryption_key_source_)
942 return true;
943
944 std::vector<uint8_t> pssh_raw_data;
945 for (const auto& header : headers) {
946 pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
947 header.raw_box.end());
948 }
949 Status status =
950 decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
951 if (!status.ok()) {
952 LOG(ERROR) << "Error fetching decryption keys: " << status;
953 return false;
954 }
955 return true;
956}
957
958bool MP4MediaParser::EnqueueSample(bool* err) {
959 if (!runs_->IsRunValid()) {
960 // Remain in kEnqueueingSamples state, discarding data, until the end of
961 // the current 'mdat' box has been appended to the queue.
962 if (!queue_.Trim(mdat_tail_))
963 return false;
964
965 ChangeState(kParsingBoxes);
966 return true;
967 }
968
969 if (!runs_->IsSampleValid()) {
970 runs_->AdvanceRun();
971 return true;
972 }
973
974 DCHECK(!(*err));
975
976 const uint8_t* buf;
977 int buf_size;
978 queue_.Peek(&buf, &buf_size);
979 if (!buf_size)
980 return false;
981
982 // Skip this entire track if it is not audio nor video.
983 if (!runs_->is_audio() && !runs_->is_video())
984 runs_->AdvanceRun();
985
986 // Attempt to cache the auxiliary information first. Aux info is usually
987 // placed in a contiguous block before the sample data, rather than being
988 // interleaved. If we didn't cache it, this would require that we retain the
989 // start of the segment buffer while reading samples. Aux info is typically
990 // quite small compared to sample data, so this pattern is useful on
991 // memory-constrained devices where the source buffer consumes a substantial
992 // portion of the total system memory.
993 if (runs_->AuxInfoNeedsToBeCached()) {
994 queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
995 if (buf_size < runs_->aux_info_size())
996 return false;
997 *err = !runs_->CacheAuxInfo(buf, buf_size);
998 return !*err;
999 }
1000
1001 int64_t sample_offset = runs_->sample_offset() + moof_head_;
1002 queue_.PeekAt(sample_offset, &buf, &buf_size);
1003 if (buf_size < runs_->sample_size()) {
1004 if (sample_offset < queue_.head()) {
1005 LOG(ERROR) << "Incorrect sample offset " << sample_offset
1006 << " < " << queue_.head();
1007 *err = true;
1008 }
1009 return false;
1010 }
1011
1012 const uint8_t* media_data = buf;
1013 const size_t media_data_size = runs_->sample_size();
1014 // Use a dummy data size of 0 to avoid copying overhead.
1015 // Actual media data is set later.
1016 const size_t kDummyDataSize = 0;
1017 std::shared_ptr<MediaSample> stream_sample(
1018 MediaSample::CopyFrom(media_data, kDummyDataSize, runs_->is_keyframe()));
1019
1020 if (runs_->is_encrypted()) {
1021 std::shared_ptr<uint8_t> decrypted_media_data(
1022 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
1023 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
1024 if (!decrypt_config) {
1025 *err = true;
1026 LOG(ERROR) << "Missing decrypt config.";
1027 return false;
1028 }
1029
1030 if (!decryptor_source_) {
1031 stream_sample->SetData(media_data, media_data_size);
1032 // If the demuxer does not have the decryptor_source_, store
1033 // decrypt_config so that the demuxed sample can be decrypted later.
1034 stream_sample->set_decrypt_config(std::move(decrypt_config));
1035 stream_sample->set_is_encrypted(true);
1036 } else {
1037 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
1038 media_data, media_data_size,
1039 decrypted_media_data.get())) {
1040 *err = true;
1041 LOG(ERROR) << "Cannot decrypt samples.";
1042 return false;
1043 }
1044 stream_sample->TransferData(std::move(decrypted_media_data),
1045 media_data_size);
1046 }
1047 } else {
1048 stream_sample->SetData(media_data, media_data_size);
1049 }
1050
1051 stream_sample->set_dts(runs_->dts());
1052 stream_sample->set_pts(runs_->cts());
1053 stream_sample->set_duration(runs_->duration());
1054
1055 DVLOG(3) << "Pushing frame: "
1056 << ", key=" << runs_->is_keyframe()
1057 << ", dur=" << runs_->duration()
1058 << ", dts=" << runs_->dts()
1059 << ", cts=" << runs_->cts()
1060 << ", size=" << runs_->sample_size();
1061
1062 if (!new_sample_cb_(runs_->track_id(), stream_sample)) {
1063 *err = true;
1064 LOG(ERROR) << "Failed to process the sample.";
1065 return false;
1066 }
1067
1068 runs_->AdvanceSample();
1069 return true;
1070}
1071
1072bool MP4MediaParser::ReadAndDiscardMDATsUntil(const int64_t offset) {
1073 bool err = false;
1074 while (mdat_tail_ < offset) {
1075 const uint8_t* buf;
1076 int size;
1077 queue_.PeekAt(mdat_tail_, &buf, &size);
1078
1079 FourCC type;
1080 uint64_t box_sz;
1081 if (!BoxReader::StartBox(buf, size, &type, &box_sz, &err))
1082 break;
1083
1084 mdat_tail_ += box_sz;
1085 }
1086 queue_.Trim(std::min(mdat_tail_, offset));
1087 return !err;
1088}
1089
1090void MP4MediaParser::ChangeState(State new_state) {
1091 DVLOG(2) << "Changing state: " << new_state;
1092 state_ = new_state;
1093}
1094
1095} // namespace mp4
1096} // namespace media
1097} // namespace shaka
DecryptorSource wraps KeySource and is responsible for decryptor management.
KeySource is responsible for encryption key acquisition.
Definition key_source.h:53
std::function< bool(uint32_t track_id, std::shared_ptr< MediaSample > media_sample)> NewMediaSampleCB
std::function< bool(uint32_t track_id, std::shared_ptr< TextSample > text_sample)> NewTextSampleCB
std::function< void(const std::vector< std::shared_ptr< StreamInfo > > &stream_info)> InitCB
All the methods that are virtual are virtual for mocking.