Shaka Packager SDK
Loading...
Searching...
No Matches
mp4_media_parser.cc
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <packager/media/formats/mp4/mp4_media_parser.h>
6
7#include <algorithm>
8#include <functional>
9#include <limits>
10
11#include <absl/log/check.h>
12#include <absl/log/log.h>
13#include <absl/strings/numbers.h>
14
15#include <packager/file.h>
16#include <packager/file/file_closer.h>
17#include <packager/macros/compiler.h>
18#include <packager/macros/logging.h>
19#include <packager/media/base/audio_stream_info.h>
20#include <packager/media/base/buffer_reader.h>
21#include <packager/media/base/decrypt_config.h>
22#include <packager/media/base/key_source.h>
23#include <packager/media/base/media_sample.h>
24#include <packager/media/base/rcheck.h>
25#include <packager/media/base/video_stream_info.h>
26#include <packager/media/base/video_util.h>
27#include <packager/media/codecs/ac3_audio_util.h>
28#include <packager/media/codecs/ac4_audio_util.h>
29#include <packager/media/codecs/av1_codec_configuration_record.h>
30#include <packager/media/codecs/avc_decoder_configuration_record.h>
31#include <packager/media/codecs/dovi_decoder_configuration_record.h>
32#include <packager/media/codecs/ec3_audio_util.h>
33#include <packager/media/codecs/es_descriptor.h>
34#include <packager/media/codecs/hevc_decoder_configuration_record.h>
35#include <packager/media/codecs/iamf_audio_util.h>
36#include <packager/media/codecs/vp_codec_configuration_record.h>
37#include <packager/media/formats/mp4/box_definitions.h>
38#include <packager/media/formats/mp4/box_reader.h>
39#include <packager/media/formats/mp4/track_run_iterator.h>
40
41ABSL_FLAG(bool,
42 use_dovi_supplemental_codecs,
43 false,
44 "Set to true to signal DolbyVision using the modern supplemental "
45 "codecs approach instead of the legacy "
46 "duplicate representations approach");
47
48namespace shaka {
49namespace media {
50namespace mp4 {
51namespace {
52
53int64_t Rescale(int64_t time_in_old_scale,
54 int32_t old_scale,
55 int32_t new_scale) {
56 return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
57}
58
59H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
60 switch (fourcc) {
61 case FOURCC_avc1:
62 case FOURCC_dvh1:
63 case FOURCC_hvc1:
64 return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
65 case FOURCC_avc3:
66 case FOURCC_dvhe:
67 case FOURCC_hev1:
68 return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
69 default:
70 return H26xStreamFormat::kUnSpecified;
71 }
72}
73
74Codec FourCCToCodec(FourCC fourcc) {
75 switch (fourcc) {
76 case FOURCC_av01:
77 return kCodecAV1;
78 case FOURCC_avc1:
79 case FOURCC_avc3:
80 return kCodecH264;
81 case FOURCC_dvh1:
82 case FOURCC_dvhe:
83 return kCodecH265DolbyVision;
84 case FOURCC_hev1:
85 case FOURCC_hvc1:
86 return kCodecH265;
87 case FOURCC_vp08:
88 return kCodecVP8;
89 case FOURCC_vp09:
90 return kCodecVP9;
91 case FOURCC_Opus:
92 return kCodecOpus;
93 case FOURCC_dtsc:
94 return kCodecDTSC;
95 case FOURCC_dtsh:
96 return kCodecDTSH;
97 case FOURCC_dtsl:
98 return kCodecDTSL;
99 case FOURCC_dtse:
100 return kCodecDTSE;
101 case FOURCC_dtsx:
102 return kCodecDTSX;
103 case FOURCC_dtsp:
104 return kCodecDTSP;
105 case FOURCC_dtsm:
106 return kCodecDTSM;
107 case FOURCC_ac_3:
108 return kCodecAC3;
109 case FOURCC_ec_3:
110 return kCodecEAC3;
111 case FOURCC_ac_4:
112 return kCodecAC4;
113 case FOURCC_alac:
114 return kCodecALAC;
115 case FOURCC_fLaC:
116 return kCodecFlac;
117 case FOURCC_iamf:
118 return kCodecIAMF;
119 case FOURCC_ipcm:
120 return kCodecPcm;
121 case FOURCC_mha1:
122 return kCodecMha1;
123 case FOURCC_mhm1:
124 return kCodecMhm1;
125 default:
126 return kUnknownCodec;
127 }
128}
129
130Codec ObjectTypeToCodec(ObjectType object_type) {
131 switch (object_type) {
132 case ObjectType::kISO_14496_3:
133 case ObjectType::kISO_13818_7_AAC_LC:
134 return kCodecAAC;
135 case ObjectType::kDTSC:
136 return kCodecDTSC;
137 case ObjectType::kDTSE:
138 return kCodecDTSE;
139 case ObjectType::kDTSH:
140 return kCodecDTSH;
141 case ObjectType::kDTSL:
142 return kCodecDTSL;
143 default:
144 return kUnknownCodec;
145 }
146}
147
148std::vector<uint8_t> GetDOVIDecoderConfig(
149 const std::vector<CodecConfiguration>& configs) {
150 for (const CodecConfiguration& config : configs) {
151 if (config.box_type == FOURCC_dvcC || config.box_type == FOURCC_dvvC) {
152 return config.data;
153 }
154 }
155 return std::vector<uint8_t>();
156}
157
158bool UpdateCodecStringForDolbyVision(
159 FourCC actual_format,
160 const std::vector<CodecConfiguration>& configs,
161 std::string* codec_string) {
162 DOVIDecoderConfigurationRecord dovi_config;
163 if (!dovi_config.Parse(GetDOVIDecoderConfig(configs))) {
164 LOG(ERROR) << "Failed to parse Dolby Vision decoder "
165 "configuration record.";
166 return false;
167 }
168 switch (actual_format) {
169 case FOURCC_dvh1:
170 case FOURCC_dvhe:
171 case FOURCC_dav1:
172 // Non-Backward compatibility mode. Replace the code string with
173 // Dolby Vision only.
174 *codec_string = dovi_config.GetCodecString(actual_format);
175 break;
176 case FOURCC_hev1:
177 // Backward compatibility mode. Two codecs are signalled: base codec
178 // without Dolby Vision and HDR with Dolby Vision.
179 *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dvhe);
180 break;
181 case FOURCC_hvc1:
182 // See above.
183 *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dvh1);
184 break;
185 case FOURCC_av01:
186 *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dav1);
187 break;
188 default:
189 LOG(ERROR) << "Unsupported format with extra codec "
190 << FourCCToString(actual_format);
191 return false;
192 }
193 return true;
194}
195
196bool UpdateDolbyVisionInfo(FourCC actual_format,
197 const std::vector<CodecConfiguration>& configs,
198 uint8_t transfer_characteristics,
199 std::string* codec_string,
200 std::string* dovi_supplemental_codec_string,
201 FourCC* dovi_compatible_brand) {
202 DOVIDecoderConfigurationRecord dovi_config;
203 if (!dovi_config.Parse(GetDOVIDecoderConfig(configs))) {
204 LOG(ERROR) << "Failed to parse Dolby Vision decoder "
205 "configuration record.";
206 return false;
207 }
208 switch (actual_format) {
209 case FOURCC_dvh1:
210 case FOURCC_dvhe:
211 case FOURCC_dav1:
212 // Non-Backward compatibility mode. Replace the code string with
213 // Dolby Vision only.
214 *codec_string = dovi_config.GetCodecString(actual_format);
215 break;
216 case FOURCC_hev1:
217 // Backward compatibility mode. Use supplemental codec indicating Dolby
218 // Dolby Vision content.
219 *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dvhe);
220 break;
221 case FOURCC_hvc1:
222 // See above.
223 *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dvh1);
224 break;
225 case FOURCC_av01:
226 *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dav1);
227 break;
228 default:
229 LOG(ERROR) << "Unsupported format with extra codec "
230 << FourCCToString(actual_format);
231 return false;
232 }
233 *dovi_compatible_brand =
234 dovi_config.GetDoViCompatibleBrand(transfer_characteristics);
235 return true;
236}
237
238const uint64_t kNanosecondsPerSecond = 1000000000ull;
239
240} // namespace
241
242MP4MediaParser::MP4MediaParser()
243 : state_(kWaitingForInit),
244 decryption_key_source_(NULL),
245 moof_head_(0),
246 mdat_tail_(0) {}
247
248MP4MediaParser::~MP4MediaParser() {}
249
250void MP4MediaParser::Init(const InitCB& init_cb,
251 const NewMediaSampleCB& new_media_sample_cb,
252 const NewTextSampleCB& new_text_sample_cb,
253 KeySource* decryption_key_source) {
254 DCHECK_EQ(state_, kWaitingForInit);
255 DCHECK(init_cb_ == nullptr);
256 DCHECK(init_cb != nullptr);
257 DCHECK(new_media_sample_cb != nullptr);
258
259 ChangeState(kParsingBoxes);
260 init_cb_ = init_cb;
261 new_sample_cb_ = new_media_sample_cb;
262 decryption_key_source_ = decryption_key_source;
263 if (decryption_key_source)
264 decryptor_source_.reset(new DecryptorSource(decryption_key_source));
265}
266
267void MP4MediaParser::Reset() {
268 queue_.Reset();
269 runs_.reset();
270 moof_head_ = 0;
271 mdat_tail_ = 0;
272}
273
274bool MP4MediaParser::Flush() {
275 DCHECK_NE(state_, kWaitingForInit);
276 Reset();
277 ChangeState(kParsingBoxes);
278 return true;
279}
280
281bool MP4MediaParser::Parse(const uint8_t* buf, int size) {
282 DCHECK_NE(state_, kWaitingForInit);
283
284 if (state_ == kError)
285 return false;
286
287 queue_.Push(buf, size);
288
289 bool result, err = false;
290
291 do {
292 if (state_ == kParsingBoxes) {
293 result = ParseBox(&err);
294 } else {
295 DCHECK_EQ(kEmittingSamples, state_);
296 result = EnqueueSample(&err);
297 if (result) {
298 int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
299 err = !ReadAndDiscardMDATsUntil(max_clear);
300 }
301 }
302 } while (result && !err);
303
304 if (err) {
305 DLOG(ERROR) << "Error while parsing MP4";
306 moov_.reset();
307 Reset();
308 ChangeState(kError);
309 return false;
310 }
311
312 return true;
313}
314
315bool MP4MediaParser::LoadMoov(const std::string& file_path) {
316 std::unique_ptr<File, FileCloser> file(
317 File::OpenWithNoBuffering(file_path.c_str(), "r"));
318 if (!file) {
319 LOG(ERROR) << "Unable to open media file '" << file_path << "'";
320 return false;
321 }
322 if (!file->Seek(0)) {
323 LOG(WARNING) << "Filesystem does not support seeking on file '" << file_path
324 << "'";
325 return false;
326 }
327
328 uint64_t file_position(0);
329 bool mdat_seen(false);
330 while (true) {
331 const uint32_t kBoxHeaderReadSize(16);
332 std::vector<uint8_t> buffer(kBoxHeaderReadSize);
333 int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
334 if (bytes_read == 0) {
335 LOG(ERROR) << "Could not find 'moov' box in file '" << file_path << "'";
336 return false;
337 }
338 if (bytes_read < kBoxHeaderReadSize) {
339 LOG(ERROR) << "Error reading media file '" << file_path << "'";
340 return false;
341 }
342 uint64_t box_size;
343 FourCC box_type;
344 bool err;
345 if (!BoxReader::StartBox(&buffer[0], kBoxHeaderReadSize, &box_type,
346 &box_size, &err)) {
347 LOG(ERROR) << "Could not start box from file '" << file_path << "'";
348 return false;
349 }
350 if (box_type == FOURCC_mdat) {
351 mdat_seen = true;
352 } else if (box_type == FOURCC_moov) {
353 if (!mdat_seen) {
354 // 'moov' is before 'mdat'. Nothing to do.
355 break;
356 }
357 // 'mdat' before 'moov'. Read and parse 'moov'.
358 if (!Parse(&buffer[0], bytes_read)) {
359 LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
360 return false;
361 }
362 uint64_t bytes_to_read = box_size - bytes_read;
363 buffer.resize(bytes_to_read);
364 while (bytes_to_read > 0) {
365 bytes_read = file->Read(&buffer[0], bytes_to_read);
366 if (bytes_read <= 0) {
367 LOG(ERROR) << "Error reading 'moov' contents from file '" << file_path
368 << "'";
369 return false;
370 }
371 if (!Parse(&buffer[0], bytes_read)) {
372 LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
373 return false;
374 }
375 bytes_to_read -= bytes_read;
376 }
377 queue_.Reset(); // So that we don't need to adjust data offsets.
378 mdat_tail_ = 0; // So it will skip boxes until mdat.
379 break; // Done.
380 }
381 file_position += box_size;
382 if (!file->Seek(file_position)) {
383 LOG(ERROR) << "Error skipping box in mp4 file '" << file_path << "'";
384 return false;
385 }
386 }
387 return true;
388}
389
390bool MP4MediaParser::ParseBox(bool* err) {
391 const uint8_t* buf;
392 int size;
393 queue_.Peek(&buf, &size);
394 if (!size)
395 return false;
396
397 std::unique_ptr<BoxReader> reader(BoxReader::ReadBox(buf, size, err));
398 if (reader.get() == NULL)
399 return false;
400
401 if (reader->type() == FOURCC_mdat) {
402 if (!moov_) {
403 // For seekable files, we seek to the 'moov' and load the 'moov' first
404 // then seek back (see LoadMoov function for details); we do not support
405 // having 'mdat' before 'moov' for non-seekable files. The code ends up
406 // here only if it is a non-seekable file.
407 NOTIMPLEMENTED() << " Non-seekable Files with 'mdat' box before 'moov' "
408 "box is not supported.";
409 *err = true;
410 return false;
411 } else {
412 // This can happen if there are unused 'mdat' boxes, which is unusual
413 // but allowed by the spec. Ignore the 'mdat' and proceed.
414 LOG(INFO)
415 << "Ignore unused 'mdat' box - this could be as a result of extra "
416 "not usable 'mdat' or 'mdat' associated with unrecognized track.";
417 }
418 }
419
420 // Set up mdat offset for ReadMDATsUntil().
421 mdat_tail_ = queue_.head() + reader->size();
422
423 if (reader->type() == FOURCC_moov) {
424 *err = !ParseMoov(reader.get());
425 } else if (reader->type() == FOURCC_moof) {
426 moof_head_ = queue_.head();
427 *err = !ParseMoof(reader.get());
428
429 // Return early to avoid evicting 'moof' data from queue. Auxiliary info may
430 // be located anywhere in the file, including inside the 'moof' itself.
431 // (Since 'default-base-is-moof' is mandated, no data references can come
432 // before the head of the 'moof', so keeping this box around is sufficient.)
433 return !(*err);
434 } else {
435 VLOG(2) << "Skipping top-level box: " << FourCCToString(reader->type());
436 }
437
438 queue_.Pop(static_cast<int>(reader->size()));
439 return !(*err);
440}
441
442bool MP4MediaParser::ParseMoov(BoxReader* reader) {
443 if (moov_)
444 return true; // Already parsed the 'moov' box.
445
446 moov_.reset(new Movie);
447 RCHECK(moov_->Parse(reader));
448 runs_.reset();
449
450 std::vector<std::shared_ptr<StreamInfo>> streams;
451
452 bool use_dovi_supplemental =
453 absl::GetFlag(FLAGS_use_dovi_supplemental_codecs);
454
455 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
456 track != moov_->tracks.end(); ++track) {
457 const int32_t timescale = track->media.header.timescale;
458
459 // Calculate duration (based on timescale).
460 int64_t duration = 0;
461 if (track->media.header.duration > 0) {
462 duration = track->media.header.duration;
463 } else if (moov_->extends.header.fragment_duration > 0) {
464 DCHECK(moov_->header.timescale != 0);
465 duration = Rescale(moov_->extends.header.fragment_duration,
466 moov_->header.timescale,
467 timescale);
468 } else if (moov_->header.duration > 0 &&
469 moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
470 DCHECK(moov_->header.timescale != 0);
471 duration =
472 Rescale(moov_->header.duration, moov_->header.timescale, timescale);
473 }
474
475 const SampleDescription& samp_descr =
476 track->media.information.sample_table.description;
477
478 size_t desc_idx = 0;
479
480 // Read sample description index from mvex if it exists otherwise read
481 // from the first entry in Sample To Chunk box.
482 if (moov_->extends.tracks.size() > 0) {
483 for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {
484 const TrackExtends& trex = moov_->extends.tracks[t];
485 if (trex.track_id == track->header.track_id) {
486 desc_idx = trex.default_sample_description_index;
487 break;
488 }
489 }
490 } else {
491 const std::vector<ChunkInfo>& chunk_info =
492 track->media.information.sample_table.sample_to_chunk.chunk_info;
493 RCHECK(chunk_info.size() > 0);
494 desc_idx = chunk_info[0].sample_description_index;
495 }
496 RCHECK(desc_idx > 0);
497 desc_idx -= 1; // BMFF descriptor index is one-based
498
499 if (samp_descr.type == kAudio) {
500 RCHECK(!samp_descr.audio_entries.empty());
501
502 // It is not uncommon to find otherwise-valid files with incorrect sample
503 // description indices, so we fail gracefully in that case.
504 if (desc_idx >= samp_descr.audio_entries.size())
505 desc_idx = 0;
506
507 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
508 const FourCC actual_format = entry.GetActualFormat();
509 Codec codec = FourCCToCodec(actual_format);
510 uint8_t num_channels = entry.channelcount;
511 uint32_t sampling_frequency = entry.samplerate;
512 uint64_t codec_delay_ns = 0;
513 uint8_t audio_object_type = 0;
514 uint32_t max_bitrate = 0;
515 uint32_t avg_bitrate = 0;
516 std::vector<uint8_t> codec_config;
517
518 switch (actual_format) {
519 case FOURCC_mp4a: {
520 const DecoderConfigDescriptor& decoder_config =
521 entry.esds.es_descriptor.decoder_config_descriptor();
522 max_bitrate = decoder_config.max_bitrate();
523 avg_bitrate = decoder_config.avg_bitrate();
524
525 codec = ObjectTypeToCodec(decoder_config.object_type());
526 if (codec == kCodecAAC) {
527 const AACAudioSpecificConfig& aac_audio_specific_config =
528 entry.esds.aac_audio_specific_config;
529 num_channels = aac_audio_specific_config.GetNumChannels();
530 sampling_frequency =
531 aac_audio_specific_config.GetSamplesPerSecond();
532 audio_object_type = aac_audio_specific_config.GetAudioObjectType();
533 codec_config =
534 decoder_config.decoder_specific_info_descriptor().data();
535 } else if (codec == kUnknownCodec) {
536 // Intentionally not to fail in the parser as there may be multiple
537 // streams in the source content, which allows the supported stream
538 // to be packaged. An error will be returned if the unsupported
539 // stream is passed to the muxer.
540 LOG(WARNING) << "Unsupported audio object type "
541 << static_cast<int>(decoder_config.object_type())
542 << " in stsd.es_desriptor.";
543 }
544 break;
545 }
546 case FOURCC_dtsc:
547 FALLTHROUGH_INTENDED;
548 case FOURCC_dtse:
549 FALLTHROUGH_INTENDED;
550 case FOURCC_dtsh:
551 FALLTHROUGH_INTENDED;
552 case FOURCC_dtsl:
553 FALLTHROUGH_INTENDED;
554 case FOURCC_dtsm:
555 codec_config = entry.ddts.extra_data;
556 max_bitrate = entry.ddts.max_bitrate;
557 avg_bitrate = entry.ddts.avg_bitrate;
558 break;
559 case FOURCC_dtsx:
560 codec_config = entry.udts.data;
561 break;
562 case FOURCC_ac_3:
563 codec_config = entry.dac3.data;
564 num_channels = static_cast<uint8_t>(GetAc3NumChannels(codec_config));
565 break;
566 case FOURCC_ec_3:
567 codec_config = entry.dec3.data;
568 num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config));
569 break;
570 case FOURCC_ac_4:
571 codec_config = entry.dac4.data;
572 // Stop the process if have errors when parsing AC-4 dac4 box,
573 // bitstream version 0 (has beed deprecated) and contains multiple
574 // presentations in single AC-4 stream (only used for broadcast).
575 if (!GetAc4CodecInfo(codec_config, &audio_object_type)) {
576 LOG(ERROR) << "Failed to parse dac4.";
577 return false;
578 }
579 break;
580 case FOURCC_alac:
581 codec_config = entry.alac.data;
582 break;
583 case FOURCC_fLaC:
584 codec_config = entry.dfla.data;
585 break;
586 case FOURCC_Opus:
587 codec_config = entry.dops.opus_identification_header;
588 codec_delay_ns =
589 entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
590 break;
591 case FOURCC_iamf:
592 codec_config = entry.iacb.data;
593 if (!GetIamfCodecStringInfo(codec_config, audio_object_type)) {
594 LOG(ERROR) << "Failed to parse iamf.";
595 return false;
596 }
597 break;
598 case FOURCC_mha1:
599 case FOURCC_mhm1:
600 codec_config = entry.mhac.data;
601 audio_object_type = entry.mhac.mpeg_h_3da_profile_level_indication;
602 break;
603 default:
604 // Intentionally not to fail in the parser as there may be multiple
605 // streams in the source content, which allows the supported stream to
606 // be packaged.
607 // An error will be returned if the unsupported stream is passed to
608 // the muxer.
609 LOG(WARNING) << "Unsupported audio format '"
610 << FourCCToString(actual_format) << "' in stsd box.";
611 break;
612 }
613
614 // Extract possible seek preroll.
615 uint64_t seek_preroll_ns = 0;
616 for (const auto& sample_group_description :
617 track->media.information.sample_table.sample_group_descriptions) {
618 if (sample_group_description.grouping_type != FOURCC_roll)
619 continue;
620 const auto& audio_roll_recovery_entries =
621 sample_group_description.audio_roll_recovery_entries;
622 if (audio_roll_recovery_entries.size() != 1) {
623 LOG(WARNING) << "Unexpected number of entries in "
624 "SampleGroupDescription table with grouping type "
625 "'roll'.";
626 break;
627 }
628 const int16_t roll_distance_in_samples =
629 audio_roll_recovery_entries[0].roll_distance;
630 if (roll_distance_in_samples < 0) {
631 // IAMF requires the `samplerate` field to be set to 0.
632 // (https://aomediacodec.github.io/iamf/#iasampleentry-section)
633 if (actual_format == FOURCC_iamf)
634 continue;
635
636 RCHECK((sampling_frequency != 0));
637 seek_preroll_ns = kNanosecondsPerSecond *
638 (-roll_distance_in_samples) / sampling_frequency;
639 } else {
640 LOG(WARNING)
641 << "Roll distance is supposed to be negative, but seeing "
642 << roll_distance_in_samples;
643 }
644 break;
645 }
646
647 // The stream will be decrypted if a |decryptor_source_| is available.
648 const bool is_encrypted =
649 decryptor_source_
650 ? false
651 : entry.sinf.info.track_encryption.default_is_protected == 1;
652 DVLOG(1) << "is_audio_track_encrypted_: " << is_encrypted;
653 streams.emplace_back(new AudioStreamInfo(
654 track->header.track_id, timescale, duration, codec,
655 AudioStreamInfo::GetCodecString(codec, audio_object_type),
656 codec_config.data(), codec_config.size(), entry.samplesize,
657 num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
658 max_bitrate, avg_bitrate, track->media.header.language.code,
659 is_encrypted));
660 }
661
662 if (samp_descr.type == kVideo) {
663 RCHECK(!samp_descr.video_entries.empty());
664 if (desc_idx >= samp_descr.video_entries.size())
665 desc_idx = 0;
666 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
667 std::vector<uint8_t> codec_configuration_data =
668 entry.codec_configuration.data;
669
670 uint32_t coded_width = entry.width;
671 uint32_t coded_height = entry.height;
672 uint32_t pixel_width = entry.pixel_aspect.h_spacing;
673 uint32_t pixel_height = entry.pixel_aspect.v_spacing;
674 if (pixel_width == 0 && pixel_height == 0) {
675 DerivePixelWidthHeight(coded_width, coded_height, track->header.width,
676 track->header.height, &pixel_width,
677 &pixel_height);
678 }
679 std::string codec_string;
680 std::string dovi_supplemental_codec_string("");
681 FourCC dovi_compatible_brand = FOURCC_NULL;
682 uint8_t nalu_length_size = 0;
683 uint8_t transfer_characteristics = 0;
684 uint8_t color_primaries = 0;
685 uint8_t matrix_coefficients = 0;
686
687 const FourCC actual_format = entry.GetActualFormat();
688 const Codec video_codec = FourCCToCodec(actual_format);
689 switch (actual_format) {
690 case FOURCC_av01: {
691 AV1CodecConfigurationRecord av1_config;
692 if (!av1_config.Parse(codec_configuration_data)) {
693 LOG(ERROR) << "Failed to parse av1c.";
694 return false;
695 }
696 // Generate the full codec string if the colr atom is present.
697 if (entry.colr.color_parameter_type != FOURCC_NULL) {
698 transfer_characteristics = entry.colr.transfer_characteristics;
699 color_primaries = entry.colr.color_primaries;
700 matrix_coefficients = entry.colr.matrix_coefficients;
701 codec_string = av1_config.GetCodecString(
702 color_primaries, transfer_characteristics, matrix_coefficients,
703 entry.colr.video_full_range_flag);
704 } else {
705 codec_string = av1_config.GetCodecString();
706 }
707
708 if (!entry.extra_codec_configs.empty()) {
709 // |extra_codec_configs| is present only for Dolby Vision.
710 if (use_dovi_supplemental) {
711 if (!UpdateDolbyVisionInfo(
712 actual_format, entry.extra_codec_configs,
713 transfer_characteristics, &codec_string,
714 &dovi_supplemental_codec_string,
715 &dovi_compatible_brand)) {
716 return false;
717 }
718 } else {
719 if (!UpdateCodecStringForDolbyVision(actual_format,
720 entry.extra_codec_configs,
721 &codec_string)) {
722 return false;
723 }
724 }
725 }
726 break;
727 }
728 case FOURCC_avc1:
729 case FOURCC_avc3: {
730 AVCDecoderConfigurationRecord avc_config;
731 if (!avc_config.Parse(codec_configuration_data)) {
732 LOG(ERROR) << "Failed to parse avcc.";
733 return false;
734 }
735 codec_string = avc_config.GetCodecString(actual_format);
736 nalu_length_size = avc_config.nalu_length_size();
737 transfer_characteristics = avc_config.transfer_characteristics();
738 color_primaries = avc_config.color_primaries();
739 matrix_coefficients = avc_config.matrix_coefficients();
740
741 // Use configurations from |avc_config| if it is valid.
742 if (avc_config.coded_width() != 0) {
743 DCHECK_NE(avc_config.coded_height(), 0u);
744 if (coded_width != avc_config.coded_width() ||
745 coded_height != avc_config.coded_height()) {
746 LOG(WARNING) << "Resolution in VisualSampleEntry (" << coded_width
747 << "," << coded_height
748 << ") does not match with resolution in "
749 "AVCDecoderConfigurationRecord ("
750 << avc_config.coded_width() << ","
751 << avc_config.coded_height()
752 << "). Use AVCDecoderConfigurationRecord.";
753 coded_width = avc_config.coded_width();
754 coded_height = avc_config.coded_height();
755 }
756
757 DCHECK_NE(avc_config.pixel_width(), 0u);
758 DCHECK_NE(avc_config.pixel_height(), 0u);
759 if (pixel_width != avc_config.pixel_width() ||
760 pixel_height != avc_config.pixel_height()) {
761 LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
762 << "Pixel aspect ratio in PASP box (" << pixel_width << ","
763 << pixel_height
764 << ") does not match with SAR in "
765 "AVCDecoderConfigurationRecord "
766 "("
767 << avc_config.pixel_width() << ","
768 << avc_config.pixel_height()
769 << "). Use AVCDecoderConfigurationRecord.";
770 pixel_width = avc_config.pixel_width();
771 pixel_height = avc_config.pixel_height();
772 }
773 }
774 break;
775 }
776 case FOURCC_dvh1:
777 case FOURCC_dvhe:
778 case FOURCC_hev1:
779 case FOURCC_hvc1: {
780 HEVCDecoderConfigurationRecord hevc_config;
781 if (!hevc_config.Parse(codec_configuration_data)) {
782 LOG(ERROR) << "Failed to parse hevc.";
783 return false;
784 }
785 codec_string = hevc_config.GetCodecString(actual_format);
786 nalu_length_size = hevc_config.nalu_length_size();
787 transfer_characteristics = hevc_config.transfer_characteristics();
788 color_primaries = hevc_config.color_primaries();
789 matrix_coefficients = hevc_config.matrix_coefficients();
790
791 if (!entry.extra_codec_configs.empty()) {
792 // |extra_codec_configs| is present only for Dolby Vision.
793 if (use_dovi_supplemental) {
794 if (!UpdateDolbyVisionInfo(
795 actual_format, entry.extra_codec_configs,
796 transfer_characteristics, &codec_string,
797 &dovi_supplemental_codec_string,
798 &dovi_compatible_brand)) {
799 return false;
800 }
801 } else {
802 if (!UpdateCodecStringForDolbyVision(actual_format,
803 entry.extra_codec_configs,
804 &codec_string)) {
805 return false;
806 }
807 }
808 }
809 break;
810 }
811 case FOURCC_vp08:
812 case FOURCC_vp09: {
813 VPCodecConfigurationRecord vp_config;
814 if (!vp_config.ParseMP4(codec_configuration_data)) {
815 LOG(ERROR) << "Failed to parse vpcc.";
816 return false;
817 }
818 if (actual_format == FOURCC_vp09 &&
819 (!vp_config.is_level_set() || vp_config.level() == 0)) {
820 const double kUnknownSampleDuration = 0.0;
821 vp_config.SetVP9Level(coded_width, coded_height,
822 kUnknownSampleDuration);
823 vp_config.WriteMP4(&codec_configuration_data);
824 }
825 codec_string = vp_config.GetCodecString(video_codec);
826 break;
827 }
828 default:
829 // Intentionally not to fail in the parser as there may be multiple
830 // streams in the source content, which allows the supported stream to
831 // be packaged.
832 // An error will be returned if the unsupported stream is passed to
833 // the muxer.
834 LOG(WARNING) << "Unsupported video format '"
835 << FourCCToString(actual_format) << "' in stsd box.";
836 break;
837 }
838
839 // The stream will be decrypted if a |decryptor_source_| is available.
840 const bool is_encrypted =
841 decryptor_source_
842 ? false
843 : entry.sinf.info.track_encryption.default_is_protected == 1;
844 DVLOG(1) << "is_video_track_encrypted_: " << is_encrypted;
845 std::shared_ptr<VideoStreamInfo> video_stream_info(new VideoStreamInfo(
846 track->header.track_id, timescale, duration, video_codec,
847 GetH26xStreamFormat(actual_format), codec_string,
848 codec_configuration_data.data(), codec_configuration_data.size(),
849 coded_width, coded_height, pixel_width, pixel_height, color_primaries,
850 matrix_coefficients, transfer_characteristics,
851 0, // trick_play_factor
852 nalu_length_size, track->media.header.language.code, is_encrypted));
853
854 if (use_dovi_supplemental) {
855 video_stream_info->set_supplemental_codec(
856 dovi_supplemental_codec_string);
857 video_stream_info->set_compatible_brand(dovi_compatible_brand);
858 }
859 video_stream_info->set_extra_config(entry.ExtraCodecConfigsAsVector());
860 video_stream_info->set_colr_data((entry.colr.raw_box).data(),
861 (entry.colr.raw_box).size());
862
863 // Set pssh raw data if it has.
864 if (moov_->pssh.size() > 0) {
865 std::vector<uint8_t> pssh_raw_data;
866 for (const auto& pssh : moov_->pssh) {
867 pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
868 pssh.raw_box.end());
869 }
870 video_stream_info->set_eme_init_data(pssh_raw_data.data(),
871 pssh_raw_data.size());
872 }
873
874 streams.push_back(video_stream_info);
875 }
876 }
877
878 init_cb_(streams);
879 if (!FetchKeysIfNecessary(moov_->pssh))
880 return false;
881 runs_.reset(new TrackRunIterator(moov_.get()));
882 RCHECK(runs_->Init());
883 ChangeState(kEmittingSamples);
884 return true;
885}
886
887bool MP4MediaParser::ParseMoof(BoxReader* reader) {
888 // Must already have initialization segment.
889 RCHECK(moov_.get());
890 MovieFragment moof;
891 RCHECK(moof.Parse(reader));
892 if (!runs_)
893 runs_.reset(new TrackRunIterator(moov_.get()));
894 RCHECK(runs_->Init(moof));
895 if (!FetchKeysIfNecessary(moof.pssh))
896 return false;
897 ChangeState(kEmittingSamples);
898 return true;
899}
900
901bool MP4MediaParser::FetchKeysIfNecessary(
902 const std::vector<ProtectionSystemSpecificHeader>& headers) {
903 if (headers.empty())
904 return true;
905
906 // An error will be returned later if the samples need to be decrypted.
907 if (!decryption_key_source_)
908 return true;
909
910 std::vector<uint8_t> pssh_raw_data;
911 for (const auto& header : headers) {
912 pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
913 header.raw_box.end());
914 }
915 Status status =
916 decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
917 if (!status.ok()) {
918 LOG(ERROR) << "Error fetching decryption keys: " << status;
919 return false;
920 }
921 return true;
922}
923
924bool MP4MediaParser::EnqueueSample(bool* err) {
925 if (!runs_->IsRunValid()) {
926 // Remain in kEnqueueingSamples state, discarding data, until the end of
927 // the current 'mdat' box has been appended to the queue.
928 if (!queue_.Trim(mdat_tail_))
929 return false;
930
931 ChangeState(kParsingBoxes);
932 return true;
933 }
934
935 if (!runs_->IsSampleValid()) {
936 runs_->AdvanceRun();
937 return true;
938 }
939
940 DCHECK(!(*err));
941
942 const uint8_t* buf;
943 int buf_size;
944 queue_.Peek(&buf, &buf_size);
945 if (!buf_size)
946 return false;
947
948 // Skip this entire track if it is not audio nor video.
949 if (!runs_->is_audio() && !runs_->is_video())
950 runs_->AdvanceRun();
951
952 // Attempt to cache the auxiliary information first. Aux info is usually
953 // placed in a contiguous block before the sample data, rather than being
954 // interleaved. If we didn't cache it, this would require that we retain the
955 // start of the segment buffer while reading samples. Aux info is typically
956 // quite small compared to sample data, so this pattern is useful on
957 // memory-constrained devices where the source buffer consumes a substantial
958 // portion of the total system memory.
959 if (runs_->AuxInfoNeedsToBeCached()) {
960 queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
961 if (buf_size < runs_->aux_info_size())
962 return false;
963 *err = !runs_->CacheAuxInfo(buf, buf_size);
964 return !*err;
965 }
966
967 int64_t sample_offset = runs_->sample_offset() + moof_head_;
968 queue_.PeekAt(sample_offset, &buf, &buf_size);
969 if (buf_size < runs_->sample_size()) {
970 if (sample_offset < queue_.head()) {
971 LOG(ERROR) << "Incorrect sample offset " << sample_offset
972 << " < " << queue_.head();
973 *err = true;
974 }
975 return false;
976 }
977
978 const uint8_t* media_data = buf;
979 const size_t media_data_size = runs_->sample_size();
980 // Use a dummy data size of 0 to avoid copying overhead.
981 // Actual media data is set later.
982 const size_t kDummyDataSize = 0;
983 std::shared_ptr<MediaSample> stream_sample(
984 MediaSample::CopyFrom(media_data, kDummyDataSize, runs_->is_keyframe()));
985
986 if (runs_->is_encrypted()) {
987 std::shared_ptr<uint8_t> decrypted_media_data(
988 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
989 std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
990 if (!decrypt_config) {
991 *err = true;
992 LOG(ERROR) << "Missing decrypt config.";
993 return false;
994 }
995
996 if (!decryptor_source_) {
997 stream_sample->SetData(media_data, media_data_size);
998 // If the demuxer does not have the decryptor_source_, store
999 // decrypt_config so that the demuxed sample can be decrypted later.
1000 stream_sample->set_decrypt_config(std::move(decrypt_config));
1001 stream_sample->set_is_encrypted(true);
1002 } else {
1003 if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
1004 media_data, media_data_size,
1005 decrypted_media_data.get())) {
1006 *err = true;
1007 LOG(ERROR) << "Cannot decrypt samples.";
1008 return false;
1009 }
1010 stream_sample->TransferData(std::move(decrypted_media_data),
1011 media_data_size);
1012 }
1013 } else {
1014 stream_sample->SetData(media_data, media_data_size);
1015 }
1016
1017 stream_sample->set_dts(runs_->dts());
1018 stream_sample->set_pts(runs_->cts());
1019 stream_sample->set_duration(runs_->duration());
1020
1021 DVLOG(3) << "Pushing frame: "
1022 << ", key=" << runs_->is_keyframe()
1023 << ", dur=" << runs_->duration()
1024 << ", dts=" << runs_->dts()
1025 << ", cts=" << runs_->cts()
1026 << ", size=" << runs_->sample_size();
1027
1028 if (!new_sample_cb_(runs_->track_id(), stream_sample)) {
1029 *err = true;
1030 LOG(ERROR) << "Failed to process the sample.";
1031 return false;
1032 }
1033
1034 runs_->AdvanceSample();
1035 return true;
1036}
1037
1038bool MP4MediaParser::ReadAndDiscardMDATsUntil(const int64_t offset) {
1039 bool err = false;
1040 while (mdat_tail_ < offset) {
1041 const uint8_t* buf;
1042 int size;
1043 queue_.PeekAt(mdat_tail_, &buf, &size);
1044
1045 FourCC type;
1046 uint64_t box_sz;
1047 if (!BoxReader::StartBox(buf, size, &type, &box_sz, &err))
1048 break;
1049
1050 mdat_tail_ += box_sz;
1051 }
1052 queue_.Trim(std::min(mdat_tail_, offset));
1053 return !err;
1054}
1055
1056void MP4MediaParser::ChangeState(State new_state) {
1057 DVLOG(2) << "Changing state: " << new_state;
1058 state_ = new_state;
1059}
1060
1061} // namespace mp4
1062} // namespace media
1063} // namespace shaka
DecryptorSource wraps KeySource and is responsible for decryptor management.
KeySource is responsible for encryption key acquisition.
Definition key_source.h:52
std::function< bool(uint32_t track_id, std::shared_ptr< MediaSample > media_sample)> NewMediaSampleCB
std::function< bool(uint32_t track_id, std::shared_ptr< TextSample > text_sample)> NewTextSampleCB
std::function< void(const std::vector< std::shared_ptr< StreamInfo > > &stream_info)> InitCB
All the methods that are virtual are virtual for mocking.