Shaka Packager SDK
mp4_media_parser.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <packager/media/formats/mp4/mp4_media_parser.h>
6 
7 #include <algorithm>
8 #include <functional>
9 #include <limits>
10 
11 #include <absl/log/check.h>
12 #include <absl/log/log.h>
13 #include <absl/strings/numbers.h>
14 
15 #include <packager/file.h>
16 #include <packager/file/file_closer.h>
17 #include <packager/macros/compiler.h>
18 #include <packager/macros/logging.h>
19 #include <packager/media/base/audio_stream_info.h>
20 #include <packager/media/base/buffer_reader.h>
21 #include <packager/media/base/decrypt_config.h>
22 #include <packager/media/base/key_source.h>
23 #include <packager/media/base/media_sample.h>
24 #include <packager/media/base/rcheck.h>
25 #include <packager/media/base/video_stream_info.h>
26 #include <packager/media/base/video_util.h>
27 #include <packager/media/codecs/ac3_audio_util.h>
28 #include <packager/media/codecs/ac4_audio_util.h>
29 #include <packager/media/codecs/av1_codec_configuration_record.h>
30 #include <packager/media/codecs/avc_decoder_configuration_record.h>
31 #include <packager/media/codecs/dovi_decoder_configuration_record.h>
32 #include <packager/media/codecs/ec3_audio_util.h>
33 #include <packager/media/codecs/es_descriptor.h>
34 #include <packager/media/codecs/hevc_decoder_configuration_record.h>
35 #include <packager/media/codecs/iamf_audio_util.h>
36 #include <packager/media/codecs/vp_codec_configuration_record.h>
37 #include <packager/media/formats/mp4/box_definitions.h>
38 #include <packager/media/formats/mp4/box_reader.h>
39 #include <packager/media/formats/mp4/track_run_iterator.h>
40 
41 ABSL_FLAG(bool,
42  use_dovi_supplemental_codecs,
43  false,
44  "Set to true to signal DolbyVision using the modern supplemental "
45  "codecs approach instead of the legacy "
46  "duplicate representations approach");
47 
48 namespace shaka {
49 namespace media {
50 namespace mp4 {
51 namespace {
52 
53 int64_t Rescale(int64_t time_in_old_scale,
54  int32_t old_scale,
55  int32_t new_scale) {
56  return (static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
57 }
58 
59 H26xStreamFormat GetH26xStreamFormat(FourCC fourcc) {
60  switch (fourcc) {
61  case FOURCC_avc1:
62  case FOURCC_dvh1:
63  case FOURCC_hvc1:
64  return H26xStreamFormat::kNalUnitStreamWithoutParameterSetNalus;
65  case FOURCC_avc3:
66  case FOURCC_dvhe:
67  case FOURCC_hev1:
68  return H26xStreamFormat::kNalUnitStreamWithParameterSetNalus;
69  default:
70  return H26xStreamFormat::kUnSpecified;
71  }
72 }
73 
74 Codec FourCCToCodec(FourCC fourcc) {
75  switch (fourcc) {
76  case FOURCC_av01:
77  return kCodecAV1;
78  case FOURCC_avc1:
79  case FOURCC_avc3:
80  return kCodecH264;
81  case FOURCC_dvh1:
82  case FOURCC_dvhe:
83  return kCodecH265DolbyVision;
84  case FOURCC_hev1:
85  case FOURCC_hvc1:
86  return kCodecH265;
87  case FOURCC_vp08:
88  return kCodecVP8;
89  case FOURCC_vp09:
90  return kCodecVP9;
91  case FOURCC_Opus:
92  return kCodecOpus;
93  case FOURCC_dtsc:
94  return kCodecDTSC;
95  case FOURCC_dtsh:
96  return kCodecDTSH;
97  case FOURCC_dtsl:
98  return kCodecDTSL;
99  case FOURCC_dtse:
100  return kCodecDTSE;
101  case FOURCC_dtsx:
102  return kCodecDTSX;
103  case FOURCC_dtsp:
104  return kCodecDTSP;
105  case FOURCC_dtsm:
106  return kCodecDTSM;
107  case FOURCC_ac_3:
108  return kCodecAC3;
109  case FOURCC_ec_3:
110  return kCodecEAC3;
111  case FOURCC_ac_4:
112  return kCodecAC4;
113  case FOURCC_alac:
114  return kCodecALAC;
115  case FOURCC_fLaC:
116  return kCodecFlac;
117  case FOURCC_iamf:
118  return kCodecIAMF;
119  case FOURCC_ipcm:
120  return kCodecPcm;
121  case FOURCC_mha1:
122  return kCodecMha1;
123  case FOURCC_mhm1:
124  return kCodecMhm1;
125  default:
126  return kUnknownCodec;
127  }
128 }
129 
130 Codec ObjectTypeToCodec(ObjectType object_type) {
131  switch (object_type) {
132  case ObjectType::kISO_14496_3:
133  case ObjectType::kISO_13818_7_AAC_LC:
134  return kCodecAAC;
135  case ObjectType::kDTSC:
136  return kCodecDTSC;
137  case ObjectType::kDTSE:
138  return kCodecDTSE;
139  case ObjectType::kDTSH:
140  return kCodecDTSH;
141  case ObjectType::kDTSL:
142  return kCodecDTSL;
143  default:
144  return kUnknownCodec;
145  }
146 }
147 
148 std::vector<uint8_t> GetDOVIDecoderConfig(
149  const std::vector<CodecConfiguration>& configs) {
150  for (const CodecConfiguration& config : configs) {
151  if (config.box_type == FOURCC_dvcC || config.box_type == FOURCC_dvvC) {
152  return config.data;
153  }
154  }
155  return std::vector<uint8_t>();
156 }
157 
158 bool UpdateCodecStringForDolbyVision(
159  FourCC actual_format,
160  const std::vector<CodecConfiguration>& configs,
161  std::string* codec_string) {
162  DOVIDecoderConfigurationRecord dovi_config;
163  if (!dovi_config.Parse(GetDOVIDecoderConfig(configs))) {
164  LOG(ERROR) << "Failed to parse Dolby Vision decoder "
165  "configuration record.";
166  return false;
167  }
168  switch (actual_format) {
169  case FOURCC_dvh1:
170  case FOURCC_dvhe:
171  case FOURCC_dav1:
172  // Non-Backward compatibility mode. Replace the code string with
173  // Dolby Vision only.
174  *codec_string = dovi_config.GetCodecString(actual_format);
175  break;
176  case FOURCC_hev1:
177  // Backward compatibility mode. Two codecs are signalled: base codec
178  // without Dolby Vision and HDR with Dolby Vision.
179  *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dvhe);
180  break;
181  case FOURCC_hvc1:
182  // See above.
183  *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dvh1);
184  break;
185  case FOURCC_av01:
186  *codec_string += ";" + dovi_config.GetCodecString(FOURCC_dav1);
187  break;
188  default:
189  LOG(ERROR) << "Unsupported format with extra codec "
190  << FourCCToString(actual_format);
191  return false;
192  }
193  return true;
194 }
195 
196 bool UpdateDolbyVisionInfo(FourCC actual_format,
197  const std::vector<CodecConfiguration>& configs,
198  uint8_t transfer_characteristics,
199  std::string* codec_string,
200  std::string* dovi_supplemental_codec_string,
201  FourCC* dovi_compatible_brand) {
202  DOVIDecoderConfigurationRecord dovi_config;
203  if (!dovi_config.Parse(GetDOVIDecoderConfig(configs))) {
204  LOG(ERROR) << "Failed to parse Dolby Vision decoder "
205  "configuration record.";
206  return false;
207  }
208  switch (actual_format) {
209  case FOURCC_dvh1:
210  case FOURCC_dvhe:
211  case FOURCC_dav1:
212  // Non-Backward compatibility mode. Replace the code string with
213  // Dolby Vision only.
214  *codec_string = dovi_config.GetCodecString(actual_format);
215  break;
216  case FOURCC_hev1:
217  // Backward compatibility mode. Use supplemental codec indicating Dolby
218  // Dolby Vision content.
219  *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dvhe);
220  break;
221  case FOURCC_hvc1:
222  // See above.
223  *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dvh1);
224  break;
225  case FOURCC_av01:
226  *dovi_supplemental_codec_string = dovi_config.GetCodecString(FOURCC_dav1);
227  break;
228  default:
229  LOG(ERROR) << "Unsupported format with extra codec "
230  << FourCCToString(actual_format);
231  return false;
232  }
233  *dovi_compatible_brand =
234  dovi_config.GetDoViCompatibleBrand(transfer_characteristics);
235  return true;
236 }
237 
238 const uint64_t kNanosecondsPerSecond = 1000000000ull;
239 
240 } // namespace
241 
242 MP4MediaParser::MP4MediaParser()
243  : state_(kWaitingForInit),
244  decryption_key_source_(NULL),
245  moof_head_(0),
246  mdat_tail_(0) {}
247 
248 MP4MediaParser::~MP4MediaParser() {}
249 
250 void MP4MediaParser::Init(const InitCB& init_cb,
251  const NewMediaSampleCB& new_media_sample_cb,
252  const NewTextSampleCB& new_text_sample_cb,
253  KeySource* decryption_key_source) {
254  DCHECK_EQ(state_, kWaitingForInit);
255  DCHECK(init_cb_ == nullptr);
256  DCHECK(init_cb != nullptr);
257  DCHECK(new_media_sample_cb != nullptr);
258 
259  ChangeState(kParsingBoxes);
260  init_cb_ = init_cb;
261  new_sample_cb_ = new_media_sample_cb;
262  decryption_key_source_ = decryption_key_source;
263  if (decryption_key_source)
264  decryptor_source_.reset(new DecryptorSource(decryption_key_source));
265 }
266 
267 void MP4MediaParser::Reset() {
268  queue_.Reset();
269  runs_.reset();
270  moof_head_ = 0;
271  mdat_tail_ = 0;
272 }
273 
274 bool MP4MediaParser::Flush() {
275  DCHECK_NE(state_, kWaitingForInit);
276  Reset();
277  ChangeState(kParsingBoxes);
278  return true;
279 }
280 
281 bool MP4MediaParser::Parse(const uint8_t* buf, int size) {
282  DCHECK_NE(state_, kWaitingForInit);
283 
284  if (state_ == kError)
285  return false;
286 
287  queue_.Push(buf, size);
288 
289  bool result, err = false;
290 
291  do {
292  if (state_ == kParsingBoxes) {
293  result = ParseBox(&err);
294  } else {
295  DCHECK_EQ(kEmittingSamples, state_);
296  result = EnqueueSample(&err);
297  if (result) {
298  int64_t max_clear = runs_->GetMaxClearOffset() + moof_head_;
299  err = !ReadAndDiscardMDATsUntil(max_clear);
300  }
301  }
302  } while (result && !err);
303 
304  if (err) {
305  DLOG(ERROR) << "Error while parsing MP4";
306  moov_.reset();
307  Reset();
308  ChangeState(kError);
309  return false;
310  }
311 
312  return true;
313 }
314 
315 bool MP4MediaParser::LoadMoov(const std::string& file_path) {
316  std::unique_ptr<File, FileCloser> file(
317  File::OpenWithNoBuffering(file_path.c_str(), "r"));
318  if (!file) {
319  LOG(ERROR) << "Unable to open media file '" << file_path << "'";
320  return false;
321  }
322  if (!file->Seek(0)) {
323  LOG(WARNING) << "Filesystem does not support seeking on file '" << file_path
324  << "'";
325  return false;
326  }
327 
328  uint64_t file_position(0);
329  bool mdat_seen(false);
330  while (true) {
331  const uint32_t kBoxHeaderReadSize(16);
332  std::vector<uint8_t> buffer(kBoxHeaderReadSize);
333  int64_t bytes_read = file->Read(&buffer[0], kBoxHeaderReadSize);
334  if (bytes_read == 0) {
335  LOG(ERROR) << "Could not find 'moov' box in file '" << file_path << "'";
336  return false;
337  }
338  if (bytes_read < kBoxHeaderReadSize) {
339  LOG(ERROR) << "Error reading media file '" << file_path << "'";
340  return false;
341  }
342  uint64_t box_size;
343  FourCC box_type;
344  bool err;
345  if (!BoxReader::StartBox(&buffer[0], kBoxHeaderReadSize, &box_type,
346  &box_size, &err)) {
347  LOG(ERROR) << "Could not start box from file '" << file_path << "'";
348  return false;
349  }
350  if (box_type == FOURCC_mdat) {
351  mdat_seen = true;
352  } else if (box_type == FOURCC_moov) {
353  if (!mdat_seen) {
354  // 'moov' is before 'mdat'. Nothing to do.
355  break;
356  }
357  // 'mdat' before 'moov'. Read and parse 'moov'.
358  if (!Parse(&buffer[0], bytes_read)) {
359  LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
360  return false;
361  }
362  uint64_t bytes_to_read = box_size - bytes_read;
363  buffer.resize(bytes_to_read);
364  while (bytes_to_read > 0) {
365  bytes_read = file->Read(&buffer[0], bytes_to_read);
366  if (bytes_read <= 0) {
367  LOG(ERROR) << "Error reading 'moov' contents from file '" << file_path
368  << "'";
369  return false;
370  }
371  if (!Parse(&buffer[0], bytes_read)) {
372  LOG(ERROR) << "Error parsing mp4 file '" << file_path << "'";
373  return false;
374  }
375  bytes_to_read -= bytes_read;
376  }
377  queue_.Reset(); // So that we don't need to adjust data offsets.
378  mdat_tail_ = 0; // So it will skip boxes until mdat.
379  break; // Done.
380  }
381  file_position += box_size;
382  if (!file->Seek(file_position)) {
383  LOG(ERROR) << "Error skipping box in mp4 file '" << file_path << "'";
384  return false;
385  }
386  }
387  return true;
388 }
389 
390 bool MP4MediaParser::ParseBox(bool* err) {
391  const uint8_t* buf;
392  int size;
393  queue_.Peek(&buf, &size);
394  if (!size)
395  return false;
396 
397  std::unique_ptr<BoxReader> reader(BoxReader::ReadBox(buf, size, err));
398  if (reader.get() == NULL)
399  return false;
400 
401  if (reader->type() == FOURCC_mdat) {
402  if (!moov_) {
403  // For seekable files, we seek to the 'moov' and load the 'moov' first
404  // then seek back (see LoadMoov function for details); we do not support
405  // having 'mdat' before 'moov' for non-seekable files. The code ends up
406  // here only if it is a non-seekable file.
407  NOTIMPLEMENTED() << " Non-seekable Files with 'mdat' box before 'moov' "
408  "box is not supported.";
409  *err = true;
410  return false;
411  } else {
412  // This can happen if there are unused 'mdat' boxes, which is unusual
413  // but allowed by the spec. Ignore the 'mdat' and proceed.
414  LOG(INFO)
415  << "Ignore unused 'mdat' box - this could be as a result of extra "
416  "not usable 'mdat' or 'mdat' associated with unrecognized track.";
417  }
418  }
419 
420  // Set up mdat offset for ReadMDATsUntil().
421  mdat_tail_ = queue_.head() + reader->size();
422 
423  if (reader->type() == FOURCC_moov) {
424  *err = !ParseMoov(reader.get());
425  } else if (reader->type() == FOURCC_moof) {
426  moof_head_ = queue_.head();
427  *err = !ParseMoof(reader.get());
428 
429  // Return early to avoid evicting 'moof' data from queue. Auxiliary info may
430  // be located anywhere in the file, including inside the 'moof' itself.
431  // (Since 'default-base-is-moof' is mandated, no data references can come
432  // before the head of the 'moof', so keeping this box around is sufficient.)
433  return !(*err);
434  } else {
435  VLOG(2) << "Skipping top-level box: " << FourCCToString(reader->type());
436  }
437 
438  queue_.Pop(static_cast<int>(reader->size()));
439  return !(*err);
440 }
441 
442 bool MP4MediaParser::ParseMoov(BoxReader* reader) {
443  if (moov_)
444  return true; // Already parsed the 'moov' box.
445 
446  moov_.reset(new Movie);
447  RCHECK(moov_->Parse(reader));
448  runs_.reset();
449 
450  std::vector<std::shared_ptr<StreamInfo>> streams;
451 
452  bool use_dovi_supplemental =
453  absl::GetFlag(FLAGS_use_dovi_supplemental_codecs);
454 
455  for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
456  track != moov_->tracks.end(); ++track) {
457  const int32_t timescale = track->media.header.timescale;
458 
459  // Calculate duration (based on timescale).
460  int64_t duration = 0;
461  if (track->media.header.duration > 0) {
462  duration = track->media.header.duration;
463  } else if (moov_->extends.header.fragment_duration > 0) {
464  DCHECK(moov_->header.timescale != 0);
465  duration = Rescale(moov_->extends.header.fragment_duration,
466  moov_->header.timescale,
467  timescale);
468  } else if (moov_->header.duration > 0 &&
469  moov_->header.duration != std::numeric_limits<uint64_t>::max()) {
470  DCHECK(moov_->header.timescale != 0);
471  duration =
472  Rescale(moov_->header.duration, moov_->header.timescale, timescale);
473  }
474 
475  const SampleDescription& samp_descr =
476  track->media.information.sample_table.description;
477 
478  size_t desc_idx = 0;
479 
480  // Read sample description index from mvex if it exists otherwise read
481  // from the first entry in Sample To Chunk box.
482  if (moov_->extends.tracks.size() > 0) {
483  for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {
484  const TrackExtends& trex = moov_->extends.tracks[t];
485  if (trex.track_id == track->header.track_id) {
486  desc_idx = trex.default_sample_description_index;
487  break;
488  }
489  }
490  } else {
491  const std::vector<ChunkInfo>& chunk_info =
492  track->media.information.sample_table.sample_to_chunk.chunk_info;
493  RCHECK(chunk_info.size() > 0);
494  desc_idx = chunk_info[0].sample_description_index;
495  }
496  RCHECK(desc_idx > 0);
497  desc_idx -= 1; // BMFF descriptor index is one-based
498 
499  if (samp_descr.type == kAudio) {
500  RCHECK(!samp_descr.audio_entries.empty());
501 
502  // It is not uncommon to find otherwise-valid files with incorrect sample
503  // description indices, so we fail gracefully in that case.
504  if (desc_idx >= samp_descr.audio_entries.size())
505  desc_idx = 0;
506 
507  const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
508  const FourCC actual_format = entry.GetActualFormat();
509  Codec codec = FourCCToCodec(actual_format);
510  uint8_t num_channels = entry.channelcount;
511  uint32_t sampling_frequency = entry.samplerate;
512  uint64_t codec_delay_ns = 0;
513  uint8_t audio_object_type = 0;
514  uint32_t max_bitrate = 0;
515  uint32_t avg_bitrate = 0;
516  std::vector<uint8_t> codec_config;
517 
518  switch (actual_format) {
519  case FOURCC_mp4a: {
520  const DecoderConfigDescriptor& decoder_config =
521  entry.esds.es_descriptor.decoder_config_descriptor();
522  max_bitrate = decoder_config.max_bitrate();
523  avg_bitrate = decoder_config.avg_bitrate();
524 
525  codec = ObjectTypeToCodec(decoder_config.object_type());
526  if (codec == kCodecAAC) {
527  const AACAudioSpecificConfig& aac_audio_specific_config =
528  entry.esds.aac_audio_specific_config;
529  num_channels = aac_audio_specific_config.GetNumChannels();
530  sampling_frequency =
531  aac_audio_specific_config.GetSamplesPerSecond();
532  audio_object_type = aac_audio_specific_config.GetAudioObjectType();
533  codec_config =
534  decoder_config.decoder_specific_info_descriptor().data();
535  } else if (codec == kUnknownCodec) {
536  // Intentionally not to fail in the parser as there may be multiple
537  // streams in the source content, which allows the supported stream
538  // to be packaged. An error will be returned if the unsupported
539  // stream is passed to the muxer.
540  LOG(WARNING) << "Unsupported audio object type "
541  << static_cast<int>(decoder_config.object_type())
542  << " in stsd.es_desriptor.";
543  }
544  break;
545  }
546  case FOURCC_dtsc:
547  FALLTHROUGH_INTENDED;
548  case FOURCC_dtse:
549  FALLTHROUGH_INTENDED;
550  case FOURCC_dtsh:
551  FALLTHROUGH_INTENDED;
552  case FOURCC_dtsl:
553  FALLTHROUGH_INTENDED;
554  case FOURCC_dtsm:
555  codec_config = entry.ddts.extra_data;
556  max_bitrate = entry.ddts.max_bitrate;
557  avg_bitrate = entry.ddts.avg_bitrate;
558  break;
559  case FOURCC_dtsx:
560  codec_config = entry.udts.data;
561  break;
562  case FOURCC_ac_3:
563  codec_config = entry.dac3.data;
564  num_channels = static_cast<uint8_t>(GetAc3NumChannels(codec_config));
565  break;
566  case FOURCC_ec_3:
567  codec_config = entry.dec3.data;
568  num_channels = static_cast<uint8_t>(GetEc3NumChannels(codec_config));
569  break;
570  case FOURCC_ac_4:
571  codec_config = entry.dac4.data;
572  // Stop the process if have errors when parsing AC-4 dac4 box,
573  // bitstream version 0 (has beed deprecated) and contains multiple
574  // presentations in single AC-4 stream (only used for broadcast).
575  if (!GetAc4CodecInfo(codec_config, &audio_object_type)) {
576  LOG(ERROR) << "Failed to parse dac4.";
577  return false;
578  }
579  break;
580  case FOURCC_alac:
581  codec_config = entry.alac.data;
582  break;
583  case FOURCC_fLaC:
584  codec_config = entry.dfla.data;
585  break;
586  case FOURCC_Opus:
587  codec_config = entry.dops.opus_identification_header;
588  codec_delay_ns =
589  entry.dops.preskip * kNanosecondsPerSecond / sampling_frequency;
590  break;
591  case FOURCC_iamf:
592  codec_config = entry.iacb.data;
593  if (!GetIamfCodecStringInfo(codec_config, audio_object_type)) {
594  LOG(ERROR) << "Failed to parse iamf.";
595  return false;
596  }
597  break;
598  case FOURCC_mha1:
599  case FOURCC_mhm1:
600  codec_config = entry.mhac.data;
601  audio_object_type = entry.mhac.mpeg_h_3da_profile_level_indication;
602  break;
603  default:
604  // Intentionally not to fail in the parser as there may be multiple
605  // streams in the source content, which allows the supported stream to
606  // be packaged.
607  // An error will be returned if the unsupported stream is passed to
608  // the muxer.
609  LOG(WARNING) << "Unsupported audio format '"
610  << FourCCToString(actual_format) << "' in stsd box.";
611  break;
612  }
613 
614  // Extract possible seek preroll.
615  uint64_t seek_preroll_ns = 0;
616  for (const auto& sample_group_description :
617  track->media.information.sample_table.sample_group_descriptions) {
618  if (sample_group_description.grouping_type != FOURCC_roll)
619  continue;
620  const auto& audio_roll_recovery_entries =
621  sample_group_description.audio_roll_recovery_entries;
622  if (audio_roll_recovery_entries.size() != 1) {
623  LOG(WARNING) << "Unexpected number of entries in "
624  "SampleGroupDescription table with grouping type "
625  "'roll'.";
626  break;
627  }
628  const int16_t roll_distance_in_samples =
629  audio_roll_recovery_entries[0].roll_distance;
630  if (roll_distance_in_samples < 0) {
631  // IAMF requires the `samplerate` field to be set to 0.
632  // (https://aomediacodec.github.io/iamf/#iasampleentry-section)
633  if (actual_format == FOURCC_iamf)
634  continue;
635 
636  RCHECK((sampling_frequency != 0));
637  seek_preroll_ns = kNanosecondsPerSecond *
638  (-roll_distance_in_samples) / sampling_frequency;
639  } else {
640  LOG(WARNING)
641  << "Roll distance is supposed to be negative, but seeing "
642  << roll_distance_in_samples;
643  }
644  break;
645  }
646 
647  // The stream will be decrypted if a |decryptor_source_| is available.
648  const bool is_encrypted =
649  decryptor_source_
650  ? false
651  : entry.sinf.info.track_encryption.default_is_protected == 1;
652  DVLOG(1) << "is_audio_track_encrypted_: " << is_encrypted;
653  streams.emplace_back(new AudioStreamInfo(
654  track->header.track_id, timescale, duration, codec,
655  AudioStreamInfo::GetCodecString(codec, audio_object_type),
656  codec_config.data(), codec_config.size(), entry.samplesize,
657  num_channels, sampling_frequency, seek_preroll_ns, codec_delay_ns,
658  max_bitrate, avg_bitrate, track->media.header.language.code,
659  is_encrypted));
660  }
661 
662  if (samp_descr.type == kVideo) {
663  RCHECK(!samp_descr.video_entries.empty());
664  if (desc_idx >= samp_descr.video_entries.size())
665  desc_idx = 0;
666  const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
667  std::vector<uint8_t> codec_configuration_data =
668  entry.codec_configuration.data;
669 
670  uint32_t coded_width = entry.width;
671  uint32_t coded_height = entry.height;
672  uint32_t pixel_width = entry.pixel_aspect.h_spacing;
673  uint32_t pixel_height = entry.pixel_aspect.v_spacing;
674  if (pixel_width == 0 && pixel_height == 0) {
675  DerivePixelWidthHeight(coded_width, coded_height, track->header.width,
676  track->header.height, &pixel_width,
677  &pixel_height);
678  }
679  std::string codec_string;
680  std::string dovi_supplemental_codec_string("");
681  FourCC dovi_compatible_brand = FOURCC_NULL;
682  uint8_t nalu_length_size = 0;
683  uint8_t transfer_characteristics = 0;
684  uint8_t color_primaries = 0;
685  uint8_t matrix_coefficients = 0;
686 
687  const FourCC actual_format = entry.GetActualFormat();
688  const Codec video_codec = FourCCToCodec(actual_format);
689  switch (actual_format) {
690  case FOURCC_av01: {
691  AV1CodecConfigurationRecord av1_config;
692  if (!av1_config.Parse(codec_configuration_data)) {
693  LOG(ERROR) << "Failed to parse av1c.";
694  return false;
695  }
696  // Generate the full codec string if the colr atom is present.
697  if (entry.colr.color_parameter_type != FOURCC_NULL) {
698  transfer_characteristics = entry.colr.transfer_characteristics;
699  color_primaries = entry.colr.color_primaries;
700  matrix_coefficients = entry.colr.matrix_coefficients;
701  codec_string = av1_config.GetCodecString(
702  color_primaries, transfer_characteristics, matrix_coefficients,
703  entry.colr.video_full_range_flag);
704  } else {
705  codec_string = av1_config.GetCodecString();
706  }
707 
708  if (!entry.extra_codec_configs.empty()) {
709  // |extra_codec_configs| is present only for Dolby Vision.
710  if (use_dovi_supplemental) {
711  if (!UpdateDolbyVisionInfo(
712  actual_format, entry.extra_codec_configs,
713  transfer_characteristics, &codec_string,
714  &dovi_supplemental_codec_string,
715  &dovi_compatible_brand)) {
716  return false;
717  }
718  } else {
719  if (!UpdateCodecStringForDolbyVision(actual_format,
720  entry.extra_codec_configs,
721  &codec_string)) {
722  return false;
723  }
724  }
725  }
726  break;
727  }
728  case FOURCC_avc1:
729  case FOURCC_avc3: {
730  AVCDecoderConfigurationRecord avc_config;
731  if (!avc_config.Parse(codec_configuration_data)) {
732  LOG(ERROR) << "Failed to parse avcc.";
733  return false;
734  }
735  codec_string = avc_config.GetCodecString(actual_format);
736  nalu_length_size = avc_config.nalu_length_size();
737  transfer_characteristics = avc_config.transfer_characteristics();
738  color_primaries = avc_config.color_primaries();
739  matrix_coefficients = avc_config.matrix_coefficients();
740 
741  // Use configurations from |avc_config| if it is valid.
742  if (avc_config.coded_width() != 0) {
743  DCHECK_NE(avc_config.coded_height(), 0u);
744  if (coded_width != avc_config.coded_width() ||
745  coded_height != avc_config.coded_height()) {
746  LOG(WARNING) << "Resolution in VisualSampleEntry (" << coded_width
747  << "," << coded_height
748  << ") does not match with resolution in "
749  "AVCDecoderConfigurationRecord ("
750  << avc_config.coded_width() << ","
751  << avc_config.coded_height()
752  << "). Use AVCDecoderConfigurationRecord.";
753  coded_width = avc_config.coded_width();
754  coded_height = avc_config.coded_height();
755  }
756 
757  DCHECK_NE(avc_config.pixel_width(), 0u);
758  DCHECK_NE(avc_config.pixel_height(), 0u);
759  if (pixel_width != avc_config.pixel_width() ||
760  pixel_height != avc_config.pixel_height()) {
761  LOG_IF(WARNING, pixel_width != 1 || pixel_height != 1)
762  << "Pixel aspect ratio in PASP box (" << pixel_width << ","
763  << pixel_height
764  << ") does not match with SAR in "
765  "AVCDecoderConfigurationRecord "
766  "("
767  << avc_config.pixel_width() << ","
768  << avc_config.pixel_height()
769  << "). Use AVCDecoderConfigurationRecord.";
770  pixel_width = avc_config.pixel_width();
771  pixel_height = avc_config.pixel_height();
772  }
773  }
774  break;
775  }
776  case FOURCC_dvh1:
777  case FOURCC_dvhe:
778  case FOURCC_hev1:
779  case FOURCC_hvc1: {
780  HEVCDecoderConfigurationRecord hevc_config;
781  if (!hevc_config.Parse(codec_configuration_data)) {
782  LOG(ERROR) << "Failed to parse hevc.";
783  return false;
784  }
785  codec_string = hevc_config.GetCodecString(actual_format);
786  nalu_length_size = hevc_config.nalu_length_size();
787  transfer_characteristics = hevc_config.transfer_characteristics();
788  color_primaries = hevc_config.color_primaries();
789  matrix_coefficients = hevc_config.matrix_coefficients();
790 
791  if (!entry.extra_codec_configs.empty()) {
792  // |extra_codec_configs| is present only for Dolby Vision.
793  if (use_dovi_supplemental) {
794  if (!UpdateDolbyVisionInfo(
795  actual_format, entry.extra_codec_configs,
796  transfer_characteristics, &codec_string,
797  &dovi_supplemental_codec_string,
798  &dovi_compatible_brand)) {
799  return false;
800  }
801  } else {
802  if (!UpdateCodecStringForDolbyVision(actual_format,
803  entry.extra_codec_configs,
804  &codec_string)) {
805  return false;
806  }
807  }
808  }
809  break;
810  }
811  case FOURCC_vp08:
812  case FOURCC_vp09: {
813  VPCodecConfigurationRecord vp_config;
814  if (!vp_config.ParseMP4(codec_configuration_data)) {
815  LOG(ERROR) << "Failed to parse vpcc.";
816  return false;
817  }
818  if (actual_format == FOURCC_vp09 &&
819  (!vp_config.is_level_set() || vp_config.level() == 0)) {
820  const double kUnknownSampleDuration = 0.0;
821  vp_config.SetVP9Level(coded_width, coded_height,
822  kUnknownSampleDuration);
823  vp_config.WriteMP4(&codec_configuration_data);
824  }
825  codec_string = vp_config.GetCodecString(video_codec);
826  break;
827  }
828  default:
829  // Intentionally not to fail in the parser as there may be multiple
830  // streams in the source content, which allows the supported stream to
831  // be packaged.
832  // An error will be returned if the unsupported stream is passed to
833  // the muxer.
834  LOG(WARNING) << "Unsupported video format '"
835  << FourCCToString(actual_format) << "' in stsd box.";
836  break;
837  }
838 
839  // The stream will be decrypted if a |decryptor_source_| is available.
840  const bool is_encrypted =
841  decryptor_source_
842  ? false
843  : entry.sinf.info.track_encryption.default_is_protected == 1;
844  DVLOG(1) << "is_video_track_encrypted_: " << is_encrypted;
845  std::shared_ptr<VideoStreamInfo> video_stream_info(new VideoStreamInfo(
846  track->header.track_id, timescale, duration, video_codec,
847  GetH26xStreamFormat(actual_format), codec_string,
848  codec_configuration_data.data(), codec_configuration_data.size(),
849  coded_width, coded_height, pixel_width, pixel_height, color_primaries,
850  matrix_coefficients, transfer_characteristics,
851  0, // trick_play_factor
852  nalu_length_size, track->media.header.language.code, is_encrypted));
853 
854  if (use_dovi_supplemental) {
855  video_stream_info->set_supplemental_codec(
856  dovi_supplemental_codec_string);
857  video_stream_info->set_compatible_brand(dovi_compatible_brand);
858  }
859  video_stream_info->set_extra_config(entry.ExtraCodecConfigsAsVector());
860  video_stream_info->set_colr_data((entry.colr.raw_box).data(),
861  (entry.colr.raw_box).size());
862 
863  // Set pssh raw data if it has.
864  if (moov_->pssh.size() > 0) {
865  std::vector<uint8_t> pssh_raw_data;
866  for (const auto& pssh : moov_->pssh) {
867  pssh_raw_data.insert(pssh_raw_data.end(), pssh.raw_box.begin(),
868  pssh.raw_box.end());
869  }
870  video_stream_info->set_eme_init_data(pssh_raw_data.data(),
871  pssh_raw_data.size());
872  }
873 
874  streams.push_back(video_stream_info);
875  }
876  }
877 
878  init_cb_(streams);
879  if (!FetchKeysIfNecessary(moov_->pssh))
880  return false;
881  runs_.reset(new TrackRunIterator(moov_.get()));
882  RCHECK(runs_->Init());
883  ChangeState(kEmittingSamples);
884  return true;
885 }
886 
887 bool MP4MediaParser::ParseMoof(BoxReader* reader) {
888  // Must already have initialization segment.
889  RCHECK(moov_.get());
890  MovieFragment moof;
891  RCHECK(moof.Parse(reader));
892  if (!runs_)
893  runs_.reset(new TrackRunIterator(moov_.get()));
894  RCHECK(runs_->Init(moof));
895  if (!FetchKeysIfNecessary(moof.pssh))
896  return false;
897  ChangeState(kEmittingSamples);
898  return true;
899 }
900 
901 bool MP4MediaParser::FetchKeysIfNecessary(
902  const std::vector<ProtectionSystemSpecificHeader>& headers) {
903  if (headers.empty())
904  return true;
905 
906  // An error will be returned later if the samples need to be decrypted.
907  if (!decryption_key_source_)
908  return true;
909 
910  std::vector<uint8_t> pssh_raw_data;
911  for (const auto& header : headers) {
912  pssh_raw_data.insert(pssh_raw_data.end(), header.raw_box.begin(),
913  header.raw_box.end());
914  }
915  Status status =
916  decryption_key_source_->FetchKeys(EmeInitDataType::CENC, pssh_raw_data);
917  if (!status.ok()) {
918  LOG(ERROR) << "Error fetching decryption keys: " << status;
919  return false;
920  }
921  return true;
922 }
923 
924 bool MP4MediaParser::EnqueueSample(bool* err) {
925  if (!runs_->IsRunValid()) {
926  // Remain in kEnqueueingSamples state, discarding data, until the end of
927  // the current 'mdat' box has been appended to the queue.
928  if (!queue_.Trim(mdat_tail_))
929  return false;
930 
931  ChangeState(kParsingBoxes);
932  return true;
933  }
934 
935  if (!runs_->IsSampleValid()) {
936  runs_->AdvanceRun();
937  return true;
938  }
939 
940  DCHECK(!(*err));
941 
942  const uint8_t* buf;
943  int buf_size;
944  queue_.Peek(&buf, &buf_size);
945  if (!buf_size)
946  return false;
947 
948  // Skip this entire track if it is not audio nor video.
949  if (!runs_->is_audio() && !runs_->is_video())
950  runs_->AdvanceRun();
951 
952  // Attempt to cache the auxiliary information first. Aux info is usually
953  // placed in a contiguous block before the sample data, rather than being
954  // interleaved. If we didn't cache it, this would require that we retain the
955  // start of the segment buffer while reading samples. Aux info is typically
956  // quite small compared to sample data, so this pattern is useful on
957  // memory-constrained devices where the source buffer consumes a substantial
958  // portion of the total system memory.
959  if (runs_->AuxInfoNeedsToBeCached()) {
960  queue_.PeekAt(runs_->aux_info_offset() + moof_head_, &buf, &buf_size);
961  if (buf_size < runs_->aux_info_size())
962  return false;
963  *err = !runs_->CacheAuxInfo(buf, buf_size);
964  return !*err;
965  }
966 
967  int64_t sample_offset = runs_->sample_offset() + moof_head_;
968  queue_.PeekAt(sample_offset, &buf, &buf_size);
969  if (buf_size < runs_->sample_size()) {
970  if (sample_offset < queue_.head()) {
971  LOG(ERROR) << "Incorrect sample offset " << sample_offset
972  << " < " << queue_.head();
973  *err = true;
974  }
975  return false;
976  }
977 
978  const uint8_t* media_data = buf;
979  const size_t media_data_size = runs_->sample_size();
980  // Use a dummy data size of 0 to avoid copying overhead.
981  // Actual media data is set later.
982  const size_t kDummyDataSize = 0;
983  std::shared_ptr<MediaSample> stream_sample(
984  MediaSample::CopyFrom(media_data, kDummyDataSize, runs_->is_keyframe()));
985 
986  if (runs_->is_encrypted()) {
987  std::shared_ptr<uint8_t> decrypted_media_data(
988  new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
989  std::unique_ptr<DecryptConfig> decrypt_config = runs_->GetDecryptConfig();
990  if (!decrypt_config) {
991  *err = true;
992  LOG(ERROR) << "Missing decrypt config.";
993  return false;
994  }
995 
996  if (!decryptor_source_) {
997  stream_sample->SetData(media_data, media_data_size);
998  // If the demuxer does not have the decryptor_source_, store
999  // decrypt_config so that the demuxed sample can be decrypted later.
1000  stream_sample->set_decrypt_config(std::move(decrypt_config));
1001  stream_sample->set_is_encrypted(true);
1002  } else {
1003  if (!decryptor_source_->DecryptSampleBuffer(decrypt_config.get(),
1004  media_data, media_data_size,
1005  decrypted_media_data.get())) {
1006  *err = true;
1007  LOG(ERROR) << "Cannot decrypt samples.";
1008  return false;
1009  }
1010  stream_sample->TransferData(std::move(decrypted_media_data),
1011  media_data_size);
1012  }
1013  } else {
1014  stream_sample->SetData(media_data, media_data_size);
1015  }
1016 
1017  stream_sample->set_dts(runs_->dts());
1018  stream_sample->set_pts(runs_->cts());
1019  stream_sample->set_duration(runs_->duration());
1020 
1021  DVLOG(3) << "Pushing frame: "
1022  << ", key=" << runs_->is_keyframe()
1023  << ", dur=" << runs_->duration()
1024  << ", dts=" << runs_->dts()
1025  << ", cts=" << runs_->cts()
1026  << ", size=" << runs_->sample_size();
1027 
1028  if (!new_sample_cb_(runs_->track_id(), stream_sample)) {
1029  *err = true;
1030  LOG(ERROR) << "Failed to process the sample.";
1031  return false;
1032  }
1033 
1034  runs_->AdvanceSample();
1035  return true;
1036 }
1037 
1038 bool MP4MediaParser::ReadAndDiscardMDATsUntil(const int64_t offset) {
1039  bool err = false;
1040  while (mdat_tail_ < offset) {
1041  const uint8_t* buf;
1042  int size;
1043  queue_.PeekAt(mdat_tail_, &buf, &size);
1044 
1045  FourCC type;
1046  uint64_t box_sz;
1047  if (!BoxReader::StartBox(buf, size, &type, &box_sz, &err))
1048  break;
1049 
1050  mdat_tail_ += box_sz;
1051  }
1052  queue_.Trim(std::min(mdat_tail_, offset));
1053  return !err;
1054 }
1055 
1056 void MP4MediaParser::ChangeState(State new_state) {
1057  DVLOG(2) << "Changing state: " << new_state;
1058  state_ = new_state;
1059 }
1060 
1061 } // namespace mp4
1062 } // namespace media
1063 } // namespace shaka
DecryptorSource wraps KeySource and is responsible for decryptor management.
KeySource is responsible for encryption key acquisition.
Definition: key_source.h:52
std::function< bool(uint32_t track_id, std::shared_ptr< MediaSample > media_sample)> NewMediaSampleCB
Definition: media_parser.h:45
std::function< bool(uint32_t track_id, std::shared_ptr< TextSample > text_sample)> NewTextSampleCB
Definition: media_parser.h:54
std::function< void(const std::vector< std::shared_ptr< StreamInfo > > &stream_info)> InitCB
Definition: media_parser.h:36
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66