5 #include <packager/media/formats/mp4/track_run_iterator.h>
10 #include <absl/flags/flag.h>
11 #include <absl/log/check.h>
13 #include <packager/macros/logging.h>
14 #include <packager/media/base/buffer_reader.h>
15 #include <packager/media/base/fourccs.h>
16 #include <packager/media/base/rcheck.h>
17 #include <packager/media/formats/mp4/chunk_info_iterator.h>
18 #include <packager/media/formats/mp4/composition_offset_iterator.h>
19 #include <packager/media/formats/mp4/decoding_time_iterator.h>
20 #include <packager/media/formats/mp4/sync_sample_iterator.h>
23 mp4_reset_initial_composition_offset_to_zero,
25 "MP4 only. If it is true, reset the initial composition offset to "
26 "zero, i.e. by assuming that there is a missing EditList.");
29 const int64_t kInvalidOffset = std::numeric_limits<int64_t>::max();
31 int64_t Rescale(int64_t time_in_old_scale,
34 return (
static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
52 std::vector<SampleInfo> samples;
55 int64_t sample_start_offset;
58 const AudioSampleEntry* audio_description;
59 const VideoSampleEntry* video_description;
63 std::vector<SampleEncryptionEntry> sample_encryption_entries;
67 int64_t aux_info_start_offset;
68 int aux_info_default_size;
69 std::vector<uint8_t> aux_info_sizes;
70 int aux_info_total_size;
76 TrackRunInfo::TrackRunInfo()
80 sample_start_offset(-1),
82 audio_description(NULL),
83 video_description(NULL),
84 aux_info_start_offset(-1),
85 aux_info_default_size(0),
86 aux_info_total_size(0) {}
87 TrackRunInfo::~TrackRunInfo() {}
89 TrackRunIterator::TrackRunIterator(
const Movie* moov)
90 : moov_(moov), sample_dts_(0), sample_offset_(0) {
94 TrackRunIterator::~TrackRunIterator() {}
96 static void PopulateSampleInfo(
const TrackExtends& trex,
97 const TrackFragmentHeader& tfhd,
98 const TrackFragmentRun& trun,
100 SampleInfo* sample_info) {
101 if (i < trun.sample_sizes.size()) {
102 sample_info->size = trun.sample_sizes[i];
103 }
else if (tfhd.default_sample_size > 0) {
104 sample_info->size = tfhd.default_sample_size;
106 sample_info->size = trex.default_sample_size;
109 if (i < trun.sample_durations.size()) {
110 sample_info->duration = trun.sample_durations[i];
111 }
else if (tfhd.default_sample_duration > 0) {
112 sample_info->duration = tfhd.default_sample_duration;
114 sample_info->duration = trex.default_sample_duration;
117 if (i < trun.sample_composition_time_offsets.size()) {
118 sample_info->cts_offset = trun.sample_composition_time_offsets[i];
120 sample_info->cts_offset = 0;
124 if (i < trun.sample_flags.size()) {
125 flags = trun.sample_flags[i];
126 }
else if (tfhd.flags & TrackFragmentHeader::kDefaultSampleFlagsPresentMask) {
127 flags = tfhd.default_sample_flags;
129 flags = trex.default_sample_flags;
131 sample_info->is_keyframe = !(flags & TrackFragmentHeader::kNonKeySampleMask);
145 class CompareMinTrackRunDataOffset {
147 bool operator()(
const TrackRunInfo& a,
const TrackRunInfo& b) {
148 int64_t a_aux = a.aux_info_total_size ? a.aux_info_start_offset : kInvalidOffset;
149 int64_t b_aux = b.aux_info_total_size ? b.aux_info_start_offset : kInvalidOffset;
151 int64_t a_lesser = std::min(a_aux, a.sample_start_offset);
152 int64_t a_greater = std::max(a_aux, a.sample_start_offset);
153 int64_t b_lesser = std::min(b_aux, b.sample_start_offset);
154 int64_t b_greater = std::max(b_aux, b.sample_start_offset);
156 if (a_lesser == b_lesser)
157 return a_greater < b_greater;
158 return a_lesser < b_lesser;
165 for (std::vector<Track>::const_iterator trak = moov_->tracks.begin();
166 trak != moov_->tracks.end(); ++trak) {
168 trak->media.information.sample_table.description;
169 if (stsd.type != kAudio && stsd.type != kVideo) {
170 DVLOG(1) <<
"Skipping unhandled track type";
175 trak->media.information.sample_table.decoding_time_to_sample);
177 trak->media.information.sample_table.composition_time_to_sample);
178 bool has_composition_offset = composition_offset.
IsValid();
180 trak->media.information.sample_table.sample_to_chunk);
182 trak->media.information.sample_table.sync_sample);
187 trak->media.information.sample_table.sample_size;
188 const std::vector<uint64_t>& chunk_offset_vector =
189 trak->media.information.sample_table.chunk_large_offset.offsets;
193 int64_t run_start_dts = GetTimestampAdjustment(*moov_, *trak,
nullptr);
195 uint32_t num_samples = sample_size.sample_count;
196 uint32_t num_chunks =
static_cast<uint32_t
>(chunk_offset_vector.size());
199 DCHECK_EQ(num_samples, decoding_time.
NumSamples());
200 if (has_composition_offset) {
201 DCHECK_EQ(num_samples, composition_offset.
NumSamples());
203 if (num_chunks > 0) {
204 DCHECK_EQ(num_samples, chunk_info.
NumSamples(1, num_chunks));
208 if (num_samples > 0) {
210 RCHECK(decoding_time.
IsValid());
214 uint32_t sample_index = 0;
215 for (uint32_t chunk_index = 0; chunk_index < num_chunks; ++chunk_index) {
219 tri.track_id = trak->header.track_id;
220 tri.timescale = trak->media.header.timescale;
221 tri.start_dts = run_start_dts;
222 tri.sample_start_offset = chunk_offset_vector[chunk_index];
225 RCHECK(desc_idx > 0);
228 tri.track_type = stsd.type;
229 if (tri.track_type == kAudio) {
230 RCHECK(!stsd.audio_entries.empty());
231 if (desc_idx > stsd.audio_entries.size())
233 tri.audio_description = &stsd.audio_entries[desc_idx];
235 RCHECK(tri.audio_description->sinf.info.track_encryption
236 .default_is_protected == 0);
237 }
else if (tri.track_type == kVideo) {
238 RCHECK(!stsd.video_entries.empty());
239 if (desc_idx > stsd.video_entries.size())
241 tri.video_description = &stsd.video_entries[desc_idx];
243 RCHECK(tri.video_description->sinf.info.track_encryption
244 .default_is_protected == 0);
248 tri.samples.resize(samples_per_chunk);
249 for (uint32_t k = 0; k < samples_per_chunk; ++k) {
250 SampleInfo& sample = tri.samples[k];
251 sample.size = sample_size.sample_size != 0
252 ? sample_size.sample_size
253 : sample_size.sizes[sample_index];
256 has_composition_offset ? composition_offset.
sample_offset() : 0;
259 run_start_dts += sample.duration;
264 if (sample_index == num_samples) {
268 if (has_composition_offset)
272 if (has_composition_offset)
277 runs_.push_back(tri);
281 std::sort(runs_.begin(), runs_.end(), CompareMinTrackRunDataOffset());
282 run_itr_ = runs_.begin();
290 const auto track_count = std::max(moof.tracks.size(), moov_->tracks.size());
291 next_fragment_start_dts_.resize(track_count, 0);
292 for (
size_t i = 0; i < moof.tracks.size(); i++) {
294 const auto track_index = traf.header.track_id - 1;
295 const Track* trak = NULL;
296 for (
size_t t = 0; t < moov_->tracks.size(); t++) {
297 if (moov_->tracks[t].header.track_id == traf.header.track_id)
298 trak = &moov_->tracks[t];
303 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
304 if (moov_->extends.tracks[t].track_id == traf.header.track_id)
305 trex = &moov_->extends.tracks[t];
310 trak->media.information.sample_table.description;
311 if (stsd.type != kAudio && stsd.type != kVideo) {
312 DVLOG(1) <<
"Skipping unhandled track type";
315 size_t desc_idx = traf.header.sample_description_index;
317 desc_idx = trex->default_sample_description_index;
318 RCHECK(desc_idx > 0);
325 RCHECK(!stsd.audio_entries.empty());
326 if (desc_idx > stsd.audio_entries.size())
328 audio_sample_entry = &stsd.audio_entries[desc_idx];
331 RCHECK(!stsd.video_entries.empty());
332 if (desc_idx > stsd.video_entries.size())
334 video_sample_entry = &stsd.video_entries[desc_idx];
343 DCHECK(traf.sample_encryption.sample_encryption_entries.empty());
344 std::vector<SampleEncryptionEntry> sample_encryption_entries;
346 RCHECK(audio_sample_entry || video_sample_entry);
347 const uint8_t default_per_sample_iv_size =
349 ? audio_sample_entry->sinf.info.track_encryption
350 .default_per_sample_iv_size
351 : video_sample_entry->sinf.info.track_encryption
352 .default_per_sample_iv_size;
354 default_per_sample_iv_size, &sample_encryption_entries));
357 int64_t run_start_dts = traf.decode_time_absent
358 ? next_fragment_start_dts_[track_index]
359 : traf.decode_time.decode_time;
363 run_start_dts += GetTimestampAdjustment(*moov_, *trak, &traf);
365 int sample_count_sum = 0;
367 for (
size_t j = 0; j < traf.runs.size(); j++) {
370 tri.track_id = traf.header.track_id;
371 tri.timescale = trak->media.header.timescale;
372 tri.start_dts = run_start_dts;
373 tri.sample_start_offset = trun.data_offset;
375 tri.track_type = stsd.type;
376 tri.audio_description = audio_sample_entry;
377 tri.video_description = video_sample_entry;
379 tri.aux_info_start_offset = -1;
380 tri.aux_info_total_size = 0;
384 if (!sample_encryption_entries.empty()) {
385 RCHECK(sample_encryption_entries.size() >=
386 sample_count_sum + trun.sample_count);
387 for (
size_t k = 0; k < trun.sample_count; ++k) {
388 tri.sample_encryption_entries.push_back(
389 sample_encryption_entries[sample_count_sum + k]);
391 }
else if (traf.auxiliary_offset.offsets.size() > j) {
395 tri.aux_info_start_offset = traf.auxiliary_offset.offsets[j];
398 RCHECK(traf.auxiliary_size.sample_count >=
399 sample_count_sum + trun.sample_count);
400 tri.aux_info_default_size =
401 traf.auxiliary_size.default_sample_info_size;
402 if (tri.aux_info_default_size == 0) {
403 const std::vector<uint8_t>& sizes =
404 traf.auxiliary_size.sample_info_sizes;
405 tri.aux_info_sizes.insert(
406 tri.aux_info_sizes.begin(),
407 sizes.begin() + sample_count_sum,
408 sizes.begin() + sample_count_sum + trun.sample_count);
414 if (tri.aux_info_default_size) {
415 tri.aux_info_total_size =
416 tri.aux_info_default_size * trun.sample_count;
418 tri.aux_info_total_size = 0;
419 for (
size_t k = 0; k < trun.sample_count; k++) {
420 tri.aux_info_total_size += tri.aux_info_sizes[k];
425 tri.samples.resize(trun.sample_count);
426 for (
size_t k = 0; k < trun.sample_count; k++) {
427 PopulateSampleInfo(*trex, traf.header, trun, k, &tri.samples[k]);
428 run_start_dts += tri.samples[k].duration;
430 runs_.push_back(tri);
431 sample_count_sum += trun.sample_count;
433 next_fragment_start_dts_[track_index] = run_start_dts;
436 std::sort(runs_.begin(), runs_.end(), CompareMinTrackRunDataOffset());
437 run_itr_ = runs_.begin();
447 void TrackRunIterator::ResetRun() {
450 sample_dts_ = run_itr_->start_dts;
451 sample_offset_ = run_itr_->sample_start_offset;
452 sample_itr_ = run_itr_->samples.begin();
457 sample_dts_ += sample_itr_->duration;
458 sample_offset_ += sample_itr_->size;
466 return is_encrypted() && aux_info_size() > 0 &&
467 run_itr_->sample_encryption_entries.size() == 0;
474 std::vector<SampleEncryptionEntry>& sample_encryption_entries =
475 runs_[run_itr_ - runs_.begin()].sample_encryption_entries;
476 sample_encryption_entries.resize(run_itr_->samples.size());
478 for (
size_t i = 0; i < run_itr_->samples.size(); i++) {
479 int info_size = run_itr_->aux_info_default_size;
481 info_size = run_itr_->aux_info_sizes[i];
484 const bool has_subsamples =
485 info_size > track_encryption().default_per_sample_iv_size;
486 RCHECK(sample_encryption_entries[i].ParseFromBuffer(
487 track_encryption().default_per_sample_iv_size, has_subsamples,
498 return IsRunValid() && (sample_itr_ != run_itr_->samples.end());
508 int64_t offset = kInvalidOffset;
511 offset = std::min(offset, sample_offset_);
513 offset = std::min(offset, aux_info_offset());
515 if (run_itr_ != runs_.end()) {
516 std::vector<TrackRunInfo>::const_iterator next_run = run_itr_ + 1;
517 if (next_run != runs_.end()) {
518 offset = std::min(offset, next_run->sample_start_offset);
519 if (next_run->aux_info_total_size)
520 offset = std::min(offset, next_run->aux_info_start_offset);
523 if (offset == kInvalidOffset)
524 return runs_.empty() ? 0 : runs_[0].sample_start_offset;
528 uint32_t TrackRunIterator::track_id()
const {
530 return run_itr_->track_id;
533 bool TrackRunIterator::is_encrypted()
const {
535 return track_encryption().default_is_protected == 1;
538 int64_t TrackRunIterator::aux_info_offset()
const {
539 return run_itr_->aux_info_start_offset;
542 int TrackRunIterator::aux_info_size()
const {
543 return run_itr_->aux_info_total_size;
546 bool TrackRunIterator::is_audio()
const {
548 return run_itr_->track_type == kAudio;
551 bool TrackRunIterator::is_video()
const {
553 return run_itr_->track_type == kVideo;
558 DCHECK(run_itr_->audio_description);
559 return *run_itr_->audio_description;
564 DCHECK(run_itr_->video_description);
565 return *run_itr_->video_description;
568 int64_t TrackRunIterator::sample_offset()
const {
570 return sample_offset_;
573 int TrackRunIterator::sample_size()
const {
575 return sample_itr_->size;
578 int64_t TrackRunIterator::dts()
const {
583 int64_t TrackRunIterator::cts()
const {
585 return sample_dts_ + sample_itr_->cts_offset;
588 int64_t TrackRunIterator::duration()
const {
590 return sample_itr_->duration;
593 bool TrackRunIterator::is_keyframe()
const {
595 return sample_itr_->is_keyframe;
598 const TrackEncryption& TrackRunIterator::track_encryption()
const {
606 std::vector<uint8_t> iv;
607 std::vector<SubsampleEntry> subsamples;
609 size_t sample_idx = sample_itr_ - run_itr_->samples.begin();
610 if (sample_idx < run_itr_->sample_encryption_entries.size()) {
612 run_itr_->sample_encryption_entries[sample_idx];
613 DCHECK(is_encrypted());
616 const size_t total_size_of_subsamples =
618 if (total_size_of_subsamples != 0 &&
619 total_size_of_subsamples !=
static_cast<size_t>(sample_size())) {
620 LOG(ERROR) <<
"Incorrect CENC subsample size.";
621 return std::unique_ptr<DecryptConfig>();
624 iv = sample_encryption_entry.initialization_vector;
625 subsamples = sample_encryption_entry.subsamples;
631 if (protection_scheme != FOURCC_cbcs) {
633 <<
"Constant IV should only be used with 'cbcs' protection scheme.";
635 iv = track_encryption().default_constant_iv;
637 LOG(ERROR) <<
"IV cannot be empty.";
638 return std::unique_ptr<DecryptConfig>();
642 track_encryption().default_kid, iv, subsamples, protection_scheme,
643 track_encryption().default_crypt_byte_block,
644 track_encryption().default_skip_byte_block));
647 int64_t TrackRunIterator::GetTimestampAdjustment(
const Movie& movie,
650 const uint32_t track_id = track.header.track_id;
651 const auto iter = timestamp_adjustment_map_.find(track_id);
652 if (iter != timestamp_adjustment_map_.end())
655 int64_t timestamp_adjustment = 0;
656 const std::vector<EditListEntry>& edits = track.edit.list.edits;
657 if (!edits.empty()) {
660 if (edit.media_rate_integer != 1) {
661 LOG(INFO) <<
"dwell EditListEntry is ignored.";
665 if (edit.media_time < 0) {
668 const int64_t scaled_time =
669 Rescale(edit.segment_duration, movie.header.timescale,
670 track.media.header.timescale);
671 timestamp_adjustment += scaled_time;
673 timestamp_adjustment -= edit.media_time;
678 if (timestamp_adjustment == 0) {
679 int64_t composition_offset = 0;
680 if (traf && !traf->runs.empty()) {
681 const auto& cts_offsets =
682 traf->runs.front().sample_composition_time_offsets;
683 if (!cts_offsets.empty())
684 composition_offset = cts_offsets.front();
686 CompositionOffsetIterator composition_offset_iter(
687 track.media.information.sample_table.composition_time_to_sample);
688 if (!composition_offset_iter.IsValid()) {
696 composition_offset = composition_offset_iter.sample_offset();
699 int64_t decode_time = 0;
701 decode_time = traf->decode_time.decode_time;
702 if (composition_offset != 0 && decode_time == 0) {
703 LOG(WARNING) <<
"Seeing non-zero composition offset "
704 << composition_offset
705 <<
". An EditList is probably missing.";
706 if (absl::GetFlag(FLAGS_mp4_reset_initial_composition_offset_to_zero)) {
708 <<
"Adjusting timestamps by " << -composition_offset
709 <<
". Please file a bug to "
710 "https://github.com/shaka-project/shaka-packager/issues if you "
711 "do not think it is right or if you are seeing any problems.";
712 timestamp_adjustment = -composition_offset;
717 timestamp_adjustment_map_.insert(
718 std::make_pair(track_id, timestamp_adjustment));
719 return timestamp_adjustment;
All the methods that are virtual are virtual for mocking.
uint32_t GetTotalSizeOfSubsamples() const