5#include <packager/media/formats/mp4/track_run_iterator.h>
10#include <absl/flags/flag.h>
11#include <absl/log/check.h>
13#include <packager/macros/logging.h>
14#include <packager/media/base/buffer_reader.h>
15#include <packager/media/base/fourccs.h>
16#include <packager/media/base/rcheck.h>
17#include <packager/media/formats/mp4/chunk_info_iterator.h>
18#include <packager/media/formats/mp4/composition_offset_iterator.h>
19#include <packager/media/formats/mp4/decoding_time_iterator.h>
20#include <packager/media/formats/mp4/sync_sample_iterator.h>
23 mp4_reset_initial_composition_offset_to_zero,
25 "MP4 only. If it is true, reset the initial composition offset to "
26 "zero, i.e. by assuming that there is a missing EditList.");
29const int64_t kInvalidOffset = std::numeric_limits<int64_t>::max();
31int64_t Rescale(int64_t time_in_old_scale,
34 return (
static_cast<double>(time_in_old_scale) / old_scale) * new_scale;
52 std::vector<SampleInfo> samples;
55 int64_t sample_start_offset;
58 const AudioSampleEntry* audio_description;
59 const VideoSampleEntry* video_description;
63 std::vector<SampleEncryptionEntry> sample_encryption_entries;
67 int64_t aux_info_start_offset;
68 int aux_info_default_size;
69 std::vector<uint8_t> aux_info_sizes;
70 int aux_info_total_size;
76TrackRunInfo::TrackRunInfo()
80 sample_start_offset(-1),
82 audio_description(NULL),
83 video_description(NULL),
84 aux_info_start_offset(-1),
85 aux_info_default_size(0),
86 aux_info_total_size(0) {}
87TrackRunInfo::~TrackRunInfo() {}
89TrackRunIterator::TrackRunIterator(
const Movie* moov)
90 : moov_(moov), sample_dts_(0), sample_offset_(0) {
94TrackRunIterator::~TrackRunIterator() {}
96static void PopulateSampleInfo(
const TrackExtends& trex,
97 const TrackFragmentHeader& tfhd,
98 const TrackFragmentRun& trun,
100 SampleInfo* sample_info) {
101 if (i < trun.sample_sizes.size()) {
102 sample_info->size = trun.sample_sizes[i];
103 }
else if (tfhd.default_sample_size > 0) {
104 sample_info->size = tfhd.default_sample_size;
106 sample_info->size = trex.default_sample_size;
109 if (i < trun.sample_durations.size()) {
110 sample_info->duration = trun.sample_durations[i];
111 }
else if (tfhd.default_sample_duration > 0) {
112 sample_info->duration = tfhd.default_sample_duration;
114 sample_info->duration = trex.default_sample_duration;
117 if (i < trun.sample_composition_time_offsets.size()) {
118 sample_info->cts_offset = trun.sample_composition_time_offsets[i];
120 sample_info->cts_offset = 0;
124 if (i < trun.sample_flags.size()) {
125 flags = trun.sample_flags[i];
126 }
else if (tfhd.flags & TrackFragmentHeader::kDefaultSampleFlagsPresentMask) {
127 flags = tfhd.default_sample_flags;
129 flags = trex.default_sample_flags;
131 sample_info->is_keyframe = !(flags & TrackFragmentHeader::kNonKeySampleMask);
145class CompareMinTrackRunDataOffset {
147 bool operator()(
const TrackRunInfo& a,
const TrackRunInfo& b) {
149 a.aux_info_total_size ? a.aux_info_start_offset : kInvalidOffset;
151 b.aux_info_total_size ? b.aux_info_start_offset : kInvalidOffset;
153 int64_t a_lesser = std::min(a_aux, a.sample_start_offset);
154 int64_t a_greater = std::max(a_aux, a.sample_start_offset);
155 int64_t b_lesser = std::min(b_aux, b.sample_start_offset);
156 int64_t b_greater = std::max(b_aux, b.sample_start_offset);
158 if (a_lesser == b_lesser)
159 return a_greater < b_greater;
160 return a_lesser < b_lesser;
167 for (std::vector<Track>::const_iterator trak = moov_->tracks.begin();
168 trak != moov_->tracks.end(); ++trak) {
170 trak->media.information.sample_table.description;
171 if (stsd.type != kAudio && stsd.type != kVideo) {
172 DVLOG(1) <<
"Skipping unhandled track type";
177 trak->media.information.sample_table.decoding_time_to_sample);
179 trak->media.information.sample_table.composition_time_to_sample);
180 bool has_composition_offset = composition_offset.
IsValid();
182 trak->media.information.sample_table.sample_to_chunk);
184 trak->media.information.sample_table.sync_sample);
189 trak->media.information.sample_table.sample_size;
190 const std::vector<uint64_t>& chunk_offset_vector =
191 trak->media.information.sample_table.chunk_large_offset.offsets;
195 int64_t run_start_dts = GetTimestampAdjustment(*moov_, *trak,
nullptr);
197 uint32_t num_samples = sample_size.sample_count;
198 uint32_t num_chunks =
static_cast<uint32_t
>(chunk_offset_vector.size());
201 DCHECK_EQ(num_samples, decoding_time.
NumSamples());
202 if (has_composition_offset) {
203 DCHECK_EQ(num_samples, composition_offset.
NumSamples());
205 if (num_chunks > 0) {
206 DCHECK_EQ(num_samples, chunk_info.
NumSamples(1, num_chunks));
210 if (num_samples > 0) {
212 RCHECK(decoding_time.
IsValid());
216 uint32_t sample_index = 0;
217 for (uint32_t chunk_index = 0; chunk_index < num_chunks; ++chunk_index) {
221 tri.track_id = trak->header.track_id;
222 tri.timescale = trak->media.header.timescale;
223 tri.start_dts = run_start_dts;
224 tri.sample_start_offset = chunk_offset_vector[chunk_index];
227 RCHECK(desc_idx > 0);
230 tri.track_type = stsd.type;
231 if (tri.track_type == kAudio) {
232 RCHECK(!stsd.audio_entries.empty());
233 if (desc_idx > stsd.audio_entries.size())
235 tri.audio_description = &stsd.audio_entries[desc_idx];
237 RCHECK(tri.audio_description->sinf.info.track_encryption
238 .default_is_protected == 0);
239 }
else if (tri.track_type == kVideo) {
240 RCHECK(!stsd.video_entries.empty());
241 if (desc_idx > stsd.video_entries.size())
243 tri.video_description = &stsd.video_entries[desc_idx];
245 RCHECK(tri.video_description->sinf.info.track_encryption
246 .default_is_protected == 0);
250 tri.samples.resize(samples_per_chunk);
251 for (uint32_t k = 0; k < samples_per_chunk; ++k) {
252 SampleInfo& sample = tri.samples[k];
253 sample.size = sample_size.sample_size != 0
254 ? sample_size.sample_size
255 : sample_size.sizes[sample_index];
258 has_composition_offset ? composition_offset.
sample_offset() : 0;
261 run_start_dts += sample.duration;
266 if (sample_index == num_samples) {
270 if (has_composition_offset)
274 if (has_composition_offset)
279 runs_.push_back(tri);
283 std::sort(runs_.begin(), runs_.end(), CompareMinTrackRunDataOffset());
284 run_itr_ = runs_.begin();
292 const auto track_count = std::max(moof.tracks.size(), moov_->tracks.size());
293 next_fragment_start_dts_.resize(track_count, 0);
294 for (
size_t i = 0; i < moof.tracks.size(); i++) {
296 const auto track_index = traf.header.track_id - 1;
297 const Track* trak = NULL;
298 for (
size_t t = 0; t < moov_->tracks.size(); t++) {
299 if (moov_->tracks[t].header.track_id == traf.header.track_id)
300 trak = &moov_->tracks[t];
305 for (
size_t t = 0; t < moov_->extends.tracks.size(); t++) {
306 if (moov_->extends.tracks[t].track_id == traf.header.track_id)
307 trex = &moov_->extends.tracks[t];
312 trak->media.information.sample_table.description;
313 if (stsd.type != kAudio && stsd.type != kVideo) {
314 DVLOG(1) <<
"Skipping unhandled track type";
317 size_t desc_idx = traf.header.sample_description_index;
319 desc_idx = trex->default_sample_description_index;
320 RCHECK(desc_idx > 0);
327 RCHECK(!stsd.audio_entries.empty());
328 if (desc_idx > stsd.audio_entries.size())
330 audio_sample_entry = &stsd.audio_entries[desc_idx];
333 RCHECK(!stsd.video_entries.empty());
334 if (desc_idx > stsd.video_entries.size())
336 video_sample_entry = &stsd.video_entries[desc_idx];
345 DCHECK(traf.sample_encryption.sample_encryption_entries.empty());
346 std::vector<SampleEncryptionEntry> sample_encryption_entries;
348 RCHECK(audio_sample_entry || video_sample_entry);
349 const uint8_t default_per_sample_iv_size =
350 audio_sample_entry ? audio_sample_entry->sinf.info.track_encryption
351 .default_per_sample_iv_size
352 : video_sample_entry->sinf.info.track_encryption
353 .default_per_sample_iv_size;
355 default_per_sample_iv_size, &sample_encryption_entries));
358 int64_t run_start_dts = traf.decode_time_absent
359 ? next_fragment_start_dts_[track_index]
360 : traf.decode_time.decode_time;
364 run_start_dts += GetTimestampAdjustment(*moov_, *trak, &traf);
366 int sample_count_sum = 0;
368 for (
size_t j = 0; j < traf.runs.size(); j++) {
371 tri.track_id = traf.header.track_id;
372 tri.timescale = trak->media.header.timescale;
373 tri.start_dts = run_start_dts;
374 tri.sample_start_offset = trun.data_offset;
376 tri.track_type = stsd.type;
377 tri.audio_description = audio_sample_entry;
378 tri.video_description = video_sample_entry;
380 tri.aux_info_start_offset = -1;
381 tri.aux_info_total_size = 0;
385 if (!sample_encryption_entries.empty()) {
386 RCHECK(sample_encryption_entries.size() >=
387 sample_count_sum + trun.sample_count);
388 for (
size_t k = 0; k < trun.sample_count; ++k) {
389 tri.sample_encryption_entries.push_back(
390 sample_encryption_entries[sample_count_sum + k]);
392 }
else if (traf.auxiliary_offset.offsets.size() > j) {
396 tri.aux_info_start_offset = traf.auxiliary_offset.offsets[j];
399 RCHECK(traf.auxiliary_size.sample_count >=
400 sample_count_sum + trun.sample_count);
401 tri.aux_info_default_size =
402 traf.auxiliary_size.default_sample_info_size;
403 if (tri.aux_info_default_size == 0) {
404 const std::vector<uint8_t>& sizes =
405 traf.auxiliary_size.sample_info_sizes;
406 tri.aux_info_sizes.insert(
407 tri.aux_info_sizes.begin(), sizes.begin() + sample_count_sum,
408 sizes.begin() + sample_count_sum + trun.sample_count);
414 if (tri.aux_info_default_size) {
415 tri.aux_info_total_size =
416 tri.aux_info_default_size * trun.sample_count;
418 tri.aux_info_total_size = 0;
419 for (
size_t k = 0; k < trun.sample_count; k++) {
420 tri.aux_info_total_size += tri.aux_info_sizes[k];
425 tri.samples.resize(trun.sample_count);
426 for (
size_t k = 0; k < trun.sample_count; k++) {
427 PopulateSampleInfo(*trex, traf.header, trun, k, &tri.samples[k]);
428 run_start_dts += tri.samples[k].duration;
430 runs_.push_back(tri);
431 sample_count_sum += trun.sample_count;
433 next_fragment_start_dts_[track_index] = run_start_dts;
436 std::sort(runs_.begin(), runs_.end(), CompareMinTrackRunDataOffset());
437 run_itr_ = runs_.begin();
447void TrackRunIterator::ResetRun() {
450 sample_dts_ = run_itr_->start_dts;
451 sample_offset_ = run_itr_->sample_start_offset;
452 sample_itr_ = run_itr_->samples.begin();
457 sample_dts_ += sample_itr_->duration;
458 sample_offset_ += sample_itr_->size;
466 return is_encrypted() && aux_info_size() > 0 &&
467 run_itr_->sample_encryption_entries.size() == 0;
474 std::vector<SampleEncryptionEntry>& sample_encryption_entries =
475 runs_[run_itr_ - runs_.begin()].sample_encryption_entries;
476 sample_encryption_entries.resize(run_itr_->samples.size());
478 for (
size_t i = 0; i < run_itr_->samples.size(); i++) {
479 int info_size = run_itr_->aux_info_default_size;
481 info_size = run_itr_->aux_info_sizes[i];
484 const bool has_subsamples =
485 info_size > track_encryption().default_per_sample_iv_size;
486 RCHECK(sample_encryption_entries[i].ParseFromBuffer(
487 track_encryption().default_per_sample_iv_size, has_subsamples,
496 return run_itr_ != runs_.end();
500 return IsRunValid() && (sample_itr_ != run_itr_->samples.end());
510 int64_t offset = kInvalidOffset;
513 offset = std::min(offset, sample_offset_);
515 offset = std::min(offset, aux_info_offset());
517 if (run_itr_ != runs_.end()) {
518 std::vector<TrackRunInfo>::const_iterator next_run = run_itr_ + 1;
519 if (next_run != runs_.end()) {
520 offset = std::min(offset, next_run->sample_start_offset);
521 if (next_run->aux_info_total_size)
522 offset = std::min(offset, next_run->aux_info_start_offset);
525 if (offset == kInvalidOffset)
526 return runs_.empty() ? 0 : runs_[0].sample_start_offset;
530uint32_t TrackRunIterator::track_id()
const {
532 return run_itr_->track_id;
535bool TrackRunIterator::is_encrypted()
const {
537 return track_encryption().default_is_protected == 1;
540int64_t TrackRunIterator::aux_info_offset()
const {
541 return run_itr_->aux_info_start_offset;
544int TrackRunIterator::aux_info_size()
const {
545 return run_itr_->aux_info_total_size;
548bool TrackRunIterator::is_audio()
const {
550 return run_itr_->track_type == kAudio;
553bool TrackRunIterator::is_video()
const {
555 return run_itr_->track_type == kVideo;
560 DCHECK(run_itr_->audio_description);
561 return *run_itr_->audio_description;
566 DCHECK(run_itr_->video_description);
567 return *run_itr_->video_description;
570int64_t TrackRunIterator::sample_offset()
const {
572 return sample_offset_;
575int TrackRunIterator::sample_size()
const {
577 return sample_itr_->size;
580int64_t TrackRunIterator::dts()
const {
585int64_t TrackRunIterator::cts()
const {
587 return sample_dts_ + sample_itr_->cts_offset;
590int64_t TrackRunIterator::duration()
const {
592 return sample_itr_->duration;
595bool TrackRunIterator::is_keyframe()
const {
597 return sample_itr_->is_keyframe;
600const TrackEncryption& TrackRunIterator::track_encryption()
const {
608 std::vector<uint8_t> iv;
609 std::vector<SubsampleEntry> subsamples;
611 size_t sample_idx = sample_itr_ - run_itr_->samples.begin();
612 if (sample_idx < run_itr_->sample_encryption_entries.size()) {
614 run_itr_->sample_encryption_entries[sample_idx];
615 DCHECK(is_encrypted());
618 const size_t total_size_of_subsamples =
620 if (total_size_of_subsamples != 0 &&
621 total_size_of_subsamples !=
static_cast<size_t>(sample_size())) {
622 LOG(ERROR) <<
"Incorrect CENC subsample size.";
623 return std::unique_ptr<DecryptConfig>();
626 iv = sample_encryption_entry.initialization_vector;
627 subsamples = sample_encryption_entry.subsamples;
633 if (protection_scheme != FOURCC_cbcs) {
635 <<
"Constant IV should only be used with 'cbcs' protection scheme.";
637 iv = track_encryption().default_constant_iv;
639 LOG(ERROR) <<
"IV cannot be empty.";
640 return std::unique_ptr<DecryptConfig>();
644 track_encryption().default_kid, iv, subsamples, protection_scheme,
645 track_encryption().default_crypt_byte_block,
646 track_encryption().default_skip_byte_block));
649int64_t TrackRunIterator::GetTimestampAdjustment(
const Movie& movie,
652 const uint32_t track_id = track.header.track_id;
653 const auto iter = timestamp_adjustment_map_.find(track_id);
654 if (iter != timestamp_adjustment_map_.end())
657 int64_t timestamp_adjustment = 0;
658 const std::vector<EditListEntry>& edits = track.edit.list.edits;
659 if (!edits.empty()) {
662 if (edit.media_rate_integer != 1) {
663 LOG(INFO) <<
"dwell EditListEntry is ignored.";
667 if (edit.media_time < 0) {
670 const int64_t scaled_time =
671 Rescale(edit.segment_duration, movie.header.timescale,
672 track.media.header.timescale);
673 timestamp_adjustment += scaled_time;
675 timestamp_adjustment -= edit.media_time;
680 if (timestamp_adjustment == 0) {
681 int64_t composition_offset = 0;
682 if (traf && !traf->runs.empty()) {
683 const auto& cts_offsets =
684 traf->runs.front().sample_composition_time_offsets;
685 if (!cts_offsets.empty())
686 composition_offset = cts_offsets.front();
688 CompositionOffsetIterator composition_offset_iter(
689 track.media.information.sample_table.composition_time_to_sample);
690 if (!composition_offset_iter.IsValid()) {
698 composition_offset = composition_offset_iter.sample_offset();
701 int64_t decode_time = 0;
703 decode_time = traf->decode_time.decode_time;
704 if (composition_offset != 0 && decode_time == 0) {
705 LOG(WARNING) <<
"Seeing non-zero composition offset "
706 << composition_offset
707 <<
". An EditList is probably missing.";
708 if (absl::GetFlag(FLAGS_mp4_reset_initial_composition_offset_to_zero)) {
710 <<
"Adjusting timestamps by " << -composition_offset
711 <<
". Please file a bug to "
712 "https://github.com/shaka-project/shaka-packager/issues if you "
713 "do not think it is right or if you are seeing any problems.";
714 timestamp_adjustment = -composition_offset;
719 timestamp_adjustment_map_.insert(
720 std::make_pair(track_id, timestamp_adjustment));
721 return timestamp_adjustment;
All the methods that are virtual are virtual for mocking.
uint32_t GetTotalSizeOfSubsamples() const