Shaka Packager SDK
Loading...
Searching...
No Matches
mp4_muxer.cc
1// Copyright 2014 Google LLC. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/formats/mp4/mp4_muxer.h>
8
9#include <algorithm>
10#include <chrono>
11
12#include <absl/log/check.h>
13#include <absl/strings/escaping.h>
14#include <absl/strings/numbers.h>
15
16#include <packager/file.h>
17#include <packager/macros/logging.h>
18#include <packager/macros/status.h>
19#include <packager/media/base/aes_encryptor.h>
20#include <packager/media/base/audio_stream_info.h>
21#include <packager/media/base/fourccs.h>
22#include <packager/media/base/key_source.h>
23#include <packager/media/base/media_sample.h>
24#include <packager/media/base/text_stream_info.h>
25#include <packager/media/base/video_stream_info.h>
26#include <packager/media/codecs/es_descriptor.h>
27#include <packager/media/event/muxer_listener.h>
28#include <packager/media/formats/mp4/box_definitions.h>
29#include <packager/media/formats/mp4/low_latency_segment_segmenter.h>
30#include <packager/media/formats/mp4/multi_segment_segmenter.h>
31#include <packager/media/formats/mp4/single_segment_segmenter.h>
32#include <packager/media/formats/ttml/ttml_generator.h>
33
34namespace shaka {
35namespace media {
36namespace mp4 {
37
38namespace {
39
40// Sets the range start and end value from offset and size.
41// |start| and |end| are for byte-range-spec specified in RFC2616.
42void SetStartAndEndFromOffsetAndSize(size_t offset,
43 size_t size,
44 Range* range) {
45 DCHECK(range);
46 range->start = static_cast<uint32_t>(offset);
47 // Note that ranges are inclusive. So we need - 1.
48 range->end = range->start + static_cast<uint32_t>(size) - 1;
49}
50
51FourCC CodecToFourCC(Codec codec, H26xStreamFormat h26x_stream_format) {
52 switch (codec) {
53 case kCodecAV1:
54 return FOURCC_av01;
55 case kCodecH264:
56 return h26x_stream_format ==
57 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
58 ? FOURCC_avc3
59 : FOURCC_avc1;
60 case kCodecH265:
61 return h26x_stream_format ==
62 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
63 ? FOURCC_hev1
64 : FOURCC_hvc1;
65 case kCodecH265DolbyVision:
66 return h26x_stream_format ==
67 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
68 ? FOURCC_dvhe
69 : FOURCC_dvh1;
70 case kCodecVP8:
71 return FOURCC_vp08;
72 case kCodecVP9:
73 return FOURCC_vp09;
74 case kCodecAAC:
75 case kCodecMP3:
76 return FOURCC_mp4a;
77 case kCodecAC3:
78 return FOURCC_ac_3;
79 case kCodecALAC:
80 return FOURCC_alac;
81 case kCodecDTSC:
82 return FOURCC_dtsc;
83 case kCodecDTSH:
84 return FOURCC_dtsh;
85 case kCodecDTSL:
86 return FOURCC_dtsl;
87 case kCodecDTSE:
88 return FOURCC_dtse;
89 case kCodecDTSM:
90 return FOURCC_dtsm;
91 case kCodecDTSX:
92 return FOURCC_dtsx;
93 case kCodecEAC3:
94 return FOURCC_ec_3;
95 case kCodecAC4:
96 return FOURCC_ac_4;
97 case kCodecFlac:
98 return FOURCC_fLaC;
99 case kCodecOpus:
100 return FOURCC_Opus;
101 case kCodecIAMF:
102 return FOURCC_iamf;
103 case kCodecMha1:
104 return FOURCC_mha1;
105 case kCodecMhm1:
106 return FOURCC_mhm1;
107 default:
108 return FOURCC_NULL;
109 }
110}
111
112void GenerateSinf(FourCC old_type,
113 const EncryptionConfig& encryption_config,
114 ProtectionSchemeInfo* sinf) {
115 sinf->format.format = old_type;
116
117 DCHECK_NE(encryption_config.protection_scheme, FOURCC_NULL);
118 sinf->type.type = encryption_config.protection_scheme;
119
120 // The version of cenc implemented here. CENC 4.
121 const int kCencSchemeVersion = 0x00010000;
122 sinf->type.version = kCencSchemeVersion;
123
124 auto& track_encryption = sinf->info.track_encryption;
125 track_encryption.default_is_protected = 1;
126
127 track_encryption.default_crypt_byte_block =
128 encryption_config.crypt_byte_block;
129 track_encryption.default_skip_byte_block = encryption_config.skip_byte_block;
130 switch (encryption_config.protection_scheme) {
131 case FOURCC_cenc:
132 case FOURCC_cbc1:
133 DCHECK_EQ(track_encryption.default_crypt_byte_block, 0u);
134 DCHECK_EQ(track_encryption.default_skip_byte_block, 0u);
135 // CENCv3 10.1 ‘cenc’ AES-CTR scheme and 10.2 ‘cbc1’ AES-CBC scheme:
136 // The version of the Track Encryption Box (‘tenc’) SHALL be 0.
137 track_encryption.version = 0;
138 break;
139 case FOURCC_cbcs:
140 case FOURCC_cens:
141 // CENCv3 10.3 ‘cens’ AES-CTR subsample pattern encryption scheme and
142 // 10.4 ‘cbcs’ AES-CBC subsample pattern encryption scheme:
143 // The version of the Track Encryption Box (‘tenc’) SHALL be 1.
144 track_encryption.version = 1;
145 break;
146 default:
147 NOTIMPLEMENTED() << "Unexpected protection scheme "
148 << encryption_config.protection_scheme;
149 }
150
151 track_encryption.default_per_sample_iv_size =
152 encryption_config.per_sample_iv_size;
153 track_encryption.default_constant_iv = encryption_config.constant_iv;
154 track_encryption.default_kid = encryption_config.key_id;
155}
156
157// The roll distance is expressed in sample units and always takes negative
158// values.
159int16_t GetRollDistance(uint64_t seek_preroll_ns, uint32_t sampling_frequency) {
160 const double kNanosecondsPerSecond = 1000000000;
161 const double preroll_in_samples =
162 seek_preroll_ns / kNanosecondsPerSecond * sampling_frequency;
163 // Round to closest integer.
164 return -static_cast<int16_t>(preroll_in_samples + 0.5);
165}
166
167} // namespace
168
169MP4Muxer::MP4Muxer(const MuxerOptions& options) : Muxer(options) {}
170MP4Muxer::~MP4Muxer() {}
171
172Status MP4Muxer::InitializeMuxer() {
173 // Muxer will be delay-initialized after seeing the first sample.
174 to_be_initialized_ = true;
175 return Status::OK;
176}
177
178Status MP4Muxer::Finalize() {
179 // This happens on streams that are not initialized, i.e. not going through
180 // DelayInitializeMuxer, which can only happen if there are no samples from
181 // the stream.
182 if (!segmenter_) {
183 DCHECK(to_be_initialized_);
184 LOG(INFO) << "Skip stream '" << options().output_file_name
185 << "' which does not contain any sample.";
186 return Status::OK;
187 }
188
189 Status segmenter_finalized = segmenter_->Finalize();
190
191 if (!segmenter_finalized.ok())
192 return segmenter_finalized;
193
194 FireOnMediaEndEvent();
195 LOG(INFO) << "MP4 file '" << options().output_file_name << "' finalized.";
196 return Status::OK;
197}
198
199Status MP4Muxer::AddMediaSample(size_t stream_id, const MediaSample& sample) {
200 if (to_be_initialized_) {
201 RETURN_IF_ERROR(UpdateEditListOffsetFromSample(sample));
202 RETURN_IF_ERROR(DelayInitializeMuxer());
203 to_be_initialized_ = false;
204 }
205 DCHECK(segmenter_);
206 return segmenter_->AddSample(stream_id, sample);
207}
208
209Status MP4Muxer::FinalizeSegment(size_t stream_id,
210 const SegmentInfo& segment_info) {
211 DCHECK(segmenter_);
212 VLOG(3) << "Finalizing " << (segment_info.is_subsegment ? "sub" : "")
213 << "segment " << segment_info.start_timestamp << " duration "
214 << segment_info.duration << " segment number "
215 << segment_info.segment_number;
216 return segmenter_->FinalizeSegment(stream_id, segment_info);
217}
218
219Status MP4Muxer::DelayInitializeMuxer() {
220 DCHECK(!streams().empty());
221
222 std::unique_ptr<FileType> ftyp(new FileType);
223 std::unique_ptr<Movie> moov(new Movie);
224
225 ftyp->major_brand = FOURCC_mp41;
226 ftyp->compatible_brands.push_back(FOURCC_iso8);
227 ftyp->compatible_brands.push_back(FOURCC_isom);
228 ftyp->compatible_brands.push_back(FOURCC_mp41);
229 ftyp->compatible_brands.push_back(FOURCC_dash);
230
231 if (streams().size() == 1) {
232 FourCC codec_fourcc = FOURCC_NULL;
233 if (streams()[0]->stream_type() == kStreamVideo) {
234 codec_fourcc =
235 CodecToFourCC(streams()[0]->codec(),
236 static_cast<const VideoStreamInfo*>(streams()[0].get())
237 ->h26x_stream_format());
238 if (codec_fourcc != FOURCC_NULL)
239 ftyp->compatible_brands.push_back(codec_fourcc);
240
241 // https://professional.dolby.com/siteassets/content-creation/dolby-vision-for-content-creators/dolby_vision_bitstreams_within_the_iso_base_media_file_format_dec2017.pdf
242 std::string codec_string =
243 static_cast<const VideoStreamInfo*>(streams()[0].get())
244 ->codec_string();
245 std::string supplemental_codec_string =
246 static_cast<const VideoStreamInfo*>(streams()[0].get())
247 ->supplemental_codec();
248 if (codec_string.find("dvh") != std::string::npos ||
249 supplemental_codec_string.find("dvh") != std::string::npos ||
250 codec_string.find("dav1") != std::string::npos ||
251 supplemental_codec_string.find("dav1") != std::string::npos)
252 ftyp->compatible_brands.push_back(FOURCC_dby1);
253 FourCC extra_brand =
254 static_cast<const VideoStreamInfo*>(streams()[0].get())
255 ->compatible_brand();
256 if (extra_brand != FOURCC_NULL)
257 ftyp->compatible_brands.push_back(extra_brand);
258 }
259
260 // CMAF allows only one track/stream per file.
261 // CMAF requires single initialization switching for AVC3/HEV1, which is not
262 // supported yet.
263 if (codec_fourcc != FOURCC_avc3 && codec_fourcc != FOURCC_hev1)
264 ftyp->compatible_brands.push_back(FOURCC_cmfc);
265
266 if (streams()[0]->stream_type() == kStreamAudio) {
267 codec_fourcc =
268 CodecToFourCC(streams()[0]->codec(), H26xStreamFormat::kUnSpecified);
269 if (codec_fourcc == FOURCC_iamf)
270 ftyp->compatible_brands.push_back(FOURCC_iamf);
271 }
272 }
273
274 moov->header.creation_time = IsoTimeNow();
275 moov->header.modification_time = IsoTimeNow();
276 moov->header.next_track_id = static_cast<uint32_t>(streams().size()) + 1;
277
278 moov->tracks.resize(streams().size());
279 moov->extends.tracks.resize(streams().size());
280
281 // Initialize tracks.
282 for (uint32_t i = 0; i < streams().size(); ++i) {
283 const StreamInfo* stream = streams()[i].get();
284 Track& trak = moov->tracks[i];
285 trak.header.track_id = i + 1;
286
287 TrackExtends& trex = moov->extends.tracks[i];
288 trex.track_id = trak.header.track_id;
289 trex.default_sample_description_index = 1;
290
291 bool generate_trak_result = false;
292 switch (stream->stream_type()) {
293 case kStreamVideo:
294 generate_trak_result = GenerateVideoTrak(
295 static_cast<const VideoStreamInfo*>(stream), &trak);
296 break;
297 case kStreamAudio:
298 generate_trak_result = GenerateAudioTrak(
299 static_cast<const AudioStreamInfo*>(stream), &trak);
300 break;
301 case kStreamText:
302 generate_trak_result = GenerateTextTrak(
303 static_cast<const TextStreamInfo*>(stream), &trak);
304 break;
305 default:
306 NOTIMPLEMENTED() << "Not implemented for stream type: "
307 << stream->stream_type();
308 }
309 if (!generate_trak_result)
310 return Status(error::MUXER_FAILURE, "Failed to generate trak.");
311
312 // Generate EditList if needed. See UpdateEditListOffsetFromSample() for
313 // more information.
314 if (edit_list_offset_.value() > 0) {
315 EditListEntry entry;
316 entry.media_time = edit_list_offset_.value();
317 entry.media_rate_integer = 1;
318 trak.edit.list.edits.push_back(entry);
319 }
320
321 if (stream->is_encrypted() && options().mp4_params.include_pssh_in_stream) {
322 moov->pssh.clear();
323 const auto& key_system_info = stream->encryption_config().key_system_info;
324 for (const ProtectionSystemSpecificInfo& system : key_system_info) {
325 if (system.psshs.empty())
326 continue;
327 ProtectionSystemSpecificHeader pssh;
328 pssh.raw_box = system.psshs;
329 moov->pssh.push_back(pssh);
330 }
331 }
332 }
333
334 if (options().segment_template.empty()) {
335 segmenter_.reset(new SingleSegmentSegmenter(options(), std::move(ftyp),
336 std::move(moov)));
337 } else if (options().mp4_params.low_latency_dash_mode) {
338 segmenter_.reset(new LowLatencySegmentSegmenter(options(), std::move(ftyp),
339 std::move(moov)));
340 } else {
341 segmenter_.reset(
342 new MultiSegmentSegmenter(options(), std::move(ftyp), std::move(moov)));
343 }
344
345 const Status segmenter_initialized =
346 segmenter_->Initialize(streams(), muxer_listener(), progress_listener());
347 if (!segmenter_initialized.ok())
348 return segmenter_initialized;
349
350 FireOnMediaStartEvent();
351 return Status::OK;
352}
353
354Status MP4Muxer::UpdateEditListOffsetFromSample(const MediaSample& sample) {
355 if (edit_list_offset_)
356 return Status::OK;
357
358 const int64_t pts = sample.pts();
359 const int64_t dts = sample.dts();
360 // An EditList entry is inserted if one of the below conditions occur [4]:
361 // (1) pts > dts for the first sample. Due to Chrome's dts bug [1], dts is
362 // used in buffered range API, while pts is used elsewhere (players,
363 // manifests, and Chrome's own appendWindow check etc.), this
364 // inconsistency creates various problems, including possible stalls
365 // during playback. Since Chrome adjusts pts only when seeing EditList
366 // [2], we can insert an EditList with the time equal to difference of pts
367 // and dts to make aligned buffered ranges using pts and dts. This
368 // effectively workarounds the dts bug. It is also recommended by ISO-BMFF
369 // specification [3].
370 // (2) pts == dts and with pts < 0. This happens for some audio codecs where a
371 // negative presentation timestamp signals that the sample is not supposed
372 // to be shown, i.e. for audio priming. EditList is needed to encode
373 // negative timestamps.
374 // [1] https://crbug.com/718641, fixed but behind MseBufferByPts, still not
375 // enabled as of M67.
376 // [2] This is actually a bug, see https://crbug.com/354518. It looks like
377 // Chrome is planning to enable the fix for [1] before addressing this
378 // bug, so we are safe.
379 // [3] ISO 14496-12:2015 8.6.6.1
380 // It is recommended that such an edit be used to establish a presentation
381 // time of 0 for the first presented sample, when composition offsets are
382 // used.
383 // [4] ISO 23009-19:2018 7.5.13
384 // In two cases, an EditBox containing a single EditListBox with the
385 // following constraints may be present in the CMAF header of a CMAF track
386 // to adjust the presentation time of all media samples in the CMAF track.
387 // a) The first case is a video CMAF track file using v0 TrackRunBoxes
388 // with positive composition offsets to reorder video media samples.
389 // b) The second case is an audio CMAF track where each media sample's
390 // presentation time does not equal its composition time.
391 const int64_t pts_dts_offset = pts - dts;
392 if (pts_dts_offset > 0) {
393 if (pts < 0) {
394 LOG(ERROR) << "Negative presentation timestamp (" << pts
395 << ") is not supported when there is an offset between "
396 "presentation timestamp and decoding timestamp ("
397 << dts << ").";
398 return Status(error::MUXER_FAILURE,
399 "Unsupported negative pts when there is an offset between "
400 "pts and dts.");
401 }
402 edit_list_offset_ = pts_dts_offset;
403 return Status::OK;
404 }
405 if (pts_dts_offset < 0) {
406 LOG(ERROR) << "presentation timestamp (" << pts
407 << ") is not supposed to be greater than decoding timestamp ("
408 << dts << ").";
409 return Status(error::MUXER_FAILURE, "Not expecting pts < dts.");
410 }
411 edit_list_offset_ = std::max(-sample.pts(), static_cast<int64_t>(0));
412 return Status::OK;
413}
414
415void MP4Muxer::InitializeTrak(const StreamInfo* info, Track* trak) {
416 int64_t now = IsoTimeNow();
417 trak->header.creation_time = now;
418 trak->header.modification_time = now;
419 trak->header.duration = 0;
420 trak->media.header.creation_time = now;
421 trak->media.header.modification_time = now;
422 trak->media.header.timescale = info->time_scale();
423 trak->media.header.duration = 0;
424 if (!info->language().empty()) {
425 // Strip off the subtag, if any.
426 std::string main_language = info->language();
427 size_t dash = main_language.find('-');
428 if (dash != std::string::npos) {
429 main_language.erase(dash);
430 }
431
432 // ISO-639-2/T main language code should be 3 characters.
433 if (main_language.size() != 3) {
434 LOG(WARNING) << "'" << main_language << "' is not a valid ISO-639-2 "
435 << "language code, ignoring.";
436 } else {
437 trak->media.header.language.code = main_language;
438 }
439 }
440}
441
442bool MP4Muxer::GenerateVideoTrak(const VideoStreamInfo* video_info,
443 Track* trak) {
444 InitializeTrak(video_info, trak);
445
446 // width and height specify the track's visual presentation size as
447 // fixed-point 16.16 values.
448 uint32_t pixel_width = video_info->pixel_width();
449 uint32_t pixel_height = video_info->pixel_height();
450 if (pixel_width == 0 || pixel_height == 0) {
451 LOG(WARNING) << "pixel width/height are not set. Assuming 1:1.";
452 pixel_width = 1;
453 pixel_height = 1;
454 }
455 const double sample_aspect_ratio =
456 static_cast<double>(pixel_width) / pixel_height;
457 trak->header.width = video_info->width() * sample_aspect_ratio * 0x10000;
458 trak->header.height = video_info->height() * 0x10000;
459
460 VideoSampleEntry video;
461 video.format =
462 CodecToFourCC(video_info->codec(), video_info->h26x_stream_format());
463 video.width = video_info->width();
464 video.height = video_info->height();
465 video.colr.raw_box = video_info->colr_data();
466 video.codec_configuration.data = video_info->codec_config();
467 if (!video.ParseExtraCodecConfigsVector(video_info->extra_config())) {
468 LOG(ERROR) << "Malformed extra codec configs: "
469 << absl::BytesToHexString(
470 absl::string_view(reinterpret_cast<const char*>(
471 video_info->extra_config().data()),
472 video_info->extra_config().size()));
473 return false;
474 }
475 if (pixel_width != 1 || pixel_height != 1) {
476 video.pixel_aspect.h_spacing = pixel_width;
477 video.pixel_aspect.v_spacing = pixel_height;
478 }
479
480 SampleDescription& sample_description =
481 trak->media.information.sample_table.description;
482 sample_description.type = kVideo;
483 sample_description.video_entries.push_back(video);
484
485 if (video_info->is_encrypted()) {
486 if (video_info->has_clear_lead()) {
487 // Add a second entry for clear content.
488 sample_description.video_entries.push_back(video);
489 }
490 // Convert the first entry to an encrypted entry.
491 VideoSampleEntry& entry = sample_description.video_entries[0];
492 GenerateSinf(entry.format, video_info->encryption_config(), &entry.sinf);
493 entry.format = FOURCC_encv;
494 }
495 return true;
496}
497
498bool MP4Muxer::GenerateAudioTrak(const AudioStreamInfo* audio_info,
499 Track* trak) {
500 InitializeTrak(audio_info, trak);
501
502 trak->header.volume = 0x100;
503
504 AudioSampleEntry audio;
505 audio.format =
506 CodecToFourCC(audio_info->codec(), H26xStreamFormat::kUnSpecified);
507 switch(audio_info->codec()){
508 case kCodecAAC: {
509 DecoderConfigDescriptor* decoder_config =
510 audio.esds.es_descriptor.mutable_decoder_config_descriptor();
511 decoder_config->set_object_type(ObjectType::kISO_14496_3); // MPEG4 AAC.
512 decoder_config->set_max_bitrate(audio_info->max_bitrate());
513 decoder_config->set_avg_bitrate(audio_info->avg_bitrate());
514 decoder_config->mutable_decoder_specific_info_descriptor()->set_data(
515 audio_info->codec_config());
516 break;
517 }
518 case kCodecDTSC:
519 case kCodecDTSH:
520 case kCodecDTSL:
521 case kCodecDTSE:
522 case kCodecDTSM:
523 audio.ddts.extra_data = audio_info->codec_config();
524 audio.ddts.max_bitrate = audio_info->max_bitrate();
525 audio.ddts.avg_bitrate = audio_info->avg_bitrate();
526 audio.ddts.sampling_frequency = audio_info->sampling_frequency();
527 audio.ddts.pcm_sample_depth = audio_info->sample_bits();
528 break;
529 case kCodecDTSX:
530 audio.udts.data = audio_info->codec_config();
531 break;
532 case kCodecAC3:
533 audio.dac3.data = audio_info->codec_config();
534 break;
535 case kCodecEAC3:
536 audio.dec3.data = audio_info->codec_config();
537 break;
538 case kCodecAC4:
539 audio.dac4.data = audio_info->codec_config();
540 break;
541 case kCodecALAC:
542 audio.alac.data = audio_info->codec_config();
543 break;
544 case kCodecFlac:
545 audio.dfla.data = audio_info->codec_config();
546 break;
547 case kCodecMP3: {
548 DecoderConfigDescriptor* decoder_config =
549 audio.esds.es_descriptor.mutable_decoder_config_descriptor();
550 uint32_t samplerate = audio_info->sampling_frequency();
551 if (samplerate < 32000)
552 decoder_config->set_object_type(ObjectType::kISO_13818_3_MPEG1);
553 else
554 decoder_config->set_object_type(ObjectType::kISO_11172_3_MPEG1);
555 decoder_config->set_max_bitrate(audio_info->max_bitrate());
556 decoder_config->set_avg_bitrate(audio_info->avg_bitrate());
557
558 // For values of DecoderConfigDescriptor.objectTypeIndication
559 // that refer to streams complying with ISO/IEC 11172-3 or
560 // ISO/IEC 13818-3 the decoder specific information is empty
561 // since all necessary data is contained in the bitstream frames
562 // itself.
563 break;
564 }
565 case kCodecOpus:
566 audio.dops.opus_identification_header = audio_info->codec_config();
567 break;
568 case kCodecIAMF:
569 audio.iacb.data = audio_info->codec_config();
570 break;
571 case kCodecMha1:
572 case kCodecMhm1:
573 audio.mhac.data = audio_info->codec_config();
574 break;
575 default:
576 NOTIMPLEMENTED() << " Unsupported audio codec " << audio_info->codec();
577 return false;
578 }
579
580 if (audio_info->codec() == kCodecAC3 || audio_info->codec() == kCodecEAC3) {
581 // AC3 and EC3 does not fill in actual channel count and sample size in
582 // sample description entry. Instead, two constants are used.
583 audio.channelcount = 2;
584 audio.samplesize = 16;
585 } else if (audio_info->codec() == kCodecAC4) {
586 //ETSI TS 103 190-2, E.4.5 channelcount should be set to the total number of
587 //audio outputchannels of the default audio presentation of that track
588 audio.channelcount = audio_info->num_channels();
589 //ETSI TS 103 190-2, E.4.6 samplesize shall be set to 16.
590 audio.samplesize = 16;
591 } else if (audio_info->codec() == kCodecIAMF) {
592 // IAMF sets channelcount to 0
593 // https://aomediacodec.github.io/iamf/#iasampleentry-section
594 audio.channelcount = 0;
595 } else {
596 audio.channelcount = audio_info->num_channels();
597 audio.samplesize = audio_info->sample_bits();
598 }
599
600 // IAMF sets samplerate to 0
601 // https://aomediacodec.github.io/iamf/#iasampleentry-section
602 audio.samplerate =
603 audio_info->codec() == kCodecIAMF ? 0 : audio_info->sampling_frequency();
604
605 SampleTable& sample_table = trak->media.information.sample_table;
606 SampleDescription& sample_description = sample_table.description;
607 sample_description.type = kAudio;
608 sample_description.audio_entries.push_back(audio);
609
610 if (audio_info->is_encrypted()) {
611 if (audio_info->has_clear_lead()) {
612 // Add a second entry for clear content.
613 sample_description.audio_entries.push_back(audio);
614 }
615 // Convert the first entry to an encrypted entry.
616 AudioSampleEntry& entry = sample_description.audio_entries[0];
617 GenerateSinf(entry.format, audio_info->encryption_config(), &entry.sinf);
618 entry.format = FOURCC_enca;
619 }
620
621 if (audio_info->seek_preroll_ns() > 0) {
622 sample_table.sample_group_descriptions.resize(1);
623 SampleGroupDescription& sample_group_description =
624 sample_table.sample_group_descriptions.back();
625 sample_group_description.grouping_type = FOURCC_roll;
626 sample_group_description.audio_roll_recovery_entries.resize(1);
627 sample_group_description.audio_roll_recovery_entries[0].roll_distance =
628 GetRollDistance(audio_info->seek_preroll_ns(), audio.samplerate);
629 // sample to group box is not allowed in the init segment per CMAF
630 // specification. It is put in the fragment instead.
631 }
632 return true;
633}
634
635bool MP4Muxer::GenerateTextTrak(const TextStreamInfo* text_info,
636 Track* trak) {
637 InitializeTrak(text_info, trak);
638
639 if (text_info->codec_string() == "wvtt") {
640 // Handle WebVTT.
641 TextSampleEntry webvtt;
642 webvtt.format = FOURCC_wvtt;
643
644 // 14496-30:2014 7.5 Web Video Text Tracks Sample entry format.
645 // In the sample entry, a WebVTT configuration box must occur, carrying
646 // exactly the lines of the WebVTT file header, i.e. all text lines up to
647 // but excluding the 'two or more line terminators' that end the header.
648 webvtt.config.config = "WEBVTT";
649 // The spec does not define a way to carry STYLE and REGION information in
650 // the mp4 container.
651 if (!text_info->regions().empty() || !text_info->css_styles().empty()) {
652 LOG(INFO) << "Skipping possible style / region configuration as the spec "
653 "does not define a way to carry them inside ISO-BMFF files.";
654 }
655
656 // TODO(rkuroiwa): This should be the source file URI(s). Putting bogus
657 // string for now so that the box will be there for samples with overlapping
658 // cues.
659 webvtt.label.source_label = "source_label";
660 SampleDescription& sample_description =
661 trak->media.information.sample_table.description;
662 sample_description.type = kText;
663 sample_description.text_entries.push_back(webvtt);
664 return true;
665 } else if (text_info->codec_string() == "ttml") {
666 // Handle TTML.
667 TextSampleEntry ttml;
668 ttml.format = FOURCC_stpp;
669 ttml.namespace_ = ttml::TtmlGenerator::kTtNamespace;
670
671 SampleDescription& sample_description =
672 trak->media.information.sample_table.description;
673 sample_description.type = kSubtitle;
674 sample_description.text_entries.push_back(ttml);
675 return true;
676 }
677 NOTIMPLEMENTED() << text_info->codec_string()
678 << " handling not implemented yet.";
679 return false;
680}
681
682std::optional<Range> MP4Muxer::GetInitRangeStartAndEnd() {
683 size_t range_offset = 0;
684 size_t range_size = 0;
685 const bool has_range = segmenter_->GetInitRange(&range_offset, &range_size);
686
687 if (!has_range)
688 return std::nullopt;
689
690 Range range;
691 SetStartAndEndFromOffsetAndSize(range_offset, range_size, &range);
692 return range;
693}
694
695std::optional<Range> MP4Muxer::GetIndexRangeStartAndEnd() {
696 size_t range_offset = 0;
697 size_t range_size = 0;
698 const bool has_range = segmenter_->GetIndexRange(&range_offset, &range_size);
699
700 if (!has_range)
701 return std::nullopt;
702
703 Range range;
704 SetStartAndEndFromOffsetAndSize(range_offset, range_size, &range);
705 return range;
706}
707
708void MP4Muxer::FireOnMediaStartEvent() {
709 if (!muxer_listener())
710 return;
711
712 if (streams().size() > 1) {
713 LOG(ERROR) << "MuxerListener cannot take more than 1 stream.";
714 return;
715 }
716 DCHECK(!streams().empty()) << "Media started without a stream.";
717
718 const int32_t timescale = segmenter_->GetReferenceTimeScale();
719 muxer_listener()->OnMediaStart(options(), *streams().front(), timescale,
720 MuxerListener::kContainerMp4);
721}
722
723void MP4Muxer::FireOnMediaEndEvent() {
724 if (!muxer_listener())
725 return;
726
727 MuxerListener::MediaRanges media_range;
728 media_range.init_range = GetInitRangeStartAndEnd();
729 media_range.index_range = GetIndexRangeStartAndEnd();
730 media_range.subsegment_ranges = segmenter_->GetSegmentRanges();
731
732 const float duration_seconds = static_cast<float>(segmenter_->GetDuration());
733 muxer_listener()->OnMediaEnd(media_range, duration_seconds);
734}
735
736uint64_t MP4Muxer::IsoTimeNow() {
737 // Time in seconds from Jan. 1, 1904 to epoch time, i.e. Jan. 1, 1970.
738 const uint64_t kIsomTimeOffset = 2082844800l;
739
740 // Get the current system time since January 1, 1970, in seconds.
741 std::int64_t secondsSince1970 = Now();
742
743 // Add the offset of seconds between January 1, 1970, and January 1, 1904.
744 return secondsSince1970 + kIsomTimeOffset;
745}
746
747} // namespace mp4
748} // namespace media
749} // namespace shaka
virtual void OnMediaEnd(const MediaRanges &media_ranges, float duration_seconds)=0
virtual void OnMediaStart(const MuxerOptions &muxer_options, const StreamInfo &stream_info, int32_t time_scale, ContainerType container_type)=0
MP4Muxer(const MuxerOptions &options)
Create a MP4Muxer object from MuxerOptions.
Definition mp4_muxer.cc:169
All the methods that are virtual are virtual for mocking.
This structure contains the list of configuration options for Muxer.