5 #include <packager/media/formats/webm/webm_cluster_parser.h>
10 #include <absl/base/internal/endian.h>
11 #include <absl/log/check.h>
12 #include <absl/log/log.h>
14 #include <packager/macros/logging.h>
15 #include <packager/media/base/timestamp.h>
16 #include <packager/media/codecs/vp8_parser.h>
17 #include <packager/media/codecs/vp9_parser.h>
18 #include <packager/media/codecs/webvtt_util.h>
19 #include <packager/media/formats/webm/webm_constants.h>
20 #include <packager/media/formats/webm/webm_crypto_helpers.h>
21 #include <packager/media/formats/webm/webm_webvtt_parser.h>
27 const int64_t kMicrosecondsPerMillisecond = 1000;
32 int64_t timecode_scale,
33 std::shared_ptr<AudioStreamInfo> audio_stream_info,
34 std::shared_ptr<VideoStreamInfo> video_stream_info,
36 int64_t audio_default_duration,
37 int64_t video_default_duration,
38 const WebMTracksParser::TextTracks& text_tracks,
39 const std::set<int64_t>& ignored_tracks,
40 const std::string& audio_encryption_key_id,
41 const std::string& video_encryption_key_id,
45 : timecode_multiplier_(timecode_scale /
46 static_cast<double>(kMicrosecondsPerMillisecond)),
47 audio_stream_info_(audio_stream_info),
48 video_stream_info_(video_stream_info),
49 vp_config_(vp_config),
50 ignored_tracks_(ignored_tracks),
51 audio_encryption_key_id_(audio_encryption_key_id),
52 video_encryption_key_id_(video_encryption_key_id),
53 parser_(kWebMIdCluster, this),
56 cluster_start_time_(kNoTimestamp),
57 audio_(audio_stream_info ? audio_stream_info->track_id() : -1,
59 audio_default_duration,
61 video_(video_stream_info ? video_stream_info->track_id() : -1,
63 video_default_duration,
65 if (decryption_key_source) {
67 if (audio_stream_info_)
68 audio_stream_info_->set_is_encrypted(
false);
69 if (video_stream_info_)
70 video_stream_info_->set_is_encrypted(
false);
72 for (WebMTracksParser::TextTracks::const_iterator it = text_tracks.begin();
73 it != text_tracks.end();
75 text_track_map_.insert(std::make_pair(
76 it->first, Track(it->first,
false, kNoTimestamp, new_sample_cb)));
80 WebMClusterParser::~WebMClusterParser() {}
83 last_block_timecode_ = -1;
84 cluster_timecode_ = -1;
85 cluster_start_time_ = kNoTimestamp;
86 cluster_ended_ =
false;
95 bool audio_result = audio_.ApplyDurationEstimateIfNeeded();
96 bool video_result = video_.ApplyDurationEstimateIfNeeded();
98 return audio_result && video_result;
102 int result = parser_.
Parse(buf, size);
105 cluster_ended_ =
false;
110 if (cluster_ended_) {
113 if (cluster_start_time_ == kNoTimestamp) {
116 if (cluster_timecode_ < 0)
119 cluster_start_time_ = cluster_timecode_ * timecode_multiplier_;
127 last_block_timecode_ = -1;
128 cluster_timecode_ = -1;
135 if (
id == kWebMIdCluster) {
136 cluster_timecode_ = -1;
137 cluster_start_time_ = kNoTimestamp;
138 }
else if (
id == kWebMIdBlockGroup) {
140 block_data_size_ = -1;
141 block_duration_ = -1;
142 discard_padding_ = -1;
143 discard_padding_set_ =
false;
144 reference_block_set_ =
false;
145 }
else if (
id == kWebMIdBlockAdditions) {
147 block_additional_data_.reset();
148 block_additional_data_size_ = 0;
154 bool WebMClusterParser::OnListEnd(
int id) {
155 if (
id != kWebMIdBlockGroup)
159 if (block_data_size_ == -1) {
160 LOG(ERROR) <<
"Block missing from BlockGroup.";
164 bool result = ParseBlock(
165 false, block_data_.get(), block_data_size_, block_additional_data_.get(),
166 block_additional_data_size_, block_duration_,
167 discard_padding_set_ ? discard_padding_ : 0, reference_block_set_);
169 block_data_size_ = -1;
170 block_duration_ = -1;
172 block_additional_data_.reset();
173 block_additional_data_size_ = 0;
174 discard_padding_ = -1;
175 discard_padding_set_ =
false;
176 reference_block_set_ =
false;
180 bool WebMClusterParser::OnUInt(
int id, int64_t val) {
183 case kWebMIdTimecode:
184 dst = &cluster_timecode_;
186 case kWebMIdBlockDuration:
187 dst = &block_duration_;
189 case kWebMIdBlockAddID:
190 dst = &block_add_id_;
201 bool WebMClusterParser::ParseBlock(
bool is_simple_block,
204 const uint8_t* additional,
207 int64_t discard_padding,
208 bool reference_block_set) {
214 if (!(buf[0] & 0x80)) {
215 LOG(ERROR) <<
"TrackNumber over 127 not supported";
219 int track_num = buf[0] & 0x7f;
220 int timecode = buf[1] << 8 | buf[2];
221 int flags = buf[3] & 0xff;
222 int lacing = (flags >> 1) & 0x3;
225 LOG(ERROR) <<
"Lacing " << lacing <<
" is not supported yet.";
230 if (timecode & 0x8000)
238 is_simple_block ? (flags & 0x80) != 0 : !reference_block_set;
240 const uint8_t* frame_data = buf + 4;
241 int frame_size = size - (frame_data - buf);
242 return OnBlock(is_simple_block, track_num, timecode, duration, frame_data,
243 frame_size, additional, additional_size, discard_padding,
247 bool WebMClusterParser::OnBinary(
int id,
const uint8_t* data,
int size) {
249 case kWebMIdSimpleBlock:
250 return ParseBlock(
true, data, size, NULL, 0, -1, 0,
false);
254 LOG(ERROR) <<
"More than 1 Block in a BlockGroup is not "
258 block_data_.reset(
new uint8_t[size]);
259 memcpy(block_data_.get(), data, size);
260 block_data_size_ = size;
263 case kWebMIdBlockAdditional: {
264 uint64_t block_add_id = absl::big_endian::FromHost64(block_add_id_);
265 if (block_additional_data_) {
270 LOG(ERROR) <<
"More than 1 BlockAdditional in a "
271 "BlockGroup is not supported.";
277 block_additional_data_size_ = size +
sizeof(block_add_id);
278 block_additional_data_.reset(
new uint8_t[block_additional_data_size_]);
279 memcpy(block_additional_data_.get(), &block_add_id,
280 sizeof(block_add_id));
281 memcpy(block_additional_data_.get() + 8, data, size);
284 case kWebMIdDiscardPadding: {
285 if (discard_padding_set_ || size <= 0 || size > 8)
287 discard_padding_set_ =
true;
290 discard_padding_ =
static_cast<int8_t
>(data[0]);
291 for (
int i = 1; i < size; ++i)
292 discard_padding_ = (discard_padding_ << 8) | data[i];
296 case kWebMIdReferenceBlock:
300 reference_block_set_ =
true;
307 bool WebMClusterParser::OnBlock(
bool is_simple_block,
313 const uint8_t* additional,
318 if (cluster_timecode_ == -1) {
319 LOG(ERROR) <<
"Got a block before cluster timecode.";
326 LOG(ERROR) <<
"Got a block with negative timecode offset " << timecode;
330 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
331 LOG(ERROR) <<
"Got a block with a timecode before the previous block.";
336 StreamType stream_type = kStreamUnknown;
337 std::string encryption_key_id;
338 if (track_num == audio_.track_num()) {
340 encryption_key_id = audio_encryption_key_id_;
341 stream_type = kStreamAudio;
342 }
else if (track_num == video_.track_num()) {
344 encryption_key_id = video_encryption_key_id_;
345 stream_type = kStreamVideo;
346 }
else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
348 }
else if (Track*
const text_track = FindTextTrack(track_num)) {
351 if (block_duration < 0)
354 stream_type = kStreamText;
356 LOG(ERROR) <<
"Unexpected track number " << track_num;
359 DCHECK_NE(stream_type, kStreamUnknown);
361 last_block_timecode_ = timecode;
363 int64_t timestamp = (cluster_timecode_ + timecode) * timecode_multiplier_;
365 std::shared_ptr<MediaSample> buffer;
366 if (stream_type != kStreamText) {
370 std::unique_ptr<DecryptConfig> decrypt_config;
372 if (!encryption_key_id.empty() &&
373 !WebMCreateDecryptConfig(
375 reinterpret_cast<const uint8_t*
>(encryption_key_id.data()),
376 encryption_key_id.size(),
377 &decrypt_config, &data_offset)) {
381 const uint8_t* media_data = data + data_offset;
382 const size_t media_data_size = size - data_offset;
385 const size_t kDummyDataSize = 0;
387 additional_size, is_key_frame);
389 if (decrypt_config) {
390 if (!decryptor_source_) {
391 buffer->SetData(media_data, media_data_size);
394 buffer->set_decrypt_config(std::move(decrypt_config));
395 buffer->set_is_encrypted(
true);
397 std::shared_ptr<uint8_t> decrypted_media_data(
398 new uint8_t[media_data_size], std::default_delete<uint8_t[]>());
399 if (!decryptor_source_->DecryptSampleBuffer(
400 decrypt_config.get(), media_data, media_data_size,
401 decrypted_media_data.get())) {
402 LOG(ERROR) <<
"Cannot decrypt samples";
405 buffer->TransferData(std::move(decrypted_media_data), media_data_size);
408 buffer->SetData(media_data, media_data_size);
411 std::string id, settings, content;
414 std::vector<uint8_t> side_data;
415 MakeSideData(
id.begin(),
id.end(),
416 settings.begin(), settings.end(),
420 reinterpret_cast<const uint8_t*
>(content.data()), content.length(),
421 &side_data[0], side_data.size(),
true);
424 buffer->set_dts(timestamp);
425 buffer->set_pts(timestamp);
426 if (cluster_start_time_ == kNoTimestamp)
427 cluster_start_time_ = timestamp;
428 buffer->set_duration(block_duration > 0
429 ? (block_duration * timecode_multiplier_)
432 if (init_cb_ && !initialized_) {
433 std::vector<std::shared_ptr<StreamInfo>> streams;
434 if (audio_stream_info_)
435 streams.push_back(audio_stream_info_);
436 if (video_stream_info_) {
437 if (stream_type == kStreamVideo) {
441 if (video_stream_info_->codec() != kCodecAV1) {
442 std::unique_ptr<VPxParser> vpx_parser;
443 switch (video_stream_info_->codec()) {
445 vpx_parser.reset(
new VP8Parser);
448 vpx_parser.reset(
new VP9Parser);
452 <<
"Unsupported codec " << video_stream_info_->codec();
455 std::vector<VPxFrameInfo> vpx_frames;
456 if (!vpx_parser->Parse(buffer->data(), buffer->data_size(),
458 LOG(ERROR) <<
"Failed to parse vpx frame.";
461 if (vpx_frames.size() != 1u || !vpx_frames[0].is_keyframe) {
462 LOG(ERROR) <<
"The first frame should be a key frame.";
466 vp_config_.
MergeFrom(vpx_parser->codec_config());
467 video_stream_info_->set_codec_string(
469 std::vector<uint8_t> config_serialized;
470 vp_config_.
WriteMP4(&config_serialized);
471 video_stream_info_->set_codec_config(config_serialized);
474 streams.push_back(video_stream_info_);
484 return track->EmitBuffer(buffer);
487 WebMClusterParser::Track::Track(
490 int64_t default_duration,
492 : track_num_(track_num),
494 default_duration_(default_duration),
495 estimated_next_frame_duration_(kNoTimestamp),
496 new_sample_cb_(new_sample_cb) {
497 DCHECK(default_duration_ == kNoTimestamp || default_duration_ > 0);
500 WebMClusterParser::Track::~Track() {}
502 bool WebMClusterParser::Track::EmitBuffer(
503 const std::shared_ptr<MediaSample>& buffer) {
504 DVLOG(2) <<
"EmitBuffer() : " << track_num_
505 <<
" ts " << buffer->pts()
506 <<
" dur " << buffer->duration()
507 <<
" kf " << buffer->is_key_frame()
508 <<
" size " << buffer->data_size();
510 if (last_added_buffer_missing_duration_.get()) {
511 int64_t derived_duration =
512 buffer->pts() - last_added_buffer_missing_duration_->pts();
513 last_added_buffer_missing_duration_->set_duration(derived_duration);
515 DVLOG(2) <<
"EmitBuffer() : applied derived duration to held-back buffer : "
517 << last_added_buffer_missing_duration_->pts()
519 << last_added_buffer_missing_duration_->duration()
520 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
521 <<
" size " << last_added_buffer_missing_duration_->data_size();
522 std::shared_ptr<MediaSample> updated_buffer =
523 last_added_buffer_missing_duration_;
524 last_added_buffer_missing_duration_ = NULL;
525 if (!EmitBufferHelp(updated_buffer))
529 if (buffer->duration() == kNoTimestamp) {
530 last_added_buffer_missing_duration_ = buffer;
531 DVLOG(2) <<
"EmitBuffer() : holding back buffer that is missing duration";
535 return EmitBufferHelp(buffer);
538 bool WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
539 if (!last_added_buffer_missing_duration_.get())
542 int64_t estimated_duration = GetDurationEstimate();
543 last_added_buffer_missing_duration_->set_duration(estimated_duration);
545 VLOG(1) <<
"Track " << track_num_ <<
": Estimating WebM block duration to be "
546 << estimated_duration / 1000
547 <<
"ms for the last (Simple)Block in the Cluster for this Track. Use "
548 "BlockGroups with BlockDurations at the end of each Track in a "
549 "Cluster to avoid estimation.";
551 DVLOG(2) <<
" new dur : ts " << last_added_buffer_missing_duration_->pts()
552 <<
" dur " << last_added_buffer_missing_duration_->duration()
553 <<
" kf " << last_added_buffer_missing_duration_->is_key_frame()
554 <<
" size " << last_added_buffer_missing_duration_->data_size();
558 if (!new_sample_cb_(track_num_, last_added_buffer_missing_duration_))
560 last_added_buffer_missing_duration_ = NULL;
564 void WebMClusterParser::Track::Reset() {
565 last_added_buffer_missing_duration_ = NULL;
568 bool WebMClusterParser::Track::EmitBufferHelp(
569 const std::shared_ptr<MediaSample>& buffer) {
570 DCHECK(!last_added_buffer_missing_duration_.get());
572 int64_t duration = buffer->duration();
573 if (duration < 0 || duration == kNoTimestamp) {
574 LOG(ERROR) <<
"Invalid buffer duration: " << duration;
581 int64_t orig_duration_estimate = estimated_next_frame_duration_;
582 if (estimated_next_frame_duration_ == kNoTimestamp) {
583 estimated_next_frame_duration_ = duration;
585 estimated_next_frame_duration_ =
586 std::max(duration, estimated_next_frame_duration_);
589 if (orig_duration_estimate != estimated_next_frame_duration_) {
590 DVLOG(3) <<
"Updated duration estimate:"
591 << orig_duration_estimate
593 << estimated_next_frame_duration_
599 return new_sample_cb_(track_num_, buffer);
602 int64_t WebMClusterParser::Track::GetDurationEstimate() {
603 int64_t duration = kNoTimestamp;
604 if (default_duration_ != kNoTimestamp) {
605 duration = default_duration_;
606 DVLOG(3) << __FUNCTION__ <<
" : using track default duration " << duration;
607 }
else if (estimated_next_frame_duration_ != kNoTimestamp) {
608 duration = estimated_next_frame_duration_;
609 DVLOG(3) << __FUNCTION__ <<
" : using estimated duration " << duration;
612 duration = kDefaultVideoBufferDurationInMs * kMicrosecondsPerMillisecond;
614 duration = kDefaultAudioBufferDurationInMs * kMicrosecondsPerMillisecond;
616 DVLOG(3) << __FUNCTION__ <<
" : using hardcoded default duration "
620 DCHECK_GT(duration, 0);
621 DCHECK_NE(duration, kNoTimestamp);
625 void WebMClusterParser::ResetTextTracks() {
626 for (TextTrackMap::iterator it = text_track_map_.begin();
627 it != text_track_map_.end();
633 WebMClusterParser::Track*
634 WebMClusterParser::FindTextTrack(
int track_num) {
635 const TextTrackMap::iterator it = text_track_map_.find(track_num);
637 if (it == text_track_map_.end())
All the methods that are virtual are virtual for mocking.