Shaka Packager SDK
Loading...
Searching...
No Matches
wvm_media_parser.cc
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <packager/media/formats/wvm/wvm_media_parser.h>
6
7#include <map>
8#include <sstream>
9#include <vector>
10
11#include <absl/base/internal/endian.h>
12#include <absl/log/check.h>
13#include <absl/strings/str_format.h>
14
15#include <packager/media/base/aes_decryptor.h>
16#include <packager/media/base/audio_stream_info.h>
17#include <packager/media/base/key_source.h>
18#include <packager/media/base/media_sample.h>
19#include <packager/media/codecs/aac_audio_specific_config.h>
20#include <packager/media/codecs/avc_decoder_configuration_record.h>
21#include <packager/media/codecs/es_descriptor.h>
22#include <packager/media/formats/mp2t/adts_header.h>
23
24#define HAS_HEADER_EXTENSION(x) ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) \
25 && (x != 0xF0) && (x != 0xF2) && (x != 0xF8) \
26 && (x != 0xFF))
27
28namespace {
29const uint32_t kMpeg2ClockRate = 90000;
30const uint32_t kPesOptPts = 0x80;
31const uint32_t kPesOptDts = 0x40;
32const uint32_t kPesOptAlign = 0x04;
33const uint32_t kPsmStreamId = 0xBC;
34const uint32_t kPaddingStreamId = 0xBE;
35const uint32_t kIndexMagic = 0x49444d69;
36const uint32_t kIndexStreamId = 0xBF; // private_stream_2
37const uint32_t kIndexVersion4HeaderSize = 12;
38const uint32_t kEcmStreamId = 0xF0;
39const uint32_t kV2MetadataStreamId = 0xF1; // EMM_stream
40const uint32_t kScramblingBitsMask = 0x30;
41const uint32_t kStartCode1 = 0x00;
42const uint32_t kStartCode2 = 0x00;
43const uint32_t kStartCode3 = 0x01;
44const uint32_t kStartCode4Pack = 0xBA;
45const uint32_t kStartCode4System = 0xBB;
46const uint32_t kStartCode4ProgramEnd = 0xB9;
47const uint32_t kPesStreamIdVideoMask = 0xF0;
48const uint32_t kPesStreamIdVideo = 0xE0;
49const uint32_t kPesStreamIdAudioMask = 0xE0;
50const uint32_t kPesStreamIdAudio = 0xC0;
51const uint32_t kVersion4 = 4;
52const uint8_t kAacSampleSizeBits = 16;
53// Applies to all video streams.
54const uint8_t kNaluLengthSize = 4; // unit is bytes.
55// Placeholder sampling frequency for all audio streams, which
56// will be overwritten after filter parsing.
57const uint32_t kDefaultSamplingFrequency = 100;
58const uint16_t kEcmSizeBytes = 80;
59const uint32_t kInitializationVectorSizeBytes = 16;
60// ECM fields for processing.
61const uint32_t kEcmContentKeySizeBytes = 16;
62const uint32_t kEcmDCPFlagsSizeBytes = 3;
63const uint32_t kEcmCCIFlagsSizeBytes = 1;
64const uint32_t kEcmFlagsSizeBytes =
65 kEcmCCIFlagsSizeBytes + kEcmDCPFlagsSizeBytes;
66const uint32_t kEcmPaddingSizeBytes = 12;
67const uint32_t kAssetKeySizeBytes = 16;
68// Default audio and video PES stream IDs.
69const uint8_t kDefaultAudioStreamId = kPesStreamIdAudio;
70const uint8_t kDefaultVideoStreamId = kPesStreamIdVideo;
71
72enum Type {
73 Type_void = 0,
74 Type_uint8 = 1,
75 Type_int8 = 2,
76 Type_uint16 = 3,
77 Type_int16 = 4,
78 Type_uint32 = 5,
79 Type_int32 = 6,
80 Type_uint64 = 7,
81 Type_int64 = 8,
82 Type_string = 9,
83 Type_BinaryData = 10
84};
85} // namespace
86
87namespace shaka {
88namespace media {
89namespace wvm {
90
91WvmMediaParser::WvmMediaParser()
92 : is_initialized_(false),
93 parse_state_(StartCode1),
94 skip_bytes_(0),
95 metadata_is_complete_(false),
96 current_program_id_(0),
97 pes_stream_id_(0),
98 prev_pes_stream_id_(0),
99 pes_packet_bytes_(0),
100 pes_flags_1_(0),
101 pes_flags_2_(0),
102 prev_pes_flags_1_(0),
103 pes_header_data_bytes_(0),
104 timestamp_(0),
105 pts_(0),
106 dts_(0),
107 index_program_id_(0),
108 media_sample_(NULL),
109 crypto_unit_start_pos_(0),
110 stream_id_count_(0),
111 decryption_key_source_(NULL) {}
112
113WvmMediaParser::~WvmMediaParser() {}
114
115void WvmMediaParser::Init(const InitCB& init_cb,
116 const NewMediaSampleCB& new_media_sample_cb,
117 const NewTextSampleCB& new_text_sample_cb,
118 KeySource* decryption_key_source) {
119 DCHECK(!is_initialized_);
120 DCHECK(init_cb != nullptr);
121 DCHECK(new_media_sample_cb != nullptr);
122 decryption_key_source_ = decryption_key_source;
123 init_cb_ = init_cb;
124 new_sample_cb_ = new_media_sample_cb;
125}
126
127bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
128 size_t num_bytes = 0;
129 size_t prev_size = 0;
130 const uint8_t* read_ptr = buf;
131 const uint8_t* end = read_ptr + size;
132
133 while (read_ptr < end) {
134 switch (parse_state_) {
135 case StartCode1:
136 if (*read_ptr == kStartCode1) {
137 parse_state_ = StartCode2;
138 }
139 break;
140 case StartCode2:
141 if (*read_ptr == kStartCode2) {
142 parse_state_ = StartCode3;
143 } else {
144 parse_state_ = StartCode1;
145 }
146 break;
147 case StartCode3:
148 if (*read_ptr == kStartCode3) {
149 parse_state_ = StartCode4;
150 } else {
151 parse_state_ = StartCode1;
152 }
153 break;
154 case StartCode4:
155 switch (*read_ptr) {
156 case kStartCode4Pack:
157 parse_state_ = PackHeader1;
158 break;
159 case kStartCode4System:
160 parse_state_ = SystemHeader1;
161 break;
162 case kStartCode4ProgramEnd:
163 parse_state_ = ProgramEnd;
164 continue;
165 default:
166 parse_state_ = PesStreamId;
167 continue;
168 }
169 break;
170 case PackHeader1:
171 parse_state_ = PackHeader2;
172 break;
173 case PackHeader2:
174 parse_state_ = PackHeader3;
175 break;
176 case PackHeader3:
177 parse_state_ = PackHeader4;
178 break;
179 case PackHeader4:
180 parse_state_ = PackHeader5;
181 break;
182 case PackHeader5:
183 parse_state_ = PackHeader6;
184 break;
185 case PackHeader6:
186 parse_state_ = PackHeader7;
187 break;
188 case PackHeader7:
189 parse_state_ = PackHeader8;
190 break;
191 case PackHeader8:
192 parse_state_ = PackHeader9;
193 break;
194 case PackHeader9:
195 parse_state_ = PackHeader10;
196 break;
197 case PackHeader10:
198 skip_bytes_ = *read_ptr & 0x07;
199 parse_state_ = PackHeaderStuffingSkip;
200 break;
201 case SystemHeader1:
202 skip_bytes_ = *read_ptr;
203 skip_bytes_ <<= 8;
204 parse_state_ = SystemHeader2;
205 break;
206 case SystemHeader2:
207 skip_bytes_ |= *read_ptr;
208 parse_state_ = SystemHeaderSkip;
209 break;
210 case PackHeaderStuffingSkip:
211 if (end >= skip_bytes_ + read_ptr) {
212 read_ptr += skip_bytes_;
213 skip_bytes_ = 0;
214 parse_state_ = StartCode1;
215 } else {
216 skip_bytes_ -= (end - read_ptr);
217 read_ptr = end;
218 }
219 continue;
220 case SystemHeaderSkip:
221 if (end >= skip_bytes_ + read_ptr) {
222 read_ptr += skip_bytes_;
223 skip_bytes_ = 0;
224 parse_state_ = StartCode1;
225 } else {
226 uint32_t remaining_size = end - read_ptr;
227 skip_bytes_ -= remaining_size;
228 read_ptr = end;
229 }
230 continue;
231 case PesStreamId:
232 pes_stream_id_ = *read_ptr;
233 if (!metadata_is_complete_ &&
234 (pes_stream_id_ != kPsmStreamId) &&
235 (pes_stream_id_ != kIndexStreamId) &&
236 (pes_stream_id_ != kEcmStreamId) &&
237 (pes_stream_id_ != kV2MetadataStreamId) &&
238 (pes_stream_id_ != kPaddingStreamId)) {
239 metadata_is_complete_ = true;
240 }
241 parse_state_ = PesPacketLength1;
242 break;
243 case PesPacketLength1:
244 pes_packet_bytes_ = *read_ptr;
245 pes_packet_bytes_ <<= 8;
246 parse_state_ = PesPacketLength2;
247 break;
248 case PesPacketLength2:
249 pes_packet_bytes_ |= *read_ptr;
250 if (HAS_HEADER_EXTENSION(pes_stream_id_)) {
251 parse_state_ = PesExtension1;
252 } else {
253 prev_pes_flags_1_ = pes_flags_1_;
254 pes_flags_1_ = pes_flags_2_ = 0;
255 pes_header_data_bytes_ = 0;
256 parse_state_ = PesPayload;
257 }
258 break;
259 case PesExtension1:
260 prev_pes_flags_1_ = pes_flags_1_;
261 pes_flags_1_ = *read_ptr;
262 --pes_packet_bytes_;
263 parse_state_ = PesExtension2;
264 break;
265 case PesExtension2:
266 pes_flags_2_ = *read_ptr;
267 --pes_packet_bytes_;
268 parse_state_ = PesExtension3;
269 break;
270 case PesExtension3:
271 pes_header_data_bytes_ = *read_ptr;
272 --pes_packet_bytes_;
273 if (pes_flags_2_ & kPesOptPts) {
274 parse_state_ = Pts1;
275 } else {
276 parse_state_ = PesHeaderData;
277 }
278 break;
279 case Pts1:
280 timestamp_ = (*read_ptr & 0x0E);
281 --pes_header_data_bytes_;
282 --pes_packet_bytes_;
283 parse_state_ = Pts2;
284 break;
285 case Pts2:
286 timestamp_ <<= 7;
287 timestamp_ |= *read_ptr;
288 --pes_header_data_bytes_;
289 --pes_packet_bytes_;
290 parse_state_ = Pts3;
291 break;
292 case Pts3:
293 timestamp_ <<= 7;
294 timestamp_ |= *read_ptr >> 1;
295 --pes_header_data_bytes_;
296 --pes_packet_bytes_;
297 parse_state_ = Pts4;
298 break;
299 case Pts4:
300 timestamp_ <<= 8;
301 timestamp_ |= *read_ptr;
302 --pes_header_data_bytes_;
303 --pes_packet_bytes_;
304 parse_state_ = Pts5;
305 break;
306 case Pts5:
307 timestamp_ <<= 7;
308 timestamp_ |= *read_ptr >> 1;
309 pts_ = timestamp_;
310 --pes_header_data_bytes_;
311 --pes_packet_bytes_;
312 if (pes_flags_2_ & kPesOptDts) {
313 parse_state_ = Dts1;
314 } else {
315 dts_ = pts_;
316 parse_state_ = PesHeaderData;
317 }
318 break;
319 case Dts1:
320 timestamp_ = (*read_ptr & 0x0E);
321 --pes_header_data_bytes_;
322 --pes_packet_bytes_;
323 parse_state_ = Dts2;
324 break;
325 case Dts2:
326 timestamp_ <<= 7;
327 timestamp_ |= *read_ptr;
328 --pes_header_data_bytes_;
329 --pes_packet_bytes_;
330 parse_state_ = Dts3;
331 break;
332 case Dts3:
333 timestamp_ <<= 7;
334 timestamp_ |= *read_ptr >> 1;
335 --pes_header_data_bytes_;
336 --pes_packet_bytes_;
337 parse_state_ = Dts4;
338 break;
339 case Dts4:
340 timestamp_ <<= 8;
341 timestamp_ |= *read_ptr;
342 --pes_header_data_bytes_;
343 --pes_packet_bytes_;
344 parse_state_ = Dts5;
345 break;
346 case Dts5:
347 timestamp_ <<= 7;
348 timestamp_ |= *read_ptr >> 1;
349 dts_ = timestamp_;
350 --pes_header_data_bytes_;
351 --pes_packet_bytes_;
352 parse_state_ = PesHeaderData;
353 break;
354 case PesHeaderData:
355 num_bytes = end - read_ptr;
356 if (num_bytes >= pes_header_data_bytes_) {
357 num_bytes = pes_header_data_bytes_;
358 parse_state_ = PesPayload;
359 }
360 pes_header_data_bytes_ -= num_bytes;
361 pes_packet_bytes_ -= num_bytes;
362 read_ptr += num_bytes;
363 continue;
364 case PesPayload:
365 switch (pes_stream_id_) {
366 case kPsmStreamId:
367 psm_data_.clear();
368 parse_state_ = PsmPayload;
369 continue;
370 case kPaddingStreamId:
371 parse_state_ = Padding;
372 continue;
373 case kEcmStreamId:
374 ecm_.clear();
375 parse_state_ = EcmPayload;
376 continue;
377 case kIndexStreamId:
378 parse_state_ = IndexPayload;
379 continue;
380 default:
381 if (!DemuxNextPes(false)) {
382 return false;
383 }
384 parse_state_ = EsPayload;
385 }
386 continue;
387 case PsmPayload:
388 num_bytes = end - read_ptr;
389 if (num_bytes >= pes_packet_bytes_) {
390 num_bytes = pes_packet_bytes_;
391 parse_state_ = StartCode1;
392 }
393 if (num_bytes > 0) {
394 pes_packet_bytes_ -= num_bytes;
395 prev_size = psm_data_.size();
396 psm_data_.resize(prev_size + num_bytes);
397 memcpy(&psm_data_[prev_size], read_ptr, num_bytes);
398 }
399 read_ptr += num_bytes;
400 continue;
401 case EcmPayload:
402 num_bytes = end - read_ptr;
403 if (num_bytes >= pes_packet_bytes_) {
404 num_bytes = pes_packet_bytes_;
405 parse_state_ = StartCode1;
406 }
407 if (num_bytes > 0) {
408 pes_packet_bytes_ -= num_bytes;
409 prev_size = ecm_.size();
410 ecm_.resize(prev_size + num_bytes);
411 memcpy(&ecm_[prev_size], read_ptr, num_bytes);
412 }
413 if ((pes_packet_bytes_ == 0) && !ecm_.empty()) {
414 if (!ProcessEcm()) {
415 return(false);
416 }
417 }
418 read_ptr += num_bytes;
419 continue;
420 case IndexPayload:
421 num_bytes = end - read_ptr;
422 if (num_bytes >= pes_packet_bytes_) {
423 num_bytes = pes_packet_bytes_;
424 parse_state_ = StartCode1;
425 }
426 if (num_bytes > 0) {
427 pes_packet_bytes_ -= num_bytes;
428 prev_size = index_data_.size();
429 index_data_.resize(prev_size + num_bytes);
430 memcpy(&index_data_[prev_size], read_ptr, num_bytes);
431 }
432 if (pes_packet_bytes_ == 0 && !index_data_.empty()) {
433 if (!metadata_is_complete_) {
434 if (!ParseIndexEntry()) {
435 return false;
436 }
437 }
438 }
439 read_ptr += num_bytes;
440 continue;
441 case EsPayload:
442 num_bytes = end - read_ptr;
443 if (num_bytes >= pes_packet_bytes_) {
444 num_bytes = pes_packet_bytes_;
445 parse_state_ = StartCode1;
446 }
447 pes_packet_bytes_ -= num_bytes;
448 if (pes_stream_id_ != kV2MetadataStreamId) {
449 sample_data_.resize(sample_data_.size() + num_bytes);
450 memcpy(&sample_data_[sample_data_.size() - num_bytes], read_ptr,
451 num_bytes);
452 }
453 prev_pes_stream_id_ = pes_stream_id_;
454 read_ptr += num_bytes;
455 continue;
456 case Padding:
457 num_bytes = end - read_ptr;
458 if (num_bytes >= pes_packet_bytes_) {
459 num_bytes = pes_packet_bytes_;
460 parse_state_ = StartCode1;
461 }
462 pes_packet_bytes_ -= num_bytes;
463 read_ptr += num_bytes;
464 continue;
465 case ProgramEnd:
466 parse_state_ = StartCode1;
467 metadata_is_complete_ = true;
468 if (!DemuxNextPes(true)) {
469 return false;
470 }
471 if (!Flush()) {
472 return false;
473 }
474 // Reset.
475 dts_ = pts_ = 0;
476 parse_state_ = StartCode1;
477 prev_media_sample_data_.Reset();
478 current_program_id_++;
479 ecm_.clear();
480 index_data_.clear();
481 psm_data_.clear();
482 break;
483 default:
484 break;
485 }
486 ++read_ptr;
487 }
488 return true;
489}
490
491bool WvmMediaParser::EmitLastSample(
492 uint32_t stream_id,
493 const std::shared_ptr<MediaSample>& new_sample) {
494 std::string key = absl::StrFormat("%u:%u", current_program_id_, stream_id);
495 std::map<std::string, uint32_t>::iterator it =
496 program_demux_stream_map_.find(key);
497 if (it == program_demux_stream_map_.end())
498 return false;
499 return EmitSample(stream_id, (*it).second, new_sample, true);
500}
501
502bool WvmMediaParser::EmitPendingSamples() {
503 // Emit queued samples which were built when not initialized.
504 while (!media_sample_queue_.empty()) {
505 DemuxStreamIdMediaSample& demux_stream_media_sample =
506 media_sample_queue_.front();
507 if (!EmitSample(demux_stream_media_sample.parsed_audio_or_video_stream_id,
508 demux_stream_media_sample.demux_stream_id,
509 demux_stream_media_sample.media_sample,
510 false)) {
511 return false;
512 }
513 media_sample_queue_.pop_front();
514 }
515 return true;
516}
517
518bool WvmMediaParser::Flush() {
519 // Flush the last audio and video sample for current program.
520 // Reset the streamID when successfully emitted.
521 if (prev_media_sample_data_.audio_sample != NULL) {
522 if (!EmitLastSample(prev_pes_stream_id_,
523 prev_media_sample_data_.audio_sample)) {
524 LOG(ERROR) << "Did not emit last sample for audio stream with ID = "
525 << prev_pes_stream_id_;
526 return false;
527 }
528 }
529 if (prev_media_sample_data_.video_sample != NULL) {
530 if (!EmitLastSample(prev_pes_stream_id_,
531 prev_media_sample_data_.video_sample)) {
532 LOG(ERROR) << "Did not emit last sample for video stream with ID = "
533 << prev_pes_stream_id_;
534 return false;
535 }
536 }
537 return true;
538}
539
540bool WvmMediaParser::ParseIndexEntry() {
541 // Do not parse index entry at the beginning of any track *after* the first
542 // track.
543 if (current_program_id_ > 0) {
544 return true;
545 }
546 uint32_t index_size = 0;
547 if (index_data_.size() < kIndexVersion4HeaderSize) {
548 return false;
549 }
550
551 const uint8_t* read_ptr = index_data_.data();
552 if (absl::big_endian::Load32(read_ptr) != kIndexMagic) {
553 index_data_.clear();
554 return false;
555 }
556 read_ptr += 4;
557
558 uint32_t version = absl::big_endian::Load32(read_ptr);
559 read_ptr += 4;
560 if (version == kVersion4) {
561 index_size = kIndexVersion4HeaderSize + absl::big_endian::Load32(read_ptr);
562 if (index_data_.size() < index_size) {
563 // We do not yet have the full index. Keep accumulating index data.
564 return true;
565 }
566 read_ptr += sizeof(uint32_t);
567
568 // Index metadata
569 uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
570 if (index_metadata_max_size < sizeof(uint8_t)) {
571 index_data_.clear();
572 return false;
573 }
574
575 int64_t track_duration = 0;
576 uint32_t trick_play_factor = 0;
577 uint32_t sampling_frequency = kDefaultSamplingFrequency;
578 int32_t time_scale = kMpeg2ClockRate;
579 uint16_t video_width = 0;
580 uint16_t video_height = 0;
581 uint32_t pixel_width = 0;
582 uint32_t pixel_height = 0;
583 uint8_t nalu_length_size = kNaluLengthSize;
584 uint8_t num_channels = 0;
585 int audio_pes_stream_id = 0;
586 int video_pes_stream_id = 0;
587 bool has_video = false;
588 bool has_audio = false;
589 std::vector<uint8_t> audio_codec_config;
590 std::vector<uint8_t> video_codec_config;
591 uint8_t num_index_entries = *read_ptr;
592 ++read_ptr;
593 --index_metadata_max_size;
594
595 for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
596 if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
597 return false;
598 }
599 uint8_t tag = *read_ptr;
600 ++read_ptr;
601 uint8_t type = *read_ptr;
602 ++read_ptr;
603 uint32_t length = absl::big_endian::Load32(read_ptr);
604 read_ptr += sizeof(uint32_t);
605 index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
606 if (index_metadata_max_size < length) {
607 return false;
608 }
609 int64_t value = 0;
610 Tag tagtype = Unset;
611 std::vector<uint8_t> binary_data;
612 switch (Type(type)) {
613 case Type_uint8:
614 if (length == sizeof(uint8_t)) {
615 tagtype = GetTag(tag, length, read_ptr, &value);
616 } else {
617 return false;
618 }
619 break;
620 case Type_int8:
621 if (length == sizeof(int8_t)) {
622 tagtype = GetTag(tag, length, read_ptr, &value);
623 } else {
624 return false;
625 }
626 break;
627 case Type_uint16:
628 if (length == sizeof(uint16_t)) {
629 tagtype = GetTag(tag, length, read_ptr, &value);
630 } else {
631 return false;
632 }
633 break;
634 case Type_int16:
635 if (length == sizeof(int16_t)) {
636 tagtype = GetTag(tag, length, read_ptr, &value);
637 } else {
638 return false;
639 }
640 break;
641 case Type_uint32:
642 if (length == sizeof(uint32_t)) {
643 tagtype = GetTag(tag, length, read_ptr, &value);
644 } else {
645 return false;
646 }
647 break;
648 case Type_int32:
649 if (length == sizeof(int32_t)) {
650 tagtype = GetTag(tag, length, read_ptr, &value);
651 } else {
652 return false;
653 }
654 break;
655 case Type_uint64:
656 if (length == sizeof(uint64_t)) {
657 tagtype = GetTag(tag, length, read_ptr, &value);
658 } else {
659 return false;
660 }
661 break;
662 case Type_int64:
663 if (length == sizeof(int64_t)) {
664 tagtype = GetTag(tag, length, read_ptr, &value);
665 } else {
666 return false;
667 }
668 break;
669 case Type_string:
670 case Type_BinaryData:
671 binary_data.assign(read_ptr, read_ptr + length);
672 tagtype = Tag(tag);
673 break;
674 default:
675 break;
676 }
677
678 switch (tagtype) {
679 case TrackDuration:
680 track_duration = value;
681 break;
682 case TrackTrickPlayFactor:
683 trick_play_factor = value;
684 break;
685 case VideoStreamId:
686 video_pes_stream_id = value;
687 break;
688 case AudioStreamId:
689 audio_pes_stream_id = value;
690 break;
691 case VideoWidth:
692 video_width = (uint16_t)value;
693 break;
694 case VideoHeight:
695 video_height = (uint16_t)value;
696 break;
697 case AudioNumChannels:
698 num_channels = (uint8_t)value;
699 break;
700 case VideoType:
701 has_video = true;
702 break;
703 case AudioType:
704 has_audio = true;
705 break;
706 case VideoPixelWidth:
707 pixel_width = static_cast<uint32_t>(value);
708 break;
709 case VideoPixelHeight:
710 pixel_height = static_cast<uint32_t>(value);
711 break;
712 case Audio_EsDescriptor: {
713 ESDescriptor descriptor;
714 if (!descriptor.Parse(binary_data)) {
715 LOG(ERROR) <<
716 "Could not extract AudioSpecificConfig from ES_Descriptor";
717 return false;
718 }
719 audio_codec_config = descriptor.decoder_config_descriptor()
720 .decoder_specific_info_descriptor()
721 .data();
722 break;
723 }
724 case Audio_EC3SpecificData:
725 case Audio_DtsSpecificData:
726 case Audio_AC3SpecificData:
727 LOG(ERROR) << "Audio type not supported.";
728 return false;
729 case Video_AVCDecoderConfigurationRecord:
730 video_codec_config = binary_data;
731 break;
732 default:
733 break;
734 }
735
736 read_ptr += length;
737 index_metadata_max_size -= length;
738 }
739 // End Index metadata
740 index_size = read_ptr - index_data_.data();
741
742 if (has_video) {
743 stream_infos_.emplace_back(new VideoStreamInfo(
744 stream_id_count_, time_scale, track_duration, kCodecH264,
745 byte_to_unit_stream_converter_.stream_format(), std::string(),
746 video_codec_config.data(), video_codec_config.size(), video_width,
747 video_height, pixel_width, pixel_height, 0 /* color_primaries */,
748 0 /*matrix_coefficients */, 0 /* transfer_characteristics */,
749 trick_play_factor, nalu_length_size, std::string(),
750 decryption_key_source_ ? false : true));
751 program_demux_stream_map_[absl::StrFormat(
752 "%u:%u", index_program_id_,
753 video_pes_stream_id ? video_pes_stream_id : kDefaultVideoStreamId)] =
754 stream_id_count_++;
755 }
756 if (has_audio) {
757 const Codec audio_codec = kCodecAAC;
758 // TODO(beil): Pass in max and average bitrate in wvm container.
759 stream_infos_.emplace_back(new AudioStreamInfo(
760 stream_id_count_, time_scale, track_duration, audio_codec,
761 std::string(), audio_codec_config.data(), audio_codec_config.size(),
762 kAacSampleSizeBits, num_channels, sampling_frequency,
763 0 /* seek preroll */, 0 /* codec delay */, 0 /* max bitrate */,
764 0 /* avg bitrate */, std::string(),
765 decryption_key_source_ ? false : true));
766 program_demux_stream_map_[absl::StrFormat(
767 "%u:%u", index_program_id_,
768 audio_pes_stream_id ? audio_pes_stream_id : kDefaultAudioStreamId)] =
769 stream_id_count_++;
770 }
771 }
772
773 index_program_id_++;
774 index_data_.clear();
775 return true;
776}
777
778bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
779 bool output_encrypted_sample = false;
780 if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
781 // Decrypt crypto unit.
782 if (!content_decryptor_) {
783 output_encrypted_sample = true;
784 } else {
785 size_t output_size = content_decryptor_->RequiredOutputSize(
786 sample_data_.size() - crypto_unit_start_pos_);
787 content_decryptor_->Crypt(&sample_data_[crypto_unit_start_pos_],
788 sample_data_.size() - crypto_unit_start_pos_,
789 &sample_data_[crypto_unit_start_pos_],
790 &output_size);
791 }
792 }
793 // Demux media sample if we are at program end or if we are not at a
794 // continuation PES.
795 if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
796 if (!sample_data_.empty()) {
797 if (!Output(output_encrypted_sample)) {
798 return false;
799 }
800 }
801 StartMediaSampleDemux();
802 }
803
804 crypto_unit_start_pos_ = sample_data_.size();
805 return true;
806}
807
808void WvmMediaParser::StartMediaSampleDemux() {
809 bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
810 media_sample_ = MediaSample::CreateEmptyMediaSample();
811 media_sample_->set_dts(dts_);
812 media_sample_->set_pts(pts_);
813 media_sample_->set_is_key_frame(is_key_frame);
814
815 sample_data_.clear();
816}
817
818bool WvmMediaParser::Output(bool output_encrypted_sample) {
819 if (output_encrypted_sample) {
820 media_sample_->SetData(sample_data_.data(), sample_data_.size());
821 media_sample_->set_is_encrypted(true);
822 } else {
823 if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
824 // Convert video stream to unit stream and get config.
825 std::vector<uint8_t> nal_unit_stream;
826 if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
827 sample_data_.data(), sample_data_.size(), &nal_unit_stream)) {
828 LOG(ERROR) << "Could not convert h.264 byte stream sample";
829 return false;
830 }
831 media_sample_->SetData(nal_unit_stream.data(), nal_unit_stream.size());
832 if (!is_initialized_) {
833 // Set extra data for video stream from AVC Decoder Config Record.
834 // Also, set codec string from the AVC Decoder Config Record.
835 std::vector<uint8_t> decoder_config_record;
836 byte_to_unit_stream_converter_.GetDecoderConfigurationRecord(
837 &decoder_config_record);
838 for (uint32_t i = 0; i < stream_infos_.size(); i++) {
839 if (stream_infos_[i]->stream_type() == kStreamVideo &&
840 stream_infos_[i]->codec_string().empty()) {
841 const std::vector<uint8_t>* stream_config;
842 if (stream_infos_[i]->codec_config().empty()) {
843 // Decoder config record not available for stream. Use the one
844 // computed from the first video stream.
845 stream_infos_[i]->set_codec_config(decoder_config_record);
846 stream_config = &decoder_config_record;
847 } else {
848 // Use stream-specific config record.
849 stream_config = &stream_infos_[i]->codec_config();
850 }
851 DCHECK(stream_config);
852
853 VideoStreamInfo* video_stream_info =
854 reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
855 AVCDecoderConfigurationRecord avc_config;
856 if (!avc_config.Parse(*stream_config)) {
857 LOG(WARNING) << "Failed to parse AVCDecoderConfigurationRecord. "
858 "Using computed configuration record instead.";
859 video_stream_info->set_codec_config(decoder_config_record);
860 if (!avc_config.Parse(decoder_config_record)) {
861 LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
862 return false;
863 }
864 }
865 const FourCC codec_fourcc =
866 byte_to_unit_stream_converter_.stream_format() ==
867 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
868 ? FOURCC_avc3
869 : FOURCC_avc1;
870 video_stream_info->set_codec_string(
871 avc_config.GetCodecString(codec_fourcc));
872
873 if (avc_config.pixel_width() != video_stream_info->pixel_width() ||
874 avc_config.pixel_height() !=
875 video_stream_info->pixel_height()) {
876 LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
877 video_stream_info->pixel_height() != 0)
878 << "Pixel aspect ratio in WVM metadata ("
879 << video_stream_info->pixel_width() << ","
880 << video_stream_info->pixel_height()
881 << ") does not match with SAR in "
882 "AVCDecoderConfigurationRecord ("
883 << avc_config.pixel_width() << ","
884 << avc_config.pixel_height()
885 << "). Use AVCDecoderConfigurationRecord.";
886 video_stream_info->set_pixel_width(avc_config.pixel_width());
887 video_stream_info->set_pixel_height(avc_config.pixel_height());
888 }
889 if (avc_config.coded_width() != video_stream_info->width() ||
890 avc_config.coded_height() != video_stream_info->height()) {
891 LOG(WARNING) << "Resolution in WVM metadata ("
892 << video_stream_info->width() << ","
893 << video_stream_info->height()
894 << ") does not match with resolution in "
895 "AVCDecoderConfigurationRecord ("
896 << avc_config.coded_width() << ","
897 << avc_config.coded_height()
898 << "). Use AVCDecoderConfigurationRecord.";
899 video_stream_info->set_width(avc_config.coded_width());
900 video_stream_info->set_height(avc_config.coded_height());
901 }
902 }
903 }
904 }
905 } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
906 kPesStreamIdAudio) {
907 // Set data on the audio stream.
908 mp2t::AdtsHeader adts_header;
909 const uint8_t* frame_ptr = sample_data_.data();
910 if (!adts_header.Parse(frame_ptr, sample_data_.size())) {
911 LOG(ERROR) << "Could not parse ADTS header";
912 return false;
913 }
914 media_sample_->SetData(
915 frame_ptr + adts_header.GetHeaderSize(),
916 adts_header.GetFrameSize() - adts_header.GetHeaderSize());
917 if (!is_initialized_) {
918 for (uint32_t i = 0; i < stream_infos_.size(); i++) {
919 if (stream_infos_[i]->stream_type() == kStreamAudio &&
920 stream_infos_[i]->codec_string().empty()) {
921 AudioStreamInfo* audio_stream_info =
922 reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
923 if (audio_stream_info->codec_config().empty()) {
924 // Set AudioStreamInfo fields using information from the ADTS
925 // header.
926 audio_stream_info->set_sampling_frequency(
927 adts_header.GetSamplingFrequency());
928 std::vector<uint8_t> audio_specific_config;
929 adts_header.GetAudioSpecificConfig(&audio_specific_config);
930 audio_stream_info->set_codec_config(audio_specific_config);
931 audio_stream_info->set_codec_string(
932 AudioStreamInfo::GetCodecString(
933 kCodecAAC, adts_header.GetObjectType()));
934 } else {
935 // Set AudioStreamInfo fields using information from the
936 // AACAudioSpecificConfig record.
937 AACAudioSpecificConfig aac_config;
938 if (!aac_config.Parse(stream_infos_[i]->codec_config())) {
939 LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
940 return false;
941 }
942 audio_stream_info->set_sampling_frequency(
943 aac_config.GetSamplesPerSecond());
944 audio_stream_info->set_codec_string(
945 AudioStreamInfo::GetCodecString(
946 kCodecAAC, aac_config.GetAudioObjectType()));
947 }
948 }
949 }
950 }
951 }
952 }
953
954 if (!is_initialized_) {
955 bool all_streams_have_config = true;
956 // Check if all collected stream infos have codec_config set.
957 for (uint32_t i = 0; i < stream_infos_.size(); i++) {
958 if (stream_infos_[i]->codec_string().empty()) {
959 all_streams_have_config = false;
960 break;
961 }
962 }
963 if (all_streams_have_config) {
964 init_cb_(stream_infos_);
965 is_initialized_ = true;
966 }
967 }
968
969 DCHECK_GT(media_sample_->data_size(), 0UL);
970 std::string key =
971 absl::StrFormat("%u:%u", current_program_id_, prev_pes_stream_id_);
972 std::map<std::string, uint32_t>::iterator it =
973 program_demux_stream_map_.find(key);
974 if (it == program_demux_stream_map_.end()) {
975 // TODO(ramjic): Log error message here and in other error cases through
976 // this method.
977 return false;
978 }
979 DemuxStreamIdMediaSample demux_stream_media_sample;
980 demux_stream_media_sample.parsed_audio_or_video_stream_id =
981 prev_pes_stream_id_;
982 demux_stream_media_sample.demux_stream_id = (*it).second;
983 demux_stream_media_sample.media_sample = media_sample_;
984 // Check if sample can be emitted.
985 if (!is_initialized_) {
986 media_sample_queue_.push_back(demux_stream_media_sample);
987 } else {
988 // flush the sample queue and emit all queued samples.
989 while (!media_sample_queue_.empty()) {
990 if (!EmitPendingSamples())
991 return false;
992 }
993 // Emit current sample.
994 if (!EmitSample(prev_pes_stream_id_, (*it).second, media_sample_, false))
995 return false;
996 }
997 return true;
998}
999
1000bool WvmMediaParser::EmitSample(uint32_t parsed_audio_or_video_stream_id,
1001 uint32_t stream_id,
1002 const std::shared_ptr<MediaSample>& new_sample,
1003 bool isLastSample) {
1004 DCHECK(new_sample);
1005 if (isLastSample) {
1006 if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1007 kPesStreamIdVideo) {
1008 new_sample->set_duration(prev_media_sample_data_.video_sample_duration);
1009 } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1010 kPesStreamIdAudio) {
1011 new_sample->set_duration(prev_media_sample_data_.audio_sample_duration);
1012 }
1013 if (!new_sample_cb_(stream_id, new_sample)) {
1014 LOG(ERROR) << "Failed to process the last sample.";
1015 return false;
1016 }
1017 return true;
1018 }
1019
1020 // Cannot emit current sample. Compute duration first and then,
1021 // emit previous sample.
1022 if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1023 kPesStreamIdVideo) {
1024 if (prev_media_sample_data_.video_sample == NULL) {
1025 prev_media_sample_data_.video_sample = new_sample;
1026 prev_media_sample_data_.video_stream_id = stream_id;
1027 return true;
1028 }
1029 prev_media_sample_data_.video_sample->set_duration(
1030 new_sample->dts() - prev_media_sample_data_.video_sample->dts());
1031 prev_media_sample_data_.video_sample_duration =
1032 prev_media_sample_data_.video_sample->duration();
1033 if (!new_sample_cb_(prev_media_sample_data_.video_stream_id,
1034 prev_media_sample_data_.video_sample)) {
1035 LOG(ERROR) << "Failed to process the video sample.";
1036 return false;
1037 }
1038 prev_media_sample_data_.video_sample = new_sample;
1039 prev_media_sample_data_.video_stream_id = stream_id;
1040 } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1041 kPesStreamIdAudio) {
1042 if (prev_media_sample_data_.audio_sample == NULL) {
1043 prev_media_sample_data_.audio_sample = new_sample;
1044 prev_media_sample_data_.audio_stream_id = stream_id;
1045 return true;
1046 }
1047 prev_media_sample_data_.audio_sample->set_duration(
1048 new_sample->dts() - prev_media_sample_data_.audio_sample->dts());
1049 prev_media_sample_data_.audio_sample_duration =
1050 prev_media_sample_data_.audio_sample->duration();
1051 if (!new_sample_cb_(prev_media_sample_data_.audio_stream_id,
1052 prev_media_sample_data_.audio_sample)) {
1053 LOG(ERROR) << "Failed to process the audio sample.";
1054 return false;
1055 }
1056 prev_media_sample_data_.audio_sample = new_sample;
1057 prev_media_sample_data_.audio_stream_id = stream_id;
1058 }
1059 return true;
1060}
1061
1062bool WvmMediaParser::GetAssetKey(const uint8_t* asset_id,
1063 EncryptionKey* encryption_key) {
1064 DCHECK(decryption_key_source_);
1065 Status status = decryption_key_source_->FetchKeys(
1066 EmeInitDataType::WIDEVINE_CLASSIC,
1067 std::vector<uint8_t>(asset_id, asset_id + sizeof(uint32_t)));
1068 if (!status.ok()) {
1069 LOG(ERROR) << "Fetch Key(s) failed for AssetID = "
1070 << absl::big_endian::Load32(asset_id) << ", error = " << status;
1071 return false;
1072 }
1073
1074 const char kHdStreamLabel[] = "HD";
1075 status = decryption_key_source_->GetKey(kHdStreamLabel, encryption_key);
1076 if (!status.ok()) {
1077 LOG(ERROR) << "Fetch Key(s) failed for AssetID = "
1078 << absl::big_endian::Load32(asset_id) << ", error = " << status;
1079 return false;
1080 }
1081
1082 return true;
1083}
1084
1085bool WvmMediaParser::ProcessEcm() {
1086 // An error will be returned later if the samples need to be decrypted.
1087 if (!decryption_key_source_)
1088 return true;
1089
1090 if (current_program_id_ > 0) {
1091 return true;
1092 }
1093 if (ecm_.size() != kEcmSizeBytes) {
1094 LOG(ERROR) << "Unexpected ECM size = " << ecm_.size()
1095 << ", expected size = " << kEcmSizeBytes;
1096 return false;
1097 }
1098 const uint8_t* ecm_data = ecm_.data();
1099 DCHECK(ecm_data);
1100 ecm_data += sizeof(uint32_t); // old version field - skip.
1101 ecm_data += sizeof(uint32_t); // clear lead - skip.
1102 ecm_data += sizeof(uint32_t); // system id(includes ECM version) - skip.
1103 EncryptionKey encryption_key;
1104 if (!GetAssetKey(ecm_data, &encryption_key)) {
1105 return false;
1106 }
1107 if (encryption_key.key.size() < kAssetKeySizeBytes) {
1108 LOG(ERROR) << "Asset Key size of " << encryption_key.key.size()
1109 << " for AssetID = " << absl::big_endian::Load32(ecm_data)
1110 << " is less than minimum asset key size.";
1111 return false;
1112 }
1113 ecm_data += sizeof(uint32_t); // asset_id.
1114 // Legacy WVM content may have asset keys > 16 bytes.
1115 // Use only the first 16 bytes of the asset key to get
1116 // the content key.
1117 std::vector<uint8_t> asset_key(
1118 encryption_key.key.begin(),
1119 encryption_key.key.begin() + kAssetKeySizeBytes);
1120 // WVM format always uses all zero IV.
1121 std::vector<uint8_t> zero_iv(kInitializationVectorSizeBytes, 0);
1122 AesCbcDecryptor asset_decryptor(kCtsPadding, AesCryptor::kUseConstantIv);
1123 if (!asset_decryptor.InitializeWithIv(asset_key, zero_iv)) {
1124 LOG(ERROR) << "Failed to initialize asset_decryptor.";
1125 return false;
1126 }
1127
1128 const size_t content_key_buffer_size =
1129 kEcmFlagsSizeBytes + kEcmContentKeySizeBytes +
1130 kEcmPaddingSizeBytes; // flags + contentKey + padding.
1131
1132 size_t output_size =
1133 asset_decryptor.RequiredOutputSize(content_key_buffer_size);
1134 std::vector<uint8_t> content_key_buffer(output_size);
1135 CHECK(asset_decryptor.Crypt(ecm_data, content_key_buffer_size,
1136 content_key_buffer.data(), &output_size));
1137
1138 std::vector<uint8_t> decrypted_content_key_vec(
1139 content_key_buffer.begin() + 4,
1140 content_key_buffer.begin() + 20);
1141 std::unique_ptr<AesCbcDecryptor> content_decryptor(
1142 new AesCbcDecryptor(kCtsPadding, AesCryptor::kUseConstantIv));
1143 if (!content_decryptor->InitializeWithIv(decrypted_content_key_vec,
1144 zero_iv)) {
1145 LOG(ERROR) << "Failed to initialize content decryptor.";
1146 return false;
1147 }
1148
1149 content_decryptor_ = std::move(content_decryptor);
1150 return true;
1151}
1152
1153DemuxStreamIdMediaSample::DemuxStreamIdMediaSample() :
1154 demux_stream_id(0),
1155 parsed_audio_or_video_stream_id(0) {}
1156
1157DemuxStreamIdMediaSample::~DemuxStreamIdMediaSample() {}
1158
1159PrevSampleData::PrevSampleData() {
1160 Reset();
1161}
1162
1163PrevSampleData::~PrevSampleData() {}
1164
1165void PrevSampleData::Reset() {
1166 audio_sample = NULL;
1167 video_sample = NULL;
1168 audio_stream_id = 0;
1169 video_stream_id = 0;
1170 audio_sample_duration = 0;
1171 video_sample_duration = 0;
1172}
1173
1174} // namespace wvm
1175} // namespace media
1176} // namespace shaka
KeySource is responsible for encryption key acquisition.
Definition key_source.h:52
std::function< bool(uint32_t track_id, std::shared_ptr< MediaSample > media_sample)> NewMediaSampleCB
std::function< bool(uint32_t track_id, std::shared_ptr< TextSample > text_sample)> NewTextSampleCB
std::function< void(const std::vector< std::shared_ptr< StreamInfo > > &stream_info)> InitCB
All the methods that are virtual are virtual for mocking.