Shaka Packager SDK
Loading...
Searching...
No Matches
wvm_media_parser.cc
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <packager/media/formats/wvm/wvm_media_parser.h>
6
7#include <map>
8#include <sstream>
9#include <vector>
10
11#include <absl/base/internal/endian.h>
12#include <absl/log/check.h>
13#include <absl/strings/str_format.h>
14
15#include <packager/media/base/aes_decryptor.h>
16#include <packager/media/base/audio_stream_info.h>
17#include <packager/media/base/key_source.h>
18#include <packager/media/base/media_sample.h>
19#include <packager/media/codecs/aac_audio_specific_config.h>
20#include <packager/media/codecs/avc_decoder_configuration_record.h>
21#include <packager/media/codecs/es_descriptor.h>
22#include <packager/media/formats/mp2t/adts_header.h>
23
24#define HAS_HEADER_EXTENSION(x) \
25 ((x != 0xBC) && (x != 0xBE) && (x != 0xBF) && (x != 0xF0) && (x != 0xF2) && \
26 (x != 0xF8) && (x != 0xFF))
27
28namespace {
29const uint32_t kMpeg2ClockRate = 90000;
30const uint32_t kPesOptPts = 0x80;
31const uint32_t kPesOptDts = 0x40;
32const uint32_t kPesOptAlign = 0x04;
33const uint32_t kPsmStreamId = 0xBC;
34const uint32_t kPaddingStreamId = 0xBE;
35const uint32_t kIndexMagic = 0x49444d69;
36const uint32_t kIndexStreamId = 0xBF; // private_stream_2
37const uint32_t kIndexVersion4HeaderSize = 12;
38const uint32_t kEcmStreamId = 0xF0;
39const uint32_t kV2MetadataStreamId = 0xF1; // EMM_stream
40const uint32_t kScramblingBitsMask = 0x30;
41const uint32_t kStartCode1 = 0x00;
42const uint32_t kStartCode2 = 0x00;
43const uint32_t kStartCode3 = 0x01;
44const uint32_t kStartCode4Pack = 0xBA;
45const uint32_t kStartCode4System = 0xBB;
46const uint32_t kStartCode4ProgramEnd = 0xB9;
47const uint32_t kPesStreamIdVideoMask = 0xF0;
48const uint32_t kPesStreamIdVideo = 0xE0;
49const uint32_t kPesStreamIdAudioMask = 0xE0;
50const uint32_t kPesStreamIdAudio = 0xC0;
51const uint32_t kVersion4 = 4;
52const uint8_t kAacSampleSizeBits = 16;
53// Applies to all video streams.
54const uint8_t kNaluLengthSize = 4; // unit is bytes.
55// Placeholder sampling frequency for all audio streams, which
56// will be overwritten after filter parsing.
57const uint32_t kDefaultSamplingFrequency = 100;
58const uint16_t kEcmSizeBytes = 80;
59const uint32_t kInitializationVectorSizeBytes = 16;
60// ECM fields for processing.
61const uint32_t kEcmContentKeySizeBytes = 16;
62const uint32_t kEcmDCPFlagsSizeBytes = 3;
63const uint32_t kEcmCCIFlagsSizeBytes = 1;
64const uint32_t kEcmFlagsSizeBytes =
65 kEcmCCIFlagsSizeBytes + kEcmDCPFlagsSizeBytes;
66const uint32_t kEcmPaddingSizeBytes = 12;
67const uint32_t kAssetKeySizeBytes = 16;
68// Default audio and video PES stream IDs.
69const uint8_t kDefaultAudioStreamId = kPesStreamIdAudio;
70const uint8_t kDefaultVideoStreamId = kPesStreamIdVideo;
71
72enum Type {
73 Type_void = 0,
74 Type_uint8 = 1,
75 Type_int8 = 2,
76 Type_uint16 = 3,
77 Type_int16 = 4,
78 Type_uint32 = 5,
79 Type_int32 = 6,
80 Type_uint64 = 7,
81 Type_int64 = 8,
82 Type_string = 9,
83 Type_BinaryData = 10
84};
85} // namespace
86
87namespace shaka {
88namespace media {
89namespace wvm {
90
91WvmMediaParser::WvmMediaParser()
92 : is_initialized_(false),
93 parse_state_(StartCode1),
94 skip_bytes_(0),
95 metadata_is_complete_(false),
96 current_program_id_(0),
97 pes_stream_id_(0),
98 prev_pes_stream_id_(0),
99 pes_packet_bytes_(0),
100 pes_flags_1_(0),
101 pes_flags_2_(0),
102 prev_pes_flags_1_(0),
103 pes_header_data_bytes_(0),
104 timestamp_(0),
105 pts_(0),
106 dts_(0),
107 index_program_id_(0),
108 media_sample_(NULL),
109 crypto_unit_start_pos_(0),
110 stream_id_count_(0),
111 decryption_key_source_(NULL) {}
112
113WvmMediaParser::~WvmMediaParser() {}
114
115void WvmMediaParser::Init(const InitCB& init_cb,
116 const NewMediaSampleCB& new_media_sample_cb,
117 const NewTextSampleCB& new_text_sample_cb,
118 KeySource* decryption_key_source) {
119 DCHECK(!is_initialized_);
120 DCHECK(init_cb != nullptr);
121 DCHECK(new_media_sample_cb != nullptr);
122 decryption_key_source_ = decryption_key_source;
123 init_cb_ = init_cb;
124 new_sample_cb_ = new_media_sample_cb;
125}
126
127bool WvmMediaParser::Parse(const uint8_t* buf, int size) {
128 size_t num_bytes = 0;
129 size_t prev_size = 0;
130 const uint8_t* read_ptr = buf;
131 const uint8_t* end = read_ptr + size;
132
133 while (read_ptr < end) {
134 switch (parse_state_) {
135 case StartCode1:
136 if (*read_ptr == kStartCode1) {
137 parse_state_ = StartCode2;
138 }
139 break;
140 case StartCode2:
141 if (*read_ptr == kStartCode2) {
142 parse_state_ = StartCode3;
143 } else {
144 parse_state_ = StartCode1;
145 }
146 break;
147 case StartCode3:
148 if (*read_ptr == kStartCode3) {
149 parse_state_ = StartCode4;
150 } else {
151 parse_state_ = StartCode1;
152 }
153 break;
154 case StartCode4:
155 switch (*read_ptr) {
156 case kStartCode4Pack:
157 parse_state_ = PackHeader1;
158 break;
159 case kStartCode4System:
160 parse_state_ = SystemHeader1;
161 break;
162 case kStartCode4ProgramEnd:
163 parse_state_ = ProgramEnd;
164 continue;
165 default:
166 parse_state_ = PesStreamId;
167 continue;
168 }
169 break;
170 case PackHeader1:
171 parse_state_ = PackHeader2;
172 break;
173 case PackHeader2:
174 parse_state_ = PackHeader3;
175 break;
176 case PackHeader3:
177 parse_state_ = PackHeader4;
178 break;
179 case PackHeader4:
180 parse_state_ = PackHeader5;
181 break;
182 case PackHeader5:
183 parse_state_ = PackHeader6;
184 break;
185 case PackHeader6:
186 parse_state_ = PackHeader7;
187 break;
188 case PackHeader7:
189 parse_state_ = PackHeader8;
190 break;
191 case PackHeader8:
192 parse_state_ = PackHeader9;
193 break;
194 case PackHeader9:
195 parse_state_ = PackHeader10;
196 break;
197 case PackHeader10:
198 skip_bytes_ = *read_ptr & 0x07;
199 parse_state_ = PackHeaderStuffingSkip;
200 break;
201 case SystemHeader1:
202 skip_bytes_ = *read_ptr;
203 skip_bytes_ <<= 8;
204 parse_state_ = SystemHeader2;
205 break;
206 case SystemHeader2:
207 skip_bytes_ |= *read_ptr;
208 parse_state_ = SystemHeaderSkip;
209 break;
210 case PackHeaderStuffingSkip:
211 if (end >= skip_bytes_ + read_ptr) {
212 read_ptr += skip_bytes_;
213 skip_bytes_ = 0;
214 parse_state_ = StartCode1;
215 } else {
216 skip_bytes_ -= (end - read_ptr);
217 read_ptr = end;
218 }
219 continue;
220 case SystemHeaderSkip:
221 if (end >= skip_bytes_ + read_ptr) {
222 read_ptr += skip_bytes_;
223 skip_bytes_ = 0;
224 parse_state_ = StartCode1;
225 } else {
226 uint32_t remaining_size = end - read_ptr;
227 skip_bytes_ -= remaining_size;
228 read_ptr = end;
229 }
230 continue;
231 case PesStreamId:
232 pes_stream_id_ = *read_ptr;
233 if (!metadata_is_complete_ && (pes_stream_id_ != kPsmStreamId) &&
234 (pes_stream_id_ != kIndexStreamId) &&
235 (pes_stream_id_ != kEcmStreamId) &&
236 (pes_stream_id_ != kV2MetadataStreamId) &&
237 (pes_stream_id_ != kPaddingStreamId)) {
238 metadata_is_complete_ = true;
239 }
240 parse_state_ = PesPacketLength1;
241 break;
242 case PesPacketLength1:
243 pes_packet_bytes_ = *read_ptr;
244 pes_packet_bytes_ <<= 8;
245 parse_state_ = PesPacketLength2;
246 break;
247 case PesPacketLength2:
248 pes_packet_bytes_ |= *read_ptr;
249 if (HAS_HEADER_EXTENSION(pes_stream_id_)) {
250 parse_state_ = PesExtension1;
251 } else {
252 prev_pes_flags_1_ = pes_flags_1_;
253 pes_flags_1_ = pes_flags_2_ = 0;
254 pes_header_data_bytes_ = 0;
255 parse_state_ = PesPayload;
256 }
257 break;
258 case PesExtension1:
259 prev_pes_flags_1_ = pes_flags_1_;
260 pes_flags_1_ = *read_ptr;
261 --pes_packet_bytes_;
262 parse_state_ = PesExtension2;
263 break;
264 case PesExtension2:
265 pes_flags_2_ = *read_ptr;
266 --pes_packet_bytes_;
267 parse_state_ = PesExtension3;
268 break;
269 case PesExtension3:
270 pes_header_data_bytes_ = *read_ptr;
271 --pes_packet_bytes_;
272 if (pes_flags_2_ & kPesOptPts) {
273 parse_state_ = Pts1;
274 } else {
275 parse_state_ = PesHeaderData;
276 }
277 break;
278 case Pts1:
279 timestamp_ = (*read_ptr & 0x0E);
280 --pes_header_data_bytes_;
281 --pes_packet_bytes_;
282 parse_state_ = Pts2;
283 break;
284 case Pts2:
285 timestamp_ <<= 7;
286 timestamp_ |= *read_ptr;
287 --pes_header_data_bytes_;
288 --pes_packet_bytes_;
289 parse_state_ = Pts3;
290 break;
291 case Pts3:
292 timestamp_ <<= 7;
293 timestamp_ |= *read_ptr >> 1;
294 --pes_header_data_bytes_;
295 --pes_packet_bytes_;
296 parse_state_ = Pts4;
297 break;
298 case Pts4:
299 timestamp_ <<= 8;
300 timestamp_ |= *read_ptr;
301 --pes_header_data_bytes_;
302 --pes_packet_bytes_;
303 parse_state_ = Pts5;
304 break;
305 case Pts5:
306 timestamp_ <<= 7;
307 timestamp_ |= *read_ptr >> 1;
308 pts_ = timestamp_;
309 --pes_header_data_bytes_;
310 --pes_packet_bytes_;
311 if (pes_flags_2_ & kPesOptDts) {
312 parse_state_ = Dts1;
313 } else {
314 dts_ = pts_;
315 parse_state_ = PesHeaderData;
316 }
317 break;
318 case Dts1:
319 timestamp_ = (*read_ptr & 0x0E);
320 --pes_header_data_bytes_;
321 --pes_packet_bytes_;
322 parse_state_ = Dts2;
323 break;
324 case Dts2:
325 timestamp_ <<= 7;
326 timestamp_ |= *read_ptr;
327 --pes_header_data_bytes_;
328 --pes_packet_bytes_;
329 parse_state_ = Dts3;
330 break;
331 case Dts3:
332 timestamp_ <<= 7;
333 timestamp_ |= *read_ptr >> 1;
334 --pes_header_data_bytes_;
335 --pes_packet_bytes_;
336 parse_state_ = Dts4;
337 break;
338 case Dts4:
339 timestamp_ <<= 8;
340 timestamp_ |= *read_ptr;
341 --pes_header_data_bytes_;
342 --pes_packet_bytes_;
343 parse_state_ = Dts5;
344 break;
345 case Dts5:
346 timestamp_ <<= 7;
347 timestamp_ |= *read_ptr >> 1;
348 dts_ = timestamp_;
349 --pes_header_data_bytes_;
350 --pes_packet_bytes_;
351 parse_state_ = PesHeaderData;
352 break;
353 case PesHeaderData:
354 num_bytes = end - read_ptr;
355 if (num_bytes >= pes_header_data_bytes_) {
356 num_bytes = pes_header_data_bytes_;
357 parse_state_ = PesPayload;
358 }
359 pes_header_data_bytes_ -= num_bytes;
360 pes_packet_bytes_ -= num_bytes;
361 read_ptr += num_bytes;
362 continue;
363 case PesPayload:
364 switch (pes_stream_id_) {
365 case kPsmStreamId:
366 psm_data_.clear();
367 parse_state_ = PsmPayload;
368 continue;
369 case kPaddingStreamId:
370 parse_state_ = Padding;
371 continue;
372 case kEcmStreamId:
373 ecm_.clear();
374 parse_state_ = EcmPayload;
375 continue;
376 case kIndexStreamId:
377 parse_state_ = IndexPayload;
378 continue;
379 default:
380 if (!DemuxNextPes(false)) {
381 return false;
382 }
383 parse_state_ = EsPayload;
384 }
385 continue;
386 case PsmPayload:
387 num_bytes = end - read_ptr;
388 if (num_bytes >= pes_packet_bytes_) {
389 num_bytes = pes_packet_bytes_;
390 parse_state_ = StartCode1;
391 }
392 if (num_bytes > 0) {
393 pes_packet_bytes_ -= num_bytes;
394 prev_size = psm_data_.size();
395 psm_data_.resize(prev_size + num_bytes);
396 memcpy(&psm_data_[prev_size], read_ptr, num_bytes);
397 }
398 read_ptr += num_bytes;
399 continue;
400 case EcmPayload:
401 num_bytes = end - read_ptr;
402 if (num_bytes >= pes_packet_bytes_) {
403 num_bytes = pes_packet_bytes_;
404 parse_state_ = StartCode1;
405 }
406 if (num_bytes > 0) {
407 pes_packet_bytes_ -= num_bytes;
408 prev_size = ecm_.size();
409 ecm_.resize(prev_size + num_bytes);
410 memcpy(&ecm_[prev_size], read_ptr, num_bytes);
411 }
412 if ((pes_packet_bytes_ == 0) && !ecm_.empty()) {
413 if (!ProcessEcm()) {
414 return (false);
415 }
416 }
417 read_ptr += num_bytes;
418 continue;
419 case IndexPayload:
420 num_bytes = end - read_ptr;
421 if (num_bytes >= pes_packet_bytes_) {
422 num_bytes = pes_packet_bytes_;
423 parse_state_ = StartCode1;
424 }
425 if (num_bytes > 0) {
426 pes_packet_bytes_ -= num_bytes;
427 prev_size = index_data_.size();
428 index_data_.resize(prev_size + num_bytes);
429 memcpy(&index_data_[prev_size], read_ptr, num_bytes);
430 }
431 if (pes_packet_bytes_ == 0 && !index_data_.empty()) {
432 if (!metadata_is_complete_) {
433 if (!ParseIndexEntry()) {
434 return false;
435 }
436 }
437 }
438 read_ptr += num_bytes;
439 continue;
440 case EsPayload:
441 num_bytes = end - read_ptr;
442 if (num_bytes >= pes_packet_bytes_) {
443 num_bytes = pes_packet_bytes_;
444 parse_state_ = StartCode1;
445 }
446 pes_packet_bytes_ -= num_bytes;
447 if (pes_stream_id_ != kV2MetadataStreamId) {
448 sample_data_.resize(sample_data_.size() + num_bytes);
449 memcpy(&sample_data_[sample_data_.size() - num_bytes], read_ptr,
450 num_bytes);
451 }
452 prev_pes_stream_id_ = pes_stream_id_;
453 read_ptr += num_bytes;
454 continue;
455 case Padding:
456 num_bytes = end - read_ptr;
457 if (num_bytes >= pes_packet_bytes_) {
458 num_bytes = pes_packet_bytes_;
459 parse_state_ = StartCode1;
460 }
461 pes_packet_bytes_ -= num_bytes;
462 read_ptr += num_bytes;
463 continue;
464 case ProgramEnd:
465 parse_state_ = StartCode1;
466 metadata_is_complete_ = true;
467 if (!DemuxNextPes(true)) {
468 return false;
469 }
470 if (!Flush()) {
471 return false;
472 }
473 // Reset.
474 dts_ = pts_ = 0;
475 parse_state_ = StartCode1;
476 prev_media_sample_data_.Reset();
477 current_program_id_++;
478 ecm_.clear();
479 index_data_.clear();
480 psm_data_.clear();
481 break;
482 default:
483 break;
484 }
485 ++read_ptr;
486 }
487 return true;
488}
489
490bool WvmMediaParser::EmitLastSample(
491 uint32_t stream_id,
492 const std::shared_ptr<MediaSample>& new_sample) {
493 std::string key = absl::StrFormat("%u:%u", current_program_id_, stream_id);
494 std::map<std::string, uint32_t>::iterator it =
495 program_demux_stream_map_.find(key);
496 if (it == program_demux_stream_map_.end())
497 return false;
498 return EmitSample(stream_id, (*it).second, new_sample, true);
499}
500
501bool WvmMediaParser::EmitPendingSamples() {
502 // Emit queued samples which were built when not initialized.
503 while (!media_sample_queue_.empty()) {
504 DemuxStreamIdMediaSample& demux_stream_media_sample =
505 media_sample_queue_.front();
506 if (!EmitSample(demux_stream_media_sample.parsed_audio_or_video_stream_id,
507 demux_stream_media_sample.demux_stream_id,
508 demux_stream_media_sample.media_sample, false)) {
509 return false;
510 }
511 media_sample_queue_.pop_front();
512 }
513 return true;
514}
515
516bool WvmMediaParser::Flush() {
517 // Flush the last audio and video sample for current program.
518 // Reset the streamID when successfully emitted.
519 if (prev_media_sample_data_.audio_sample != NULL) {
520 if (!EmitLastSample(prev_pes_stream_id_,
521 prev_media_sample_data_.audio_sample)) {
522 LOG(ERROR) << "Did not emit last sample for audio stream with ID = "
523 << prev_pes_stream_id_;
524 return false;
525 }
526 }
527 if (prev_media_sample_data_.video_sample != NULL) {
528 if (!EmitLastSample(prev_pes_stream_id_,
529 prev_media_sample_data_.video_sample)) {
530 LOG(ERROR) << "Did not emit last sample for video stream with ID = "
531 << prev_pes_stream_id_;
532 return false;
533 }
534 }
535 return true;
536}
537
538bool WvmMediaParser::ParseIndexEntry() {
539 // Do not parse index entry at the beginning of any track *after* the first
540 // track.
541 if (current_program_id_ > 0) {
542 return true;
543 }
544 uint32_t index_size = 0;
545 if (index_data_.size() < kIndexVersion4HeaderSize) {
546 return false;
547 }
548
549 const uint8_t* read_ptr = index_data_.data();
550 if (absl::big_endian::Load32(read_ptr) != kIndexMagic) {
551 index_data_.clear();
552 return false;
553 }
554 read_ptr += 4;
555
556 uint32_t version = absl::big_endian::Load32(read_ptr);
557 read_ptr += 4;
558 if (version == kVersion4) {
559 index_size = kIndexVersion4HeaderSize + absl::big_endian::Load32(read_ptr);
560 if (index_data_.size() < index_size) {
561 // We do not yet have the full index. Keep accumulating index data.
562 return true;
563 }
564 read_ptr += sizeof(uint32_t);
565
566 // Index metadata
567 uint32_t index_metadata_max_size = index_size - kIndexVersion4HeaderSize;
568 if (index_metadata_max_size < sizeof(uint8_t)) {
569 index_data_.clear();
570 return false;
571 }
572
573 int64_t track_duration = 0;
574 uint32_t trick_play_factor = 0;
575 uint32_t sampling_frequency = kDefaultSamplingFrequency;
576 int32_t time_scale = kMpeg2ClockRate;
577 uint16_t video_width = 0;
578 uint16_t video_height = 0;
579 uint32_t pixel_width = 0;
580 uint32_t pixel_height = 0;
581 uint8_t nalu_length_size = kNaluLengthSize;
582 uint8_t num_channels = 0;
583 int audio_pes_stream_id = 0;
584 int video_pes_stream_id = 0;
585 bool has_video = false;
586 bool has_audio = false;
587 std::vector<uint8_t> audio_codec_config;
588 std::vector<uint8_t> video_codec_config;
589 uint8_t num_index_entries = *read_ptr;
590 ++read_ptr;
591 --index_metadata_max_size;
592
593 for (uint8_t idx = 0; idx < num_index_entries; ++idx) {
594 if (index_metadata_max_size < (2 * sizeof(uint8_t)) + sizeof(uint32_t)) {
595 return false;
596 }
597 uint8_t tag = *read_ptr;
598 ++read_ptr;
599 uint8_t type = *read_ptr;
600 ++read_ptr;
601 uint32_t length = absl::big_endian::Load32(read_ptr);
602 read_ptr += sizeof(uint32_t);
603 index_metadata_max_size -= (2 * sizeof(uint8_t)) + sizeof(uint32_t);
604 if (index_metadata_max_size < length) {
605 return false;
606 }
607 int64_t value = 0;
608 Tag tagtype = Unset;
609 std::vector<uint8_t> binary_data;
610 switch (Type(type)) {
611 case Type_uint8:
612 if (length == sizeof(uint8_t)) {
613 tagtype = GetTag(tag, length, read_ptr, &value);
614 } else {
615 return false;
616 }
617 break;
618 case Type_int8:
619 if (length == sizeof(int8_t)) {
620 tagtype = GetTag(tag, length, read_ptr, &value);
621 } else {
622 return false;
623 }
624 break;
625 case Type_uint16:
626 if (length == sizeof(uint16_t)) {
627 tagtype = GetTag(tag, length, read_ptr, &value);
628 } else {
629 return false;
630 }
631 break;
632 case Type_int16:
633 if (length == sizeof(int16_t)) {
634 tagtype = GetTag(tag, length, read_ptr, &value);
635 } else {
636 return false;
637 }
638 break;
639 case Type_uint32:
640 if (length == sizeof(uint32_t)) {
641 tagtype = GetTag(tag, length, read_ptr, &value);
642 } else {
643 return false;
644 }
645 break;
646 case Type_int32:
647 if (length == sizeof(int32_t)) {
648 tagtype = GetTag(tag, length, read_ptr, &value);
649 } else {
650 return false;
651 }
652 break;
653 case Type_uint64:
654 if (length == sizeof(uint64_t)) {
655 tagtype = GetTag(tag, length, read_ptr, &value);
656 } else {
657 return false;
658 }
659 break;
660 case Type_int64:
661 if (length == sizeof(int64_t)) {
662 tagtype = GetTag(tag, length, read_ptr, &value);
663 } else {
664 return false;
665 }
666 break;
667 case Type_string:
668 case Type_BinaryData:
669 binary_data.assign(read_ptr, read_ptr + length);
670 tagtype = Tag(tag);
671 break;
672 default:
673 break;
674 }
675
676 switch (tagtype) {
677 case TrackDuration:
678 track_duration = value;
679 break;
680 case TrackTrickPlayFactor:
681 trick_play_factor = value;
682 break;
683 case VideoStreamId:
684 video_pes_stream_id = value;
685 break;
686 case AudioStreamId:
687 audio_pes_stream_id = value;
688 break;
689 case VideoWidth:
690 video_width = (uint16_t)value;
691 break;
692 case VideoHeight:
693 video_height = (uint16_t)value;
694 break;
695 case AudioNumChannels:
696 num_channels = (uint8_t)value;
697 break;
698 case VideoType:
699 has_video = true;
700 break;
701 case AudioType:
702 has_audio = true;
703 break;
704 case VideoPixelWidth:
705 pixel_width = static_cast<uint32_t>(value);
706 break;
707 case VideoPixelHeight:
708 pixel_height = static_cast<uint32_t>(value);
709 break;
710 case Audio_EsDescriptor: {
711 ESDescriptor descriptor;
712 if (!descriptor.Parse(binary_data)) {
713 LOG(ERROR)
714 << "Could not extract AudioSpecificConfig from ES_Descriptor";
715 return false;
716 }
717 audio_codec_config = descriptor.decoder_config_descriptor()
718 .decoder_specific_info_descriptor()
719 .data();
720 break;
721 }
722 case Audio_EC3SpecificData:
723 case Audio_DtsSpecificData:
724 case Audio_AC3SpecificData:
725 LOG(ERROR) << "Audio type not supported.";
726 return false;
727 case Video_AVCDecoderConfigurationRecord:
728 video_codec_config = binary_data;
729 break;
730 default:
731 break;
732 }
733
734 read_ptr += length;
735 index_metadata_max_size -= length;
736 }
737 // End Index metadata
738 index_size = read_ptr - index_data_.data();
739
740 if (has_video) {
741 stream_infos_.emplace_back(new VideoStreamInfo(
742 stream_id_count_, time_scale, track_duration, kCodecH264,
743 byte_to_unit_stream_converter_.stream_format(), std::string(),
744 video_codec_config.data(), video_codec_config.size(), video_width,
745 video_height, pixel_width, pixel_height, 0 /* color_primaries */,
746 0 /*matrix_coefficients */, 0 /* transfer_characteristics */,
747 trick_play_factor, nalu_length_size, std::string(),
748 decryption_key_source_ ? false : true));
749 program_demux_stream_map_[absl::StrFormat(
750 "%u:%u", index_program_id_,
751 video_pes_stream_id ? video_pes_stream_id : kDefaultVideoStreamId)] =
752 stream_id_count_++;
753 }
754 if (has_audio) {
755 const Codec audio_codec = kCodecAAC;
756 // TODO(beil): Pass in max and average bitrate in wvm container.
757 stream_infos_.emplace_back(new AudioStreamInfo(
758 stream_id_count_, time_scale, track_duration, audio_codec,
759 std::string(), audio_codec_config.data(), audio_codec_config.size(),
760 kAacSampleSizeBits, num_channels, sampling_frequency,
761 0 /* seek preroll */, 0 /* codec delay */, 0 /* max bitrate */,
762 0 /* avg bitrate */, std::string(),
763 decryption_key_source_ ? false : true));
764 program_demux_stream_map_[absl::StrFormat(
765 "%u:%u", index_program_id_,
766 audio_pes_stream_id ? audio_pes_stream_id : kDefaultAudioStreamId)] =
767 stream_id_count_++;
768 }
769 }
770
771 index_program_id_++;
772 index_data_.clear();
773 return true;
774}
775
776bool WvmMediaParser::DemuxNextPes(bool is_program_end) {
777 bool output_encrypted_sample = false;
778 if (!sample_data_.empty() && (prev_pes_flags_1_ & kScramblingBitsMask)) {
779 // Decrypt crypto unit.
780 if (!content_decryptor_) {
781 output_encrypted_sample = true;
782 } else {
783 size_t output_size = content_decryptor_->RequiredOutputSize(
784 sample_data_.size() - crypto_unit_start_pos_);
785 content_decryptor_->Crypt(&sample_data_[crypto_unit_start_pos_],
786 sample_data_.size() - crypto_unit_start_pos_,
787 &sample_data_[crypto_unit_start_pos_],
788 &output_size);
789 }
790 }
791 // Demux media sample if we are at program end or if we are not at a
792 // continuation PES.
793 if ((pes_flags_2_ & kPesOptPts) || is_program_end) {
794 if (!sample_data_.empty()) {
795 if (!Output(output_encrypted_sample)) {
796 return false;
797 }
798 }
799 StartMediaSampleDemux();
800 }
801
802 crypto_unit_start_pos_ = sample_data_.size();
803 return true;
804}
805
806void WvmMediaParser::StartMediaSampleDemux() {
807 bool is_key_frame = ((pes_flags_1_ & kPesOptAlign) != 0);
808 media_sample_ = MediaSample::CreateEmptyMediaSample();
809 media_sample_->set_dts(dts_);
810 media_sample_->set_pts(pts_);
811 media_sample_->set_is_key_frame(is_key_frame);
812
813 sample_data_.clear();
814}
815
816bool WvmMediaParser::Output(bool output_encrypted_sample) {
817 if (output_encrypted_sample) {
818 media_sample_->SetData(sample_data_.data(), sample_data_.size());
819 media_sample_->set_is_encrypted(true);
820 } else {
821 if ((prev_pes_stream_id_ & kPesStreamIdVideoMask) == kPesStreamIdVideo) {
822 // Convert video stream to unit stream and get config.
823 std::vector<uint8_t> nal_unit_stream;
824 if (!byte_to_unit_stream_converter_.ConvertByteStreamToNalUnitStream(
825 sample_data_.data(), sample_data_.size(), &nal_unit_stream)) {
826 LOG(ERROR) << "Could not convert h.264 byte stream sample";
827 return false;
828 }
829 media_sample_->SetData(nal_unit_stream.data(), nal_unit_stream.size());
830 if (!is_initialized_) {
831 // Set extra data for video stream from AVC Decoder Config Record.
832 // Also, set codec string from the AVC Decoder Config Record.
833 std::vector<uint8_t> decoder_config_record;
834 byte_to_unit_stream_converter_.GetDecoderConfigurationRecord(
835 &decoder_config_record);
836 for (uint32_t i = 0; i < stream_infos_.size(); i++) {
837 if (stream_infos_[i]->stream_type() == kStreamVideo &&
838 stream_infos_[i]->codec_string().empty()) {
839 const std::vector<uint8_t>* stream_config;
840 if (stream_infos_[i]->codec_config().empty()) {
841 // Decoder config record not available for stream. Use the one
842 // computed from the first video stream.
843 stream_infos_[i]->set_codec_config(decoder_config_record);
844 stream_config = &decoder_config_record;
845 } else {
846 // Use stream-specific config record.
847 stream_config = &stream_infos_[i]->codec_config();
848 }
849 DCHECK(stream_config);
850
851 VideoStreamInfo* video_stream_info =
852 reinterpret_cast<VideoStreamInfo*>(stream_infos_[i].get());
853 AVCDecoderConfigurationRecord avc_config;
854 if (!avc_config.Parse(*stream_config)) {
855 LOG(WARNING) << "Failed to parse AVCDecoderConfigurationRecord. "
856 "Using computed configuration record instead.";
857 video_stream_info->set_codec_config(decoder_config_record);
858 if (!avc_config.Parse(decoder_config_record)) {
859 LOG(ERROR) << "Failed to parse AVCDecoderConfigurationRecord.";
860 return false;
861 }
862 }
863 const FourCC codec_fourcc =
864 byte_to_unit_stream_converter_.stream_format() ==
865 H26xStreamFormat::kNalUnitStreamWithParameterSetNalus
866 ? FOURCC_avc3
867 : FOURCC_avc1;
868 video_stream_info->set_codec_string(
869 avc_config.GetCodecString(codec_fourcc));
870
871 if (avc_config.pixel_width() != video_stream_info->pixel_width() ||
872 avc_config.pixel_height() !=
873 video_stream_info->pixel_height()) {
874 LOG_IF(WARNING, video_stream_info->pixel_width() != 0 ||
875 video_stream_info->pixel_height() != 0)
876 << "Pixel aspect ratio in WVM metadata ("
877 << video_stream_info->pixel_width() << ","
878 << video_stream_info->pixel_height()
879 << ") does not match with SAR in "
880 "AVCDecoderConfigurationRecord ("
881 << avc_config.pixel_width() << ","
882 << avc_config.pixel_height()
883 << "). Use AVCDecoderConfigurationRecord.";
884 video_stream_info->set_pixel_width(avc_config.pixel_width());
885 video_stream_info->set_pixel_height(avc_config.pixel_height());
886 }
887 if (avc_config.coded_width() != video_stream_info->width() ||
888 avc_config.coded_height() != video_stream_info->height()) {
889 LOG(WARNING) << "Resolution in WVM metadata ("
890 << video_stream_info->width() << ","
891 << video_stream_info->height()
892 << ") does not match with resolution in "
893 "AVCDecoderConfigurationRecord ("
894 << avc_config.coded_width() << ","
895 << avc_config.coded_height()
896 << "). Use AVCDecoderConfigurationRecord.";
897 video_stream_info->set_width(avc_config.coded_width());
898 video_stream_info->set_height(avc_config.coded_height());
899 }
900 }
901 }
902 }
903 } else if ((prev_pes_stream_id_ & kPesStreamIdAudioMask) ==
904 kPesStreamIdAudio) {
905 // Set data on the audio stream.
906 mp2t::AdtsHeader adts_header;
907 const uint8_t* frame_ptr = sample_data_.data();
908 if (!adts_header.Parse(frame_ptr, sample_data_.size())) {
909 LOG(ERROR) << "Could not parse ADTS header";
910 return false;
911 }
912 media_sample_->SetData(
913 frame_ptr + adts_header.GetHeaderSize(),
914 adts_header.GetFrameSize() - adts_header.GetHeaderSize());
915 if (!is_initialized_) {
916 for (uint32_t i = 0; i < stream_infos_.size(); i++) {
917 if (stream_infos_[i]->stream_type() == kStreamAudio &&
918 stream_infos_[i]->codec_string().empty()) {
919 AudioStreamInfo* audio_stream_info =
920 reinterpret_cast<AudioStreamInfo*>(stream_infos_[i].get());
921 if (audio_stream_info->codec_config().empty()) {
922 // Set AudioStreamInfo fields using information from the ADTS
923 // header.
924 audio_stream_info->set_sampling_frequency(
925 adts_header.GetSamplingFrequency());
926 std::vector<uint8_t> audio_specific_config;
927 adts_header.GetAudioSpecificConfig(&audio_specific_config);
928 audio_stream_info->set_codec_config(audio_specific_config);
929 audio_stream_info->set_codec_string(
930 AudioStreamInfo::GetCodecString(kCodecAAC,
931 adts_header.GetObjectType()));
932 } else {
933 // Set AudioStreamInfo fields using information from the
934 // AACAudioSpecificConfig record.
935 AACAudioSpecificConfig aac_config;
936 if (!aac_config.Parse(stream_infos_[i]->codec_config())) {
937 LOG(ERROR) << "Could not parse AACAudioSpecificconfig";
938 return false;
939 }
940 audio_stream_info->set_sampling_frequency(
941 aac_config.GetSamplesPerSecond());
942 audio_stream_info->set_codec_string(
943 AudioStreamInfo::GetCodecString(
944 kCodecAAC, aac_config.GetAudioObjectType()));
945 }
946 }
947 }
948 }
949 }
950 }
951
952 if (!is_initialized_) {
953 bool all_streams_have_config = true;
954 // Check if all collected stream infos have codec_config set.
955 for (uint32_t i = 0; i < stream_infos_.size(); i++) {
956 if (stream_infos_[i]->codec_string().empty()) {
957 all_streams_have_config = false;
958 break;
959 }
960 }
961 if (all_streams_have_config) {
962 init_cb_(stream_infos_);
963 is_initialized_ = true;
964 }
965 }
966
967 DCHECK_GT(media_sample_->data_size(), 0UL);
968 std::string key =
969 absl::StrFormat("%u:%u", current_program_id_, prev_pes_stream_id_);
970 std::map<std::string, uint32_t>::iterator it =
971 program_demux_stream_map_.find(key);
972 if (it == program_demux_stream_map_.end()) {
973 // TODO(ramjic): Log error message here and in other error cases through
974 // this method.
975 return false;
976 }
977 DemuxStreamIdMediaSample demux_stream_media_sample;
978 demux_stream_media_sample.parsed_audio_or_video_stream_id =
979 prev_pes_stream_id_;
980 demux_stream_media_sample.demux_stream_id = (*it).second;
981 demux_stream_media_sample.media_sample = media_sample_;
982 // Check if sample can be emitted.
983 if (!is_initialized_) {
984 media_sample_queue_.push_back(demux_stream_media_sample);
985 } else {
986 // flush the sample queue and emit all queued samples.
987 while (!media_sample_queue_.empty()) {
988 if (!EmitPendingSamples())
989 return false;
990 }
991 // Emit current sample.
992 if (!EmitSample(prev_pes_stream_id_, (*it).second, media_sample_, false))
993 return false;
994 }
995 return true;
996}
997
998bool WvmMediaParser::EmitSample(uint32_t parsed_audio_or_video_stream_id,
999 uint32_t stream_id,
1000 const std::shared_ptr<MediaSample>& new_sample,
1001 bool isLastSample) {
1002 DCHECK(new_sample);
1003 if (isLastSample) {
1004 if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1005 kPesStreamIdVideo) {
1006 new_sample->set_duration(prev_media_sample_data_.video_sample_duration);
1007 } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1008 kPesStreamIdAudio) {
1009 new_sample->set_duration(prev_media_sample_data_.audio_sample_duration);
1010 }
1011 if (!new_sample_cb_(stream_id, new_sample)) {
1012 LOG(ERROR) << "Failed to process the last sample.";
1013 return false;
1014 }
1015 return true;
1016 }
1017
1018 // Cannot emit current sample. Compute duration first and then,
1019 // emit previous sample.
1020 if ((parsed_audio_or_video_stream_id & kPesStreamIdVideoMask) ==
1021 kPesStreamIdVideo) {
1022 if (prev_media_sample_data_.video_sample == NULL) {
1023 prev_media_sample_data_.video_sample = new_sample;
1024 prev_media_sample_data_.video_stream_id = stream_id;
1025 return true;
1026 }
1027 prev_media_sample_data_.video_sample->set_duration(
1028 new_sample->dts() - prev_media_sample_data_.video_sample->dts());
1029 prev_media_sample_data_.video_sample_duration =
1030 prev_media_sample_data_.video_sample->duration();
1031 if (!new_sample_cb_(prev_media_sample_data_.video_stream_id,
1032 prev_media_sample_data_.video_sample)) {
1033 LOG(ERROR) << "Failed to process the video sample.";
1034 return false;
1035 }
1036 prev_media_sample_data_.video_sample = new_sample;
1037 prev_media_sample_data_.video_stream_id = stream_id;
1038 } else if ((parsed_audio_or_video_stream_id & kPesStreamIdAudioMask) ==
1039 kPesStreamIdAudio) {
1040 if (prev_media_sample_data_.audio_sample == NULL) {
1041 prev_media_sample_data_.audio_sample = new_sample;
1042 prev_media_sample_data_.audio_stream_id = stream_id;
1043 return true;
1044 }
1045 prev_media_sample_data_.audio_sample->set_duration(
1046 new_sample->dts() - prev_media_sample_data_.audio_sample->dts());
1047 prev_media_sample_data_.audio_sample_duration =
1048 prev_media_sample_data_.audio_sample->duration();
1049 if (!new_sample_cb_(prev_media_sample_data_.audio_stream_id,
1050 prev_media_sample_data_.audio_sample)) {
1051 LOG(ERROR) << "Failed to process the audio sample.";
1052 return false;
1053 }
1054 prev_media_sample_data_.audio_sample = new_sample;
1055 prev_media_sample_data_.audio_stream_id = stream_id;
1056 }
1057 return true;
1058}
1059
1060bool WvmMediaParser::GetAssetKey(const uint8_t* asset_id,
1061 EncryptionKey* encryption_key) {
1062 DCHECK(decryption_key_source_);
1063 Status status = decryption_key_source_->FetchKeys(
1064 EmeInitDataType::WIDEVINE_CLASSIC,
1065 std::vector<uint8_t>(asset_id, asset_id + sizeof(uint32_t)));
1066 if (!status.ok()) {
1067 LOG(ERROR) << "Fetch Key(s) failed for AssetID = "
1068 << absl::big_endian::Load32(asset_id) << ", error = " << status;
1069 return false;
1070 }
1071
1072 const char kHdStreamLabel[] = "HD";
1073 status = decryption_key_source_->GetKey(kHdStreamLabel, encryption_key);
1074 if (!status.ok()) {
1075 LOG(ERROR) << "Fetch Key(s) failed for AssetID = "
1076 << absl::big_endian::Load32(asset_id) << ", error = " << status;
1077 return false;
1078 }
1079
1080 return true;
1081}
1082
1083bool WvmMediaParser::ProcessEcm() {
1084 // An error will be returned later if the samples need to be decrypted.
1085 if (!decryption_key_source_)
1086 return true;
1087
1088 if (current_program_id_ > 0) {
1089 return true;
1090 }
1091 if (ecm_.size() != kEcmSizeBytes) {
1092 LOG(ERROR) << "Unexpected ECM size = " << ecm_.size()
1093 << ", expected size = " << kEcmSizeBytes;
1094 return false;
1095 }
1096 const uint8_t* ecm_data = ecm_.data();
1097 DCHECK(ecm_data);
1098 ecm_data += sizeof(uint32_t); // old version field - skip.
1099 ecm_data += sizeof(uint32_t); // clear lead - skip.
1100 ecm_data += sizeof(uint32_t); // system id(includes ECM version) - skip.
1101 EncryptionKey encryption_key;
1102 if (!GetAssetKey(ecm_data, &encryption_key)) {
1103 return false;
1104 }
1105 if (encryption_key.key.size() < kAssetKeySizeBytes) {
1106 LOG(ERROR) << "Asset Key size of " << encryption_key.key.size()
1107 << " for AssetID = " << absl::big_endian::Load32(ecm_data)
1108 << " is less than minimum asset key size.";
1109 return false;
1110 }
1111 ecm_data += sizeof(uint32_t); // asset_id.
1112 // Legacy WVM content may have asset keys > 16 bytes.
1113 // Use only the first 16 bytes of the asset key to get
1114 // the content key.
1115 std::vector<uint8_t> asset_key(
1116 encryption_key.key.begin(),
1117 encryption_key.key.begin() + kAssetKeySizeBytes);
1118 // WVM format always uses all zero IV.
1119 std::vector<uint8_t> zero_iv(kInitializationVectorSizeBytes, 0);
1120 AesCbcDecryptor asset_decryptor(kCtsPadding, AesCryptor::kUseConstantIv);
1121 if (!asset_decryptor.InitializeWithIv(asset_key, zero_iv)) {
1122 LOG(ERROR) << "Failed to initialize asset_decryptor.";
1123 return false;
1124 }
1125
1126 const size_t content_key_buffer_size =
1127 kEcmFlagsSizeBytes + kEcmContentKeySizeBytes +
1128 kEcmPaddingSizeBytes; // flags + contentKey + padding.
1129
1130 size_t output_size =
1131 asset_decryptor.RequiredOutputSize(content_key_buffer_size);
1132 std::vector<uint8_t> content_key_buffer(output_size);
1133 CHECK(asset_decryptor.Crypt(ecm_data, content_key_buffer_size,
1134 content_key_buffer.data(), &output_size));
1135
1136 std::vector<uint8_t> decrypted_content_key_vec(
1137 content_key_buffer.begin() + 4, content_key_buffer.begin() + 20);
1138 std::unique_ptr<AesCbcDecryptor> content_decryptor(
1139 new AesCbcDecryptor(kCtsPadding, AesCryptor::kUseConstantIv));
1140 if (!content_decryptor->InitializeWithIv(decrypted_content_key_vec,
1141 zero_iv)) {
1142 LOG(ERROR) << "Failed to initialize content decryptor.";
1143 return false;
1144 }
1145
1146 content_decryptor_ = std::move(content_decryptor);
1147 return true;
1148}
1149
1150DemuxStreamIdMediaSample::DemuxStreamIdMediaSample()
1151 : demux_stream_id(0), parsed_audio_or_video_stream_id(0) {}
1152
1153DemuxStreamIdMediaSample::~DemuxStreamIdMediaSample() {}
1154
1155PrevSampleData::PrevSampleData() {
1156 Reset();
1157}
1158
1159PrevSampleData::~PrevSampleData() {}
1160
1161void PrevSampleData::Reset() {
1162 audio_sample = NULL;
1163 video_sample = NULL;
1164 audio_stream_id = 0;
1165 video_stream_id = 0;
1166 audio_sample_duration = 0;
1167 video_sample_duration = 0;
1168}
1169
1170} // namespace wvm
1171} // namespace media
1172} // namespace shaka
KeySource is responsible for encryption key acquisition.
Definition key_source.h:53
std::function< bool(uint32_t track_id, std::shared_ptr< MediaSample > media_sample)> NewMediaSampleCB
std::function< bool(uint32_t track_id, std::shared_ptr< TextSample > text_sample)> NewTextSampleCB
std::function< void(const std::vector< std::shared_ptr< StreamInfo > > &stream_info)> InitCB
All the methods that are virtual are virtual for mocking.