18 #include <unordered_map> 25 #ifndef kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder 26 # define kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder \ 27 CFSTR("EnableHardwareAcceleratedVideoDecoder") 29 #ifndef kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder 30 # define kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder \ 31 CFSTR("UsingHardwareAcceleratedVideoDecoder") 34 #define DEFAULT(val, def) ((val) != 0 ? (val) : (def)) 43 constexpr
const size_t kAudioSampleCount = 256;
52 constexpr
const size_t kAudioSampleSize = 2;
55 constexpr
const OSStatus kNoMoreDataError = -12345;
58 util::CFRef<CFMutableDictionaryRef> MakeDict(
size_t capacity) {
59 return CFDictionaryCreateMutable(kCFAllocatorDefault, capacity,
60 &kCFTypeDictionaryKeyCallBacks,
61 &kCFTypeDictionaryValueCallBacks);
65 util::CFRef<CFDataRef> CreateBuffer(
const std::vector<uint8_t>& buffer) {
66 return util::CFRef<CFDataRef>(
67 CFDataCreate(kCFAllocatorDefault, buffer.data(), buffer.size()));
70 util::CFRef<CFMutableDictionaryRef> CreateVideoDecoderConfig(
71 const std::string& codec,
const std::vector<uint8_t>& extra_data) {
72 util::CFRef<CFMutableDictionaryRef> ret(MakeDict(2));
78 util::CFRef<CFMutableDictionaryRef> info(MakeDict(1));
79 if (raw_codec ==
"h264") {
80 CFDictionarySetValue(info, CFSTR(
"avcC"), CreateBuffer(extra_data));
81 }
else if (raw_codec ==
"hevc") {
82 CFDictionarySetValue(info, CFSTR(
"hvcC"), CreateBuffer(extra_data));
87 ret, kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms, info);
92 util::CFRef<CMVideoFormatDescriptionRef> CreateFormatDescription(
93 const std::string& codec, uint32_t
width, uint32_t
height,
94 util::CFRef<CFDictionaryRef> decoder_config) {
96 CMVideoCodecType codec_type;
97 if (raw_codec ==
"h264") {
98 codec_type = kCMVideoCodecType_H264;
99 }
else if (raw_codec ==
"hevc") {
100 codec_type = kCMVideoCodecType_HEVC;
105 CMVideoFormatDescriptionRef ret;
106 const auto status = CMVideoFormatDescriptionCreate(
107 kCFAllocatorDefault, codec_type, width, height, decoder_config, &ret);
109 LOG(ERROR) <<
"Error creating video format description: " << status;
116 util::CFRef<CMSampleBufferRef> CreateSampleBuffer(
117 util::CFRef<CMVideoFormatDescriptionRef> format_desc,
const uint8_t* data,
119 CMBlockBufferRef block =
nullptr;
120 CMSampleBufferRef ret =
nullptr;
121 const auto status = CMBlockBufferCreateWithMemoryBlock(
122 kCFAllocatorDefault, const_cast<uint8_t*>(data), size, kCFAllocatorNull,
123 nullptr, 0, size, 0, &block);
125 CMSampleBufferCreate(kCFAllocatorDefault,
144 util::CFRef<CFMutableDictionaryRef> CreateBufferAttributes(int32_t width,
146 util::CFRef<CFMutableDictionaryRef> ret(MakeDict(5));
147 util::CFRef<CFMutableDictionaryRef> surface_props(MakeDict(0));
149 util::CFRef<CFNumberRef> w(
150 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &width));
151 util::CFRef<CFNumberRef> h(
152 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &height));
153 int32_t pix_fmt_raw = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
154 util::CFRef<CFNumberRef> pix_fmt(
155 CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt_raw));
157 CFDictionarySetValue(ret, kCVPixelBufferWidthKey, w);
158 CFDictionarySetValue(ret, kCVPixelBufferHeightKey, h);
159 CFDictionarySetValue(ret, kCVPixelBufferPixelFormatTypeKey, pix_fmt);
160 CFDictionarySetValue(ret, kCVPixelBufferIOSurfacePropertiesKey,
162 CFDictionarySetValue(ret, kCVPixelBufferCGImageCompatibilityKey,
168 std::vector<uint8_t> MakeH264ExtraData(
const std::string& codec) {
170 auto pos = codec.find(
'.');
171 if (pos != std::string::npos)
172 profile = strtol(codec.substr(pos).c_str(),
nullptr, 16);
179 uint8_t extra_data[] = {
184 (profile >> 16) & 0xff,
185 (profile >> 8) & 0xff,
186 (profile >> 0) & 0xff,
191 0x67, 0x42, 0xc8, 0x1e, 0xd9, 0x01, 0x03, 0xfe, 0xbf, 0xf0,
192 0x06, 0xe0, 0x06, 0xd1, 0x00, 0x00, 0x03, 0x00, 0x01, 0x00,
193 0x00, 0x03, 0x00, 0x30, 0x0f, 0x16, 0x2e, 0x48,
197 0x68, 0xcb, 0x8c, 0xb2,
200 return {extra_data, extra_data +
sizeof(extra_data)};
203 std::vector<uint8_t> MakeHevcExtraData(
const std::string& codec) {
208 DCHECK(parts[0] ==
"hvc1" || parts[0] ==
"hev1");
210 uint8_t general_profile_space = 0;
211 uint8_t general_profile_idc = 1;
212 if (parts.size() > 1) {
214 if (parts[1].size() > 0 &&
215 (parts[1][0] ==
'A' || parts[1][0] ==
'B' || parts[1][0] ==
'C')) {
216 general_profile_space = 1 + (parts[1][0] -
'A');
219 general_profile_idc =
220 static_cast<uint8_t
>(strtol(parts[1].c_str() + offset,
nullptr, 10));
223 uint32_t general_profile_compatibility_flags = 0;
224 if (parts.size() > 2)
225 general_profile_compatibility_flags = strtol(parts[2].c_str(),
nullptr, 16);
227 uint8_t general_tier_flag = 0;
228 uint8_t general_level_idc = 0;
229 if (parts.size() > 3) {
230 general_tier_flag = parts[3].size() > 0 && parts[3][0] !=
'L' ? 1 : 0;
232 static_cast<uint8_t
>(strtol(parts[3].substr(1).c_str(),
nullptr, 10));
235 uint8_t general_constraint_indicator_flags[6] = {0};
236 for (
size_t i = 0; i < 6 && i + 4 < parts.size(); i++) {
237 general_constraint_indicator_flags[i] =
238 static_cast<uint8_t
>(strtol(parts[i + 4].c_str(),
nullptr, 16));
241 const uint8_t data[] = {
244 static_cast<uint8_t
>((general_profile_space << 6) | general_profile_idc),
245 (general_profile_compatibility_flags >> 24) & 0xff,
246 (general_profile_compatibility_flags >> 16) & 0xff,
247 (general_profile_compatibility_flags >> 8) & 0xff,
248 (general_profile_compatibility_flags >> 0) & 0xff,
249 general_constraint_indicator_flags[0],
250 general_constraint_indicator_flags[1],
251 general_constraint_indicator_flags[2],
252 general_constraint_indicator_flags[3],
253 general_constraint_indicator_flags[4],
254 general_constraint_indicator_flags[5],
269 0x42, 0x01, 0x01, 0x01, 0x60, 0x00, 0x00, 0x03,
270 0x00, 0x90, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03,
271 0x00, 0x5d, 0xa0, 0x02, 0x80, 0x80, 0x24, 0x1f,
272 0x26, 0x59, 0x99, 0xa4, 0x93, 0x2b, 0xff, 0xc0,
273 0xd5, 0xc0, 0xd6, 0x40, 0x40, 0x00, 0x00, 0x03,
274 0x00, 0x40, 0x00, 0x00, 0x06, 0x02,
280 0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40,
286 0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x01, 0x60,
287 0x00, 0x00, 0x03, 0x00, 0x90, 0x00, 0x00, 0x03,
288 0x00, 0x00, 0x03, 0x00, 0x5d, 0x99, 0x98, 0x09,
291 return {data, data +
sizeof(data)};
294 std::vector<uint8_t> MakeAacExtraData(
const std::vector<uint8_t>& codec_data) {
296 constexpr
const size_t kDescPrefixSize = 8;
297 constexpr
const size_t kConfigPrefixSize = 23;
298 const size_t config_size = codec_data.size() + kConfigPrefixSize;
299 const size_t total_size = config_size + kDescPrefixSize;
300 CHECK_LT(total_size, 1 << (7 * 4));
301 const uint8_t fixed[] = {
304 0x80 | ((total_size >> (7 * 3)) & 0x7f),
305 0x80 | ((total_size >> (7 * 2)) & 0x7f),
306 0x80 | ((total_size >> (7 * 1)) & 0x7f),
313 0x80 | ((config_size >> (7 * 3)) & 0x7f),
314 0x80 | ((config_size >> (7 * 2)) & 0x7f),
315 0x80 | ((config_size >> (7 * 1)) & 0x7f),
316 (config_size & 0x7f),
325 0x80 | ((codec_data.size() >> (7 * 3)) & 0x7f),
326 0x80 | ((codec_data.size() >> (7 * 2)) & 0x7f),
327 0x80 | ((codec_data.size() >> (7 * 1)) & 0x7f),
328 (codec_data.size() & 0x7f),
332 static_assert(kDescPrefixSize + kConfigPrefixSize ==
sizeof(fixed),
333 "Inconsistent buffer sizes");
334 DCHECK_EQ(total_size,
sizeof(fixed) + codec_data.size());
336 std::vector<uint8_t> ret(total_size);
337 std::memcpy(ret.data(), fixed,
sizeof(fixed));
338 std::memcpy(ret.data() +
sizeof(fixed), codec_data.data(), codec_data.size());
342 OSStatus CreateAudioConverter(uint32_t sample_rate, uint32_t channel_count,
343 const std::vector<uint8_t>& extra_data,
344 AudioConverterRef* session) {
347 AudioStreamBasicDescription input = {0};
348 input.mFormatID = kAudioFormatMPEG4AAC;
349 if (extra_data.empty()) {
351 input.mSampleRate = sample_rate;
352 input.mChannelsPerFrame = channel_count;
353 input.mBytesPerPacket = 0;
354 input.mFramesPerPacket = 1024;
357 std::vector<uint8_t> cookie = MakeAacExtraData(extra_data);
358 UInt32 size =
sizeof(input);
360 AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, cookie.size(),
361 cookie.data(), &size, &input);
366 AudioStreamBasicDescription output = {0};
367 output.mFormatID = kAudioFormatLinearPCM;
368 output.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger;
369 output.mSampleRate = sample_rate;
370 output.mChannelsPerFrame = channel_count;
371 output.mFramesPerPacket = 1;
372 output.mBitsPerChannel = kAudioSampleSize * 8;
373 output.mBytesPerFrame = output.mBitsPerChannel * output.mChannelsPerFrame / 8;
374 output.mBytesPerPacket = output.mBytesPerFrame * output.mFramesPerPacket;
376 return AudioConverterNew(&input, &output, session);
382 : mutex_(
"AppleDecoder"), at_session_(nullptr, &AudioConverterDispose) {}
395 std::unordered_map<std::string, std::string> args;
408 if (norm_codec ==
"h264" || norm_codec ==
"hevc") {
411 const std::vector<uint8_t> extra_data = norm_codec ==
"h264" 412 ? MakeH264ExtraData(codec)
413 : MakeHevcExtraData(codec);
416 const size_t max_size = std::max(resolution.first, resolution.second);
417 if (width > max_size || height > max_size) {
424 VTDecompressionOutputCallbackRecord cb = {
425 .decompressionOutputCallback = &AppleDecoder::OnNewVideoFrame,
426 .decompressionOutputRefCon =
nullptr,
428 util::CFRef<CFDictionaryRef> decoder_config =
429 CreateVideoDecoderConfig(codec, extra_data);
431 CreateFormatDescription(codec, width, height, decoder_config);
432 util::CFRef<CFDictionaryRef> buffer_attr =
433 CreateBufferAttributes(width, height);
435 VTDecompressionSessionRef session;
436 auto status = VTDecompressionSessionCreate(kCFAllocatorDefault, format_desc,
437 decoder_config, buffer_attr, &cb,
443 CFBooleanRef using_hardware;
444 status = VTSessionCopyProperty(
447 kCFAllocatorDefault, &using_hardware);
449 status == 0 && CFBooleanGetValue(using_hardware);
451 VTDecompressionSessionInvalidate(session);
454 }
else if (norm_codec ==
"aac") {
455 AudioConverterRef audio_session =
nullptr;
459 CreateAudioConverter(sample_rate, channel_count, {}, &audio_session);
462 AudioConverterDispose(audio_session);
473 std::unique_lock<Mutex> lock(mutex_);
479 std::vector<std::shared_ptr<DecodedFrame>>*
frames,
480 std::string* extra_info) {
481 std::unique_lock<Mutex> lock(mutex_);
488 ret = DecodeVideo(
nullptr, 0, extra_info);
489 else if (at_session_)
490 ret = DecodeAudio(
nullptr, 0, extra_info);
499 const bool is_video = input->stream_info->is_video;
500 auto init = is_video ? &AppleDecoder::InitVideoDecoder
501 : &AppleDecoder::InitAudioDecoder;
503 is_video ? &AppleDecoder::DecodeVideo : &AppleDecoder::DecodeAudio;
504 auto has_session = is_video ? !!vt_session_ : !!at_session_;
506 if (!has_session || input->stream_info != decoder_stream_info_) {
508 if (!(this->*init)(input->stream_info, extra_info))
510 decoder_stream_info_ = input->stream_info;
514 const size_t size = input->data_size;
515 std::vector<uint8_t> decrypted_data;
516 if (input->encryption_info) {
517 decrypted_data.resize(input->data_size);
518 const auto status = input->Decrypt(eme, decrypted_data.data());
520 *extra_info =
"Error decrypting frame";
523 data = decrypted_data.data();
530 input_ = input.get();
532 input_data_size_ = size;
534 const bool ret = (this->*decode)(data, size, extra_info);
536 input_data_ =
nullptr;
537 input_data_size_ = 0;
542 void AppleDecoder::OnNewVideoFrame(
void* user,
void* frameUser, OSStatus status,
544 CVImageBufferRef buffer, CMTime pts,
547 auto*
frame = decoder->input_;
551 CHECK(decoder->output_);
555 if (pts.flags & kCMTimeFlags_Valid)
556 time = CMTimeGetSeconds(pts);
560 if (duration.flags & kCMTimeFlags_Valid)
561 durationSec = CMTimeGetSeconds(duration);
566 decoder->decoder_stream_info_, time, durationSec, buffer));
569 OSStatus AppleDecoder::AudioInputCallback(AudioConverterRef ,
571 AudioBufferList* data,
572 AudioStreamPacketDescription** desc,
575 if (!decoder->input_) {
577 return kNoMoreDataError;
581 decoder->audio_desc_.mStartOffset = 0;
582 decoder->audio_desc_.mVariableFramesInPacket = 0;
583 decoder->audio_desc_.mDataByteSize = decoder->input_data_size_;
584 *desc = &decoder->audio_desc_;
587 CHECK_EQ(data->mNumberBuffers, 1);
589 data->mBuffers[0].mNumberChannels =
590 decoder->decoder_stream_info_->channel_count;
591 data->mBuffers[0].mDataByteSize = decoder->input_data_size_;
592 data->mBuffers[0].mData =
const_cast<uint8_t*
>(decoder->input_data_);
594 decoder->input_ =
nullptr;
598 void AppleDecoder::ResetInternal() {
600 VTDecompressionSessionInvalidate(vt_session_);
601 vt_session_ =
nullptr;
606 bool AppleDecoder::DecodeVideo(
const uint8_t* data,
size_t data_size,
607 std::string* extra_info) {
610 util::CFRef<CMSampleBufferRef> sample =
611 CreateSampleBuffer(format_desc_, data, data_size);
613 *extra_info =
"Error creating sample buffer";
617 status = VTDecompressionSessionDecodeFrame(
618 vt_session_, sample, kVTDecodeFrame_EnableTemporalProcessing,
nullptr,
621 status = VTDecompressionSessionWaitForAsynchronousFrames(vt_session_);
623 status = VTDecompressionSessionFinishDelayedFrames(vt_session_);
625 status = VTDecompressionSessionWaitForAsynchronousFrames(vt_session_);
629 LOG(ERROR) << (*extra_info =
636 bool AppleDecoder::DecodeAudio(
const uint8_t* data,
size_t data_size,
637 std::string* extra_info) {
641 const size_t channel_count = decoder_stream_info_->channel_count;
643 std::vector<uint8_t> temp_buffer;
644 temp_buffer.resize(kAudioSampleCount * kAudioSampleSize * channel_count);
645 auto* input = input_;
648 while (status == 0) {
649 AudioBufferList output{};
650 output.mNumberBuffers = 1;
651 output.mBuffers[0].mNumberChannels = channel_count;
652 output.mBuffers[0].mDataByteSize = temp_buffer.size();
653 output.mBuffers[0].mData = temp_buffer.
data();
654 UInt32 output_size = kAudioSampleCount;
656 status = AudioConverterFillComplexBuffer(
657 at_session_.get(), &AppleDecoder::AudioInputCallback,
this,
658 &output_size, &output,
nullptr);
659 if (status != 0 && status != kNoMoreDataError) {
660 LOG(DFATAL) << (*extra_info =
666 output_size * kAudioSampleSize * channel_count);
669 temp_buffer.resize(out_buffer.
Size());
670 out_buffer.
CopyDataTo(temp_buffer.data(), temp_buffer.size());
671 const uint32_t sample_count =
672 temp_buffer.size() / kAudioSampleSize / channel_count;
674 decoder_stream_info_, input->pts, input->duration, kAudioSampleFormat,
675 sample_count, std::move(temp_buffer)));
680 bool AppleDecoder::InitVideoDecoder(std::shared_ptr<const StreamInfo> info,
681 std::string* extra_info) {
682 VTDecompressionOutputCallbackRecord cb = {
683 .decompressionOutputCallback = &AppleDecoder::OnNewVideoFrame,
684 .decompressionOutputRefCon =
this,
686 util::CFRef<CFDictionaryRef> decoder_config =
687 CreateVideoDecoderConfig(info->codec, info->extra_data);
688 format_desc_ = CreateFormatDescription(info->codec, info->width, info->height,
690 util::CFRef<CFDictionaryRef> buffer_attr =
691 CreateBufferAttributes(info->width, info->height);
693 VTDecompressionSessionRef session;
695 VTDecompressionSessionCreate(kCFAllocatorDefault, format_desc_,
696 decoder_config, buffer_attr, &cb, &session);
698 LOG(ERROR) << (*extra_info =
"Error creating VideoToolbox session: " +
703 vt_session_ = session;
704 decoder_stream_info_ = info;
708 bool AppleDecoder::InitAudioDecoder(std::shared_ptr<const StreamInfo> info,
709 std::string* extra_info) {
710 AudioConverterRef session =
nullptr;
711 auto status = CreateAudioConverter(info->sample_rate, info->channel_count,
712 info->extra_data, &session);
714 const std::vector<uint8_t> extra_data = MakeAacExtraData(info->extra_data);
715 status = AudioConverterSetProperty(session,
716 kAudioConverterDecompressionMagicCookie,
717 extra_data.size(), extra_data.data());
720 LOG(DFATAL) << (*extra_info =
"Error creating audio converter: " +
723 AudioConverterDispose(session);
726 at_session_.reset(session);
void CopyDataTo(uint8_t *dest, size_t size) const
std::shared_ptr< shaka::media::DecodedFrame > frame
#define kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder
void AppendCopy(const void *buffer, size_t size)
#define DEFAULT(val, def)
std::vector< std::string > StringSplit(const std::string &source, char split_on)
std::list< std::shared_ptr< BaseFrame > > frames
#define kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder