Shaka Player Embedded
apple_decoder.cc
Go to the documentation of this file.
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
16 
17 #include <algorithm>
18 #include <unordered_map>
19 #include <utility>
20 
21 #include "src/media/media_utils.h"
23 #include "src/util/utils.h"
24 
25 #ifndef kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder
26 # define kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder \
27  CFSTR("EnableHardwareAcceleratedVideoDecoder")
28 #endif
29 #ifndef kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder
30 # define kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder \
31  CFSTR("UsingHardwareAcceleratedVideoDecoder")
32 #endif
33 
34 #define DEFAULT(val, def) ((val) != 0 ? (val) : (def))
35 
36 namespace shaka {
37 namespace media {
38 namespace apple {
39 
40 namespace {
41 
43 constexpr const size_t kAudioSampleCount = 256;
44 
49 constexpr const SampleFormat kAudioSampleFormat = SampleFormat::PackedS16;
50 
52 constexpr const size_t kAudioSampleSize = 2;
53 
55 constexpr const OSStatus kNoMoreDataError = -12345;
56 
57 
58 util::CFRef<CFMutableDictionaryRef> MakeDict(size_t capacity) {
59  return CFDictionaryCreateMutable(kCFAllocatorDefault, capacity,
60  &kCFTypeDictionaryKeyCallBacks,
61  &kCFTypeDictionaryValueCallBacks);
62 }
63 
65 util::CFRef<CFDataRef> CreateBuffer(const std::vector<uint8_t>& buffer) {
66  return util::CFRef<CFDataRef>(
67  CFDataCreate(kCFAllocatorDefault, buffer.data(), buffer.size()));
68 }
69 
70 util::CFRef<CFMutableDictionaryRef> CreateVideoDecoderConfig(
71  const std::string& codec, const std::vector<uint8_t>& extra_data) {
72  util::CFRef<CFMutableDictionaryRef> ret(MakeDict(2));
73  CFDictionarySetValue(
75  kCFBooleanTrue);
76 
77  const std::string raw_codec = NormalizeCodec(codec);
78  util::CFRef<CFMutableDictionaryRef> info(MakeDict(1));
79  if (raw_codec == "h264") {
80  CFDictionarySetValue(info, CFSTR("avcC"), CreateBuffer(extra_data));
81  } else if (raw_codec == "hevc") {
82  CFDictionarySetValue(info, CFSTR("hvcC"), CreateBuffer(extra_data));
83  } else {
84  return nullptr;
85  }
86  CFDictionarySetValue(
87  ret, kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms, info);
88 
89  return ret;
90 }
91 
92 util::CFRef<CMVideoFormatDescriptionRef> CreateFormatDescription(
93  const std::string& codec, uint32_t width, uint32_t height,
94  util::CFRef<CFDictionaryRef> decoder_config) {
95  const std::string raw_codec = NormalizeCodec(codec);
96  CMVideoCodecType codec_type;
97  if (raw_codec == "h264") {
98  codec_type = kCMVideoCodecType_H264;
99  } else if (raw_codec == "hevc") {
100  codec_type = kCMVideoCodecType_HEVC;
101  } else {
102  return nullptr;
103  }
104 
105  CMVideoFormatDescriptionRef ret;
106  const auto status = CMVideoFormatDescriptionCreate(
107  kCFAllocatorDefault, codec_type, width, height, decoder_config, &ret);
108  if (status != 0) {
109  LOG(ERROR) << "Error creating video format description: " << status;
110  return nullptr;
111  }
112 
113  return ret;
114 }
115 
116 util::CFRef<CMSampleBufferRef> CreateSampleBuffer(
117  util::CFRef<CMVideoFormatDescriptionRef> format_desc, const uint8_t* data,
118  size_t size) {
119  CMBlockBufferRef block = nullptr;
120  CMSampleBufferRef ret = nullptr;
121  const auto status = CMBlockBufferCreateWithMemoryBlock(
122  kCFAllocatorDefault, const_cast<uint8_t*>(data), size, kCFAllocatorNull,
123  nullptr, 0, size, 0, &block);
124  if (status == 0) {
125  CMSampleBufferCreate(kCFAllocatorDefault, // allocator
126  block, // dataBuffer
127  TRUE, // dataReady
128  nullptr, // makeDataReadyCallback
129  nullptr, // makeDataReadyRefcon
130  format_desc, // formatDescription
131  1, // numSamples
132  0, // numSampleTimingEntries
133  nullptr, // sampleTimingArray
134  0, // numSampleSizeEntries
135  nullptr, // sampleSizeArray
136  &ret); // sampleBufferOut
137  }
138 
139  if (block)
140  CFRelease(block);
141  return ret;
142 }
143 
144 util::CFRef<CFMutableDictionaryRef> CreateBufferAttributes(int32_t width,
145  int32_t height) {
146  util::CFRef<CFMutableDictionaryRef> ret(MakeDict(5));
147  util::CFRef<CFMutableDictionaryRef> surface_props(MakeDict(0));
148 
149  util::CFRef<CFNumberRef> w(
150  CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &width));
151  util::CFRef<CFNumberRef> h(
152  CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &height));
153  int32_t pix_fmt_raw = kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange;
154  util::CFRef<CFNumberRef> pix_fmt(
155  CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &pix_fmt_raw));
156 
157  CFDictionarySetValue(ret, kCVPixelBufferWidthKey, w);
158  CFDictionarySetValue(ret, kCVPixelBufferHeightKey, h);
159  CFDictionarySetValue(ret, kCVPixelBufferPixelFormatTypeKey, pix_fmt);
160  CFDictionarySetValue(ret, kCVPixelBufferIOSurfacePropertiesKey,
161  surface_props);
162  CFDictionarySetValue(ret, kCVPixelBufferCGImageCompatibilityKey,
163  kCFBooleanTrue);
164 
165  return ret;
166 }
167 
168 std::vector<uint8_t> MakeH264ExtraData(const std::string& codec) {
169  long profile = 0; // NOLINT
170  auto pos = codec.find('.');
171  if (pos != std::string::npos)
172  profile = strtol(codec.substr(pos).c_str(), nullptr, 16);
173  if (profile == 0)
174  profile = 0x42001e;
175 
176  // This is just a common SPS and PPS that doesn't use any "unusual"
177  // features; this is believed to be commonly supported. We can't just
178  // pass 0 SPS or PPS, the decoder requires at least one of each.
179  uint8_t extra_data[] = {
180  // Disable formatting so clang-format doesn't put each element on its
181  // own line.
182  // clang-format off
183  0x01, // version
184  (profile >> 16) & 0xff, // profile
185  (profile >> 8) & 0xff, // profile compat
186  (profile >> 0) & 0xff, // level
187  0xff, // 6 reserved bits + 2 bits nalu size length - 1
188 
189  0xe1, // 3 reserved bits + 5 bits SPS count
190  0x00, 0x1c, // SPS size
191  0x67, 0x42, 0xc8, 0x1e, 0xd9, 0x01, 0x03, 0xfe, 0xbf, 0xf0,
192  0x06, 0xe0, 0x06, 0xd1, 0x00, 0x00, 0x03, 0x00, 0x01, 0x00,
193  0x00, 0x03, 0x00, 0x30, 0x0f, 0x16, 0x2e, 0x48,
194 
195  0x01, // PPS count
196  0x00, 0x04, // PPS size
197  0x68, 0xcb, 0x8c, 0xb2,
198  // clang-format on
199  };
200  return {extra_data, extra_data + sizeof(extra_data)};
201 }
202 
203 std::vector<uint8_t> MakeHevcExtraData(const std::string& codec) {
204  // See ISO IEC 14496-15 2012+ Annex E.3
205  // e.g. "hvc1.1.C0000006.L0.0.90.0.0.3"
206  std::vector<std::string> parts = util::StringSplit(codec, '.');
207 
208  DCHECK(parts[0] == "hvc1" || parts[0] == "hev1");
209 
210  uint8_t general_profile_space = 0;
211  uint8_t general_profile_idc = 1;
212  if (parts.size() > 1) {
213  int offset = 0;
214  if (parts[1].size() > 0 &&
215  (parts[1][0] == 'A' || parts[1][0] == 'B' || parts[1][0] == 'C')) {
216  general_profile_space = 1 + (parts[1][0] - 'A');
217  offset = 1;
218  }
219  general_profile_idc =
220  static_cast<uint8_t>(strtol(parts[1].c_str() + offset, nullptr, 10));
221  }
222 
223  uint32_t general_profile_compatibility_flags = 0;
224  if (parts.size() > 2)
225  general_profile_compatibility_flags = strtol(parts[2].c_str(), nullptr, 16);
226 
227  uint8_t general_tier_flag = 0;
228  uint8_t general_level_idc = 0;
229  if (parts.size() > 3) {
230  general_tier_flag = parts[3].size() > 0 && parts[3][0] != 'L' ? 1 : 0;
231  general_level_idc =
232  static_cast<uint8_t>(strtol(parts[3].substr(1).c_str(), nullptr, 10));
233  }
234 
235  uint8_t general_constraint_indicator_flags[6] = {0};
236  for (size_t i = 0; i < 6 && i + 4 < parts.size(); i++) {
237  general_constraint_indicator_flags[i] =
238  static_cast<uint8_t>(strtol(parts[i + 4].c_str(), nullptr, 16));
239  }
240 
241  const uint8_t data[] = {
242  // clang-format off
243  0x01, // configurationVersion
244  static_cast<uint8_t>((general_profile_space << 6) | general_profile_idc),
245  (general_profile_compatibility_flags >> 24) & 0xff,
246  (general_profile_compatibility_flags >> 16) & 0xff,
247  (general_profile_compatibility_flags >> 8) & 0xff,
248  (general_profile_compatibility_flags >> 0) & 0xff,
249  general_constraint_indicator_flags[0],
250  general_constraint_indicator_flags[1],
251  general_constraint_indicator_flags[2],
252  general_constraint_indicator_flags[3],
253  general_constraint_indicator_flags[4],
254  general_constraint_indicator_flags[5],
255  general_level_idc,
256  0xf0, 0x00, // reserved(0b1111) | min_spatial_segmentation_idc
257  0xfc, // reserved(0b111111) | parallelismType
258  0xfc, // reserved(0b111111) | chroma_format_idc
259  0xf8, // reserved(0b11111) | bit_depth_luma_minus8
260  0xf8, // reserved(0b11111) | bit_depth_chroma_minus8
261  0x00, 0x00, // avgFrameRate
262  0x00,
263 
264  0x03, // numOfArrays
265  // array[0]
266  0xa1, // NAL_unit_type
267  0x00, 0x01, // numNalus
268  0x00, 0x2e, // nalUnitLength
269  0x42, 0x01, 0x01, 0x01, 0x60, 0x00, 0x00, 0x03,
270  0x00, 0x90, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03,
271  0x00, 0x5d, 0xa0, 0x02, 0x80, 0x80, 0x24, 0x1f,
272  0x26, 0x59, 0x99, 0xa4, 0x93, 0x2b, 0xff, 0xc0,
273  0xd5, 0xc0, 0xd6, 0x40, 0x40, 0x00, 0x00, 0x03,
274  0x00, 0x40, 0x00, 0x00, 0x06, 0x02,
275 
276  // array[1]
277  0xa2, // NAL_unit_type
278  0x00, 0x01, // numNalus
279  0x00, 0x07, // nalUnitLength
280  0x44, 0x01, 0xc1, 0x72, 0xb4, 0x62, 0x40,
281 
282  // array[2]
283  0xa0, // NAL_unit_type
284  0x00, 0x01, // numNalus
285  0x00, 0x18, // nalUnitLength
286  0x40, 0x01, 0x0c, 0x01, 0xff, 0xff, 0x01, 0x60,
287  0x00, 0x00, 0x03, 0x00, 0x90, 0x00, 0x00, 0x03,
288  0x00, 0x00, 0x03, 0x00, 0x5d, 0x99, 0x98, 0x09,
289  // clang-format on
290  };
291  return {data, data + sizeof(data)};
292 }
293 
294 std::vector<uint8_t> MakeAacExtraData(const std::vector<uint8_t>& codec_data) {
295  // This is an ES_Descriptor box from ISO/IEC 14496-1 Section 7.2.6.5.
296  constexpr const size_t kDescPrefixSize = 8;
297  constexpr const size_t kConfigPrefixSize = 23;
298  const size_t config_size = codec_data.size() + kConfigPrefixSize;
299  const size_t total_size = config_size + kDescPrefixSize;
300  CHECK_LT(total_size, 1 << (7 * 4));
301  const uint8_t fixed[] = {
302  // clang-format off
303  0x3, // tag=ES_DescTag
304  0x80 | ((total_size >> (7 * 3)) & 0x7f), // Data size.
305  0x80 | ((total_size >> (7 * 2)) & 0x7f),
306  0x80 | ((total_size >> (7 * 1)) & 0x7f),
307  (total_size & 0x7f),
308  0x0, 0x0, // ES_ID
309  0x0, // Flags
310 
311  // DecoderConfigDescriptor
312  0x4, // tag=DecoderConfigDescrTag
313  0x80 | ((config_size >> (7 * 3)) & 0x7f), // Data size.
314  0x80 | ((config_size >> (7 * 2)) & 0x7f),
315  0x80 | ((config_size >> (7 * 1)) & 0x7f),
316  (config_size & 0x7f),
317  0x40, // objectTypeIndication
318  0x15, // flags=(AudioStream)
319  0x0, 0x0, 0x0, // bufferSizeDB
320  0x0, 0x0, 0x0, 0x0, // maxBitrate
321  0x0, 0x0, 0x0, 0x0, // avgBitrate
322 
323  // DecoderSpecificInfo
324  0x5, // tag=DecSpecificInfoTag
325  0x80 | ((codec_data.size() >> (7 * 3)) & 0x7f), // Data size.
326  0x80 | ((codec_data.size() >> (7 * 2)) & 0x7f),
327  0x80 | ((codec_data.size() >> (7 * 1)) & 0x7f),
328  (codec_data.size() & 0x7f),
329  // codec_data goes here.
330  // clang-format on
331  };
332  static_assert(kDescPrefixSize + kConfigPrefixSize == sizeof(fixed),
333  "Inconsistent buffer sizes");
334  DCHECK_EQ(total_size, sizeof(fixed) + codec_data.size());
335 
336  std::vector<uint8_t> ret(total_size);
337  std::memcpy(ret.data(), fixed, sizeof(fixed));
338  std::memcpy(ret.data() + sizeof(fixed), codec_data.data(), codec_data.size());
339  return ret;
340 }
341 
342 OSStatus CreateAudioConverter(uint32_t sample_rate, uint32_t channel_count,
343  const std::vector<uint8_t>& extra_data,
344  AudioConverterRef* session) {
345  // See this for some of the magic numbers below:
346  // https://developer.apple.com/documentation/coreaudiotypes/audiostreambasicdescription
347  AudioStreamBasicDescription input = {0};
348  input.mFormatID = kAudioFormatMPEG4AAC;
349  if (extra_data.empty()) {
350  // Fill in some defaults if we don't have extra data.
351  input.mSampleRate = sample_rate;
352  input.mChannelsPerFrame = channel_count;
353  input.mBytesPerPacket = 0; // Variable sized
354  input.mFramesPerPacket = 1024;
355  } else {
356  // Parse the extra data to fill in "input".
357  std::vector<uint8_t> cookie = MakeAacExtraData(extra_data);
358  UInt32 size = sizeof(input);
359  const auto status =
360  AudioFormatGetProperty(kAudioFormatProperty_FormatInfo, cookie.size(),
361  cookie.data(), &size, &input);
362  if (status != 0)
363  return status;
364  }
365 
366  AudioStreamBasicDescription output = {0};
367  output.mFormatID = kAudioFormatLinearPCM;
368  output.mFormatFlags = kLinearPCMFormatFlagIsSignedInteger;
369  output.mSampleRate = sample_rate;
370  output.mChannelsPerFrame = channel_count;
371  output.mFramesPerPacket = 1;
372  output.mBitsPerChannel = kAudioSampleSize * 8;
373  output.mBytesPerFrame = output.mBitsPerChannel * output.mChannelsPerFrame / 8;
374  output.mBytesPerPacket = output.mBytesPerFrame * output.mFramesPerPacket;
375 
376  return AudioConverterNew(&input, &output, session);
377 }
378 
379 } // namespace
380 
382  : mutex_("AppleDecoder"), at_session_(nullptr, &AudioConverterDispose) {}
384  ResetDecoder();
385 }
386 
388  const MediaDecodingConfiguration& config) const {
389  if (config.video.content_type.empty() == config.audio.content_type.empty())
390  return MediaCapabilitiesInfo();
391 
392  const std::string& content_type = !config.video.content_type.empty()
393  ? config.video.content_type
394  : config.audio.content_type;
395  std::unordered_map<std::string, std::string> args;
396  if (!ParseMimeType(content_type, nullptr, nullptr, &args))
397  return MediaCapabilitiesInfo();
398 
399  const std::string codec = args[kCodecMimeParam];
401  if (codec.empty()) {
402  // No codec, assume we can play it.
403  ret.supported = true;
404  return ret;
405  }
406 
407  const std::string norm_codec = NormalizeCodec(codec);
408  if (norm_codec == "h264" || norm_codec == "hevc") {
409  const uint32_t width = DEFAULT(config.video.width, 640);
410  const uint32_t height = DEFAULT(config.video.height, 480);
411  const std::vector<uint8_t> extra_data = norm_codec == "h264"
412  ? MakeH264ExtraData(codec)
413  : MakeHevcExtraData(codec);
414 
415  auto resolution = GetScreenResolution();
416  const size_t max_size = std::max(resolution.first, resolution.second);
417  if (width > max_size || height > max_size) {
418  // Don't play content that is larger than the screen. This is inefficient
419  // and VideoToolbox doesn't handle out of memory correctly and has a
420  // tendency to just crash with a memory error if we run out of memory.
421  return ret;
422  }
423 
424  VTDecompressionOutputCallbackRecord cb = {
425  .decompressionOutputCallback = &AppleDecoder::OnNewVideoFrame,
426  .decompressionOutputRefCon = nullptr,
427  };
428  util::CFRef<CFDictionaryRef> decoder_config =
429  CreateVideoDecoderConfig(codec, extra_data);
430  auto format_desc =
431  CreateFormatDescription(codec, width, height, decoder_config);
432  util::CFRef<CFDictionaryRef> buffer_attr =
433  CreateBufferAttributes(width, height);
434 
435  VTDecompressionSessionRef session;
436  auto status = VTDecompressionSessionCreate(kCFAllocatorDefault, format_desc,
437  decoder_config, buffer_attr, &cb,
438  &session);
439 
440  if (status == 0) {
441  ret.supported = true;
442 
443  CFBooleanRef using_hardware;
444  status = VTSessionCopyProperty(
445  session,
447  kCFAllocatorDefault, &using_hardware);
448  ret.smooth = ret.power_efficient =
449  status == 0 && CFBooleanGetValue(using_hardware);
450 
451  VTDecompressionSessionInvalidate(session);
452  CFRelease(session);
453  }
454  } else if (norm_codec == "aac") {
455  AudioConverterRef audio_session = nullptr;
456  const uint32_t sample_rate = DEFAULT(config.audio.samplerate, 44000);
457  const uint32_t channel_count = DEFAULT(config.audio.channels, 2);
458  const auto status =
459  CreateAudioConverter(sample_rate, channel_count, {}, &audio_session);
460 
461  if (status == 0) {
462  AudioConverterDispose(audio_session);
463  ret.supported = ret.smooth = ret.power_efficient = true;
464  }
465  } else {
466  // Fields are already false, unsupported.
467  }
468 
469  return ret;
470 }
471 
473  std::unique_lock<Mutex> lock(mutex_);
474  ResetInternal();
475 }
476 
478  std::shared_ptr<EncodedFrame> input, const eme::Implementation* eme,
479  std::vector<std::shared_ptr<DecodedFrame>>* frames,
480  std::string* extra_info) {
481  std::unique_lock<Mutex> lock(mutex_);
482 
483  if (!input) {
484  // Flush the decoder.
485  bool ret;
486  output_ = frames;
487  if (vt_session_)
488  ret = DecodeVideo(nullptr, 0, extra_info);
489  else if (at_session_)
490  ret = DecodeAudio(nullptr, 0, extra_info);
491  else
492  ret = true;
493 
494  output_ = nullptr;
495  ResetInternal(); // Cannot re-use decoder after flush.
497  }
498 
499  const bool is_video = input->stream_info->is_video;
500  auto init = is_video ? &AppleDecoder::InitVideoDecoder
501  : &AppleDecoder::InitAudioDecoder;
502  auto decode =
503  is_video ? &AppleDecoder::DecodeVideo : &AppleDecoder::DecodeAudio;
504  auto has_session = is_video ? !!vt_session_ : !!at_session_;
505 
506  if (!has_session || input->stream_info != decoder_stream_info_) {
507  ResetInternal();
508  if (!(this->*init)(input->stream_info, extra_info))
510  decoder_stream_info_ = input->stream_info;
511  }
512 
513  const uint8_t* data;
514  const size_t size = input->data_size;
515  std::vector<uint8_t> decrypted_data;
516  if (input->encryption_info) {
517  decrypted_data.resize(input->data_size);
518  const auto status = input->Decrypt(eme, decrypted_data.data());
519  if (status != MediaStatus::Success) {
520  *extra_info = "Error decrypting frame";
521  return status;
522  }
523  data = decrypted_data.data();
524  } else {
525  data = input->data;
526  }
527 
528  // Store the important info in fields since we get callbacks and only get one
529  // pointer for user data (this).
530  input_ = input.get();
531  input_data_ = data;
532  input_data_size_ = size;
533  output_ = frames;
534  const bool ret = (this->*decode)(data, size, extra_info);
535  input_ = nullptr;
536  input_data_ = nullptr;
537  input_data_size_ = 0;
538  output_ = nullptr;
540 }
541 
542 void AppleDecoder::OnNewVideoFrame(void* user, void* frameUser, OSStatus status,
543  VTDecodeInfoFlags /* flags */,
544  CVImageBufferRef buffer, CMTime pts,
545  CMTime duration) {
546  auto* decoder = reinterpret_cast<AppleDecoder*>(user);
547  auto* frame = decoder->input_;
548  if (status != 0)
549  return;
550 
551  CHECK(decoder->output_);
552  CHECK(buffer);
553 
554  double time;
555  if (pts.flags & kCMTimeFlags_Valid)
556  time = CMTimeGetSeconds(pts);
557  else
558  time = frame ? frame->pts : 0;
559  double durationSec;
560  if (duration.flags & kCMTimeFlags_Valid)
561  durationSec = CMTimeGetSeconds(duration);
562  else
563  durationSec = frame ? frame->duration : 0;
564 
565  decoder->output_->emplace_back(new AppleDecodedFrame(
566  decoder->decoder_stream_info_, time, durationSec, buffer));
567 }
568 
569 OSStatus AppleDecoder::AudioInputCallback(AudioConverterRef /* conv */,
570  UInt32* num_packets,
571  AudioBufferList* data,
572  AudioStreamPacketDescription** desc,
573  void* user) {
574  auto* decoder = reinterpret_cast<AppleDecoder*>(user);
575  if (!decoder->input_) {
576  *num_packets = 0;
577  return kNoMoreDataError;
578  }
579 
580  if (desc) {
581  decoder->audio_desc_.mStartOffset = 0;
582  decoder->audio_desc_.mVariableFramesInPacket = 0;
583  decoder->audio_desc_.mDataByteSize = decoder->input_data_size_;
584  *desc = &decoder->audio_desc_;
585  }
586 
587  CHECK_EQ(data->mNumberBuffers, 1);
588  *num_packets = 1;
589  data->mBuffers[0].mNumberChannels =
590  decoder->decoder_stream_info_->channel_count;
591  data->mBuffers[0].mDataByteSize = decoder->input_data_size_;
592  data->mBuffers[0].mData = const_cast<uint8_t*>(decoder->input_data_);
593 
594  decoder->input_ = nullptr;
595  return 0;
596 }
597 
598 void AppleDecoder::ResetInternal() {
599  if (vt_session_) {
600  VTDecompressionSessionInvalidate(vt_session_);
601  vt_session_ = nullptr;
602  }
603  at_session_.reset();
604 }
605 
606 bool AppleDecoder::DecodeVideo(const uint8_t* data, size_t data_size,
607  std::string* extra_info) {
608  OSStatus status;
609  if (data) {
610  util::CFRef<CMSampleBufferRef> sample =
611  CreateSampleBuffer(format_desc_, data, data_size);
612  if (!sample) {
613  *extra_info = "Error creating sample buffer";
614  return false;
615  }
616 
617  status = VTDecompressionSessionDecodeFrame(
618  vt_session_, sample, kVTDecodeFrame_EnableTemporalProcessing, nullptr,
619  nullptr);
620  if (status == 0)
621  status = VTDecompressionSessionWaitForAsynchronousFrames(vt_session_);
622  } else {
623  status = VTDecompressionSessionFinishDelayedFrames(vt_session_);
624  if (status == 0)
625  status = VTDecompressionSessionWaitForAsynchronousFrames(vt_session_);
626  }
627 
628  if (status != 0) {
629  LOG(ERROR) << (*extra_info =
630  "Error decoding frames: " + std::to_string(status));
631  return false;
632  }
633  return true;
634 }
635 
636 bool AppleDecoder::DecodeAudio(const uint8_t* data, size_t data_size,
637  std::string* extra_info) {
638  if (!data)
639  return true;
640 
641  const size_t channel_count = decoder_stream_info_->channel_count;
642  util::DynamicBuffer out_buffer;
643  std::vector<uint8_t> temp_buffer;
644  temp_buffer.resize(kAudioSampleCount * kAudioSampleSize * channel_count);
645  auto* input = input_;
646 
647  OSStatus status = 0;
648  while (status == 0) {
649  AudioBufferList output{};
650  output.mNumberBuffers = 1;
651  output.mBuffers[0].mNumberChannels = channel_count;
652  output.mBuffers[0].mDataByteSize = temp_buffer.size();
653  output.mBuffers[0].mData = temp_buffer.data();
654  UInt32 output_size = kAudioSampleCount;
655 
656  status = AudioConverterFillComplexBuffer(
657  at_session_.get(), &AppleDecoder::AudioInputCallback, this,
658  &output_size, &output, nullptr);
659  if (status != 0 && status != kNoMoreDataError) {
660  LOG(DFATAL) << (*extra_info =
661  "Error converting audio: " + std::to_string(status));
662  return false;
663  }
664 
665  out_buffer.AppendCopy(temp_buffer.data(),
666  output_size * kAudioSampleSize * channel_count);
667  }
668 
669  temp_buffer.resize(out_buffer.Size());
670  out_buffer.CopyDataTo(temp_buffer.data(), temp_buffer.size());
671  const uint32_t sample_count =
672  temp_buffer.size() / kAudioSampleSize / channel_count;
673  output_->emplace_back(new AppleDecodedFrame(
674  decoder_stream_info_, input->pts, input->duration, kAudioSampleFormat,
675  sample_count, std::move(temp_buffer)));
676 
677  return true;
678 }
679 
680 bool AppleDecoder::InitVideoDecoder(std::shared_ptr<const StreamInfo> info,
681  std::string* extra_info) {
682  VTDecompressionOutputCallbackRecord cb = {
683  .decompressionOutputCallback = &AppleDecoder::OnNewVideoFrame,
684  .decompressionOutputRefCon = this,
685  };
686  util::CFRef<CFDictionaryRef> decoder_config =
687  CreateVideoDecoderConfig(info->codec, info->extra_data);
688  format_desc_ = CreateFormatDescription(info->codec, info->width, info->height,
689  decoder_config);
690  util::CFRef<CFDictionaryRef> buffer_attr =
691  CreateBufferAttributes(info->width, info->height);
692 
693  VTDecompressionSessionRef session;
694  const auto status =
695  VTDecompressionSessionCreate(kCFAllocatorDefault, format_desc_,
696  decoder_config, buffer_attr, &cb, &session);
697  if (status != 0) {
698  LOG(ERROR) << (*extra_info = "Error creating VideoToolbox session: " +
699  std::to_string(status));
700  return false;
701  }
702 
703  vt_session_ = session;
704  decoder_stream_info_ = info;
705  return true;
706 }
707 
708 bool AppleDecoder::InitAudioDecoder(std::shared_ptr<const StreamInfo> info,
709  std::string* extra_info) {
710  AudioConverterRef session = nullptr;
711  auto status = CreateAudioConverter(info->sample_rate, info->channel_count,
712  info->extra_data, &session);
713  if (status == 0) {
714  const std::vector<uint8_t> extra_data = MakeAacExtraData(info->extra_data);
715  status = AudioConverterSetProperty(session,
716  kAudioConverterDecompressionMagicCookie,
717  extra_data.size(), extra_data.data());
718  }
719  if (status != 0) {
720  LOG(DFATAL) << (*extra_info = "Error creating audio converter: " +
721  std::to_string(status));
722  if (session)
723  AudioConverterDispose(session);
724  return false;
725  }
726  at_session_.reset(session);
727  return true;
728 }
729 
730 } // namespace apple
731 } // namespace media
732 } // namespace shaka
bool ParseMimeType(const std::string &source, std::string *type, std::string *subtype, std::unordered_map< std::string, std::string > *params)
Definition: media_utils.cc:62
std::string NormalizeCodec(const std::string &codec)
Definition: media_utils.cc:135
void CopyDataTo(uint8_t *dest, size_t size) const
std::shared_ptr< shaka::media::DecodedFrame > frame
int width
std::string to_string(VideoReadyState state)
Definition: media_player.cc:32
int height
#define kVTDecompressionPropertyKey_UsingHardwareAcceleratedVideoDecoder
void AppendCopy(const void *buffer, size_t size)
const uint8_t *const data
Definition: frames.h:244
constexpr const char * kCodecMimeParam
Definition: media_utils.h:33
#define DEFAULT(val, def)
std::vector< std::string > StringSplit(const std::string &source, char split_on)
Definition: utils.cc:64
std::list< std::shared_ptr< BaseFrame > > frames
Definition: streams.cc:128
MediaStatus Decode(std::shared_ptr< EncodedFrame > input, const eme::Implementation *eme, std::vector< std::shared_ptr< DecodedFrame >> *frames, std::string *extra_info) override
std::pair< uint32_t, uint32_t > GetScreenResolution()
Definition: media_utils.cc:196
#define kVTVideoDecoderSpecification_EnableHardwareAcceleratedVideoDecoder
MediaCapabilitiesInfo DecodingInfo(const MediaDecodingConfiguration &config) const override