Shaka Player Embedded
ffmpeg_demuxer.cc
Go to the documentation of this file.
1 // Copyright 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
16 
17 extern "C" {
18 #include <libavcodec/avcodec.h>
19 #include <libavutil/encryption_info.h>
20 }
21 #include <glog/logging.h>
22 
23 #include <algorithm>
24 #include <unordered_map>
25 
27 #include "src/media/media_utils.h"
28 #include "src/util/buffer_reader.h"
29 #include "src/util/buffer_writer.h"
30 
31 // Special error code added by //third_party/ffmpeg/mov.patch
32 #define AVERROR_SHAKA_RESET_DEMUXER (-123456)
33 
34 namespace shaka {
35 namespace media {
36 namespace ffmpeg {
37 
38 namespace {
39 
40 constexpr const size_t kInitialBufferSize = 2048;
41 
42 void LogError(int code) {
43  LOG(ERROR) << "Error from FFmpeg: " << av_err2str(code);
44 }
45 
46 std::string GetCodec(const std::string& mime, AVCodecID codec) {
47  std::unordered_map<std::string, std::string> params;
48  CHECK(ParseMimeType(mime, nullptr, nullptr, &params));
49  if (params.count(kCodecMimeParam) > 0)
50  return params.at(kCodecMimeParam);
51  else
52  return avcodec_get_name(codec);
53 }
54 
60 std::vector<uint8_t> CreatePssh(AVEncryptionInitInfo* info) {
61  // 4 box size
62  // 4 box type
63  // 1 version
64  // 3 flags
65  // 16 system_id
66  // if (version > 0)
67  // 4 key_id_count
68  // for (key_id_count)
69  // 16 key_id
70  // 4 data_size
71  // [data_size] data
72  DCHECK_EQ(info->system_id_size, 16u);
73  size_t pssh_size = info->data_size + 32;
74  if (info->num_key_ids) {
75  DCHECK_EQ(info->key_id_size, 16u);
76  pssh_size += 4 + info->num_key_ids * 16;
77  }
78 
79  std::vector<uint8_t> pssh(pssh_size, 0);
80  util::BufferWriter writer(pssh.data(), pssh_size);
81 
82  writer.Write<uint32_t>(pssh_size);
83  writer.WriteTag("pssh");
84  writer.Write<uint32_t>(info->num_key_ids ? 0x01000000 : 0);
85  writer.Write(info->system_id, 16);
86  if (info->num_key_ids) {
87  writer.Write<uint32_t>(info->num_key_ids);
88  for (uint32_t i = 0; i < info->num_key_ids; i++) {
89  writer.Write(info->key_ids[i], 16);
90  }
91  }
92 
93  writer.Write<uint32_t>(info->data_size);
94  writer.Write(info->data, info->data_size);
95  DCHECK(writer.empty());
96 
97  return pssh;
98 }
99 
100 bool ParseAndCheckSupport(const std::string& mime, std::string* container) {
101  std::string subtype;
102  if (!ParseMimeType(mime, nullptr, &subtype, nullptr))
103  return false;
104 
105  std::string normalized = NormalizeContainer(subtype);
106  if (!av_find_input_format(normalized.c_str()))
107  return false;
108 
109  *container = normalized;
110  return true;
111 }
112 
113 // The FFmpeg demuxer will use its decoders to fill in certain fields. If we
114 // aren't using the FFmpeg decoders, we need to parse these fields ourselves.
115 #ifndef HAS_FFMPEG_DECODER
116 
117 void RemoveEmulationPrevention(const uint8_t* data, size_t size,
118  std::vector<uint8_t>* output) {
119  DCHECK_EQ(output->size(), size);
120  // A byte sequence 0x0 0x0 0x1 is used to signal the start of a NALU. So for
121  // the body of the NALU, it needs to be escaped. So this reverses the
122  // escaping by changing 0x0 0x0 0x3 to 0x0 0x0.
123  DCHECK_EQ(output->size(), size);
124  size_t out_pos = 0;
125  for (size_t in_pos = 0; in_pos < size;) {
126  if (in_pos + 2 < size && data[in_pos] == 0 && data[in_pos + 1] == 0 &&
127  data[in_pos + 2] == 0x3) {
128  (*output)[out_pos++] = 0;
129  (*output)[out_pos++] = 0;
130  in_pos += 3;
131  } else {
132  (*output)[out_pos++] = data[in_pos++];
133  }
134  }
135  output->resize(out_pos);
136 }
137 
138 Rational<uint32_t> GetSarFromVuiParameters(util::BufferReader* reader) {
139  // See section E.1.1 of H.264/H.265.
140  // vui_parameters()
141  if (reader->ReadBits(1) == 0) // aspect_ratio_info_present_flag
142  return {0, 0}; // Values we want aren't there, return unknown.
143  const uint8_t aspect_ratio_idc = reader->ReadUint8();
144  // See Table E-1 in H.264.
145  switch (aspect_ratio_idc) {
146  case 1:
147  return {1, 1};
148  case 2:
149  return {12, 11};
150  case 3:
151  return {10, 11};
152  case 4:
153  return {16, 11};
154  case 5:
155  return {40, 33};
156  case 6:
157  return {24, 11};
158  case 7:
159  return {20, 11};
160  case 8:
161  return {32, 11};
162  case 9:
163  return {80, 33};
164  case 10:
165  return {18, 11};
166  case 11:
167  return {15, 11};
168  case 12:
169  return {64, 33};
170  case 13:
171  return {160, 99};
172  case 14:
173  return {4, 3};
174  case 15:
175  return {3, 2};
176  case 16:
177  return {2, 1};
178  case 255:
179  return {reader->ReadBits(16), reader->ReadBits(16)};
180 
181  default:
182  LOG(DFATAL) << "Unknown value of aspect_ratio_idc: "
183  << static_cast<int>(aspect_ratio_idc);
184  return {0, 0};
185  }
186 }
187 
188 Rational<uint32_t> GetSarFromH264(const std::vector<uint8_t>& extra_data) {
189  util::BufferReader reader(extra_data.data(), extra_data.size());
190  // The H.264 extra data is a AVCDecoderConfigurationRecord from
191  // Section 5.3.3.1.2 in ISO/IEC 14496-15
192  reader.Skip(5);
193  const size_t sps_count = reader.ReadUint8() & 0x1f;
194  if (sps_count == 0)
195  return {0, 0};
196 
197  // There should only be one SPS, or they should be compatible since there
198  // should only be one video stream. There may be two SPS for encrypted
199  // content with a clear lead.
200  const size_t sps_size = reader.ReadBits(16);
201  if (sps_size >= reader.BytesRemaining()) {
202  LOG(DFATAL) << "Invalid avcC configuration";
203  return {0, 0};
204  }
205 
206  // This is an SPS NALU; remove the emulation prevention bytes.
207  // See ISO/IE 14496-10 Sec. 7.3.1/7.3.2 and H.264 Sec. 7.3.2.1.1.
208  std::vector<uint8_t> temp(sps_size);
209  RemoveEmulationPrevention(reader.data(), sps_size, &temp);
210  util::BufferReader sps_reader(temp.data(), temp.size());
211  if (sps_reader.ReadUint8() != 0x67) {
212  LOG(DFATAL) << "Non-SPS found in avcC configuration";
213  return {0, 0};
214  }
215 
216  // seq_parameter_set_rbsp()
217  const uint8_t profile_idc = sps_reader.ReadUint8();
218  sps_reader.Skip(2);
219  sps_reader.ReadExpGolomb(); // seq_parameter_set_id
220  // Values here copied from the H.264 spec.
221  if (profile_idc == 100 || profile_idc == 110 || profile_idc == 122 ||
222  profile_idc == 244 || profile_idc == 44 || profile_idc == 83 ||
223  profile_idc == 86 || profile_idc == 118 || profile_idc == 128 ||
224  profile_idc == 138 || profile_idc == 139 || profile_idc == 134) {
225  const uint64_t chroma_format_idc = sps_reader.ReadExpGolomb();
226  if (chroma_format_idc == 3)
227  sps_reader.ReadBits(1); // separate_colour_plane_flag
228  sps_reader.ReadExpGolomb(); // bit_depth_luma_minus8
229  sps_reader.ReadExpGolomb(); // bit_depth_chroma_minus8
230  sps_reader.SkipBits(1); // qpprime_y_zero_transform_bypass_flag
231  if (sps_reader.ReadBits(1) == 1) { // seq_scaling_matrix_present_flag
232  LOG(WARNING) << "Scaling matrix is unsupported";
233  return {0, 0};
234  }
235  }
236  sps_reader.ReadExpGolomb(); // log2_max_frame_num_minus4
237  const uint64_t pic_order_cnt_type = sps_reader.ReadExpGolomb();
238  if (pic_order_cnt_type == 0) {
239  sps_reader.ReadExpGolomb(); // log2_max_pic_order_cnt_lsb_minus4
240  } else if (pic_order_cnt_type == 1) {
241  sps_reader.ReadBits(1); // delta_pic_order_always_zero_flag
242  sps_reader.ReadExpGolomb(); // offset_for_non_ref_pic
243  sps_reader.ReadExpGolomb(); // offset_for_top_to_bottom_field
244  const uint64_t count = sps_reader.ReadExpGolomb();
245  for (uint64_t i = 0; i < count; i++)
246  sps_reader.ReadExpGolomb(); // offset_for_ref_frame
247  }
248  sps_reader.ReadExpGolomb(); // max_num_ref_frames
249  sps_reader.ReadBits(1); // gaps_in_frame_num_value_allowed_flag
250  sps_reader.ReadExpGolomb(); // pic_width_in_mbs_minus1
251  sps_reader.ReadExpGolomb(); // pic_height_in_map_units_minus1
252  if (sps_reader.ReadBits(1) == 0) // frame_mbs_only_flag
253  sps_reader.ReadBits(1); // mb_adaptive_frame_field_flag
254  sps_reader.ReadBits(1); // direct_8x8_inference_flag
255  if (sps_reader.ReadBits(1) == 1) { // frame_cropping_flag
256  sps_reader.ReadExpGolomb(); // pframe_crop_left_offset
257  sps_reader.ReadExpGolomb(); // pframe_crop_right_offset
258  sps_reader.ReadExpGolomb(); // pframe_crop_top_offset
259  sps_reader.ReadExpGolomb(); // pframe_crop_bottom_offset
260  }
261  if (sps_reader.ReadBits(1) == 0) // vui_parameters_present_flag
262  return {0, 0}; // Values we want aren't there, return unknown.
263  // Finally, the thing we actually care about, display parameters.
264  return GetSarFromVuiParameters(&sps_reader);
265 }
266 
267 void SkipHevcProfileTierLevel(bool profile_present,
268  uint64_t max_sub_layers_minus1,
269  util::BufferReader* reader) {
270  if (profile_present) {
271  reader->Skip(11);
272  }
273  reader->Skip(1);
274  std::vector<bool> sub_layer_profile_present_flag(max_sub_layers_minus1);
275  std::vector<bool> sub_layer_level_present_flag(max_sub_layers_minus1);
276  for (uint64_t i = 0; i < max_sub_layers_minus1; i++) {
277  sub_layer_profile_present_flag[i] = reader->ReadBits(1);
278  sub_layer_level_present_flag[i] = reader->ReadBits(1);
279  }
280  if (max_sub_layers_minus1 > 0 && max_sub_layers_minus1 < 8)
281  reader->SkipBits(2 * (8 - max_sub_layers_minus1));
282  for (uint64_t i = 0; i < max_sub_layers_minus1; i++) {
283  if (sub_layer_profile_present_flag[i])
284  reader->Skip(11);
285  if (sub_layer_level_present_flag[i])
286  reader->Skip(1);
287  }
288 }
289 
290 Rational<uint32_t> GetSarFromHevc(const std::vector<uint8_t>& extra_data) {
291  util::BufferReader reader(extra_data.data(), extra_data.size());
292  // The H.265 extra data is a HEVCDecoderConfigurationRecord from
293  // Section 8.3.3.1.2 in ISO/IEC 14496-15
294  reader.Skip(22);
295  const uint8_t num_of_arrays = reader.ReadUint8();
296  uint64_t nalu_length = 0;
297  bool found = false;
298  for (uint8_t i = 0; i < num_of_arrays && !found; i++) {
299  const uint8_t nalu_type = reader.ReadUint8() & 0x3f;
300  const uint64_t num_nalus = reader.ReadBits(16);
301  for (uint64_t i = 0; i < num_nalus; i++) {
302  nalu_length = reader.ReadBits(16);
303  // Find the first SPS NALU. Since this stream should only have one video
304  // stream, all SPS should be compatible.
305  if (nalu_type == 33) {
306  found = true;
307  break;
308  }
309  reader.Skip(nalu_length);
310  }
311  }
312  if (!found)
313  return {0, 0}; // No SPS found, return unknown.
314 
315  // This is an SPS NALU; remove the emulation prevention bytes.
316  // See H.265 Sec. 7.3.1.2/7.3.2.2.1.
317  std::vector<uint8_t> temp(nalu_length);
318  RemoveEmulationPrevention(reader.data(), nalu_length, &temp);
319  util::BufferReader sps_reader(temp.data(), temp.size());
320  const uint64_t nalu_type = (sps_reader.ReadBits(16) >> 9) & 0x3f;
321  if (nalu_type != 33) {
322  LOG(DFATAL) << "Invalid NALU type found in extra data";
323  return {0, 0};
324  }
325 
326  sps_reader.SkipBits(4); // sps_video_parameter_set_id
327  const uint64_t max_sub_layers_minus1 = sps_reader.ReadBits(3);
328  sps_reader.SkipBits(1); // sps_temporal_id_nesting_flag
329  SkipHevcProfileTierLevel(/* profile_present= */ true, max_sub_layers_minus1,
330  &sps_reader);
331  sps_reader.ReadExpGolomb(); // sps_seq_parameter_set_id
332  if (sps_reader.ReadExpGolomb() == 3) // chroma_format_idc
333  sps_reader.SkipBits(1); // separate_colour_plane_flag
334  sps_reader.ReadExpGolomb(); // pic_width_in_luma_samples
335  sps_reader.ReadExpGolomb(); // pic_height_in_luma_samples
336  if (sps_reader.ReadBits(1) == 1) { // conformance_window_flag
337  sps_reader.ReadExpGolomb(); // conf_win_left_offset
338  sps_reader.ReadExpGolomb(); // conf_win_right_offset
339  sps_reader.ReadExpGolomb(); // conf_win_top_offset
340  sps_reader.ReadExpGolomb(); // conf_win_bottom_offset
341  }
342  sps_reader.ReadExpGolomb(); // bit_depth_luma_minus8
343  sps_reader.ReadExpGolomb(); // bit_depth_chroma_minus8
344  sps_reader.ReadExpGolomb(); // log2_max_pic_order_cnt_lsb_minus4
345  const uint64_t sub_layer_ordering_info_present = sps_reader.ReadBits(1);
346  for (uint64_t i =
347  (sub_layer_ordering_info_present ? 0 : max_sub_layers_minus1);
348  i <= max_sub_layers_minus1; i++) {
349  sps_reader.ReadExpGolomb(); // sps_max_dec_pic_buffering_minus1
350  sps_reader.ReadExpGolomb(); // sps_max_num_reorder_pics
351  sps_reader.ReadExpGolomb(); // ps_max_latency_increase_plus1
352  }
353  sps_reader.ReadExpGolomb(); // log2_min_luma_coding_block_size_minus3
354  sps_reader.ReadExpGolomb(); // log2_diff_max_min_luma_coding_block_size
355  sps_reader.ReadExpGolomb(); // log2_min_luma_transform_block_size_minus2
356  sps_reader.ReadExpGolomb(); // log2_diff_max_min_luma_transform_block_size
357  sps_reader.ReadExpGolomb(); // max_transform_hierarchy_depth_inter
358  sps_reader.ReadExpGolomb(); // max_transform_hierarchy_depth_intra
359  if (sps_reader.ReadBits(1) == 1) { // scaling_list_enabled_flag
360  LOG(WARNING) << "Scaling list isn't supported";
361  return {0, 0};
362  }
363  sps_reader.SkipBits(1); // amp_enabled_flag
364  sps_reader.SkipBits(1); // sample_adaptive_offset_enabled_flag
365  if (sps_reader.ReadBits(1) == 1) { // pcm_enabled_flag
366  sps_reader.ReadBits(4); // pcm_sample_bit_depth_luma_minus1
367  sps_reader.ReadBits(4); // pcm_sample_bit_depth_chroma_minus1
368  sps_reader.ReadExpGolomb(); // log2_min_pcm_luma_coding_block_size_minus3
369  sps_reader.ReadExpGolomb(); // log2_diff_max_min_pcm_luma_coding_block_size
370  }
371  const uint64_t num_short_term_ref_pic_sets = sps_reader.ReadExpGolomb();
372  if (num_short_term_ref_pic_sets != 0) {
373  LOG(WARNING) << "Short-term reference pictures not supported";
374  return {0, 0};
375  }
376  if (sps_reader.ReadBits(1) == 1) { // long_term_ref_pics_present_flag
377  const uint64_t num_long_term_ref_pics_sps = sps_reader.ReadExpGolomb();
378  for (uint64_t i = 0; i < num_long_term_ref_pics_sps; i++) {
379  sps_reader.ReadExpGolomb(); // lt_ref_pic_poc_lsb_sps
380  sps_reader.ReadBits(1); // used_by_curr_pic_lt_sps_flag
381  }
382  }
383  sps_reader.ReadBits(1); // sps_temporal_mvp_enabled_flag
384  sps_reader.ReadBits(1); // strong_intra_smoothing_enabled_flag
385  if (sps_reader.ReadBits(1) != 1) // vui_parameters_present_flag
386  return {0, 0}; // The info we want isn't there, return unknown.
387  return GetSarFromVuiParameters(&sps_reader);
388 }
389 
390 #endif
391 
392 } // namespace
393 
395  const std::string& mime_type,
396  const std::string& container)
397  : signal_("FFmpegDemuxer"),
398  mutex_("FFmpegDemuxer"),
399  mime_type_(mime_type),
400  container_(container),
401  io_(nullptr),
402  demuxer_ctx_(nullptr),
403  client_(client),
404  output_(nullptr),
405  timestamp_offset_(0),
406  input_(nullptr),
407  input_size_(0),
408  input_pos_(0),
409  state_(State::Waiting),
410  thread_("FFmepgDemuxer", std::bind(&FFmpegDemuxer::ThreadMain, this)) {}
411 
413  {
414  std::unique_lock<Mutex> lock(mutex_);
415  state_ = State::Stopping;
416  signal_.SignalAll();
417  }
418  thread_.join();
419 
420  if (io_) {
421  // If an IO buffer was allocated by libavformat, it must be freed by us.
422  if (io_->buffer) {
423  av_free(io_->buffer);
424  }
425  // The IO context itself must be freed by us as well. Closing ctx_ does
426  // not free the IO context attached to it.
427  av_free(io_);
428  }
429 }
430 
432  // TODO: Add Reset capability.
433 }
434 
435 bool FFmpegDemuxer::Demux(double timestamp_offset, const uint8_t* data,
436  size_t size,
437  std::vector<std::shared_ptr<EncodedFrame>>* frames) {
438  std::unique_lock<Mutex> lock(mutex_);
439  if (state_ != State::Waiting) {
440  DCHECK(state_ == State::Errored || state_ == State::Stopping);
441  return false;
442  }
443 
444  output_ = frames;
445  timestamp_offset_ = timestamp_offset;
446  input_ = data;
447  input_size_ = size;
448  input_pos_ = 0;
449 
450  state_ = State::Parsing;
451  while (state_ == State::Parsing) {
452  signal_.SignalAll();
453  signal_.ResetAndWaitWhileUnlocked(lock);
454  }
455 
456  output_ = nullptr;
457  input_ = nullptr;
458  input_size_ = 0;
459  return state_ == State::Waiting;
460 }
461 
462 int FFmpegDemuxer::OnRead(void* user, uint8_t* buffer, int size) {
463  auto* that = reinterpret_cast<FFmpegDemuxer*>(user);
464  std::unique_lock<Mutex> lock(that->mutex_);
465  while (that->input_pos_ >= that->input_size_ &&
466  (that->state_ == State::Parsing || that->state_ == State::Waiting)) {
467  that->state_ = State::Waiting;
468  that->signal_.SignalAll();
469  that->signal_.ResetAndWaitWhileUnlocked(lock);
470  }
471  if (that->state_ != State::Parsing) {
472  DCHECK(that->state_ == State::Errored || that->state_ == State::Stopping);
473  return AVERROR_EOF;
474  }
475 
476  DCHECK_LT(that->input_pos_, that->input_size_);
477  size_t to_read = std::min<size_t>(size, that->input_size_ - that->input_pos_);
478  memcpy(buffer, that->input_ + that->input_pos_, to_read);
479  that->input_pos_ += to_read;
480  return to_read;
481 }
482 
483 void FFmpegDemuxer::ThreadMain() {
484  // Allocate a context for custom IO.
485  // NOTE: The buffer may be reallocated/resized by libavformat later.
486  // It is always our responsibility to free it later with av_free.
487  io_ = avio_alloc_context(
488  reinterpret_cast<unsigned char*>(av_malloc(kInitialBufferSize)),
489  kInitialBufferSize,
490  0, // write_flag (read-only)
491  this, // opaque user data
492  &OnRead,
493  nullptr, // write callback (read-only)
494  nullptr); // seek callback (linear reads only)
495  if (!io_)
496  return OnError();
497  if (!ReinitDemuxer())
498  return OnError();
499 
500  // At this point, the demuxer has been created and initialized, which is only
501  // after we have parsed the init segment.
502  if (client_) {
503  if (demuxer_ctx_->duration == 0 ||
504  demuxer_ctx_->duration == AV_NOPTS_VALUE) {
505  client_->OnLoadedMetaData(HUGE_VAL);
506  } else {
507  client_->OnLoadedMetaData(static_cast<double>(demuxer_ctx_->duration) /
508  AV_TIME_BASE);
509  }
510  }
511 
512  std::unique_lock<Mutex> lock(mutex_);
513  while (true) {
514  while (state_ == State::Waiting) {
515  signal_.SignalAll();
516  signal_.ResetAndWaitWhileUnlocked(lock);
517  }
518  if (state_ != State::Parsing)
519  return;
520 
521  AVPacket pkt;
522  {
523  util::Unlocker<Mutex> unlock(&lock);
524  int ret = av_read_frame(demuxer_ctx_.get(), &pkt);
525  if (ret == AVERROR_SHAKA_RESET_DEMUXER) {
526  // Special case for Shaka where we need to reinit the demuxer.
527  VLOG(1) << "Reinitializing demuxer";
528  {
529  std::unique_lock<Mutex> lock(mutex_);
530  // Re-read the input data with the new demuxer.
531  input_pos_ = 0;
532  }
533 
534  if (!ReinitDemuxer())
535  return OnError();
536  ret = av_read_frame(demuxer_ctx_.get(), &pkt);
537  }
538  if (ret < 0) {
539  av_packet_unref(&pkt);
540  LogError(ret);
541  return OnError();
542  }
543 
544  UpdateEncryptionInfo();
545 
546  // Ignore discard flags. The demuxer will set this when we try to read
547  // content behind media we have already read.
548  pkt.flags &= ~AV_PKT_FLAG_DISCARD;
549 
550  VLOG(3) << "Read frame at dts=" << pkt.dts;
551  DCHECK_EQ(pkt.stream_index, 0);
552  DCHECK_EQ(demuxer_ctx_->nb_streams, 1u);
553  }
554 
555  auto* frame = ffmpeg::FFmpegEncodedFrame::MakeFrame(&pkt, cur_stream_info_,
556  timestamp_offset_);
557  if (frame) {
558  // No need to unref |pkt| since it was moved into the encoded frame.
559  DCHECK(output_);
560  output_->emplace_back(frame);
561  } else {
562  av_packet_unref(&pkt);
563  state_ = State::Errored;
564  signal_.SignalAll();
565  return;
566  }
567  }
568 }
569 
570 bool FFmpegDemuxer::ReinitDemuxer() {
571  demuxer_ctx_.reset();
572  avio_flush(io_);
573 
574  AVFormatContext* demuxer = avformat_alloc_context();
575  if (!demuxer)
576  return false;
577  demuxer->pb = io_;
578  // If we enable the probes, in encrypted content we'll get logs about being
579  // unable to parse the content; however, if we disable the probes, we won't
580  // get accurate frame durations, which can cause problems.
581  // TODO: Find a way to conditionally disable parsing or to suppress the
582  // logs for encrypted content (since the errors there aren't fatal).
583  // demuxer->probesize = 0;
584  // demuxer->max_analyze_duration = 0;
585 
586  // To enable extremely verbose logging:
587  // demuxer->debug = 1;
588 
589  // Parse encryption info for WebM; ignored for other demuxers.
590  AVDictionary* dict = nullptr;
591  CHECK_EQ(av_dict_set_int(&dict, "parse_encryption", 1, 0), 0);
592 
593  AVInputFormat* format = av_find_input_format(container_.c_str());
594  CHECK(format) << "Should have checked for support before creating.";
595  const int open_code = avformat_open_input(&demuxer, nullptr, format, &dict);
596  av_dict_free(&dict);
597  if (open_code < 0) {
598  LogError(open_code);
599  return false;
600  }
601 
602  demuxer_ctx_.reset(demuxer);
603  const int find_code = avformat_find_stream_info(demuxer, nullptr);
604  if (find_code < 0) {
605  LogError(find_code);
606  return false;
607  }
608 
609  if (demuxer_ctx_->nb_streams == 0) {
610  LOG(ERROR) << "FFmpeg was unable to find any streams";
611  return false;
612  }
613  if (demuxer_ctx_->nb_streams > 1) {
614  LOG(ERROR) << "Multiple streams in input not supported";
615  return false;
616  }
617 
618  AVStream* stream = demuxer_ctx_->streams[0];
619  AVCodecParameters* params = stream->codecpar;
620  const std::string expected_codec = GetCodec(mime_type_, params->codec_id);
621 
622  const char* actual_codec = avcodec_get_name(params->codec_id);
623  if (NormalizeCodec(expected_codec) != actual_codec) {
624  LOG(ERROR) << "Mismatch between codec string and media. Codec string: '"
625  << expected_codec << "', media codec: '" << actual_codec
626  << "' (0x" << std::hex << params->codec_id << ")";
627  return false;
628  }
629 
630  std::vector<uint8_t> extra_data{params->extradata,
631  params->extradata + params->extradata_size};
632  Rational<uint32_t> sar{params->sample_aspect_ratio.num,
633  params->sample_aspect_ratio.den};
634 #ifndef HAS_FFMPEG_DECODER
635  if (!sar) {
636  switch (params->codec_id) {
637  case AV_CODEC_ID_H264:
638  sar = GetSarFromH264(extra_data);
639  break;
640  case AV_CODEC_ID_H265:
641  sar = GetSarFromHevc(extra_data);
642  break;
643  default:
644  break;
645  }
646  }
647 #endif
648 
649  cur_stream_info_.reset(new StreamInfo(
650  mime_type_, expected_codec, params->codec_type == AVMEDIA_TYPE_VIDEO,
651  {stream->time_base.num, stream->time_base.den}, sar, extra_data,
652  params->width, params->height, params->channels, params->sample_rate));
653  return true;
654 }
655 
656 void FFmpegDemuxer::UpdateEncryptionInfo() {
657  if (!client_)
658  return;
659 
660  int side_data_size;
661  const uint8_t* side_data = av_stream_get_side_data(
662  demuxer_ctx_->streams[0], AV_PKT_DATA_ENCRYPTION_INIT_INFO,
663  &side_data_size);
664  if (side_data) {
665  AVEncryptionInitInfo* info =
666  av_encryption_init_info_get_side_data(side_data, side_data_size);
667  std::vector<uint8_t> pssh;
668  for (auto* cur_info = info; cur_info; cur_info = cur_info->next) {
669  if (cur_info->system_id_size) {
670  const std::vector<uint8_t> temp = CreatePssh(cur_info);
671  pssh.insert(pssh.end(), temp.begin(), temp.end());
672  } else {
673  for (size_t i = 0; i < cur_info->num_key_ids; i++) {
675  cur_info->key_ids[i], cur_info->key_id_size);
676  }
677  }
678  }
679  if (!pssh.empty()) {
680  client_->OnEncrypted(eme::MediaKeyInitDataType::Cenc, pssh.data(),
681  pssh.size());
682  }
683  av_encryption_init_info_free(info);
684 
685  av_stream_remove_side_data(demuxer_ctx_->streams[0],
686  AV_PKT_DATA_ENCRYPTION_INIT_INFO);
687  }
688 }
689 
690 void FFmpegDemuxer::OnError() {
691  std::unique_lock<Mutex> lock(mutex_);
692  if (state_ != State::Stopping)
693  state_ = State::Errored;
694  signal_.SignalAllIfNotSet();
695 }
696 
697 
698 bool FFmpegDemuxerFactory::IsTypeSupported(const std::string& mime_type) const {
699  std::string unused;
700  return ParseAndCheckSupport(mime_type, &unused);
701 }
702 
703 bool FFmpegDemuxerFactory::IsCodecVideo(const std::string& codec) const {
704  std::string norm = NormalizeCodec(codec);
705  auto* impl = avcodec_find_decoder_by_name(norm.c_str());
706  if (impl)
707  return impl->type == AVMEDIA_TYPE_VIDEO;
708  return norm == "h264" || norm == "hevc" || norm == "vp8" || norm == "vp9";
709 }
710 
711 std::unique_ptr<Demuxer> FFmpegDemuxerFactory::Create(
712  const std::string& mime_type, Demuxer::Client* client) const {
713  std::string container;
714  if (!ParseAndCheckSupport(mime_type, &container))
715  return nullptr;
716 
717  return std::unique_ptr<Demuxer>(
718  new (std::nothrow) FFmpegDemuxer(client, mime_type, container));
719 }
720 
721 } // namespace ffmpeg
722 } // namespace media
723 } // namespace shaka
bool ParseMimeType(const std::string &source, std::string *type, std::string *subtype, std::unordered_map< std::string, std::string > *params)
Definition: media_utils.cc:62
std::string NormalizeCodec(const std::string &codec)
Definition: media_utils.cc:135
bool IsCodecVideo(const std::string &codec) const override
virtual void OnLoadedMetaData(double duration)=0
std::shared_ptr< shaka::media::DecodedFrame > frame
void join()
Definition: thread.h:56
static FFmpegEncodedFrame * MakeFrame(AVPacket *pkt, std::shared_ptr< const StreamInfo > info, double timestamp_offset)
FFmpegDemuxer(Demuxer::Client *client, const std::string &mime_type, const std::string &container)
std::string NormalizeContainer(const std::string &container)
Definition: media_utils.cc:128
#define AVERROR_SHAKA_RESET_DEMUXER
T ResetAndWaitWhileUnlocked(std::unique_lock< _Mutex > &lock)
Definition: thread_event.h:106
bool IsTypeSupported(const std::string &mime_type) const override
constexpr const char * kCodecMimeParam
Definition: media_utils.h:33
std::list< std::shared_ptr< BaseFrame > > frames
Definition: streams.cc:128
#define LogError(code, extra_info)
virtual void OnEncrypted(eme::MediaKeyInitDataType type, const uint8_t *data, size_t size)=0
bool Demux(double timestamp_offset, const uint8_t *data, size_t size, std::vector< std::shared_ptr< EncodedFrame >> *frames) override
std::unique_ptr< Demuxer > Create(const std::string &mime_type, Demuxer::Client *client) const override