Shaka Packager SDK
nal_unit_to_byte_stream_converter.cc
1 // Copyright 2016 Google LLC. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include <packager/media/codecs/nal_unit_to_byte_stream_converter.h>
8 
9 #include <list>
10 
11 #include <absl/log/check.h>
12 #include <absl/log/log.h>
13 
14 #include <packager/macros/compiler.h>
15 #include <packager/media/base/bit_reader.h>
16 #include <packager/media/base/buffer_reader.h>
17 #include <packager/media/base/buffer_writer.h>
18 #include <packager/media/codecs/nalu_reader.h>
19 
20 namespace shaka {
21 namespace media {
22 
23 namespace {
24 
25 const bool kEscapeData = true;
26 const uint8_t kNaluStartCode[] = {0x00, 0x00, 0x00, 0x01};
27 
28 const uint8_t kEmulationPreventionByte = 0x03;
29 
30 const uint8_t kAccessUnitDelimiterRbspAnyPrimaryPicType = 0xF0;
31 
32 bool IsNaluEqual(const Nalu& left, const Nalu& right) {
33  if (left.type() != right.type())
34  return false;
35  const size_t left_size = left.header_size() + left.payload_size();
36  const size_t right_size = right.header_size() + right.payload_size();
37  if (left_size != right_size)
38  return false;
39  return memcmp(left.data(), right.data(), left_size) == 0;
40 }
41 
42 void AppendNalu(const Nalu& nalu,
43  int /*nalu_length_size*/,
44  bool escape_data,
45  BufferWriter* buffer_writer) {
46  if (escape_data) {
47  EscapeNalByteSequence(nalu.data(), nalu.header_size() + nalu.payload_size(),
48  buffer_writer);
49  } else {
50  buffer_writer->AppendArray(nalu.data(),
51  nalu.header_size() + nalu.payload_size());
52  }
53 }
54 
55 void AddAccessUnitDelimiter(BufferWriter* buffer_writer) {
56  buffer_writer->AppendInt(static_cast<uint8_t>(Nalu::H264_AUD));
57  // For now, primary_pic_type is 7 which is "anything".
58  buffer_writer->AppendInt(kAccessUnitDelimiterRbspAnyPrimaryPicType);
59 }
60 
61 } // namespace
62 
63 void EscapeNalByteSequence(const uint8_t* input,
64  size_t input_size,
65  BufferWriter* output_writer) {
66  // Keep track of consecutive zeros that it has seen (not including the current
67  // byte), so that the algorithm doesn't need to go back to check the same
68  // bytes.
69  int consecutive_zero_count = 0;
70  for (size_t i = 0; i < input_size; ++i) {
71  if (consecutive_zero_count <= 1) {
72  output_writer->AppendInt(input[i]);
73  } else if (consecutive_zero_count == 2) {
74  if (input[i] == 0 || input[i] == 1 || input[i] == 2 || input[i] == 3) {
75  // Must be escaped.
76  output_writer->AppendInt(kEmulationPreventionByte);
77  }
78 
79  output_writer->AppendInt(input[i]);
80  // Note that input[i] can be 0.
81  // 00 00 00 00 00 00 should become
82  // 00 00 03 00 00 03 00 00 03
83  // So consecutive_zero_count is reset here and incremented below if
84  // input[i] is 0.
85  consecutive_zero_count = 0;
86  }
87 
88  consecutive_zero_count = input[i] == 0 ? consecutive_zero_count + 1 : 0;
89  }
90 
91  // ISO 14496-10 Section 7.4.1.1 mentions that if the last byte is 0 (which
92  // only happens if RBSP has cabac_zero_word), 0x03 must be appended.
93  if (consecutive_zero_count > 0) {
94  DCHECK_GT(input_size, 0u);
95  DCHECK_EQ(input[input_size - 1], 0u);
96  output_writer->AppendInt(kEmulationPreventionByte);
97  }
98 }
99 
100 // This functions creates a new subsample entry (|clear_bytes|, |cipher_bytes|)
101 // and appends it to |subsamples|. It splits the oversized (64KB) clear_bytes
102 // into smaller ones.
103 void AppendSubsamples(uint32_t clear_bytes,
104  uint32_t cipher_bytes,
105  std::vector<SubsampleEntry>* subsamples) {
106  while (clear_bytes > UINT16_MAX) {
107  subsamples->emplace_back(UINT16_MAX, 0);
108  clear_bytes -= UINT16_MAX;
109  }
110  subsamples->emplace_back(clear_bytes, cipher_bytes);
111 }
112 
113 // TODO(hmchen): Wrap methods of processing subsamples into a separate class,
114 // e.g., SubsampleReader.
115 // This function finds the range of the subsamples corresponding a NAL unit
116 // size. If a subsample crosses the boundary of two NAL units, it is split into
117 // smaller subsamples. Each call processes one NAL unit and it assumes the input
118 // NAL unit is already aligned with subsamples->at(start_subsample_id).
119 //
120 // An example of calling multiple times on each NAL unit is as follow:
121 //
122 // Input:
123 //
124 // Nalu 0 Nalu 1 Nalu 2
125 // | | |
126 // v v v
127 // | clear | cipher | clear | clear | clear | cipher |
128 //
129 // | Subsample 0 | Subsample 1 |
130 //
131 // Output:
132 //
133 // | Subsample 0 | Subsample 1 | Subsample 2 | Subsample 3 |
134 //
135 // Nalu 0: start_subsample_id = 0, next_subsample_id = 2
136 // Nalu 1: start_subsample_id = 2, next_subsample_id = 3
137 // Nalu 2: start_subsample_id = 3, next_subsample_id = 4
138 bool AlignSubsamplesWithNalu(size_t nalu_size,
139  size_t start_subsample_id,
140  std::vector<SubsampleEntry>* subsamples,
141  size_t* next_subsample_id) {
142  DCHECK(subsamples && !subsamples->empty());
143  size_t subsample_id = start_subsample_id;
144  size_t nalu_size_remain = nalu_size;
145  size_t subsample_bytes = 0;
146  while (subsample_id < subsamples->size()) {
147  subsample_bytes = subsamples->at(subsample_id).clear_bytes +
148  subsamples->at(subsample_id).cipher_bytes;
149  if (nalu_size_remain <= subsample_bytes) {
150  break;
151  }
152  nalu_size_remain -= subsample_bytes;
153  subsample_id++;
154  }
155 
156  if (subsample_id == subsamples->size()) {
157  DCHECK_GT(nalu_size_remain, 0u);
158  LOG(ERROR)
159  << "Total size of NAL unit is larger than the size of subsamples.";
160  return false;
161  }
162 
163  if (nalu_size_remain == subsample_bytes) {
164  *next_subsample_id = subsample_id + 1;
165  return true;
166  }
167 
168  DCHECK_GT(subsample_bytes, nalu_size_remain);
169  size_t clear_bytes = subsamples->at(subsample_id).clear_bytes;
170  size_t new_clear_bytes = 0;
171  size_t new_cipher_bytes = 0;
172  if (nalu_size_remain < clear_bytes) {
173  new_clear_bytes = nalu_size_remain;
174  } else {
175  new_clear_bytes = clear_bytes;
176  new_cipher_bytes = nalu_size_remain - clear_bytes;
177  }
178  subsamples->insert(subsamples->begin() + subsample_id,
179  SubsampleEntry(static_cast<uint16_t>(new_clear_bytes),
180  static_cast<uint32_t>(new_cipher_bytes)));
181  subsample_id++;
182  subsamples->at(subsample_id).clear_bytes -=
183  static_cast<uint16_t>(new_clear_bytes);
184  subsamples->at(subsample_id).cipher_bytes -=
185  static_cast<uint32_t>(new_cipher_bytes);
186  *next_subsample_id = subsample_id;
187  return true;
188 }
189 
190 // This function tries to merge clear-only into clear+cipher subsamples. This
191 // merge makes sure the clear_bytes will not exceed the clear size limits
192 // (2^16 bytes).
193 std::vector<SubsampleEntry> MergeSubsamples(
194  const std::vector<SubsampleEntry>& subsamples) {
195  std::vector<SubsampleEntry> new_subsamples;
196  uint32_t clear_bytes = 0;
197  for (size_t i = 0; i < subsamples.size(); ++i) {
198  clear_bytes += subsamples[i].clear_bytes;
199  // Add new subsample(s).
200  if (subsamples[i].cipher_bytes > 0 || i == subsamples.size() - 1) {
201  AppendSubsamples(clear_bytes, subsamples[i].cipher_bytes,
202  &new_subsamples);
203  clear_bytes = 0;
204  }
205  }
206  return new_subsamples;
207 }
208 
209 NalUnitToByteStreamConverter::NalUnitToByteStreamConverter()
210  : nalu_length_size_(0) {}
211 NalUnitToByteStreamConverter::~NalUnitToByteStreamConverter() {}
212 
213 bool NalUnitToByteStreamConverter::Initialize(
214  const uint8_t* decoder_configuration_data,
215  size_t decoder_configuration_data_size) {
216  if (!decoder_configuration_data || decoder_configuration_data_size == 0) {
217  LOG(ERROR) << "Decoder conguration is empty.";
218  return false;
219  }
220 
221  if (!decoder_config_.Parse(std::vector<uint8_t>(
222  decoder_configuration_data,
223  decoder_configuration_data + decoder_configuration_data_size))) {
224  return false;
225  }
226 
227  if (decoder_config_.nalu_count() < 2) {
228  LOG(ERROR) << "Cannot find SPS or PPS.";
229  return false;
230  }
231 
232  nalu_length_size_ = decoder_config_.nalu_length_size();
233 
234  BufferWriter buffer_writer(decoder_configuration_data_size);
235  bool found_sps = false;
236  bool found_pps = false;
237  for (uint32_t i = 0; i < decoder_config_.nalu_count(); ++i) {
238  const Nalu& nalu = decoder_config_.nalu(i);
239  if (nalu.type() == Nalu::H264NaluType::H264_SPS) {
240  buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
241  AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
242  found_sps = true;
243  } else if (nalu.type() == Nalu::H264NaluType::H264_PPS) {
244  buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
245  AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
246  found_pps = true;
247  } else if (nalu.type() == Nalu::H264NaluType::H264_SPSExtension) {
248  buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
249  AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
250  }
251  }
252  if (!found_sps || !found_pps) {
253  LOG(ERROR) << "Failed to find SPS or PPS.";
254  return false;
255  }
256 
257  buffer_writer.SwapBuffer(&decoder_configuration_in_byte_stream_);
258  return true;
259 }
260 
261 bool NalUnitToByteStreamConverter::ConvertUnitToByteStream(
262  const uint8_t* sample,
263  size_t sample_size,
264  bool is_key_frame,
265  std::vector<uint8_t>* output) {
266  return ConvertUnitToByteStreamWithSubsamples(
267  sample, sample_size, is_key_frame, false, output,
268  nullptr); // Skip subsample update.
269 }
270 
271 // This ignores all AUD, SPS, and PPS in the sample. Instead uses the data
272 // parsed in Initialize(). However, if the SPS and PPS are different to
273 // those parsed in Initialized(), they are kept.
274 bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples(
275  const uint8_t* sample,
276  size_t sample_size,
277  bool is_key_frame,
278  bool escape_encrypted_nalu,
279  std::vector<uint8_t>* output,
280  std::vector<SubsampleEntry>* subsamples) {
281  if (!sample || sample_size == 0) {
282  LOG(WARNING) << "Sample is empty.";
283  return true;
284  }
285 
286  std::vector<SubsampleEntry> temp_subsamples;
287 
288  BufferWriter buffer_writer(sample_size);
289  buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
290  AddAccessUnitDelimiter(&buffer_writer);
291  if (is_key_frame)
292  buffer_writer.AppendVector(decoder_configuration_in_byte_stream_);
293 
294  if (subsamples && !subsamples->empty()) {
295  // The inserted part in buffer_writer is all clear. Add a corresponding
296  // all-clear subsample.
297  AppendSubsamples(static_cast<uint32_t>(buffer_writer.Size()), 0u,
298  &temp_subsamples);
299  }
300 
301  NaluReader nalu_reader(Nalu::kH264, nalu_length_size_, sample, sample_size);
302  Nalu nalu;
303  NaluReader::Result result = nalu_reader.Advance(&nalu);
304 
305  size_t start_subsample_id = 0;
306  size_t next_subsample_id = 0;
307  while (result == NaluReader::kOk) {
308  const size_t old_nalu_size =
309  nalu_length_size_ + nalu.header_size() + nalu.payload_size();
310  if (subsamples && !subsamples->empty()) {
311  if (!AlignSubsamplesWithNalu(old_nalu_size, start_subsample_id,
312  subsamples, &next_subsample_id)) {
313  return false;
314  }
315  }
316  switch (nalu.type()) {
317  case Nalu::H264_AUD:
318  break;
319  case Nalu::H264_SPS:
320  FALLTHROUGH_INTENDED;
321  case Nalu::H264_SPSExtension:
322  FALLTHROUGH_INTENDED;
323  case Nalu::H264_PPS: {
324  // Also write this SPS/PPS if it is not the same as SPS/PPS in decoder
325  // configuration, which is already written.
326  //
327  // For more information see:
328  // - github.com/shaka-project/shaka-packager/issues/327
329  // - ISO/IEC 14496-15 5.4.5 Sync Sample
330  //
331  // TODO(kqyang): Parse sample data to figure out which SPS/PPS the
332  // sample actually uses and include that only.
333  bool new_decoder_config = true;
334  for (size_t i = 0; i < decoder_config_.nalu_count(); ++i) {
335  if (IsNaluEqual(decoder_config_.nalu(i), nalu)) {
336  new_decoder_config = false;
337  break;
338  }
339  }
340  if (!new_decoder_config)
341  break;
342  FALLTHROUGH_INTENDED;
343  }
344  default:
345  bool escape_data = false;
346  if (subsamples && !subsamples->empty()) {
347  if (escape_encrypted_nalu) {
348  for (size_t i = start_subsample_id; i < next_subsample_id; ++i) {
349  if (subsamples->at(i).cipher_bytes != 0) {
350  escape_data = true;
351  break;
352  }
353  }
354  }
355  }
356  buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
357  AppendNalu(nalu, nalu_length_size_, escape_data, &buffer_writer);
358 
359  if (subsamples && !subsamples->empty()) {
360  temp_subsamples.emplace_back(
361  static_cast<uint16_t>(std::size(kNaluStartCode)), 0u);
362  // Update the first subsample of each NAL unit, which replaces NAL
363  // unit length field with start code. Note that if the escape_data is
364  // true, the total data size and the cipher_bytes may be changed.
365  // However, since the escape_data for encrypted nalu is only used in
366  // Sample-AES, which means the subsample is not really used,
367  // inaccurate subsamples should not be a big deal.
368  if (subsamples->at(start_subsample_id).clear_bytes <
369  nalu_length_size_) {
370  LOG(ERROR) << "Clear bytes ("
371  << subsamples->at(start_subsample_id).clear_bytes
372  << ") in start subsample of NAL unit is less than NAL "
373  "unit length size ("
374  << nalu_length_size_
375  << "). The NAL unit length size is (partially) "
376  "encrypted. In that case, it cannot be "
377  "converted to byte stream.";
378  return false;
379  }
380  subsamples->at(start_subsample_id).clear_bytes -= nalu_length_size_;
381  temp_subsamples.insert(temp_subsamples.end(),
382  subsamples->begin() + start_subsample_id,
383  subsamples->begin() + next_subsample_id);
384  }
385  break;
386  }
387 
388  start_subsample_id = next_subsample_id;
389  result = nalu_reader.Advance(&nalu);
390  }
391 
392  DCHECK_NE(result, NaluReader::kOk);
393  if (result != NaluReader::kEOStream) {
394  LOG(ERROR) << "Stopped reading before end of stream.";
395  return false;
396  }
397 
398  buffer_writer.SwapBuffer(output);
399  if (subsamples && !subsamples->empty()) {
400  if (next_subsample_id < subsamples->size()) {
401  LOG(ERROR)
402  << "The total size of NAL unit is shorter than the subsample size.";
403  return false;
404  }
405  // This function may modify the new_subsamples. But since it creates a
406  // merged verion and assign to the output subsamples, the input one is no
407  // longer used.
408  *subsamples = MergeSubsamples(temp_subsamples);
409  }
410  return true;
411 }
412 
413 } // namespace media
414 } // namespace shaka
Result Advance(Nalu *nalu)
Definition: nalu_reader.cc:244
int type() const
Definition: nalu_reader.h:112
uint64_t header_size() const
The size of the header, e.g. 1 for H.264.
Definition: nalu_reader.h:99
uint64_t payload_size() const
Size of this Nalu minus header_size().
Definition: nalu_reader.h:101
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66