Shaka Packager SDK
Loading...
Searching...
No Matches
nal_unit_to_byte_stream_converter.cc
1// Copyright 2016 Google LLC. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/codecs/nal_unit_to_byte_stream_converter.h>
8
9#include <list>
10
11#include <absl/log/check.h>
12#include <absl/log/log.h>
13
14#include <packager/macros/compiler.h>
15#include <packager/media/base/bit_reader.h>
16#include <packager/media/base/buffer_reader.h>
17#include <packager/media/base/buffer_writer.h>
18#include <packager/media/codecs/nalu_reader.h>
19
20namespace shaka {
21namespace media {
22
23namespace {
24
25const bool kEscapeData = true;
26const uint8_t kNaluStartCode[] = {0x00, 0x00, 0x00, 0x01};
27
28const uint8_t kEmulationPreventionByte = 0x03;
29
30const uint8_t kAccessUnitDelimiterRbspAnyPrimaryPicType = 0xF0;
31
32bool IsNaluEqual(const Nalu& left, const Nalu& right) {
33 if (left.type() != right.type())
34 return false;
35 const size_t left_size = left.header_size() + left.payload_size();
36 const size_t right_size = right.header_size() + right.payload_size();
37 if (left_size != right_size)
38 return false;
39 return memcmp(left.data(), right.data(), left_size) == 0;
40}
41
42void AppendNalu(const Nalu& nalu,
43 int /*nalu_length_size*/,
44 bool escape_data,
45 BufferWriter* buffer_writer) {
46 if (escape_data) {
47 EscapeNalByteSequence(nalu.data(), nalu.header_size() + nalu.payload_size(),
48 buffer_writer);
49 } else {
50 buffer_writer->AppendArray(nalu.data(),
51 nalu.header_size() + nalu.payload_size());
52 }
53}
54
55void AddAccessUnitDelimiter(BufferWriter* buffer_writer) {
56 buffer_writer->AppendInt(static_cast<uint8_t>(Nalu::H264_AUD));
57 // For now, primary_pic_type is 7 which is "anything".
58 buffer_writer->AppendInt(kAccessUnitDelimiterRbspAnyPrimaryPicType);
59}
60
61} // namespace
62
63void EscapeNalByteSequence(const uint8_t* input,
64 size_t input_size,
65 BufferWriter* output_writer) {
66 // Keep track of consecutive zeros that it has seen (not including the current
67 // byte), so that the algorithm doesn't need to go back to check the same
68 // bytes.
69 int consecutive_zero_count = 0;
70 for (size_t i = 0; i < input_size; ++i) {
71 if (consecutive_zero_count <= 1) {
72 output_writer->AppendInt(input[i]);
73 } else if (consecutive_zero_count == 2) {
74 if (input[i] == 0 || input[i] == 1 || input[i] == 2 || input[i] == 3) {
75 // Must be escaped.
76 output_writer->AppendInt(kEmulationPreventionByte);
77 }
78
79 output_writer->AppendInt(input[i]);
80 // Note that input[i] can be 0.
81 // 00 00 00 00 00 00 should become
82 // 00 00 03 00 00 03 00 00 03
83 // So consecutive_zero_count is reset here and incremented below if
84 // input[i] is 0.
85 consecutive_zero_count = 0;
86 }
87
88 consecutive_zero_count = input[i] == 0 ? consecutive_zero_count + 1 : 0;
89 }
90
91 // ISO 14496-10 Section 7.4.1.1 mentions that if the last byte is 0 (which
92 // only happens if RBSP has cabac_zero_word), 0x03 must be appended.
93 if (consecutive_zero_count > 0) {
94 DCHECK_GT(input_size, 0u);
95 DCHECK_EQ(input[input_size - 1], 0u);
96 output_writer->AppendInt(kEmulationPreventionByte);
97 }
98}
99
100// This functions creates a new subsample entry (|clear_bytes|, |cipher_bytes|)
101// and appends it to |subsamples|. It splits the oversized (64KB) clear_bytes
102// into smaller ones.
103void AppendSubsamples(uint32_t clear_bytes,
104 uint32_t cipher_bytes,
105 std::vector<SubsampleEntry>* subsamples) {
106 while (clear_bytes > UINT16_MAX) {
107 subsamples->emplace_back(UINT16_MAX, 0);
108 clear_bytes -= UINT16_MAX;
109 }
110 subsamples->emplace_back(clear_bytes, cipher_bytes);
111}
112
113// TODO(hmchen): Wrap methods of processing subsamples into a separate class,
114// e.g., SubsampleReader.
115// This function finds the range of the subsamples corresponding a NAL unit
116// size. If a subsample crosses the boundary of two NAL units, it is split into
117// smaller subsamples. Each call processes one NAL unit and it assumes the input
118// NAL unit is already aligned with subsamples->at(start_subsample_id).
119//
120// An example of calling multiple times on each NAL unit is as follow:
121//
122// Input:
123//
124// Nalu 0 Nalu 1 Nalu 2
125// | | |
126// v v v
127// | clear | cipher | clear | clear | clear | cipher |
128//
129// | Subsample 0 | Subsample 1 |
130//
131// Output:
132//
133// | Subsample 0 | Subsample 1 | Subsample 2 | Subsample 3 |
134//
135// Nalu 0: start_subsample_id = 0, next_subsample_id = 2
136// Nalu 1: start_subsample_id = 2, next_subsample_id = 3
137// Nalu 2: start_subsample_id = 3, next_subsample_id = 4
138bool AlignSubsamplesWithNalu(size_t nalu_size,
139 size_t start_subsample_id,
140 std::vector<SubsampleEntry>* subsamples,
141 size_t* next_subsample_id) {
142 DCHECK(subsamples && !subsamples->empty());
143 size_t subsample_id = start_subsample_id;
144 size_t nalu_size_remain = nalu_size;
145 size_t subsample_bytes = 0;
146 while (subsample_id < subsamples->size()) {
147 subsample_bytes = subsamples->at(subsample_id).clear_bytes +
148 subsamples->at(subsample_id).cipher_bytes;
149 if (nalu_size_remain <= subsample_bytes) {
150 break;
151 }
152 nalu_size_remain -= subsample_bytes;
153 subsample_id++;
154 }
155
156 if (subsample_id == subsamples->size()) {
157 DCHECK_GT(nalu_size_remain, 0u);
158 LOG(ERROR)
159 << "Total size of NAL unit is larger than the size of subsamples.";
160 return false;
161 }
162
163 if (nalu_size_remain == subsample_bytes) {
164 *next_subsample_id = subsample_id + 1;
165 return true;
166 }
167
168 DCHECK_GT(subsample_bytes, nalu_size_remain);
169 size_t clear_bytes = subsamples->at(subsample_id).clear_bytes;
170 size_t new_clear_bytes = 0;
171 size_t new_cipher_bytes = 0;
172 if (nalu_size_remain < clear_bytes) {
173 new_clear_bytes = nalu_size_remain;
174 } else {
175 new_clear_bytes = clear_bytes;
176 new_cipher_bytes = nalu_size_remain - clear_bytes;
177 }
178 subsamples->insert(subsamples->begin() + subsample_id,
179 SubsampleEntry(static_cast<uint16_t>(new_clear_bytes),
180 static_cast<uint32_t>(new_cipher_bytes)));
181 subsample_id++;
182 subsamples->at(subsample_id).clear_bytes -=
183 static_cast<uint16_t>(new_clear_bytes);
184 subsamples->at(subsample_id).cipher_bytes -=
185 static_cast<uint32_t>(new_cipher_bytes);
186 *next_subsample_id = subsample_id;
187 return true;
188}
189
190// This function tries to merge clear-only into clear+cipher subsamples. This
191// merge makes sure the clear_bytes will not exceed the clear size limits
192// (2^16 bytes).
193std::vector<SubsampleEntry> MergeSubsamples(
194 const std::vector<SubsampleEntry>& subsamples) {
195 std::vector<SubsampleEntry> new_subsamples;
196 uint32_t clear_bytes = 0;
197 for (size_t i = 0; i < subsamples.size(); ++i) {
198 clear_bytes += subsamples[i].clear_bytes;
199 // Add new subsample(s).
200 if (subsamples[i].cipher_bytes > 0 || i == subsamples.size() - 1) {
201 AppendSubsamples(clear_bytes, subsamples[i].cipher_bytes,
202 &new_subsamples);
203 clear_bytes = 0;
204 }
205 }
206 return new_subsamples;
207}
208
209NalUnitToByteStreamConverter::NalUnitToByteStreamConverter()
210 : nalu_length_size_(0) {}
211NalUnitToByteStreamConverter::~NalUnitToByteStreamConverter() {}
212
213bool NalUnitToByteStreamConverter::Initialize(
214 const uint8_t* decoder_configuration_data,
215 size_t decoder_configuration_data_size) {
216 if (!decoder_configuration_data || decoder_configuration_data_size == 0) {
217 LOG(ERROR) << "Decoder conguration is empty.";
218 return false;
219 }
220
221 if (!decoder_config_.Parse(std::vector<uint8_t>(
222 decoder_configuration_data,
223 decoder_configuration_data + decoder_configuration_data_size))) {
224 return false;
225 }
226
227 if (decoder_config_.nalu_count() < 2) {
228 LOG(ERROR) << "Cannot find SPS or PPS.";
229 return false;
230 }
231
232 nalu_length_size_ = decoder_config_.nalu_length_size();
233
234 BufferWriter buffer_writer(decoder_configuration_data_size);
235 bool found_sps = false;
236 bool found_pps = false;
237 for (uint32_t i = 0; i < decoder_config_.nalu_count(); ++i) {
238 const Nalu& nalu = decoder_config_.nalu(i);
239 if (nalu.type() == Nalu::H264NaluType::H264_SPS) {
240 buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
241 AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
242 found_sps = true;
243 } else if (nalu.type() == Nalu::H264NaluType::H264_PPS) {
244 buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
245 AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
246 found_pps = true;
247 } else if (nalu.type() == Nalu::H264NaluType::H264_SPSExtension) {
248 buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
249 AppendNalu(nalu, nalu_length_size_, !kEscapeData, &buffer_writer);
250 }
251 }
252 if (!found_sps || !found_pps) {
253 LOG(ERROR) << "Failed to find SPS or PPS.";
254 return false;
255 }
256
257 buffer_writer.SwapBuffer(&decoder_configuration_in_byte_stream_);
258 return true;
259}
260
261bool NalUnitToByteStreamConverter::ConvertUnitToByteStream(
262 const uint8_t* sample,
263 size_t sample_size,
264 bool is_key_frame,
265 std::vector<uint8_t>* output) {
266 return ConvertUnitToByteStreamWithSubsamples(
267 sample, sample_size, is_key_frame, false, output,
268 nullptr); // Skip subsample update.
269}
270
271// This ignores all AUD, SPS, and PPS in the sample. Instead uses the data
272// parsed in Initialize(). However, if the SPS and PPS are different to
273// those parsed in Initialized(), they are kept.
274bool NalUnitToByteStreamConverter::ConvertUnitToByteStreamWithSubsamples(
275 const uint8_t* sample,
276 size_t sample_size,
277 bool is_key_frame,
278 bool escape_encrypted_nalu,
279 std::vector<uint8_t>* output,
280 std::vector<SubsampleEntry>* subsamples) {
281 if (!sample || sample_size == 0) {
282 LOG(WARNING) << "Sample is empty.";
283 return true;
284 }
285
286 std::vector<SubsampleEntry> temp_subsamples;
287
288 BufferWriter buffer_writer(sample_size);
289 buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
290 AddAccessUnitDelimiter(&buffer_writer);
291 if (is_key_frame)
292 buffer_writer.AppendVector(decoder_configuration_in_byte_stream_);
293
294 if (subsamples && !subsamples->empty()) {
295 // The inserted part in buffer_writer is all clear. Add a corresponding
296 // all-clear subsample.
297 AppendSubsamples(static_cast<uint32_t>(buffer_writer.Size()), 0u,
298 &temp_subsamples);
299 }
300
301 NaluReader nalu_reader(Nalu::kH264, nalu_length_size_, sample, sample_size);
302 Nalu nalu;
303 NaluReader::Result result = nalu_reader.Advance(&nalu);
304
305 size_t start_subsample_id = 0;
306 size_t next_subsample_id = 0;
307 while (result == NaluReader::kOk) {
308 const size_t old_nalu_size =
309 nalu_length_size_ + nalu.header_size() + nalu.payload_size();
310 if (subsamples && !subsamples->empty()) {
311 if (!AlignSubsamplesWithNalu(old_nalu_size, start_subsample_id,
312 subsamples, &next_subsample_id)) {
313 return false;
314 }
315 }
316 switch (nalu.type()) {
317 case Nalu::H264_AUD:
318 break;
319 case Nalu::H264_SPS:
320 FALLTHROUGH_INTENDED;
321 case Nalu::H264_SPSExtension:
322 FALLTHROUGH_INTENDED;
323 case Nalu::H264_PPS: {
324 // Also write this SPS/PPS if it is not the same as SPS/PPS in decoder
325 // configuration, which is already written.
326 //
327 // For more information see:
328 // - github.com/shaka-project/shaka-packager/issues/327
329 // - ISO/IEC 14496-15 5.4.5 Sync Sample
330 //
331 // TODO(kqyang): Parse sample data to figure out which SPS/PPS the
332 // sample actually uses and include that only.
333 bool new_decoder_config = true;
334 for (size_t i = 0; i < decoder_config_.nalu_count(); ++i) {
335 if (IsNaluEqual(decoder_config_.nalu(i), nalu)) {
336 new_decoder_config = false;
337 break;
338 }
339 }
340 if (!new_decoder_config)
341 break;
342 FALLTHROUGH_INTENDED;
343 }
344 default:
345 bool escape_data = false;
346 if (subsamples && !subsamples->empty()) {
347 if (escape_encrypted_nalu) {
348 for (size_t i = start_subsample_id; i < next_subsample_id; ++i) {
349 if (subsamples->at(i).cipher_bytes != 0) {
350 escape_data = true;
351 break;
352 }
353 }
354 }
355 }
356 buffer_writer.AppendArray(kNaluStartCode, std::size(kNaluStartCode));
357 AppendNalu(nalu, nalu_length_size_, escape_data, &buffer_writer);
358
359 if (subsamples && !subsamples->empty()) {
360 temp_subsamples.emplace_back(
361 static_cast<uint16_t>(std::size(kNaluStartCode)), 0u);
362 // Update the first subsample of each NAL unit, which replaces NAL
363 // unit length field with start code. Note that if the escape_data is
364 // true, the total data size and the cipher_bytes may be changed.
365 // However, since the escape_data for encrypted nalu is only used in
366 // Sample-AES, which means the subsample is not really used,
367 // inaccurate subsamples should not be a big deal.
368 if (subsamples->at(start_subsample_id).clear_bytes <
369 nalu_length_size_) {
370 LOG(ERROR) << "Clear bytes ("
371 << subsamples->at(start_subsample_id).clear_bytes
372 << ") in start subsample of NAL unit is less than NAL "
373 "unit length size ("
374 << nalu_length_size_
375 << "). The NAL unit length size is (partially) "
376 "encrypted. In that case, it cannot be "
377 "converted to byte stream.";
378 return false;
379 }
380 subsamples->at(start_subsample_id).clear_bytes -= nalu_length_size_;
381 temp_subsamples.insert(temp_subsamples.end(),
382 subsamples->begin() + start_subsample_id,
383 subsamples->begin() + next_subsample_id);
384 }
385 break;
386 }
387
388 start_subsample_id = next_subsample_id;
389 result = nalu_reader.Advance(&nalu);
390 }
391
392 DCHECK_NE(result, NaluReader::kOk);
393 if (result != NaluReader::kEOStream) {
394 LOG(ERROR) << "Stopped reading before end of stream.";
395 return false;
396 }
397
398 buffer_writer.SwapBuffer(output);
399 if (subsamples && !subsamples->empty()) {
400 if (next_subsample_id < subsamples->size()) {
401 LOG(ERROR)
402 << "The total size of NAL unit is shorter than the subsample size.";
403 return false;
404 }
405 // This function may modify the new_subsamples. But since it creates a
406 // merged verion and assign to the output subsamples, the input one is no
407 // longer used.
408 *subsamples = MergeSubsamples(temp_subsamples);
409 }
410 return true;
411}
412
413} // namespace media
414} // namespace shaka
Result Advance(Nalu *nalu)
uint64_t header_size() const
The size of the header, e.g. 1 for H.264.
Definition nalu_reader.h:99
uint64_t payload_size() const
Size of this Nalu minus header_size().
All the methods that are virtual are virtual for mocking.