Shaka Packager SDK
Loading...
Searching...
No Matches
aac_audio_specific_config.cc
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <packager/media/codecs/aac_audio_specific_config.h>
6
7#include <algorithm>
8
9#include <absl/log/check.h>
10#include <absl/log/log.h>
11
12#include <packager/media/base/bit_reader.h>
13#include <packager/media/base/rcheck.h>
14
15namespace shaka {
16namespace media {
17namespace {
18
19// Sampling Frequency Index table, from ISO 14496-3 Table 1.16
20static const uint32_t kSampleRates[] = {96000, 88200, 64000, 48000, 44100,
21 32000, 24000, 22050, 16000, 12000,
22 11025, 8000, 7350};
23
24// Channel Configuration table, from ISO 14496-3 Table 1.17
25const uint8_t kChannelConfigs[] = {0, 1, 2, 3, 4, 5, 6, 8};
26
27// ISO 14496-3 Table 4.2 – Syntax of program_config_element()
28// program_config_element()
29// ...
30// element_is_cpe[i]; 1 bslbf
31// element_tag_select[i]; 4 uimsbf
32bool CountChannels(uint8_t num_elements,
33 uint8_t* num_channels,
34 BitReader* bit_reader) {
35 for (uint8_t i = 0; i < num_elements; ++i) {
36 bool is_pair = false;
37 RCHECK(bit_reader->ReadBits(1, &is_pair));
38 *num_channels += is_pair ? 2 : 1;
39 RCHECK(bit_reader->SkipBits(4));
40 }
41 return true;
42}
43
44} // namespace
45
46AACAudioSpecificConfig::AACAudioSpecificConfig() {}
47
48AACAudioSpecificConfig::~AACAudioSpecificConfig() {}
49
50bool AACAudioSpecificConfig::Parse(const std::vector<uint8_t>& data) {
51 if (data.empty())
52 return false;
53
54 BitReader reader(&data[0], data.size());
55 uint8_t extension_type = AOT_NULL;
56 uint8_t extension_frequency_index = 0xff;
57
58 sbr_present_ = false;
59 ps_present_ = false;
60 frequency_ = 0;
61 extension_frequency_ = 0;
62
63 // The following code is written according to ISO 14496 Part 3 Table 1.13 -
64 // Syntax of AudioSpecificConfig.
65
66 // Read base configuration.
67 // Audio Object Types specified in "ISO/IEC 14496-3:2019, Table 1.19"
68 RCHECK(ParseAudioObjectType(&reader));
69
70 RCHECK(reader.ReadBits(4, &frequency_index_));
71 if (frequency_index_ == 0xf)
72 RCHECK(reader.ReadBits(24, &frequency_));
73 RCHECK(reader.ReadBits(4, &channel_config_));
74
75 RCHECK(channel_config_ < std::size(kChannelConfigs));
76 num_channels_ = kChannelConfigs[channel_config_];
77
78 // Read extension configuration.
79 if (audio_object_type_ == AOT_SBR || audio_object_type_ == AOT_PS) {
80 sbr_present_ = audio_object_type_ == AOT_SBR;
81 ps_present_ = audio_object_type_ == AOT_PS;
82 extension_type = AOT_SBR;
83 RCHECK(reader.ReadBits(4, &extension_frequency_index));
84 if (extension_frequency_index == 0xf)
85 RCHECK(reader.ReadBits(24, &extension_frequency_));
86 RCHECK(ParseAudioObjectType(&reader));
87 }
88
89 RCHECK(ParseDecoderGASpecificConfig(&reader));
90 RCHECK(SkipErrorSpecificConfig());
91
92 // Read extension configuration again
93 // Note: The check for 16 available bits comes from the AAC spec.
94 if (extension_type != AOT_SBR && reader.bits_available() >= 16) {
95 uint16_t sync_extension_type;
96 uint8_t sbr_present_flag;
97 uint8_t ps_present_flag;
98
99 if (reader.ReadBits(11, &sync_extension_type) &&
100 sync_extension_type == 0x2b7) {
101 if (reader.ReadBits(5, &extension_type) && extension_type == 5) {
102 RCHECK(reader.ReadBits(1, &sbr_present_flag));
103 sbr_present_ = sbr_present_flag != 0;
104
105 if (sbr_present_flag) {
106 RCHECK(reader.ReadBits(4, &extension_frequency_index));
107
108 if (extension_frequency_index == 0xf)
109 RCHECK(reader.ReadBits(24, &extension_frequency_));
110
111 // Note: The check for 12 available bits comes from the AAC spec.
112 if (reader.bits_available() >= 12) {
113 RCHECK(reader.ReadBits(11, &sync_extension_type));
114 if (sync_extension_type == 0x548) {
115 RCHECK(reader.ReadBits(1, &ps_present_flag));
116 ps_present_ = ps_present_flag != 0;
117 }
118 }
119 }
120 }
121 }
122 }
123
124 if (frequency_ == 0) {
125 RCHECK(frequency_index_ < std::size(kSampleRates));
126 frequency_ = kSampleRates[frequency_index_];
127 }
128
129 if (extension_frequency_ == 0 && extension_frequency_index != 0xff) {
130 RCHECK(extension_frequency_index < std::size(kSampleRates));
131 extension_frequency_ = kSampleRates[extension_frequency_index];
132 }
133
134 if (audio_object_type_ == AOT_USAC) {
135 return frequency_ != 0 && num_channels_ != 0 && channel_config_ <= 7;
136 } else {
137 return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 &&
138 audio_object_type_ <= 4 && frequency_index_ != 0xf &&
139 channel_config_ <= 7;
140 }
141}
142
144 const uint8_t* data,
145 size_t data_size,
146 std::vector<uint8_t>* audio_frame) const {
147 DCHECK(audio_object_type_ >= 1 && audio_object_type_ <= 4 &&
148 frequency_index_ != 0xf && channel_config_ <= 7);
149
150 size_t size = kADTSHeaderSize + data_size;
151
152 // ADTS header uses 13 bits for packet size.
153 if (size >= (1 << 13))
154 return false;
155
156 audio_frame->reserve(size);
157 audio_frame->resize(kADTSHeaderSize);
158
159 audio_frame->at(0) = 0xff;
160 audio_frame->at(1) = 0xf1;
161 audio_frame->at(2) = ((audio_object_type_ - 1) << 6) +
162 (frequency_index_ << 2) + (channel_config_ >> 2);
163 audio_frame->at(3) =
164 ((channel_config_ & 0x3) << 6) + static_cast<uint8_t>(size >> 11);
165 audio_frame->at(4) = static_cast<uint8_t>((size & 0x7ff) >> 3);
166 audio_frame->at(5) = static_cast<uint8_t>(((size & 7) << 5) + 0x1f);
167 audio_frame->at(6) = 0xfc;
168
169 audio_frame->insert(audio_frame->end(), data, data + data_size);
170
171 return true;
172}
173
174AACAudioSpecificConfig::AudioObjectType
176 if (ps_present_)
177 return AOT_PS;
178 if (sbr_present_)
179 return AOT_SBR;
180 return audio_object_type_;
181}
182
184 if (extension_frequency_ > 0)
185 return extension_frequency_;
186
187 if (!sbr_present_)
188 return frequency_;
189
190 // The following code is written according to ISO 14496 Part 3 Table 1.11 and
191 // Table 1.22. (Table 1.11 refers to the capping to 48000, Table 1.22 refers
192 // to SBR doubling the AAC sample rate.)
193 DCHECK_GT(frequency_, 0u);
194 return std::min(2 * frequency_, 48000u);
195}
196
198 // Check for implicit signalling of HE-AAC and indicate stereo output
199 // if the mono channel configuration is signalled.
200 // See ISO-14496-3 Section 1.6.6.1.2 for details about this special casing.
201 if (sbr_present_ && channel_config_ == 1)
202 return 2; // CHANNEL_LAYOUT_STEREO
203
204 // When Parametric Stereo is on, mono will be played as stereo.
205 if (ps_present_ && channel_config_ == 1)
206 return 2; // CHANNEL_LAYOUT_STEREO
207
208 return num_channels_;
209}
210
211bool AACAudioSpecificConfig::ParseAudioObjectType(BitReader* bit_reader) {
212 RCHECK(bit_reader->ReadBits(5, &audio_object_type_));
213
214 if (audio_object_type_ == AOT_ESCAPE) {
215 uint8_t audioObjectTypeExt;
216 RCHECK(bit_reader->ReadBits(6, &audioObjectTypeExt));
217 audio_object_type_ = static_cast<AudioObjectType>(32 + audioObjectTypeExt);
218 }
219
220 return true;
221}
222
223// Currently this function only support GASpecificConfig defined in
224// ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig()
225bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig(
226 BitReader* bit_reader) {
227 switch (audio_object_type_) {
228 case 1:
229 case 2:
230 case 3:
231 case 4:
232 case 6:
233 case 7:
234 case 17:
235 case 19:
236 case 20:
237 case 21:
238 case 22:
239 case 23:
240 return ParseGASpecificConfig(bit_reader);
241 case 42:
242 // Skip UsacConfig() parsing until required
243 RCHECK(bit_reader->SkipBits(bit_reader->bits_available()));
244 return true;
245 default:
246 break;
247 }
248
249 return false;
250}
251
252bool AACAudioSpecificConfig::SkipErrorSpecificConfig() const {
253 switch (audio_object_type_) {
254 case 17:
255 case 19:
256 case 20:
257 case 21:
258 case 22:
259 case 23:
260 case 24:
261 case 25:
262 case 26:
263 case 27:
264 return false;
265 default:
266 break;
267 }
268
269 return true;
270}
271
272// The following code is written according to ISO 14496 part 3 Table 4.1 -
273// GASpecificConfig.
274bool AACAudioSpecificConfig::ParseGASpecificConfig(BitReader* bit_reader) {
275 uint8_t extension_flag = 0;
276 uint8_t depends_on_core_coder;
277 uint16_t dummy;
278
279 RCHECK(bit_reader->ReadBits(1, &dummy)); // frameLengthFlag
280 RCHECK(bit_reader->ReadBits(1, &depends_on_core_coder));
281 if (depends_on_core_coder == 1)
282 RCHECK(bit_reader->ReadBits(14, &dummy)); // coreCoderDelay
283
284 RCHECK(bit_reader->ReadBits(1, &extension_flag));
285 if (channel_config_ == 0)
286 RCHECK(ParseProgramConfigElement(bit_reader));
287
288 if (audio_object_type_ == 6 || audio_object_type_ == 20)
289 RCHECK(bit_reader->ReadBits(3, &dummy)); // layerNr
290
291 if (extension_flag) {
292 if (audio_object_type_ == 22) {
293 RCHECK(bit_reader->ReadBits(5, &dummy)); // numOfSubFrame
294 RCHECK(bit_reader->ReadBits(11, &dummy)); // layer_length
295 }
296
297 if (audio_object_type_ == 17 || audio_object_type_ == 19 ||
298 audio_object_type_ == 20 || audio_object_type_ == 23) {
299 RCHECK(bit_reader->ReadBits(3, &dummy)); // resilience flags
300 }
301
302 RCHECK(bit_reader->ReadBits(1, &dummy)); // extensionFlag3
303 }
304
305 return true;
306}
307
308// ISO 14496-3 Table 4.2 – Syntax of program_config_element()
309// program_config_element()
310// {
311// element_instance_tag; 4 uimsbf
312// object_type; 2 uimsbf
313// sampling_frequency_index; 4 uimsbf
314// num_front_channel_elements; 4 uimsbf
315// num_side_channel_elements; 4 uimsbf
316// num_back_channel_elements; 4 uimsbf
317// num_lfe_channel_elements; 2 uimsbf
318// num_assoc_data_elements; 3 uimsbf
319// num_valid_cc_elements; 4 uimsbf
320// mono_mixdown_present; 1 uimsbf
321// if (mono_mixdown_present == 1)
322// mono_mixdown_element_number; 4 uimsbf
323// stereo_mixdown_present; 1 uimsbf
324// if (stereo_mixdown_present == 1)
325// stereo_mixdown_element_number; 4 uimsbf
326// matrix_mixdown_idx_present; 1 uimsbf
327// if (matrix_mixdown_idx_present == 1) {
328// matrix_mixdown_idx ; 2 uimsbf
329// pseudo_surround_enable; 1 uimsbf
330// }
331// for (i = 0; i < num_front_channel_elements; i++) {
332// front_element_is_cpe[i]; 1 bslbf
333// front_element_tag_select[i]; 4 uimsbf
334// }
335// for (i = 0; i < num_side_channel_elements; i++) {
336// side_element_is_cpe[i]; 1 bslbf
337// side_element_tag_select[i]; 4 uimsbf
338// }
339// for (i = 0; i < num_back_channel_elements; i++) {
340// back_element_is_cpe[i]; 1 bslbf
341// back_element_tag_select[i]; 4 uimsbf
342// }
343// for (i = 0; i < num_lfe_channel_elements; i++)
344// lfe_element_tag_select[i]; 4 uimsbf
345// for ( i = 0; i < num_assoc_data_elements; i++)
346// assoc_data_element_tag_select[i]; 4 uimsbf
347// for (i = 0; i < num_valid_cc_elements; i++) {
348// cc_element_is_ind_sw[i]; 1 uimsbf
349// valid_cc_element_tag_select[i]; 4 uimsbf
350// }
351// byte_alignment(); Note 1
352// comment_field_bytes; 8 uimsbf
353// for (i = 0; i < comment_field_bytes; i++)
354// comment_field_data[i]; 8 uimsbf
355// }
356// Note 1: If called from within an AudioSpecificConfig(), this
357// byte_alignment shall be relative to the start of the AudioSpecificConfig().
358bool AACAudioSpecificConfig::ParseProgramConfigElement(BitReader* bit_reader) {
359 // element_instance_tag (4), object_type (2), sampling_frequency_index (4).
360 RCHECK(bit_reader->SkipBits(4 + 2 + 4));
361
362 uint8_t num_front_channel_elements = 0;
363 uint8_t num_side_channel_elements = 0;
364 uint8_t num_back_channel_elements = 0;
365 uint8_t num_lfe_channel_elements = 0;
366 RCHECK(bit_reader->ReadBits(4, &num_front_channel_elements));
367 RCHECK(bit_reader->ReadBits(4, &num_side_channel_elements));
368 RCHECK(bit_reader->ReadBits(4, &num_back_channel_elements));
369 RCHECK(bit_reader->ReadBits(2, &num_lfe_channel_elements));
370
371 uint8_t num_assoc_data_elements = 0;
372 RCHECK(bit_reader->ReadBits(3, &num_assoc_data_elements));
373 uint8_t num_valid_cc_elements = 0;
374 RCHECK(bit_reader->ReadBits(4, &num_valid_cc_elements));
375
376 RCHECK(bit_reader->SkipBitsConditional(true, 4)); // mono_mixdown
377 RCHECK(bit_reader->SkipBitsConditional(true, 4)); // stereo_mixdown
378 RCHECK(bit_reader->SkipBitsConditional(true, 3)); // matrix_mixdown_idx
379
380 num_channels_ = 0;
381 RCHECK(CountChannels(num_front_channel_elements, &num_channels_, bit_reader));
382 RCHECK(CountChannels(num_side_channel_elements, &num_channels_, bit_reader));
383 RCHECK(CountChannels(num_back_channel_elements, &num_channels_, bit_reader));
384 num_channels_ += num_lfe_channel_elements;
385
386 RCHECK(bit_reader->SkipBits(4 * num_lfe_channel_elements));
387 RCHECK(bit_reader->SkipBits(4 * num_assoc_data_elements));
388 RCHECK(bit_reader->SkipBits(5 * num_valid_cc_elements));
389
390 bit_reader->SkipToNextByte();
391
392 uint8_t comment_field_bytes = 0;
393 RCHECK(bit_reader->ReadBits(8, &comment_field_bytes));
394 RCHECK(bit_reader->SkipBytes(comment_field_bytes));
395 return true;
396}
397
398} // namespace media
399} // namespace shaka
static const size_t kADTSHeaderSize
Size in bytes of the ADTS header added by ConvertEsdsToADTS().
virtual bool ConvertToADTS(const uint8_t *data, size_t data_size, std::vector< uint8_t > *audio_frame) const
virtual bool Parse(const std::vector< uint8_t > &data)
A class to read bit streams.
Definition bit_reader.h:20
size_t bits_available() const
Definition bit_reader.h:92
bool ReadBits(size_t num_bits, T *out)
Definition bit_reader.h:38
All the methods that are virtual are virtual for mocking.