Shaka Packager SDK
aac_audio_specific_config.cc
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <packager/media/codecs/aac_audio_specific_config.h>
6 
7 #include <algorithm>
8 
9 #include <absl/log/check.h>
10 #include <absl/log/log.h>
11 
12 #include <packager/media/base/bit_reader.h>
13 #include <packager/media/base/rcheck.h>
14 
15 namespace shaka {
16 namespace media {
17 namespace {
18 
19 // Sampling Frequency Index table, from ISO 14496-3 Table 1.16
20 static const uint32_t kSampleRates[] = {96000, 88200, 64000, 48000, 44100,
21  32000, 24000, 22050, 16000, 12000,
22  11025, 8000, 7350};
23 
24 // Channel Configuration table, from ISO 14496-3 Table 1.17
25 const uint8_t kChannelConfigs[] = {0, 1, 2, 3, 4, 5, 6, 8};
26 
27 // ISO 14496-3 Table 4.2 – Syntax of program_config_element()
28 // program_config_element()
29 // ...
30 // element_is_cpe[i]; 1 bslbf
31 // element_tag_select[i]; 4 uimsbf
32 bool CountChannels(uint8_t num_elements,
33  uint8_t* num_channels,
34  BitReader* bit_reader) {
35  for (uint8_t i = 0; i < num_elements; ++i) {
36  bool is_pair = false;
37  RCHECK(bit_reader->ReadBits(1, &is_pair));
38  *num_channels += is_pair ? 2 : 1;
39  RCHECK(bit_reader->SkipBits(4));
40  }
41  return true;
42 }
43 
44 } // namespace
45 
46 AACAudioSpecificConfig::AACAudioSpecificConfig() {}
47 
48 AACAudioSpecificConfig::~AACAudioSpecificConfig() {}
49 
50 bool AACAudioSpecificConfig::Parse(const std::vector<uint8_t>& data) {
51  if (data.empty())
52  return false;
53 
54  BitReader reader(&data[0], data.size());
55  uint8_t extension_type = AOT_NULL;
56  uint8_t extension_frequency_index = 0xff;
57 
58  sbr_present_ = false;
59  ps_present_ = false;
60  frequency_ = 0;
61  extension_frequency_ = 0;
62 
63  // The following code is written according to ISO 14496 Part 3 Table 1.13 -
64  // Syntax of AudioSpecificConfig.
65 
66  // Read base configuration.
67  // Audio Object Types specified in "ISO/IEC 14496-3:2019, Table 1.19"
68  RCHECK(ParseAudioObjectType(&reader));
69 
70  RCHECK(reader.ReadBits(4, &frequency_index_));
71  if (frequency_index_ == 0xf)
72  RCHECK(reader.ReadBits(24, &frequency_));
73  RCHECK(reader.ReadBits(4, &channel_config_));
74 
75  RCHECK(channel_config_ < std::size(kChannelConfigs));
76  num_channels_ = kChannelConfigs[channel_config_];
77 
78  // Read extension configuration.
79  if (audio_object_type_ == AOT_SBR || audio_object_type_ == AOT_PS) {
80  sbr_present_ = audio_object_type_ == AOT_SBR;
81  ps_present_ = audio_object_type_ == AOT_PS;
82  extension_type = AOT_SBR;
83  RCHECK(reader.ReadBits(4, &extension_frequency_index));
84  if (extension_frequency_index == 0xf)
85  RCHECK(reader.ReadBits(24, &extension_frequency_));
86  RCHECK(ParseAudioObjectType(&reader));
87  }
88 
89  RCHECK(ParseDecoderGASpecificConfig(&reader));
90  RCHECK(SkipErrorSpecificConfig());
91 
92  // Read extension configuration again
93  // Note: The check for 16 available bits comes from the AAC spec.
94  if (extension_type != AOT_SBR && reader.bits_available() >= 16) {
95  uint16_t sync_extension_type;
96  uint8_t sbr_present_flag;
97  uint8_t ps_present_flag;
98 
99  if (reader.ReadBits(11, &sync_extension_type) &&
100  sync_extension_type == 0x2b7) {
101  if (reader.ReadBits(5, &extension_type) && extension_type == 5) {
102  RCHECK(reader.ReadBits(1, &sbr_present_flag));
103  sbr_present_ = sbr_present_flag != 0;
104 
105  if (sbr_present_flag) {
106  RCHECK(reader.ReadBits(4, &extension_frequency_index));
107 
108  if (extension_frequency_index == 0xf)
109  RCHECK(reader.ReadBits(24, &extension_frequency_));
110 
111  // Note: The check for 12 available bits comes from the AAC spec.
112  if (reader.bits_available() >= 12) {
113  RCHECK(reader.ReadBits(11, &sync_extension_type));
114  if (sync_extension_type == 0x548) {
115  RCHECK(reader.ReadBits(1, &ps_present_flag));
116  ps_present_ = ps_present_flag != 0;
117  }
118  }
119  }
120  }
121  }
122  }
123 
124  if (frequency_ == 0) {
125  RCHECK(frequency_index_ < std::size(kSampleRates));
126  frequency_ = kSampleRates[frequency_index_];
127  }
128 
129  if (extension_frequency_ == 0 && extension_frequency_index != 0xff) {
130  RCHECK(extension_frequency_index < std::size(kSampleRates));
131  extension_frequency_ = kSampleRates[extension_frequency_index];
132  }
133 
134  if (audio_object_type_ == AOT_USAC) {
135  return frequency_ != 0 && num_channels_ != 0 && channel_config_ <= 7;
136  } else {
137  return frequency_ != 0 && num_channels_ != 0 && audio_object_type_ >= 1 &&
138  audio_object_type_ <= 4 && frequency_index_ != 0xf &&
139  channel_config_ <= 7;
140  }
141 }
142 
144  const uint8_t* data,
145  size_t data_size,
146  std::vector<uint8_t>* audio_frame) const {
147  DCHECK(audio_object_type_ >= 1 && audio_object_type_ <= 4 &&
148  frequency_index_ != 0xf && channel_config_ <= 7);
149 
150  size_t size = kADTSHeaderSize + data_size;
151 
152  // ADTS header uses 13 bits for packet size.
153  if (size >= (1 << 13))
154  return false;
155 
156  audio_frame->reserve(size);
157  audio_frame->resize(kADTSHeaderSize);
158 
159  audio_frame->at(0) = 0xff;
160  audio_frame->at(1) = 0xf1;
161  audio_frame->at(2) = ((audio_object_type_ - 1) << 6) +
162  (frequency_index_ << 2) + (channel_config_ >> 2);
163  audio_frame->at(3) =
164  ((channel_config_ & 0x3) << 6) + static_cast<uint8_t>(size >> 11);
165  audio_frame->at(4) = static_cast<uint8_t>((size & 0x7ff) >> 3);
166  audio_frame->at(5) = static_cast<uint8_t>(((size & 7) << 5) + 0x1f);
167  audio_frame->at(6) = 0xfc;
168 
169  audio_frame->insert(audio_frame->end(), data, data + data_size);
170 
171  return true;
172 }
173 
174 AACAudioSpecificConfig::AudioObjectType
176  if (ps_present_)
177  return AOT_PS;
178  if (sbr_present_)
179  return AOT_SBR;
180  return audio_object_type_;
181 }
182 
184  if (extension_frequency_ > 0)
185  return extension_frequency_;
186 
187  if (!sbr_present_)
188  return frequency_;
189 
190  // The following code is written according to ISO 14496 Part 3 Table 1.11 and
191  // Table 1.22. (Table 1.11 refers to the capping to 48000, Table 1.22 refers
192  // to SBR doubling the AAC sample rate.)
193  DCHECK_GT(frequency_, 0u);
194  return std::min(2 * frequency_, 48000u);
195 }
196 
198  // Check for implicit signalling of HE-AAC and indicate stereo output
199  // if the mono channel configuration is signalled.
200  // See ISO-14496-3 Section 1.6.6.1.2 for details about this special casing.
201  if (sbr_present_ && channel_config_ == 1)
202  return 2; // CHANNEL_LAYOUT_STEREO
203 
204  // When Parametric Stereo is on, mono will be played as stereo.
205  if (ps_present_ && channel_config_ == 1)
206  return 2; // CHANNEL_LAYOUT_STEREO
207 
208  return num_channels_;
209 }
210 
211 bool AACAudioSpecificConfig::ParseAudioObjectType(BitReader* bit_reader) {
212  RCHECK(bit_reader->ReadBits(5, &audio_object_type_));
213 
214  if (audio_object_type_ == AOT_ESCAPE) {
215  uint8_t audioObjectTypeExt;
216  RCHECK(bit_reader->ReadBits(6, &audioObjectTypeExt));
217  audio_object_type_ = static_cast<AudioObjectType>(32 + audioObjectTypeExt);
218  }
219 
220  return true;
221 }
222 
223 // Currently this function only support GASpecificConfig defined in
224 // ISO 14496 Part 3 Table 4.1 - Syntax of GASpecificConfig()
225 bool AACAudioSpecificConfig::ParseDecoderGASpecificConfig(
226  BitReader* bit_reader) {
227  switch (audio_object_type_) {
228  case 1:
229  case 2:
230  case 3:
231  case 4:
232  case 6:
233  case 7:
234  case 17:
235  case 19:
236  case 20:
237  case 21:
238  case 22:
239  case 23:
240  return ParseGASpecificConfig(bit_reader);
241  case 42:
242  // Skip UsacConfig() parsing until required
243  RCHECK(bit_reader->SkipBits(bit_reader->bits_available()));
244  return true;
245  default:
246  break;
247  }
248 
249  return false;
250 }
251 
252 bool AACAudioSpecificConfig::SkipErrorSpecificConfig() const {
253  switch (audio_object_type_) {
254  case 17:
255  case 19:
256  case 20:
257  case 21:
258  case 22:
259  case 23:
260  case 24:
261  case 25:
262  case 26:
263  case 27:
264  return false;
265  default:
266  break;
267  }
268 
269  return true;
270 }
271 
272 // The following code is written according to ISO 14496 part 3 Table 4.1 -
273 // GASpecificConfig.
274 bool AACAudioSpecificConfig::ParseGASpecificConfig(BitReader* bit_reader) {
275  uint8_t extension_flag = 0;
276  uint8_t depends_on_core_coder;
277  uint16_t dummy;
278 
279  RCHECK(bit_reader->ReadBits(1, &dummy)); // frameLengthFlag
280  RCHECK(bit_reader->ReadBits(1, &depends_on_core_coder));
281  if (depends_on_core_coder == 1)
282  RCHECK(bit_reader->ReadBits(14, &dummy)); // coreCoderDelay
283 
284  RCHECK(bit_reader->ReadBits(1, &extension_flag));
285  if (channel_config_ == 0)
286  RCHECK(ParseProgramConfigElement(bit_reader));
287 
288  if (audio_object_type_ == 6 || audio_object_type_ == 20)
289  RCHECK(bit_reader->ReadBits(3, &dummy)); // layerNr
290 
291  if (extension_flag) {
292  if (audio_object_type_ == 22) {
293  RCHECK(bit_reader->ReadBits(5, &dummy)); // numOfSubFrame
294  RCHECK(bit_reader->ReadBits(11, &dummy)); // layer_length
295  }
296 
297  if (audio_object_type_ == 17 || audio_object_type_ == 19 ||
298  audio_object_type_ == 20 || audio_object_type_ == 23) {
299  RCHECK(bit_reader->ReadBits(3, &dummy)); // resilience flags
300  }
301 
302  RCHECK(bit_reader->ReadBits(1, &dummy)); // extensionFlag3
303  }
304 
305  return true;
306 }
307 
308 // ISO 14496-3 Table 4.2 – Syntax of program_config_element()
309 // program_config_element()
310 // {
311 // element_instance_tag; 4 uimsbf
312 // object_type; 2 uimsbf
313 // sampling_frequency_index; 4 uimsbf
314 // num_front_channel_elements; 4 uimsbf
315 // num_side_channel_elements; 4 uimsbf
316 // num_back_channel_elements; 4 uimsbf
317 // num_lfe_channel_elements; 2 uimsbf
318 // num_assoc_data_elements; 3 uimsbf
319 // num_valid_cc_elements; 4 uimsbf
320 // mono_mixdown_present; 1 uimsbf
321 // if (mono_mixdown_present == 1)
322 // mono_mixdown_element_number; 4 uimsbf
323 // stereo_mixdown_present; 1 uimsbf
324 // if (stereo_mixdown_present == 1)
325 // stereo_mixdown_element_number; 4 uimsbf
326 // matrix_mixdown_idx_present; 1 uimsbf
327 // if (matrix_mixdown_idx_present == 1) {
328 // matrix_mixdown_idx ; 2 uimsbf
329 // pseudo_surround_enable; 1 uimsbf
330 // }
331 // for (i = 0; i < num_front_channel_elements; i++) {
332 // front_element_is_cpe[i]; 1 bslbf
333 // front_element_tag_select[i]; 4 uimsbf
334 // }
335 // for (i = 0; i < num_side_channel_elements; i++) {
336 // side_element_is_cpe[i]; 1 bslbf
337 // side_element_tag_select[i]; 4 uimsbf
338 // }
339 // for (i = 0; i < num_back_channel_elements; i++) {
340 // back_element_is_cpe[i]; 1 bslbf
341 // back_element_tag_select[i]; 4 uimsbf
342 // }
343 // for (i = 0; i < num_lfe_channel_elements; i++)
344 // lfe_element_tag_select[i]; 4 uimsbf
345 // for ( i = 0; i < num_assoc_data_elements; i++)
346 // assoc_data_element_tag_select[i]; 4 uimsbf
347 // for (i = 0; i < num_valid_cc_elements; i++) {
348 // cc_element_is_ind_sw[i]; 1 uimsbf
349 // valid_cc_element_tag_select[i]; 4 uimsbf
350 // }
351 // byte_alignment(); Note 1
352 // comment_field_bytes; 8 uimsbf
353 // for (i = 0; i < comment_field_bytes; i++)
354 // comment_field_data[i]; 8 uimsbf
355 // }
356 // Note 1: If called from within an AudioSpecificConfig(), this
357 // byte_alignment shall be relative to the start of the AudioSpecificConfig().
358 bool AACAudioSpecificConfig::ParseProgramConfigElement(BitReader* bit_reader) {
359  // element_instance_tag (4), object_type (2), sampling_frequency_index (4).
360  RCHECK(bit_reader->SkipBits(4 + 2 + 4));
361 
362  uint8_t num_front_channel_elements = 0;
363  uint8_t num_side_channel_elements = 0;
364  uint8_t num_back_channel_elements = 0;
365  uint8_t num_lfe_channel_elements = 0;
366  RCHECK(bit_reader->ReadBits(4, &num_front_channel_elements));
367  RCHECK(bit_reader->ReadBits(4, &num_side_channel_elements));
368  RCHECK(bit_reader->ReadBits(4, &num_back_channel_elements));
369  RCHECK(bit_reader->ReadBits(2, &num_lfe_channel_elements));
370 
371  uint8_t num_assoc_data_elements = 0;
372  RCHECK(bit_reader->ReadBits(3, &num_assoc_data_elements));
373  uint8_t num_valid_cc_elements = 0;
374  RCHECK(bit_reader->ReadBits(4, &num_valid_cc_elements));
375 
376  RCHECK(bit_reader->SkipBitsConditional(true, 4)); // mono_mixdown
377  RCHECK(bit_reader->SkipBitsConditional(true, 4)); // stereo_mixdown
378  RCHECK(bit_reader->SkipBitsConditional(true, 3)); // matrix_mixdown_idx
379 
380  num_channels_ = 0;
381  RCHECK(CountChannels(num_front_channel_elements, &num_channels_, bit_reader));
382  RCHECK(CountChannels(num_side_channel_elements, &num_channels_, bit_reader));
383  RCHECK(CountChannels(num_back_channel_elements, &num_channels_, bit_reader));
384  num_channels_ += num_lfe_channel_elements;
385 
386  RCHECK(bit_reader->SkipBits(4 * num_lfe_channel_elements));
387  RCHECK(bit_reader->SkipBits(4 * num_assoc_data_elements));
388  RCHECK(bit_reader->SkipBits(5 * num_valid_cc_elements));
389 
390  bit_reader->SkipToNextByte();
391 
392  uint8_t comment_field_bytes = 0;
393  RCHECK(bit_reader->ReadBits(8, &comment_field_bytes));
394  RCHECK(bit_reader->SkipBytes(comment_field_bytes));
395  return true;
396 }
397 
398 } // namespace media
399 } // namespace shaka
static const size_t kADTSHeaderSize
Size in bytes of the ADTS header added by ConvertEsdsToADTS().
virtual bool ConvertToADTS(const uint8_t *data, size_t data_size, std::vector< uint8_t > *audio_frame) const
virtual bool Parse(const std::vector< uint8_t > &data)
A class to read bit streams.
Definition: bit_reader.h:20
size_t bits_available() const
Definition: bit_reader.h:92
bool ReadBits(size_t num_bits, T *out)
Definition: bit_reader.h:38
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66