Shaka Packager SDK
Loading...
Searching...
No Matches
ac4_audio_util.cc
1// Copyright 2020 Google LLC. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/codecs/ac4_audio_util.h>
8
9#include <absl/strings/escaping.h>
10#include <absl/strings/str_format.h>
11
12#include <packager/media/base/bit_reader.h>
13#include <packager/media/base/rcheck.h>
14#include <packager/utils/bytes_to_string_view.h>
15
16namespace shaka {
17namespace media {
18
19namespace {
20
21// Speaker group index
22// Bit, Location
23// 0(LSB), Left/Right pair
24// 1, Centre
25// 2, Left surround/Right surround pair
26// 3, Left back/Right back pair
27// 4, Top front left/Top front right pair
28// 5, Top back left/Top back right pair
29// 6, LFE
30// 7, Top left/Top right pair
31// 8, Top side left/Top side right pair
32// 9, Top front centre
33// 10, Top back centre
34// 11, Top centre
35// 12, LFE2
36// 13, Bottom front left/Bottom front right pair
37// 14, Bottom front centre
38// 15, Back centre
39// 16, Left screen/Right screen pair
40// 17, Left wide/Right wide pair
41// 18, Vertical height left/Vertical height right pair
42enum kAC4AudioChannelGroupIndex {
43 kLRPair = 0x1,
44 kCentre = 0x2,
45 kLsRsPair = 0x4,
46 kLbRbPair = 0x8,
47 kTflTfrPair = 0x10,
48 kTblTbrPair = 0x20,
49 kLFE = 0x40,
50 kTlTrPair = 0x80,
51 kTslTsrPair = 0x100,
52 kTopfrontCentre = 0x200,
53 kTopbackCentre = 0x400,
54 kTopCentre = 0x800,
55 kLFE2 = 0x1000,
56 kBflBfrPair = 0x2000,
57 kBottomFrontCentre = 0x4000,
58 kBackCentre = 0x8000,
59 kLscrRscrPair = 0x10000,
60 kLwRw = 0x20000,
61 kVhlVhrPair = 0x40000,
62};
63
64// Mapping of channel configurations to the MPEG audio value based on ETSI TS
65// 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard;
66// Part 2: Immersive and personalized Table G.1
67uint32_t AC4ChannelMasktoMPEGValue(uint32_t channel_mask) {
68 uint32_t ret = 0;
69
70 switch (channel_mask) {
71 case kCentre:
72 ret = 1;
73 break;
74 case kLRPair:
75 ret = 2;
76 break;
77 case kCentre | kLRPair:
78 ret = 3;
79 break;
80 case kCentre | kLRPair | kBackCentre:
81 ret = 4;
82 break;
83 case kCentre | kLRPair | kLsRsPair:
84 ret = 5;
85 break;
86 case kCentre | kLRPair | kLsRsPair | kLFE:
87 ret = 6;
88 break;
89 case kCentre | kLRPair | kLsRsPair | kLFE | kLwRw:
90 ret = 7;
91 break;
92 case kBackCentre | kLRPair:
93 ret = 9;
94 break;
95 case kLRPair | kLsRsPair:
96 ret = 10;
97 break;
98 case kCentre | kLRPair | kLsRsPair | kLFE | kBackCentre:
99 ret = 11;
100 break;
101 case kCentre | kLRPair | kLsRsPair | kLbRbPair | kLFE:
102 ret = 12;
103 break;
104 case kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair | kLFE2 |
105 kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair | kLFE |
106 kTblTbrPair | kTflTfrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
107 case kVhlVhrPair | kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair |
108 kLFE2 | kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair |
109 kLFE | kTblTbrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
110 ret = 13;
111 break;
112 case kLFE | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
113 case kVhlVhrPair | kLFE | kCentre | kLRPair | kLsRsPair:
114 ret = 14;
115 break;
116 case kLFE2 | kTopbackCentre | kLFE | kTflTfrPair | kCentre | kLRPair |
117 kLsRsPair | kLbRbPair:
118 case kVhlVhrPair | kLFE2 | kTopbackCentre | kLFE | kCentre | kLRPair |
119 kLsRsPair | kLbRbPair:
120 ret = 15;
121 break;
122 case kLFE | kTblTbrPair | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
123 case kVhlVhrPair | kLFE | kTblTbrPair | kLsRsPair | kCentre | kLRPair:
124 ret = 16;
125 break;
126 case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
127 kLsRsPair | kCentre | kLRPair:
128 case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
129 kLsRsPair | kCentre | kLRPair:
130 ret = 17;
131 break;
132 case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
133 kCentre | kLRPair | kLsRsPair | kLbRbPair:
134 case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
135 kCentre | kLRPair | kLsRsPair | kLbRbPair:
136 ret = 18;
137 break;
138 case kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair | kLsRsPair |
139 kLbRbPair:
140 case kVhlVhrPair | kLFE | kTblTbrPair | kCentre | kLRPair | kLsRsPair |
141 kLbRbPair:
142 ret = 19;
143 break;
144 case kLscrRscrPair | kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair |
145 kLsRsPair | kLbRbPair:
146 case kVhlVhrPair | kLscrRscrPair | kLFE | kTblTbrPair | kCentre | kLRPair |
147 kLsRsPair | kLbRbPair:
148 ret = 20;
149 break;
150 default:
151 ret = 0xFFFFFFFF;
152 }
153 return ret;
154}
155
156// Parse AC-4 substream group based on ETSI TS 103 192-2 V1.2.1 Digital Audio
157// Compression (AC-4) Standard; Part 2: Immersive and personalized E.11.
158bool ParseAC4SubStreamGroupDsi(BitReader& bit_reader) {
159 bool b_substream_present;
160 RCHECK(bit_reader.ReadBits(1, &b_substream_present));
161 bool b_hsf_ext;
162 RCHECK(bit_reader.ReadBits(1, &b_hsf_ext));
163 bool b_channel_coded;
164 RCHECK(bit_reader.ReadBits(1, &b_channel_coded));
165 uint8_t n_substreams;
166 RCHECK(bit_reader.ReadBits(8, &n_substreams));
167 for (uint8_t i = 0; i < n_substreams; i++) {
168 RCHECK(bit_reader.SkipBits(2));
169 bool b_substream_bitrate_indicator;
170 RCHECK(bit_reader.ReadBits(1, &b_substream_bitrate_indicator));
171 if (b_substream_bitrate_indicator) {
172 RCHECK(bit_reader.SkipBits(5));
173 }
174 if (b_channel_coded) {
175 RCHECK(bit_reader.SkipBits(24));
176 } else {
177 bool b_ajoc;
178 RCHECK(bit_reader.ReadBits(1, &b_ajoc));
179 if (b_ajoc) {
180 bool b_static_dmx;
181 RCHECK(bit_reader.ReadBits(1, &b_static_dmx));
182 if (!b_static_dmx) {
183 RCHECK(bit_reader.SkipBits(4));
184 }
185 RCHECK(bit_reader.SkipBits(6));
186 }
187 RCHECK(bit_reader.SkipBits(4));
188 }
189 }
190 bool b_content_type;
191 RCHECK(bit_reader.ReadBits(1, &b_content_type));
192 if (b_content_type) {
193 RCHECK(bit_reader.SkipBits(3));
194 bool b_language_indicator;
195 RCHECK(bit_reader.ReadBits(1, &b_language_indicator));
196 if (b_language_indicator) {
197 uint8_t n_language_tag_bytes;
198 RCHECK(bit_reader.ReadBits(6, &n_language_tag_bytes));
199 RCHECK(bit_reader.SkipBits(n_language_tag_bytes * 8));
200 }
201 }
202 return true;
203}
204
205// Parse AC-4 Presentation V1 based on ETSI TS 103 192-2 V1.2.1 Digital Audio
206// Compression (AC-4) Standard;Part 2: Immersive and personalized E.10.
207bool ParseAC4PresentationV1Dsi(BitReader& bit_reader,
208 uint32_t pres_bytes,
209 uint8_t* mdcompat,
210 uint32_t* presentation_channel_mask_v1,
211 bool* dolby_cbi_indicator,
212 uint8_t* dolby_atmos_indicator) {
213 bool ret = true;
214 // Record the initial offset.
215 const size_t presentation_start = bit_reader.bit_position();
216 uint8_t presentation_config_v1;
217 RCHECK(bit_reader.ReadBits(5, &presentation_config_v1));
218 uint8_t b_add_emdf_substreams;
219 // set default value (stereo content) for output parameters.
220 *mdcompat = 0;
221 *presentation_channel_mask_v1 = 2;
222 *dolby_cbi_indicator = false;
223 *dolby_atmos_indicator = 0;
224 if (presentation_config_v1 == 0x06) {
225 b_add_emdf_substreams = 1;
226 } else {
227 RCHECK(bit_reader.ReadBits(3, mdcompat));
228 bool b_presentation_id;
229 RCHECK(bit_reader.ReadBits(1, &b_presentation_id));
230 if (b_presentation_id) {
231 RCHECK(bit_reader.SkipBits(5));
232 }
233 RCHECK(bit_reader.SkipBits(19));
234 bool b_presentation_channel_coded;
235 RCHECK(bit_reader.ReadBits(1, &b_presentation_channel_coded));
236 *presentation_channel_mask_v1 = 0;
237 if (b_presentation_channel_coded) {
238 uint8_t dsi_presentation_ch_mode;
239 RCHECK(bit_reader.ReadBits(5, &dsi_presentation_ch_mode));
240 if (dsi_presentation_ch_mode >= 11 && dsi_presentation_ch_mode <= 14) {
241 RCHECK(bit_reader.SkipBits(1));
242 uint8_t pres_top_channel_pairs;
243 RCHECK(bit_reader.ReadBits(2, &pres_top_channel_pairs));
244 if (pres_top_channel_pairs) {
245 *dolby_cbi_indicator = true;
246 }
247 } else if (dsi_presentation_ch_mode == 15) {
248 *dolby_cbi_indicator = true;
249 }
250 RCHECK(bit_reader.ReadBits(24, presentation_channel_mask_v1));
251 }
252 bool b_presentation_core_differs;
253 RCHECK(bit_reader.ReadBits(1, &b_presentation_core_differs));
254 if (b_presentation_core_differs) {
255 bool b_presentation_core_channel_coded;
256 RCHECK(bit_reader.ReadBits(1, &b_presentation_core_channel_coded));
257 if (b_presentation_core_channel_coded) {
258 RCHECK(bit_reader.SkipBits(2));
259 }
260 }
261 bool b_presentation_filter;
262 RCHECK(bit_reader.ReadBits(1, &b_presentation_filter));
263 if (b_presentation_filter) {
264 RCHECK(bit_reader.SkipBits(1));
265 uint8_t n_filter_bytes;
266 RCHECK(bit_reader.ReadBits(8, &n_filter_bytes));
267 RCHECK(bit_reader.SkipBits(n_filter_bytes * 8));
268 }
269 if (presentation_config_v1 == 0x1f) {
270 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
271 } else {
272 RCHECK(bit_reader.SkipBits(1));
273 if (presentation_config_v1 == 0 || presentation_config_v1 == 1 ||
274 presentation_config_v1 == 2) {
275 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
276 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
277 }
278 if (presentation_config_v1 == 3 || presentation_config_v1 == 4) {
279 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
280 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
281 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
282 }
283 if (presentation_config_v1 == 5) {
284 uint8_t n_substream_groups_minus2;
285 RCHECK(bit_reader.ReadBits(3, &n_substream_groups_minus2));
286 for (uint8_t sg = 0; sg < n_substream_groups_minus2 + 2; sg++) {
287 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
288 }
289 }
290 if (presentation_config_v1 > 5) {
291 uint8_t n_skip_bytes;
292 RCHECK(bit_reader.ReadBits(7, &n_skip_bytes));
293 RCHECK(bit_reader.SkipBits(n_skip_bytes * 8));
294 }
295 }
296 RCHECK(bit_reader.SkipBits(1));
297 RCHECK(bit_reader.ReadBits(1, &b_add_emdf_substreams));
298 }
299 if (b_add_emdf_substreams) {
300 uint8_t n_add_emdf_substreams;
301 RCHECK(bit_reader.ReadBits(7, &n_add_emdf_substreams));
302 RCHECK(bit_reader.SkipBits(n_add_emdf_substreams * 15));
303 }
304 bool b_presentation_bitrate_info;
305 RCHECK(bit_reader.ReadBits(1, &b_presentation_bitrate_info));
306 if (b_presentation_bitrate_info) {
307 // Skip bit rate information based on ETSI TS 103 190-2 v1.2.1 E.7.1
308 RCHECK(bit_reader.SkipBits(66));
309 }
310 bool b_alternative;
311 RCHECK(bit_reader.ReadBits(1, &b_alternative));
312 if (b_alternative) {
313 bit_reader.SkipToNextByte();
314 // Parse alternative information based on ETSI TS 103 190-2 v1.2.1 E.12
315 uint16_t name_len;
316 RCHECK(bit_reader.ReadBits(16, &name_len));
317 RCHECK(bit_reader.SkipBits(name_len * 8));
318 uint8_t n_targets;
319 RCHECK(bit_reader.ReadBits(5, &n_targets));
320 RCHECK(bit_reader.SkipBits(n_targets * 11));
321 }
322 bit_reader.SkipToNextByte();
323 if ((bit_reader.bit_position() - presentation_start) <=
324 (pres_bytes - 1) * 8) {
325 RCHECK(bit_reader.SkipBits(1));
326 RCHECK(bit_reader.ReadBits(1, dolby_atmos_indicator));
327 RCHECK(bit_reader.SkipBits(4));
328 bool b_extended_presentation_group_index;
329 RCHECK(bit_reader.ReadBits(1, &b_extended_presentation_group_index));
330 if (b_extended_presentation_group_index) {
331 RCHECK(bit_reader.SkipBits(9));
332 } else {
333 RCHECK(bit_reader.SkipBits(1));
334 }
335 }
336 return ret;
337}
338
339bool ExtractAc4Data(const std::vector<uint8_t>& ac4_data,
340 uint8_t* bitstream_version,
341 uint8_t* presentation_version,
342 uint8_t* mdcompat,
343 uint32_t* presentation_channel_mask_v1,
344 bool* dolby_ims_indicator,
345 bool* dolby_cbi_indicator) {
346 BitReader bit_reader(ac4_data.data(), ac4_data.size());
347
348 uint16_t n_presentation;
349 RCHECK(bit_reader.SkipBits(3) && bit_reader.ReadBits(7, bitstream_version));
350 RCHECK(bit_reader.SkipBits(5) && bit_reader.ReadBits(9, &n_presentation));
351
352 if (*bitstream_version == 2) {
353 uint8_t b_program_id = 0;
354 RCHECK(bit_reader.ReadBits(1, &b_program_id));
355 if (b_program_id) {
356 RCHECK(bit_reader.SkipBits(16));
357 uint8_t b_uuid = 0;
358 RCHECK(bit_reader.ReadBits(1, &b_uuid));
359 if (b_uuid) {
360 RCHECK(bit_reader.SkipBits(16 * 8));
361 }
362 }
363 } else if (*bitstream_version == 0 || *bitstream_version == 1) {
364 LOG(WARNING) << "Bitstream version 0 or 1 is not supported";
365 return false;
366 } else {
367 LOG(WARNING) << "Invalid Bitstream version";
368 return false;
369 }
370
371 RCHECK(bit_reader.SkipBits(66));
372 bit_reader.SkipToNextByte();
373
374 // AC4 stream containing the single presentation is valid for OTT only.
375 // IMS has two presentations, and the 2nd is legacy (duplicated) presentation.
376 // So it can be considered as AC4 stream with single presentation. And IMS
377 // presentation must be prior to legacy presentation.
378 // In other word, only the 1st presentation in AC4 stream need to be parsed.
379 const uint8_t ott_n_presentation = 1;
380 for (uint8_t i = 0; i < ott_n_presentation; i++) {
381 RCHECK(bit_reader.ReadBits(8, presentation_version));
382 // *presentation_version == 2 means IMS presentation.
383 if ((*presentation_version == 2 && n_presentation > 2) ||
384 (*presentation_version == 1 && n_presentation > 1)) {
385 LOG(WARNING) << "Seeing multiple presentations, only single presentation "
386 << "(including IMS presentation) is supported";
387 return false;
388 }
389 uint32_t pres_bytes;
390 RCHECK(bit_reader.ReadBits(8, &pres_bytes));
391 if (pres_bytes == 255) {
392 uint32_t add_pres_bytes;
393 RCHECK(bit_reader.ReadBits(16, &add_pres_bytes));
394 pres_bytes += add_pres_bytes;
395 }
396
397 size_t presentation_bits = 0;
398 *dolby_ims_indicator = false;
399 if (*presentation_version == 0) {
400 LOG(WARNING) << "Presentation version 0 is not supported";
401 return false;
402 } else {
403 if (*presentation_version == 1 || *presentation_version == 2) {
404 if (*presentation_version == 2) {
405 *dolby_ims_indicator = true;
406 }
407 const size_t presentation_start = bit_reader.bit_position();
408 // dolby_atmos_indicator is extended in Dolby internal specs.
409 // It indicates whether the source content before encoding is Atmos.
410 // No final decision about how to use it in OTT.
411 // Parse it for the future usage.
412 uint8_t dolby_atmos_indicator;
413 if (!ParseAC4PresentationV1Dsi(
414 bit_reader, pres_bytes, mdcompat, presentation_channel_mask_v1,
415 dolby_cbi_indicator, &dolby_atmos_indicator)) {
416 return false;
417 }
418 const size_t presentation_end = bit_reader.bit_position();
419 presentation_bits = presentation_end - presentation_start;
420 } else {
421 LOG(WARNING) << "Invalid Presentation version";
422 return false;
423 }
424 }
425 size_t skip_bits = pres_bytes * 8 - presentation_bits;
426 RCHECK(bit_reader.SkipBits(skip_bits));
427 }
428 return true;
429}
430} // namespace
431
432bool CalculateAC4ChannelMask(const std::vector<uint8_t>& ac4_data,
433 uint32_t* ac4_channel_mask) {
434 uint8_t bitstream_version;
435 uint8_t presentation_version;
436 uint8_t mdcompat;
437 uint32_t pre_channel_mask;
438 bool dolby_ims_indicator;
439 bool dolby_cbi_indicator;
440
441 if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
442 &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
443 &dolby_cbi_indicator)) {
444 LOG(WARNING) << "Seeing invalid AC4 data: "
445 << absl::BytesToHexString(
447 return false;
448 }
449
450 if (pre_channel_mask) {
451 *ac4_channel_mask = pre_channel_mask;
452 } else {
453 *ac4_channel_mask = 0x800000;
454 }
455 return true;
456}
457
458bool CalculateAC4ChannelMPEGValue(const std::vector<uint8_t>& ac4_data,
459 uint32_t* ac4_channel_mpeg_value) {
460 uint8_t bitstream_version;
461 uint8_t presentation_version;
462 uint8_t mdcompat;
463 uint32_t pre_channel_mask;
464 bool dolby_ims_indicator;
465 bool dolby_cbi_indicator;
466
467 if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
468 &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
469 &dolby_cbi_indicator)) {
470 LOG(WARNING) << "Seeing invalid AC4 data: "
471 << absl::BytesToHexString(
473 return false;
474 }
475
476 *ac4_channel_mpeg_value = AC4ChannelMasktoMPEGValue(pre_channel_mask);
477 return true;
478}
479
480bool GetAc4CodecInfo(const std::vector<uint8_t>& ac4_data,
481 uint8_t* ac4_codec_info) {
482 uint8_t bitstream_version;
483 uint8_t presentation_version;
484 uint8_t mdcompat;
485 uint32_t pre_channel_mask;
486 bool dolby_ims_indicator;
487 bool dolby_cbi_indicator;
488
489 if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
490 &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
491 &dolby_cbi_indicator)) {
492 LOG(WARNING) << "Seeing invalid AC4 data: "
493 << absl::BytesToHexString(
495 return false;
496 }
497
498 // The valid value of bitstream_version (8 bits) is 2, the valid value of
499 // presentation_version (8 bits) is 1 or 2, and mdcompat is 3 bits.
500 // So uint8_t is fine now. If Dolby extends the value of bitstream_version and
501 // presentation_version in future, maybe need change the type from uint8_t to
502 // uint16_t or uint32_t to accommodate the valid values.
503 // If that, AudioStreamInfo::GetCodecString need to be changed accordingly.
504 // bitstream_version (3bits) + presentation_version (2bits) + mdcompat (3bits)
505 *ac4_codec_info = ((bitstream_version << 5) |
506 ((presentation_version << 3) & 0x1F) | (mdcompat & 0x7));
507 return true;
508}
509
510bool GetAc4ImmersiveInfo(const std::vector<uint8_t>& ac4_data,
511 bool* ac4_ims_flag,
512 bool* ac4_cbi_flag) {
513 uint8_t bitstream_version;
514 uint8_t presentation_version;
515 uint8_t mdcompat;
516 uint32_t pre_channel_mask;
517
518 if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
519 &mdcompat, &pre_channel_mask, ac4_ims_flag,
520 ac4_cbi_flag)) {
521 LOG(WARNING) << "Seeing invalid AC4 data: "
522 << absl::BytesToHexString(
524 return false;
525 }
526
527 return true;
528}
529
530} // namespace media
531} // namespace shaka
All the methods that are virtual are virtual for mocking.
std::string_view byte_vector_to_string_view(const std::vector< uint8_t > &bytes)
Convert byte vector to string_view.