Shaka Packager SDK
Loading...
Searching...
No Matches
ac4_audio_util.cc
1// Copyright 2020 Google LLC. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/codecs/ac4_audio_util.h>
8
9#include <absl/strings/escaping.h>
10#include <absl/strings/str_format.h>
11
12#include <packager/media/base/bit_reader.h>
13#include <packager/media/base/rcheck.h>
14#include <packager/utils/bytes_to_string_view.h>
15
16namespace shaka {
17namespace media {
18
19namespace {
20
21// Speaker group index
22// Bit, Location
23// 0(LSB), Left/Right pair
24// 1, Centre
25// 2, Left surround/Right surround pair
26// 3, Left back/Right back pair
27// 4, Top front left/Top front right pair
28// 5, Top back left/Top back right pair
29// 6, LFE
30// 7, Top left/Top right pair
31// 8, Top side left/Top side right pair
32// 9, Top front centre
33// 10, Top back centre
34// 11, Top centre
35// 12, LFE2
36// 13, Bottom front left/Bottom front right pair
37// 14, Bottom front centre
38// 15, Back centre
39// 16, Left screen/Right screen pair
40// 17, Left wide/Right wide pair
41// 18, Vertical height left/Vertical height right pair
42enum kAC4AudioChannelGroupIndex {
43 kLRPair = 0x1,
44 kCentre = 0x2,
45 kLsRsPair = 0x4,
46 kLbRbPair = 0x8,
47 kTflTfrPair = 0x10,
48 kTblTbrPair = 0x20,
49 kLFE = 0x40,
50 kTlTrPair = 0x80,
51 kTslTsrPair = 0x100,
52 kTopfrontCentre = 0x200,
53 kTopbackCentre = 0x400,
54 kTopCentre = 0x800,
55 kLFE2 = 0x1000,
56 kBflBfrPair = 0x2000,
57 kBottomFrontCentre = 0x4000,
58 kBackCentre = 0x8000,
59 kLscrRscrPair = 0x10000,
60 kLwRw = 0x20000,
61 kVhlVhrPair = 0x40000,
62};
63
64// Mapping of channel configurations to the MPEG audio value based on ETSI TS
65// 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard;
66// Part 2: Immersive and personalized Table G.1
67uint32_t AC4ChannelMasktoMPEGValue(uint32_t channel_mask) {
68 uint32_t ret = 0;
69
70 switch (channel_mask) {
71 case kCentre:
72 ret = 1;
73 break;
74 case kLRPair:
75 ret = 2;
76 break;
77 case kCentre | kLRPair:
78 ret = 3;
79 break;
80 case kCentre | kLRPair | kBackCentre:
81 ret = 4;
82 break;
83 case kCentre | kLRPair | kLsRsPair:
84 ret = 5;
85 break;
86 case kCentre | kLRPair | kLsRsPair | kLFE:
87 ret = 6;
88 break;
89 case kCentre | kLRPair | kLsRsPair | kLFE | kLwRw:
90 ret = 7;
91 break;
92 case kBackCentre | kLRPair:
93 ret = 9;
94 break;
95 case kLRPair | kLsRsPair:
96 ret = 10;
97 break;
98 case kCentre | kLRPair | kLsRsPair | kLFE | kBackCentre:
99 ret = 11;
100 break;
101 case kCentre | kLRPair | kLsRsPair | kLbRbPair | kLFE:
102 ret = 12;
103 break;
104 case kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair | kLFE2 |
105 kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair | kLFE |
106 kTblTbrPair | kTflTfrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
107 case kVhlVhrPair | kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair|
108 kLFE2 | kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair |
109 kLFE | kTblTbrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
110 ret = 13;
111 break;
112 case kLFE | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
113 case kVhlVhrPair | kLFE | kCentre | kLRPair | kLsRsPair:
114 ret = 14;
115 break;
116 case kLFE2 | kTopbackCentre | kLFE | kTflTfrPair | kCentre | kLRPair |
117 kLsRsPair | kLbRbPair:
118 case kVhlVhrPair | kLFE2 | kTopbackCentre | kLFE | kCentre | kLRPair |
119 kLsRsPair | kLbRbPair:
120 ret = 15;
121 break;
122 case kLFE | kTblTbrPair | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
123 case kVhlVhrPair | kLFE | kTblTbrPair | kLsRsPair | kCentre | kLRPair:
124 ret = 16;
125 break;
126 case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
127 kLsRsPair | kCentre | kLRPair:
128 case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
129 kLsRsPair | kCentre | kLRPair:
130 ret = 17;
131 break;
132 case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
133 kCentre | kLRPair | kLsRsPair | kLbRbPair:
134 case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
135 kCentre | kLRPair | kLsRsPair | kLbRbPair:
136 ret = 18;
137 break;
138 case kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair | kLsRsPair |
139 kLbRbPair:
140 case kVhlVhrPair | kLFE | kTblTbrPair | kCentre | kLRPair | kLsRsPair |
141 kLbRbPair:
142 ret = 19;
143 break;
144 case kLscrRscrPair | kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair |
145 kLsRsPair | kLbRbPair:
146 case kVhlVhrPair | kLscrRscrPair | kLFE | kTblTbrPair | kCentre | kLRPair |
147 kLsRsPair | kLbRbPair:
148 ret = 20;
149 break;
150 default:
151 ret = 0xFFFFFFFF;
152 }
153 return ret;
154}
155
156// Parse AC-4 substream group based on ETSI TS 103 192-2 V1.2.1 Digital Audio
157// Compression (AC-4) Standard; Part 2: Immersive and personalized E.11.
158bool ParseAC4SubStreamGroupDsi(BitReader& bit_reader) {
159 bool b_substream_present;
160 RCHECK(bit_reader.ReadBits(1, &b_substream_present));
161 bool b_hsf_ext;
162 RCHECK(bit_reader.ReadBits(1, &b_hsf_ext));
163 bool b_channel_coded;
164 RCHECK(bit_reader.ReadBits(1, &b_channel_coded));
165 uint8_t n_substreams;
166 RCHECK(bit_reader.ReadBits(8, &n_substreams));
167 for (uint8_t i = 0; i < n_substreams; i++) {
168 RCHECK(bit_reader.SkipBits(2));
169 bool b_substream_bitrate_indicator;
170 RCHECK(bit_reader.ReadBits(1, &b_substream_bitrate_indicator));
171 if (b_substream_bitrate_indicator) {
172 RCHECK(bit_reader.SkipBits(5));
173 }
174 if (b_channel_coded) {
175 RCHECK(bit_reader.SkipBits(24));
176 } else {
177 bool b_ajoc;
178 RCHECK(bit_reader.ReadBits(1, &b_ajoc));
179 if (b_ajoc) {
180 bool b_static_dmx;
181 RCHECK(bit_reader.ReadBits(1, &b_static_dmx));
182 if (!b_static_dmx) {
183 RCHECK(bit_reader.SkipBits(4));
184 }
185 RCHECK(bit_reader.SkipBits(6));
186 }
187 RCHECK(bit_reader.SkipBits(4));
188 }
189 }
190 bool b_content_type;
191 RCHECK(bit_reader.ReadBits(1, &b_content_type));
192 if (b_content_type) {
193 RCHECK(bit_reader.SkipBits(3));
194 bool b_language_indicator;
195 RCHECK(bit_reader.ReadBits(1, &b_language_indicator));
196 if (b_language_indicator) {
197 uint8_t n_language_tag_bytes;
198 RCHECK(bit_reader.ReadBits(6, &n_language_tag_bytes));
199 RCHECK(bit_reader.SkipBits(n_language_tag_bytes * 8));
200 }
201 }
202 return true;
203}
204
205// Parse AC-4 Presentation V1 based on ETSI TS 103 192-2 V1.2.1 Digital Audio
206// Compression (AC-4) Standard;Part 2: Immersive and personalized E.10.
207bool ParseAC4PresentationV1Dsi(BitReader& bit_reader,
208 uint32_t pres_bytes,
209 uint8_t* mdcompat,
210 uint32_t* presentation_channel_mask_v1,
211 bool* dolby_cbi_indicator,
212 uint8_t* dolby_atmos_indicator) {
213 bool ret = true;
214 // Record the initial offset.
215 const size_t presentation_start = bit_reader.bit_position();
216 uint8_t presentation_config_v1;
217 RCHECK(bit_reader.ReadBits(5, &presentation_config_v1));
218 uint8_t b_add_emdf_substreams;
219 // set default value (stereo content) for output parameters.
220 *mdcompat = 0;
221 *presentation_channel_mask_v1 = 2;
222 *dolby_cbi_indicator = false;
223 *dolby_atmos_indicator = 0;
224 if (presentation_config_v1 == 0x06) {
225 b_add_emdf_substreams = 1;
226 } else {
227 RCHECK(bit_reader.ReadBits(3, mdcompat));
228 bool b_presentation_id;
229 RCHECK(bit_reader.ReadBits(1, &b_presentation_id));
230 if (b_presentation_id) {
231 RCHECK(bit_reader.SkipBits(5));
232 }
233 RCHECK(bit_reader.SkipBits(19));
234 bool b_presentation_channel_coded;
235 RCHECK(bit_reader.ReadBits(1, &b_presentation_channel_coded));
236 *presentation_channel_mask_v1 = 0;
237 if (b_presentation_channel_coded) {
238 uint8_t dsi_presentation_ch_mode;
239 RCHECK(bit_reader.ReadBits(5, &dsi_presentation_ch_mode));
240 if (dsi_presentation_ch_mode >= 11 && dsi_presentation_ch_mode <= 14) {
241 RCHECK(bit_reader.SkipBits(1));
242 uint8_t pres_top_channel_pairs;
243 RCHECK(bit_reader.ReadBits(2, &pres_top_channel_pairs));
244 if (pres_top_channel_pairs) {
245 *dolby_cbi_indicator = true;
246 }
247 } else if (dsi_presentation_ch_mode == 15) {
248 *dolby_cbi_indicator = true;
249 }
250 RCHECK(bit_reader.ReadBits(24, presentation_channel_mask_v1));
251 }
252 bool b_presentation_core_differs;
253 RCHECK(bit_reader.ReadBits(1, &b_presentation_core_differs));
254 if (b_presentation_core_differs) {
255 bool b_presentation_core_channel_coded;
256 RCHECK(bit_reader.ReadBits(1, &b_presentation_core_channel_coded));
257 if (b_presentation_core_channel_coded) {
258 RCHECK(bit_reader.SkipBits(2));
259 }
260 }
261 bool b_presentation_filter;
262 RCHECK(bit_reader.ReadBits(1, &b_presentation_filter));
263 if (b_presentation_filter) {
264 RCHECK(bit_reader.SkipBits(1));
265 uint8_t n_filter_bytes;
266 RCHECK(bit_reader.ReadBits(8, &n_filter_bytes));
267 RCHECK(bit_reader.SkipBits(n_filter_bytes * 8));
268 }
269 if (presentation_config_v1 == 0x1f) {
270 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
271 } else {
272 RCHECK(bit_reader.SkipBits(1));
273 if (presentation_config_v1 == 0 ||
274 presentation_config_v1 == 1 ||
275 presentation_config_v1 == 2) {
276 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
277 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
278 }
279 if (presentation_config_v1 == 3 || presentation_config_v1 == 4) {
280 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
281 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
282 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
283 }
284 if (presentation_config_v1 == 5) {
285 uint8_t n_substream_groups_minus2;
286 RCHECK(bit_reader.ReadBits(3, &n_substream_groups_minus2));
287 for (uint8_t sg = 0; sg < n_substream_groups_minus2 + 2; sg++) {
288 ret &= ParseAC4SubStreamGroupDsi(bit_reader);
289 }
290 }
291 if (presentation_config_v1 > 5) {
292 uint8_t n_skip_bytes;
293 RCHECK(bit_reader.ReadBits(7, &n_skip_bytes));
294 RCHECK(bit_reader.SkipBits(n_skip_bytes * 8));
295 }
296 }
297 RCHECK(bit_reader.SkipBits(1));
298 RCHECK(bit_reader.ReadBits(1, &b_add_emdf_substreams));
299 }
300 if (b_add_emdf_substreams) {
301 uint8_t n_add_emdf_substreams;
302 RCHECK(bit_reader.ReadBits(7, &n_add_emdf_substreams));
303 RCHECK(bit_reader.SkipBits(n_add_emdf_substreams * 15));
304 }
305 bool b_presentation_bitrate_info;
306 RCHECK(bit_reader.ReadBits(1, &b_presentation_bitrate_info));
307 if (b_presentation_bitrate_info) {
308 // Skip bit rate information based on ETSI TS 103 190-2 v1.2.1 E.7.1
309 RCHECK(bit_reader.SkipBits(66));
310 }
311 bool b_alternative;
312 RCHECK(bit_reader.ReadBits(1, &b_alternative));
313 if (b_alternative) {
314 bit_reader.SkipToNextByte();
315 // Parse alternative information based on ETSI TS 103 190-2 v1.2.1 E.12
316 uint16_t name_len;
317 RCHECK(bit_reader.ReadBits(16, &name_len));
318 RCHECK(bit_reader.SkipBits(name_len * 8));
319 uint8_t n_targets;
320 RCHECK(bit_reader.ReadBits(5, &n_targets));
321 RCHECK(bit_reader.SkipBits(n_targets * 11));
322 }
323 bit_reader.SkipToNextByte();
324 if ((bit_reader.bit_position() - presentation_start) <=
325 (pres_bytes - 1) * 8) {
326 RCHECK(bit_reader.SkipBits(1));
327 RCHECK(bit_reader.ReadBits(1, dolby_atmos_indicator));
328 RCHECK(bit_reader.SkipBits(4));
329 bool b_extended_presentation_group_index;
330 RCHECK(bit_reader.ReadBits(1, &b_extended_presentation_group_index));
331 if (b_extended_presentation_group_index) {
332 RCHECK(bit_reader.SkipBits(9));
333 } else {
334 RCHECK(bit_reader.SkipBits(1));
335 }
336 }
337 return ret;
338}
339
340bool ExtractAc4Data(const std::vector<uint8_t>& ac4_data,
341 uint8_t* bitstream_version,
342 uint8_t* presentation_version,
343 uint8_t* mdcompat,
344 uint32_t* presentation_channel_mask_v1,
345 bool* dolby_ims_indicator,
346 bool* dolby_cbi_indicator) {
347 BitReader bit_reader(ac4_data.data(), ac4_data.size());
348
349 uint16_t n_presentation;
350 RCHECK(bit_reader.SkipBits(3) && bit_reader.ReadBits(7, bitstream_version));
351 RCHECK(bit_reader.SkipBits(5) && bit_reader.ReadBits(9, &n_presentation));
352
353 if (*bitstream_version == 2) {
354 uint8_t b_program_id = 0;
355 RCHECK(bit_reader.ReadBits(1, &b_program_id));
356 if (b_program_id) {
357 RCHECK(bit_reader.SkipBits(16));
358 uint8_t b_uuid = 0;
359 RCHECK(bit_reader.ReadBits(1, &b_uuid));
360 if (b_uuid) {
361 RCHECK(bit_reader.SkipBits(16 * 8));
362 }
363 }
364 } else if (*bitstream_version == 0 || *bitstream_version == 1) {
365 LOG(WARNING) << "Bitstream version 0 or 1 is not supported";
366 return false;
367 } else {
368 LOG(WARNING) << "Invalid Bitstream version";
369 return false;
370 }
371
372 RCHECK(bit_reader.SkipBits(66));
373 bit_reader.SkipToNextByte();
374
375 // AC4 stream containing the single presentation is valid for OTT only.
376 // IMS has two presentations, and the 2nd is legacy (duplicated) presentation.
377 // So it can be considered as AC4 stream with single presentation. And IMS
378 // presentation must be prior to legacy presentation.
379 // In other word, only the 1st presentation in AC4 stream need to be parsed.
380 const uint8_t ott_n_presentation = 1;
381 for (uint8_t i = 0; i < ott_n_presentation; i++) {
382 RCHECK(bit_reader.ReadBits(8, presentation_version));
383 // *presentation_version == 2 means IMS presentation.
384 if ((*presentation_version == 2 && n_presentation > 2) ||
385 (*presentation_version == 1 && n_presentation > 1) ) {
386 LOG(WARNING) << "Seeing multiple presentations, only single presentation "
387 << "(including IMS presentation) is supported";
388 return false;
389 }
390 uint32_t pres_bytes;
391 RCHECK(bit_reader.ReadBits(8, &pres_bytes));
392 if (pres_bytes == 255) {
393 uint32_t add_pres_bytes;
394 RCHECK(bit_reader.ReadBits(16, &add_pres_bytes));
395 pres_bytes += add_pres_bytes;
396 }
397
398 size_t presentation_bits = 0;
399 *dolby_ims_indicator = false;
400 if (*presentation_version == 0) {
401 LOG(WARNING) << "Presentation version 0 is not supported";
402 return false;
403 } else {
404 if (*presentation_version == 1 || *presentation_version == 2) {
405 if (*presentation_version == 2) {
406 *dolby_ims_indicator = true;
407 }
408 const size_t presentation_start = bit_reader.bit_position();
409 // dolby_atmos_indicator is extended in Dolby internal specs.
410 // It indicates whether the source content before encoding is Atmos.
411 // No final decision about how to use it in OTT.
412 // Parse it for the future usage.
413 uint8_t dolby_atmos_indicator;
414 if (!ParseAC4PresentationV1Dsi(bit_reader, pres_bytes, mdcompat,
415 presentation_channel_mask_v1,
416 dolby_cbi_indicator,
417 &dolby_atmos_indicator)) {
418 return false;
419 }
420 const size_t presentation_end = bit_reader.bit_position();
421 presentation_bits = presentation_end - presentation_start;
422 } else {
423 LOG(WARNING) << "Invalid Presentation version";
424 return false;
425 }
426 }
427 size_t skip_bits = pres_bytes * 8 - presentation_bits;
428 RCHECK(bit_reader.SkipBits(skip_bits));
429 }
430 return true;
431}
432} // namespace
433
434bool CalculateAC4ChannelMask(const std::vector<uint8_t>& ac4_data,
435 uint32_t* ac4_channel_mask) {
436 uint8_t bitstream_version;
437 uint8_t presentation_version;
438 uint8_t mdcompat;
439 uint32_t pre_channel_mask;
440 bool dolby_ims_indicator;
441 bool dolby_cbi_indicator;
442
443 if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
444 &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
445 &dolby_cbi_indicator)) {
446 LOG(WARNING) << "Seeing invalid AC4 data: "
447 << absl::BytesToHexString(
449 return false;
450 }
451
452 if (pre_channel_mask) {
453 *ac4_channel_mask = pre_channel_mask;
454 } else {
455 *ac4_channel_mask = 0x800000;
456 }
457 return true;
458}
459
460bool CalculateAC4ChannelMPEGValue(const std::vector<uint8_t>& ac4_data,
461 uint32_t* ac4_channel_mpeg_value) {
462 uint8_t bitstream_version;
463 uint8_t presentation_version;
464 uint8_t mdcompat;
465 uint32_t pre_channel_mask;
466 bool dolby_ims_indicator;
467 bool dolby_cbi_indicator;
468
469 if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
470 &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
471 &dolby_cbi_indicator)) {
472 LOG(WARNING) << "Seeing invalid AC4 data: "
473 << absl::BytesToHexString(
475 return false;
476 }
477
478 *ac4_channel_mpeg_value = AC4ChannelMasktoMPEGValue(pre_channel_mask);
479 return true;
480}
481
482bool GetAc4CodecInfo(const std::vector<uint8_t>& ac4_data,
483 uint8_t* ac4_codec_info) {
484 uint8_t bitstream_version;
485 uint8_t presentation_version;
486 uint8_t mdcompat;
487 uint32_t pre_channel_mask;
488 bool dolby_ims_indicator;
489 bool dolby_cbi_indicator;
490
491 if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
492 &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
493 &dolby_cbi_indicator)) {
494 LOG(WARNING) << "Seeing invalid AC4 data: "
495 << absl::BytesToHexString(
497 return false;
498 }
499
500 // The valid value of bitstream_version (8 bits) is 2, the valid value of
501 // presentation_version (8 bits) is 1 or 2, and mdcompat is 3 bits.
502 // So uint8_t is fine now. If Dolby extends the value of bitstream_version and
503 // presentation_version in future, maybe need change the type from uint8_t to
504 // uint16_t or uint32_t to accommodate the valid values.
505 // If that, AudioStreamInfo::GetCodecString need to be changed accordingly.
506 // bitstream_version (3bits) + presentation_version (2bits) + mdcompat (3bits)
507 *ac4_codec_info = ((bitstream_version << 5) |
508 ((presentation_version << 3) & 0x1F) |
509 (mdcompat & 0x7));
510 return true;
511}
512
513bool GetAc4ImmersiveInfo(const std::vector<uint8_t>& ac4_data,
514 bool* ac4_ims_flag,
515 bool* ac4_cbi_flag) {
516 uint8_t bitstream_version;
517 uint8_t presentation_version;
518 uint8_t mdcompat;
519 uint32_t pre_channel_mask;
520
521 if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
522 &mdcompat, &pre_channel_mask, ac4_ims_flag,
523 ac4_cbi_flag)) {
524 LOG(WARNING) << "Seeing invalid AC4 data: "
525 << absl::BytesToHexString(
527 return false;
528 }
529
530 return true;
531}
532
533} // namespace media
534} // namespace shaka
All the methods that are virtual are virtual for mocking.
std::string_view byte_vector_to_string_view(const std::vector< uint8_t > &bytes)
Convert byte vector to string_view.