Shaka Packager SDK
ac4_audio_util.cc
1 // Copyright 2020 Google LLC. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include <packager/media/codecs/ac4_audio_util.h>
8 
9 #include <absl/strings/escaping.h>
10 #include <absl/strings/str_format.h>
11 
12 #include <packager/media/base/bit_reader.h>
13 #include <packager/media/base/rcheck.h>
14 #include <packager/utils/bytes_to_string_view.h>
15 
16 namespace shaka {
17 namespace media {
18 
19 namespace {
20 
21 // Speaker group index
22 // Bit, Location
23 // 0(LSB), Left/Right pair
24 // 1, Centre
25 // 2, Left surround/Right surround pair
26 // 3, Left back/Right back pair
27 // 4, Top front left/Top front right pair
28 // 5, Top back left/Top back right pair
29 // 6, LFE
30 // 7, Top left/Top right pair
31 // 8, Top side left/Top side right pair
32 // 9, Top front centre
33 // 10, Top back centre
34 // 11, Top centre
35 // 12, LFE2
36 // 13, Bottom front left/Bottom front right pair
37 // 14, Bottom front centre
38 // 15, Back centre
39 // 16, Left screen/Right screen pair
40 // 17, Left wide/Right wide pair
41 // 18, Vertical height left/Vertical height right pair
42 enum kAC4AudioChannelGroupIndex {
43  kLRPair = 0x1,
44  kCentre = 0x2,
45  kLsRsPair = 0x4,
46  kLbRbPair = 0x8,
47  kTflTfrPair = 0x10,
48  kTblTbrPair = 0x20,
49  kLFE = 0x40,
50  kTlTrPair = 0x80,
51  kTslTsrPair = 0x100,
52  kTopfrontCentre = 0x200,
53  kTopbackCentre = 0x400,
54  kTopCentre = 0x800,
55  kLFE2 = 0x1000,
56  kBflBfrPair = 0x2000,
57  kBottomFrontCentre = 0x4000,
58  kBackCentre = 0x8000,
59  kLscrRscrPair = 0x10000,
60  kLwRw = 0x20000,
61  kVhlVhrPair = 0x40000,
62 };
63 
64 // Mapping of channel configurations to the MPEG audio value based on ETSI TS
65 // 103 192-2 V1.2.1 Digital Audio Compression (AC-4) Standard;
66 // Part 2: Immersive and personalized Table G.1
67 uint32_t AC4ChannelMasktoMPEGValue(uint32_t channel_mask) {
68  uint32_t ret = 0;
69 
70  switch (channel_mask) {
71  case kCentre:
72  ret = 1;
73  break;
74  case kLRPair:
75  ret = 2;
76  break;
77  case kCentre | kLRPair:
78  ret = 3;
79  break;
80  case kCentre | kLRPair | kBackCentre:
81  ret = 4;
82  break;
83  case kCentre | kLRPair | kLsRsPair:
84  ret = 5;
85  break;
86  case kCentre | kLRPair | kLsRsPair | kLFE:
87  ret = 6;
88  break;
89  case kCentre | kLRPair | kLsRsPair | kLFE | kLwRw:
90  ret = 7;
91  break;
92  case kBackCentre | kLRPair:
93  ret = 9;
94  break;
95  case kLRPair | kLsRsPair:
96  ret = 10;
97  break;
98  case kCentre | kLRPair | kLsRsPair | kLFE | kBackCentre:
99  ret = 11;
100  break;
101  case kCentre | kLRPair | kLsRsPair | kLbRbPair | kLFE:
102  ret = 12;
103  break;
104  case kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair | kLFE2 |
105  kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair | kLFE |
106  kTblTbrPair | kTflTfrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
107  case kVhlVhrPair | kLwRw | kBackCentre | kBottomFrontCentre | kBflBfrPair|
108  kLFE2 | kTopCentre | kTopbackCentre | kTopfrontCentre | kTslTsrPair |
109  kLFE | kTblTbrPair | kLbRbPair | kLsRsPair | kCentre | kLRPair:
110  ret = 13;
111  break;
112  case kLFE | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
113  case kVhlVhrPair | kLFE | kCentre | kLRPair | kLsRsPair:
114  ret = 14;
115  break;
116  case kLFE2 | kTopbackCentre | kLFE | kTflTfrPair | kCentre | kLRPair |
117  kLsRsPair | kLbRbPair:
118  case kVhlVhrPair | kLFE2 | kTopbackCentre | kLFE | kCentre | kLRPair |
119  kLsRsPair | kLbRbPair:
120  ret = 15;
121  break;
122  case kLFE | kTblTbrPair | kTflTfrPair | kLsRsPair | kCentre | kLRPair:
123  case kVhlVhrPair | kLFE | kTblTbrPair | kLsRsPair | kCentre | kLRPair:
124  ret = 16;
125  break;
126  case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
127  kLsRsPair | kCentre | kLRPair:
128  case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
129  kLsRsPair | kCentre | kLRPair:
130  ret = 17;
131  break;
132  case kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair | kTflTfrPair |
133  kCentre | kLRPair | kLsRsPair | kLbRbPair:
134  case kVhlVhrPair | kTopCentre | kTopfrontCentre | kLFE | kTblTbrPair |
135  kCentre | kLRPair | kLsRsPair | kLbRbPair:
136  ret = 18;
137  break;
138  case kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair | kLsRsPair |
139  kLbRbPair:
140  case kVhlVhrPair | kLFE | kTblTbrPair | kCentre | kLRPair | kLsRsPair |
141  kLbRbPair:
142  ret = 19;
143  break;
144  case kLscrRscrPair | kLFE | kTblTbrPair | kTflTfrPair | kCentre | kLRPair |
145  kLsRsPair | kLbRbPair:
146  case kVhlVhrPair | kLscrRscrPair | kLFE | kTblTbrPair | kCentre | kLRPair |
147  kLsRsPair | kLbRbPair:
148  ret = 20;
149  break;
150  default:
151  ret = 0xFFFFFFFF;
152  }
153  return ret;
154 }
155 
156 // Parse AC-4 substream group based on ETSI TS 103 192-2 V1.2.1 Digital Audio
157 // Compression (AC-4) Standard; Part 2: Immersive and personalized E.11.
158 bool ParseAC4SubStreamGroupDsi(BitReader& bit_reader) {
159  bool b_substream_present;
160  RCHECK(bit_reader.ReadBits(1, &b_substream_present));
161  bool b_hsf_ext;
162  RCHECK(bit_reader.ReadBits(1, &b_hsf_ext));
163  bool b_channel_coded;
164  RCHECK(bit_reader.ReadBits(1, &b_channel_coded));
165  uint8_t n_substreams;
166  RCHECK(bit_reader.ReadBits(8, &n_substreams));
167  for (uint8_t i = 0; i < n_substreams; i++) {
168  RCHECK(bit_reader.SkipBits(2));
169  bool b_substream_bitrate_indicator;
170  RCHECK(bit_reader.ReadBits(1, &b_substream_bitrate_indicator));
171  if (b_substream_bitrate_indicator) {
172  RCHECK(bit_reader.SkipBits(5));
173  }
174  if (b_channel_coded) {
175  RCHECK(bit_reader.SkipBits(24));
176  } else {
177  bool b_ajoc;
178  RCHECK(bit_reader.ReadBits(1, &b_ajoc));
179  if (b_ajoc) {
180  bool b_static_dmx;
181  RCHECK(bit_reader.ReadBits(1, &b_static_dmx));
182  if (!b_static_dmx) {
183  RCHECK(bit_reader.SkipBits(4));
184  }
185  RCHECK(bit_reader.SkipBits(6));
186  }
187  RCHECK(bit_reader.SkipBits(4));
188  }
189  }
190  bool b_content_type;
191  RCHECK(bit_reader.ReadBits(1, &b_content_type));
192  if (b_content_type) {
193  RCHECK(bit_reader.SkipBits(3));
194  bool b_language_indicator;
195  RCHECK(bit_reader.ReadBits(1, &b_language_indicator));
196  if (b_language_indicator) {
197  uint8_t n_language_tag_bytes;
198  RCHECK(bit_reader.ReadBits(6, &n_language_tag_bytes));
199  RCHECK(bit_reader.SkipBits(n_language_tag_bytes * 8));
200  }
201  }
202  return true;
203 }
204 
205 // Parse AC-4 Presentation V1 based on ETSI TS 103 192-2 V1.2.1 Digital Audio
206 // Compression (AC-4) Standard;Part 2: Immersive and personalized E.10.
207 bool ParseAC4PresentationV1Dsi(BitReader& bit_reader,
208  uint32_t pres_bytes,
209  uint8_t* mdcompat,
210  uint32_t* presentation_channel_mask_v1,
211  bool* dolby_cbi_indicator,
212  uint8_t* dolby_atmos_indicator) {
213  bool ret = true;
214  // Record the initial offset.
215  const size_t presentation_start = bit_reader.bit_position();
216  uint8_t presentation_config_v1;
217  RCHECK(bit_reader.ReadBits(5, &presentation_config_v1));
218  uint8_t b_add_emdf_substreams;
219  // set default value (stereo content) for output parameters.
220  *mdcompat = 0;
221  *presentation_channel_mask_v1 = 2;
222  *dolby_cbi_indicator = false;
223  *dolby_atmos_indicator = 0;
224  if (presentation_config_v1 == 0x06) {
225  b_add_emdf_substreams = 1;
226  } else {
227  RCHECK(bit_reader.ReadBits(3, mdcompat));
228  bool b_presentation_id;
229  RCHECK(bit_reader.ReadBits(1, &b_presentation_id));
230  if (b_presentation_id) {
231  RCHECK(bit_reader.SkipBits(5));
232  }
233  RCHECK(bit_reader.SkipBits(19));
234  bool b_presentation_channel_coded;
235  RCHECK(bit_reader.ReadBits(1, &b_presentation_channel_coded));
236  *presentation_channel_mask_v1 = 0;
237  if (b_presentation_channel_coded) {
238  uint8_t dsi_presentation_ch_mode;
239  RCHECK(bit_reader.ReadBits(5, &dsi_presentation_ch_mode));
240  if (dsi_presentation_ch_mode >= 11 && dsi_presentation_ch_mode <= 14) {
241  RCHECK(bit_reader.SkipBits(1));
242  uint8_t pres_top_channel_pairs;
243  RCHECK(bit_reader.ReadBits(2, &pres_top_channel_pairs));
244  if (pres_top_channel_pairs) {
245  *dolby_cbi_indicator = true;
246  }
247  } else if (dsi_presentation_ch_mode == 15) {
248  *dolby_cbi_indicator = true;
249  }
250  RCHECK(bit_reader.ReadBits(24, presentation_channel_mask_v1));
251  }
252  bool b_presentation_core_differs;
253  RCHECK(bit_reader.ReadBits(1, &b_presentation_core_differs));
254  if (b_presentation_core_differs) {
255  bool b_presentation_core_channel_coded;
256  RCHECK(bit_reader.ReadBits(1, &b_presentation_core_channel_coded));
257  if (b_presentation_core_channel_coded) {
258  RCHECK(bit_reader.SkipBits(2));
259  }
260  }
261  bool b_presentation_filter;
262  RCHECK(bit_reader.ReadBits(1, &b_presentation_filter));
263  if (b_presentation_filter) {
264  RCHECK(bit_reader.SkipBits(1));
265  uint8_t n_filter_bytes;
266  RCHECK(bit_reader.ReadBits(8, &n_filter_bytes));
267  RCHECK(bit_reader.SkipBits(n_filter_bytes * 8));
268  }
269  if (presentation_config_v1 == 0x1f) {
270  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
271  } else {
272  RCHECK(bit_reader.SkipBits(1));
273  if (presentation_config_v1 == 0 ||
274  presentation_config_v1 == 1 ||
275  presentation_config_v1 == 2) {
276  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
277  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
278  }
279  if (presentation_config_v1 == 3 || presentation_config_v1 == 4) {
280  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
281  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
282  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
283  }
284  if (presentation_config_v1 == 5) {
285  uint8_t n_substream_groups_minus2;
286  RCHECK(bit_reader.ReadBits(3, &n_substream_groups_minus2));
287  for (uint8_t sg = 0; sg < n_substream_groups_minus2 + 2; sg++) {
288  ret &= ParseAC4SubStreamGroupDsi(bit_reader);
289  }
290  }
291  if (presentation_config_v1 > 5) {
292  uint8_t n_skip_bytes;
293  RCHECK(bit_reader.ReadBits(7, &n_skip_bytes));
294  RCHECK(bit_reader.SkipBits(n_skip_bytes * 8));
295  }
296  }
297  RCHECK(bit_reader.SkipBits(1));
298  RCHECK(bit_reader.ReadBits(1, &b_add_emdf_substreams));
299  }
300  if (b_add_emdf_substreams) {
301  uint8_t n_add_emdf_substreams;
302  RCHECK(bit_reader.ReadBits(7, &n_add_emdf_substreams));
303  RCHECK(bit_reader.SkipBits(n_add_emdf_substreams * 15));
304  }
305  bool b_presentation_bitrate_info;
306  RCHECK(bit_reader.ReadBits(1, &b_presentation_bitrate_info));
307  if (b_presentation_bitrate_info) {
308  // Skip bit rate information based on ETSI TS 103 190-2 v1.2.1 E.7.1
309  RCHECK(bit_reader.SkipBits(66));
310  }
311  bool b_alternative;
312  RCHECK(bit_reader.ReadBits(1, &b_alternative));
313  if (b_alternative) {
314  bit_reader.SkipToNextByte();
315  // Parse alternative information based on ETSI TS 103 190-2 v1.2.1 E.12
316  uint16_t name_len;
317  RCHECK(bit_reader.ReadBits(16, &name_len));
318  RCHECK(bit_reader.SkipBits(name_len * 8));
319  uint8_t n_targets;
320  RCHECK(bit_reader.ReadBits(5, &n_targets));
321  RCHECK(bit_reader.SkipBits(n_targets * 11));
322  }
323  bit_reader.SkipToNextByte();
324  if ((bit_reader.bit_position() - presentation_start) <=
325  (pres_bytes - 1) * 8) {
326  RCHECK(bit_reader.SkipBits(1));
327  RCHECK(bit_reader.ReadBits(1, dolby_atmos_indicator));
328  RCHECK(bit_reader.SkipBits(4));
329  bool b_extended_presentation_group_index;
330  RCHECK(bit_reader.ReadBits(1, &b_extended_presentation_group_index));
331  if (b_extended_presentation_group_index) {
332  RCHECK(bit_reader.SkipBits(9));
333  } else {
334  RCHECK(bit_reader.SkipBits(1));
335  }
336  }
337  return ret;
338 }
339 
340 bool ExtractAc4Data(const std::vector<uint8_t>& ac4_data,
341  uint8_t* bitstream_version,
342  uint8_t* presentation_version,
343  uint8_t* mdcompat,
344  uint32_t* presentation_channel_mask_v1,
345  bool* dolby_ims_indicator,
346  bool* dolby_cbi_indicator) {
347  BitReader bit_reader(ac4_data.data(), ac4_data.size());
348 
349  uint16_t n_presentation;
350  RCHECK(bit_reader.SkipBits(3) && bit_reader.ReadBits(7, bitstream_version));
351  RCHECK(bit_reader.SkipBits(5) && bit_reader.ReadBits(9, &n_presentation));
352 
353  if (*bitstream_version == 2) {
354  uint8_t b_program_id = 0;
355  RCHECK(bit_reader.ReadBits(1, &b_program_id));
356  if (b_program_id) {
357  RCHECK(bit_reader.SkipBits(16));
358  uint8_t b_uuid = 0;
359  RCHECK(bit_reader.ReadBits(1, &b_uuid));
360  if (b_uuid) {
361  RCHECK(bit_reader.SkipBits(16 * 8));
362  }
363  }
364  } else if (*bitstream_version == 0 || *bitstream_version == 1) {
365  LOG(WARNING) << "Bitstream version 0 or 1 is not supported";
366  return false;
367  } else {
368  LOG(WARNING) << "Invalid Bitstream version";
369  return false;
370  }
371 
372  RCHECK(bit_reader.SkipBits(66));
373  bit_reader.SkipToNextByte();
374 
375  // AC4 stream containing the single presentation is valid for OTT only.
376  // IMS has two presentations, and the 2nd is legacy (duplicated) presentation.
377  // So it can be considered as AC4 stream with single presentation. And IMS
378  // presentation must be prior to legacy presentation.
379  // In other word, only the 1st presentation in AC4 stream need to be parsed.
380  const uint8_t ott_n_presentation = 1;
381  for (uint8_t i = 0; i < ott_n_presentation; i++) {
382  RCHECK(bit_reader.ReadBits(8, presentation_version));
383  // *presentation_version == 2 means IMS presentation.
384  if ((*presentation_version == 2 && n_presentation > 2) ||
385  (*presentation_version == 1 && n_presentation > 1) ) {
386  LOG(WARNING) << "Seeing multiple presentations, only single presentation "
387  << "(including IMS presentation) is supported";
388  return false;
389  }
390  uint32_t pres_bytes;
391  RCHECK(bit_reader.ReadBits(8, &pres_bytes));
392  if (pres_bytes == 255) {
393  uint32_t add_pres_bytes;
394  RCHECK(bit_reader.ReadBits(16, &add_pres_bytes));
395  pres_bytes += add_pres_bytes;
396  }
397 
398  size_t presentation_bits = 0;
399  *dolby_ims_indicator = false;
400  if (*presentation_version == 0) {
401  LOG(WARNING) << "Presentation version 0 is not supported";
402  return false;
403  } else {
404  if (*presentation_version == 1 || *presentation_version == 2) {
405  if (*presentation_version == 2) {
406  *dolby_ims_indicator = true;
407  }
408  const size_t presentation_start = bit_reader.bit_position();
409  // dolby_atmos_indicator is extended in Dolby internal specs.
410  // It indicates whether the source content before encoding is Atmos.
411  // No final decision about how to use it in OTT.
412  // Parse it for the future usage.
413  uint8_t dolby_atmos_indicator;
414  if (!ParseAC4PresentationV1Dsi(bit_reader, pres_bytes, mdcompat,
415  presentation_channel_mask_v1,
416  dolby_cbi_indicator,
417  &dolby_atmos_indicator)) {
418  return false;
419  }
420  const size_t presentation_end = bit_reader.bit_position();
421  presentation_bits = presentation_end - presentation_start;
422  } else {
423  LOG(WARNING) << "Invalid Presentation version";
424  return false;
425  }
426  }
427  size_t skip_bits = pres_bytes * 8 - presentation_bits;
428  RCHECK(bit_reader.SkipBits(skip_bits));
429  }
430  return true;
431 }
432 } // namespace
433 
434 bool CalculateAC4ChannelMask(const std::vector<uint8_t>& ac4_data,
435  uint32_t* ac4_channel_mask) {
436  uint8_t bitstream_version;
437  uint8_t presentation_version;
438  uint8_t mdcompat;
439  uint32_t pre_channel_mask;
440  bool dolby_ims_indicator;
441  bool dolby_cbi_indicator;
442 
443  if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
444  &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
445  &dolby_cbi_indicator)) {
446  LOG(WARNING) << "Seeing invalid AC4 data: "
447  << absl::BytesToHexString(
448  byte_vector_to_string_view(ac4_data));
449  return false;
450  }
451 
452  if (pre_channel_mask) {
453  *ac4_channel_mask = pre_channel_mask;
454  } else {
455  *ac4_channel_mask = 0x800000;
456  }
457  return true;
458 }
459 
460 bool CalculateAC4ChannelMPEGValue(const std::vector<uint8_t>& ac4_data,
461  uint32_t* ac4_channel_mpeg_value) {
462  uint8_t bitstream_version;
463  uint8_t presentation_version;
464  uint8_t mdcompat;
465  uint32_t pre_channel_mask;
466  bool dolby_ims_indicator;
467  bool dolby_cbi_indicator;
468 
469  if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
470  &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
471  &dolby_cbi_indicator)) {
472  LOG(WARNING) << "Seeing invalid AC4 data: "
473  << absl::BytesToHexString(
474  byte_vector_to_string_view(ac4_data));
475  return false;
476  }
477 
478  *ac4_channel_mpeg_value = AC4ChannelMasktoMPEGValue(pre_channel_mask);
479  return true;
480 }
481 
482 bool GetAc4CodecInfo(const std::vector<uint8_t>& ac4_data,
483  uint8_t* ac4_codec_info) {
484  uint8_t bitstream_version;
485  uint8_t presentation_version;
486  uint8_t mdcompat;
487  uint32_t pre_channel_mask;
488  bool dolby_ims_indicator;
489  bool dolby_cbi_indicator;
490 
491  if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
492  &mdcompat, &pre_channel_mask, &dolby_ims_indicator,
493  &dolby_cbi_indicator)) {
494  LOG(WARNING) << "Seeing invalid AC4 data: "
495  << absl::BytesToHexString(
496  byte_vector_to_string_view(ac4_data));
497  return false;
498  }
499 
500  // The valid value of bitstream_version (8 bits) is 2, the valid value of
501  // presentation_version (8 bits) is 1 or 2, and mdcompat is 3 bits.
502  // So uint8_t is fine now. If Dolby extends the value of bitstream_version and
503  // presentation_version in future, maybe need change the type from uint8_t to
504  // uint16_t or uint32_t to accommodate the valid values.
505  // If that, AudioStreamInfo::GetCodecString need to be changed accordingly.
506  // bitstream_version (3bits) + presentation_version (2bits) + mdcompat (3bits)
507  *ac4_codec_info = ((bitstream_version << 5) |
508  ((presentation_version << 3) & 0x1F) |
509  (mdcompat & 0x7));
510  return true;
511 }
512 
513 bool GetAc4ImmersiveInfo(const std::vector<uint8_t>& ac4_data,
514  bool* ac4_ims_flag,
515  bool* ac4_cbi_flag) {
516  uint8_t bitstream_version;
517  uint8_t presentation_version;
518  uint8_t mdcompat;
519  uint32_t pre_channel_mask;
520 
521  if (!ExtractAc4Data(ac4_data, &bitstream_version, &presentation_version,
522  &mdcompat, &pre_channel_mask, ac4_ims_flag,
523  ac4_cbi_flag)) {
524  LOG(WARNING) << "Seeing invalid AC4 data: "
525  << absl::BytesToHexString(
526  byte_vector_to_string_view(ac4_data));
527  return false;
528  }
529 
530  return true;
531 }
532 
533 } // namespace media
534 } // namespace shaka
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66
std::string_view byte_vector_to_string_view(const std::vector< uint8_t > &bytes)
Convert byte vector to string_view.