Shaka Packager SDK
es_parser_teletext.cc
1 // Copyright 2020 Google Inc. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include <packager/media/formats/mp2t/es_parser_teletext.h>
8 
9 #include <packager/media/base/bit_reader.h>
10 #include <packager/media/base/timestamp.h>
11 #include <packager/media/formats/mp2t/es_parser_teletext_tables.h>
12 #include <packager/media/formats/mp2t/mp2t_common.h>
13 #include <iostream>
14 
15 namespace shaka {
16 namespace media {
17 namespace mp2t {
18 
19 namespace {
20 
21 constexpr const char* kRegionTeletextPrefix = "ttx_";
22 
23 const uint8_t EBU_TELETEXT_WITH_SUBTITLING = 0x03;
24 const int kPayloadSize = 40;
25 const int kNumTriplets = 13;
26 
27 template <typename T>
28 constexpr T bit(T value, const size_t bit_pos) {
29  return (value >> bit_pos) & 0x1;
30 }
31 
32 uint8_t ReadHamming(BitReader& reader) {
33  uint8_t bits;
34  RCHECK(reader.ReadBits(8, &bits));
35  return TELETEXT_HAMMING_8_4[bits];
36 }
37 
38 bool Hamming_24_18(const uint32_t value, uint32_t& out_result) {
39  uint32_t result = value;
40 
41  uint8_t test = 0;
42  for (uint8_t i = 0; i < 23; i++) {
43  test ^= ((result >> i) & 0x01) * (i + 0x21);
44  }
45  test ^= ((result >> 0x17) & 0x01) * 0x20;
46 
47  if ((test & 0x1f) != 0x1f) {
48  if ((test & 0x20) == 0x20) {
49  return false;
50  }
51  result ^= 1 << (0x1e - test);
52  }
53 
54  out_result = (result & 0x000004) >> 2 | (result & 0x000070) >> 3 |
55  (result & 0x007f00) >> 4 | (result & 0x7f0000) >> 5;
56  return true;
57 }
58 
59 bool ParseSubtitlingDescriptor(
60  const uint8_t* descriptor,
61  const size_t size,
62  std::unordered_map<uint16_t, std::string>& result) {
63  BitReader reader(descriptor, size);
64  RCHECK(reader.SkipBits(8));
65 
66  size_t data_size;
67  RCHECK(reader.ReadBits(8, &data_size));
68  RCHECK(data_size + 2 <= size);
69 
70  for (size_t i = 0; i < data_size; i += 8) {
71  uint32_t lang_code;
72  RCHECK(reader.ReadBits(24, &lang_code));
73  uint8_t ignored_teletext_type;
74  RCHECK(reader.ReadBits(5, &ignored_teletext_type));
75  uint8_t magazine_number;
76  RCHECK(reader.ReadBits(3, &magazine_number));
77  if (magazine_number == 0) {
78  magazine_number = 8;
79  }
80 
81  uint8_t page_number_tens;
82  RCHECK(reader.ReadBits(4, &page_number_tens));
83  uint8_t page_number_units;
84  RCHECK(reader.ReadBits(4, &page_number_units));
85  const uint8_t page_number = page_number_tens * 10 + page_number_units;
86 
87  std::string lang(3, '\0');
88  lang[0] = static_cast<char>((lang_code >> 16) & 0xff);
89  lang[1] = static_cast<char>((lang_code >> 8) & 0xff);
90  lang[2] = static_cast<char>((lang_code >> 0) & 0xff);
91 
92  const uint16_t index = magazine_number * 100 + page_number;
93  result.emplace(index, std::move(lang));
94  }
95 
96  return true;
97 }
98 
99 } // namespace
100 
101 EsParserTeletext::EsParserTeletext(const uint32_t pid,
102  const NewStreamInfoCB& new_stream_info_cb,
103  const EmitTextSampleCB& emit_sample_cb,
104  const uint8_t* descriptor,
105  const size_t descriptor_length)
106  : EsParser(pid),
107  new_stream_info_cb_(new_stream_info_cb),
108  emit_sample_cb_(emit_sample_cb),
109  magazine_(0),
110  page_number_(0),
111  charset_code_(0),
112  current_charset_{},
113  last_pts_(0) {
114  if (!ParseSubtitlingDescriptor(descriptor, descriptor_length, languages_)) {
115  LOG(ERROR) << "Unable to parse teletext_descriptor";
116  }
117  UpdateCharset();
118 }
119 
120 bool EsParserTeletext::Parse(const uint8_t* buf,
121  int size,
122  int64_t pts,
123  int64_t dts) {
124  if (!sent_info_) {
125  sent_info_ = true;
126  auto info = std::make_shared<TextStreamInfo>(pid(), kMpeg2Timescale,
127  kInfiniteDuration, kCodecText,
128  "", "", 0, 0, "");
129  for (const auto& pair : languages_) {
130  info->AddSubStream(pair.first, {pair.second});
131  }
132 
133  new_stream_info_cb_(info);
134  }
135 
136  return ParseInternal(buf, size, pts);
137 }
138 
139 bool EsParserTeletext::Flush() {
140  std::vector<uint16_t> keys;
141  for (const auto& entry : page_state_) {
142  keys.push_back(entry.first);
143  }
144 
145  for (const auto key : keys) {
146  SendPending(key, last_pts_);
147  }
148 
149  return true;
150 }
151 
152 void EsParserTeletext::Reset() {
153  page_state_.clear();
154  magazine_ = 0;
155  page_number_ = 0;
156  sent_info_ = false;
157  charset_code_ = 0;
158  UpdateCharset();
159 }
160 
161 bool EsParserTeletext::ParseInternal(const uint8_t* data,
162  const size_t size,
163  const int64_t pts) {
164  BitReader reader(data, size);
165  RCHECK(reader.SkipBits(8));
166  std::vector<TextRow> rows;
167 
168  while (reader.bits_available()) {
169  uint8_t data_unit_id;
170  RCHECK(reader.ReadBits(8, &data_unit_id));
171 
172  uint8_t data_unit_length;
173  RCHECK(reader.ReadBits(8, &data_unit_length));
174 
175  if (data_unit_id != EBU_TELETEXT_WITH_SUBTITLING) {
176  RCHECK(reader.SkipBytes(data_unit_length));
177  continue;
178  }
179 
180  if (data_unit_length != 44) {
181  // Teletext data unit length is always 44 bytes
182  LOG(ERROR) << "Bad Teletext data length";
183  break;
184  }
185 
186 
187  RCHECK(reader.SkipBits(16));
188 
189  uint16_t address_bits;
190  RCHECK(reader.ReadBits(16, &address_bits));
191 
192  uint8_t magazine = bit(address_bits, 14) + 2 * bit(address_bits, 12) +
193  4 * bit(address_bits, 10);
194 
195  if (magazine == 0) {
196  magazine = 8;
197  }
198 
199  const uint8_t packet_nr =
200  (bit(address_bits, 8) + 2 * bit(address_bits, 6) +
201  4 * bit(address_bits, 4) + 8 * bit(address_bits, 2) +
202  16 * bit(address_bits, 0));
203  const uint8_t* data_block = reader.current_byte_ptr();
204  RCHECK(reader.SkipBytes(40));
205 
206  TextRow row;
207  if (ParseDataBlock(pts, data_block, packet_nr, magazine, row)) {
208  rows.emplace_back(std::move(row));
209  }
210  }
211 
212  if (rows.empty()) {
213  return true;
214  }
215  const uint16_t index = magazine_ * 100 + page_number_;
216  auto page_state_itr = page_state_.find(index);
217  if (page_state_itr == page_state_.end()) {
218  page_state_.emplace(index, TextBlock{std::move(rows), {}, last_pts_});
219 
220  } else {
221  for (auto& row : rows) {
222  auto& page_state_lines = page_state_itr->second.rows;
223  page_state_lines.emplace_back(std::move(row));
224  }
225  rows.clear();
226  }
227 
228  return true;
229 }
230 
231 bool EsParserTeletext::ParseDataBlock(const int64_t pts,
232  const uint8_t* data_block,
233  const uint8_t packet_nr,
234  const uint8_t magazine,
235  TextRow& row) {
236  if (packet_nr == 0) {
237  last_pts_ = pts;
238  BitReader reader(data_block, 32);
239 
240  const uint8_t page_number_units = ReadHamming(reader);
241  const uint8_t page_number_tens = ReadHamming(reader);
242  if (page_number_units == 0xf || page_number_tens == 0xf) {
243  RCHECK(reader.SkipBits(40));
244  return false;
245  }
246  const uint8_t page_number = 10 * page_number_tens + page_number_units;
247 
248  const uint16_t index = magazine * 100 + page_number;
249  SendPending(index, pts);
250 
251  page_number_ = page_number;
252  magazine_ = magazine;
253 
254  RCHECK(reader.SkipBits(40));
255  const uint8_t subcode_c11_c14 = ReadHamming(reader);
256  const uint8_t charset_code = subcode_c11_c14 >> 1;
257  if (charset_code != charset_code_) {
258  charset_code_ = charset_code;
259  UpdateCharset();
260  }
261 
262  return false;
263 
264  } else if (packet_nr == 26) {
265  ParsePacket26(data_block);
266  return false;
267 
268  } else if (packet_nr > 26) {
269  return false;
270  }
271 
272  row = BuildRow(data_block, packet_nr);
273  return true;
274 }
275 
276 void EsParserTeletext::UpdateCharset() {
277  memcpy(current_charset_, TELETEXT_CHARSET_G0_LATIN, sizeof(TELETEXT_CHARSET_G0_LATIN));
278  if (charset_code_ > 7) {
279  return;
280  }
281  const auto teletext_national_subset =
282  static_cast<TELETEXT_NATIONAL_SUBSET>(charset_code_);
283 
284  switch (teletext_national_subset) {
285  case TELETEXT_NATIONAL_SUBSET::ENGLISH:
286  UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_ENGLISH);
287  break;
288  case TELETEXT_NATIONAL_SUBSET::FRENCH:
289  UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_FRENCH);
290  break;
291  case TELETEXT_NATIONAL_SUBSET::SWEDISH_FINNISH_HUNGARIAN:
292  UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_SWEDISH_FINNISH_HUNGARIAN);
293  break;
294  case TELETEXT_NATIONAL_SUBSET::CZECH_SLOVAK:
295  UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_CZECH_SLOVAK);
296  break;
297  case TELETEXT_NATIONAL_SUBSET::GERMAN:
298  UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_GERMAN);
299  break;
300  case TELETEXT_NATIONAL_SUBSET::PORTUGUESE_SPANISH:
301  UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_PORTUGUESE_SPANISH);
302  break;
303  case TELETEXT_NATIONAL_SUBSET::ITALIAN:
304  UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_ITALIAN);
305  break;
306  case TELETEXT_NATIONAL_SUBSET::NONE:
307  default:
308  break;
309  }
310 }
311 
312 void EsParserTeletext::SendPending(const uint16_t index, const int64_t pts) {
313  auto page_state_itr = page_state_.find(index);
314 
315  if (page_state_itr == page_state_.end() ||
316  page_state_itr->second.rows.empty()) {
317  return;
318  }
319 
320  const auto& pending_rows = page_state_itr->second.rows;
321  const auto pending_pts = page_state_itr->second.pts;
322 
323  TextSettings text_settings;
324  std::shared_ptr<TextSample> text_sample;
325  std::vector<TextFragment> sub_fragments;
326 
327  if (pending_rows.size() == 1) {
328  // This is a single line of formatted text.
329  // Propagate row number/2 and alignment
330  const float line_nr = float(pending_rows[0].row_number) / 2.0;
331  text_settings.line = TextNumber(line_nr, TextUnitType::kLines);
332  text_settings.region = kRegionTeletextPrefix + std::to_string(int(line_nr));
333  text_settings.text_alignment = pending_rows[0].alignment;
334  text_sample = std::make_shared<TextSample>(
335  "", pending_pts, pts, text_settings, pending_rows[0].fragment);
336  text_sample->set_sub_stream_index(index);
337  emit_sample_cb_(text_sample);
338  page_state_.erase(index);
339  return;
340  } else {
341  int32_t latest_row_nr = -1;
342  bool last_double_height = false;
343  bool new_sample = true;
344  for (const auto& row : pending_rows) {
345  int row_nr = row.row_number;
346  bool double_height = row.double_height;
347  int row_step = last_double_height ? 2 : 1;
348  if (latest_row_nr != -1) { // Not the first row
349  if (row_nr != latest_row_nr + row_step) {
350  // Send what has been collected since not adjacent
351  text_sample =
352  std::make_shared<TextSample>("", pending_pts, pts, text_settings,
353  TextFragment({}, sub_fragments));
354  text_sample->set_sub_stream_index(index);
355  emit_sample_cb_(text_sample);
356  new_sample = true;
357  } else {
358  // Add a newline and the next row to the current sample
359  sub_fragments.push_back(TextFragment({}, true));
360  sub_fragments.push_back(row.fragment);
361  new_sample = false;
362  }
363  }
364  if (new_sample) {
365  const float line_nr = float(row.row_number) / 2.0;
366  text_settings.line = TextNumber(line_nr, TextUnitType::kLines);
367  text_settings.region =
368  kRegionTeletextPrefix + std::to_string(int(line_nr));
369  text_settings.text_alignment = row.alignment;
370  sub_fragments.clear();
371  sub_fragments.push_back(row.fragment);
372  }
373  last_double_height = double_height;
374  latest_row_nr = row_nr;
375  }
376  }
377 
378  text_sample = std::make_shared<TextSample>(
379  "", pending_pts, pts, text_settings, TextFragment({}, sub_fragments));
380  text_sample->set_sub_stream_index(index);
381  emit_sample_cb_(text_sample);
382 
383  page_state_.erase(index);
384 }
385 
386 // BuildRow builds a row with alignment information.
387 EsParserTeletext::TextRow EsParserTeletext::BuildRow(const uint8_t* data_block,
388  const uint8_t row) const {
389  std::string next_string;
390  next_string.reserve(kPayloadSize * 2);
391 
392  const uint16_t index = magazine_ * 100 + page_number_;
393  const auto page_state_itr = page_state_.find(index);
394 
395  const std::unordered_map<uint8_t, std::string>* column_replacement_map =
396  nullptr;
397  if (page_state_itr != page_state_.cend()) {
398  const auto row_itr =
399  page_state_itr->second.packet_26_replacements.find(row);
400  if (row_itr != page_state_itr->second.packet_26_replacements.cend()) {
401  column_replacement_map = &(row_itr->second);
402  }
403  }
404 
405  int32_t start_pos = 0;
406  int32_t end_pos = 0;
407  bool double_height = false;
408  TextFragmentStyle text_style = TextFragmentStyle();
409  text_style.color = "white";
410  text_style.backgroundColor = "black";
411  // A typical 40 character line looks like:
412  // doubleHeight, [color] spaces, Start, Start, text, End End, spaces
413  for (size_t i = 0; i < kPayloadSize; ++i) {
414  if (column_replacement_map) {
415  const auto column_itr = column_replacement_map->find(i);
416  if (column_itr != column_replacement_map->cend()) {
417  next_string.append(column_itr->second);
418  continue;
419  }
420  }
421 
422  char next_char =
423  static_cast<char>(TELETEXT_BITREVERSE_8[data_block[i]] & 0x7f);
424 
425  if (next_char < 0x20) {
426  // Here are control characters, which are not printable.
427  // These include colors, double-height, flashing, etc.
428  // We only handle one-foreground color and double-height.
429  switch (next_char) {
430  case 0x0: // Alpha Black (not included in Level 1.5)
431  // color = ColorBlack
432  break;
433  case 0x1:
434  text_style.color = "red";
435  break;
436  case 0x2:
437  text_style.color = "green";
438  break;
439  case 0x3:
440  text_style.color = "yellow";
441  break;
442  case 0x4:
443  text_style.color = "blue";
444  break;
445  case 0x5:
446  text_style.color = "magenta";
447  break;
448  case 0x6:
449  text_style.color = "cyan";
450  break;
451  case 0x7:
452  text_style.color = "white";
453  break;
454  case 0x08: // Flash (not handled)
455  break;
456  case 0x09: // Steady (not handled)
457  break;
458  case 0xa: // End Box
459  end_pos = i - 1;
460  break;
461  case 0xb: // Start Box, typically twice due to double height
462  start_pos = i + 1;
463  continue; // Do not propagate as a space
464  break;
465  case 0xc: // Normal size
466  break;
467  case 0xd: // Double height, typically always used
468  double_height = true;
469  break;
470  case 0x1c: // Black background (not handled)
471  break;
472  case 0x1d: // Set background color from text color.
473  text_style.backgroundColor = text_style.color;
474  text_style.color = "black"; // Avoid having same as background
475  break;
476  default:
477  // Rest of codes below 0x20 are not part of Level 1.5 or related to
478  // mosaic graphics (non-text)
479  break;
480  }
481  next_char =
482  0x20; // These characters result in a space if between start and end
483  }
484  if (start_pos == 0 || end_pos != 0) { // Not between start and end
485  continue;
486  }
487  switch (next_char) {
488  case '&':
489  next_string.append("&amp;");
490  break;
491  case '<':
492  next_string.append("&lt;");
493  break;
494  default: {
495  const std::string replacement(current_charset_[next_char - 0x20]);
496  next_string.append(replacement);
497  } break;
498  }
499  }
500  if (end_pos == 0) {
501  end_pos = kPayloadSize - 1;
502  }
503 
504  // Using start_pos and end_pos we approximated alignment of text
505  // depending on the number of spaces to the left and right of the text.
506  auto left_right_diff = start_pos - (kPayloadSize - 1 - end_pos);
507  TextAlignment alignment;
508  if (left_right_diff > 4) {
509  alignment = TextAlignment::kRight;
510  } else if (left_right_diff < -4) {
511  alignment = TextAlignment::kLeft;
512  } else {
513  alignment = TextAlignment::kCenter;
514  }
515  const auto text_row = TextRow(
516  {alignment, row, double_height, {TextFragment(text_style, next_string)}});
517 
518  return text_row;
519 }
520 
521 void EsParserTeletext::ParsePacket26(const uint8_t* data_block) {
522  const uint16_t index = magazine_ * 100 + page_number_;
523  auto page_state_itr = page_state_.find(index);
524  if (page_state_itr == page_state_.end()) {
525  page_state_.emplace(index, TextBlock{{}, {}, last_pts_});
526  }
527  auto& replacement_map = page_state_[index].packet_26_replacements;
528 
529  uint8_t row = 0;
530 
531  std::vector<uint32_t> x26_triplets;
532  x26_triplets.reserve(kNumTriplets);
533  for (uint8_t i = 1; i < kPayloadSize; i += 3) {
534  const uint32_t bytes = (TELETEXT_BITREVERSE_8[data_block[i + 2]] << 16) |
535  (TELETEXT_BITREVERSE_8[data_block[i + 1]] << 8) |
536  TELETEXT_BITREVERSE_8[data_block[i]];
537  uint32_t triplet;
538  if (Hamming_24_18(bytes, triplet)) {
539  x26_triplets.emplace_back(triplet);
540  }
541  }
542 
543  for (const auto triplet : x26_triplets) {
544  const uint8_t mode = (triplet & 0x7c0) >> 6;
545  const uint8_t address = triplet & 0x3f;
546  const uint8_t row_address_group = (address >= 0x28) && (address <= 0x3f);
547 
548  if ((mode == 0x4) && (row_address_group == 0x1)) {
549  row = address - 0x28;
550  if (row == 0x0) {
551  row = 0x18;
552  }
553  }
554 
555  if (mode >= 0x11 && mode <= 0x1f && row_address_group == 0x1) {
556  break;
557  }
558 
559  const uint8_t data = (triplet & 0x3f800) >> 11;
560 
561  if (mode == 0x0f && row_address_group == 0x0 && data > 0x1f) {
562  SetPacket26ReplacementString(replacement_map, row, address,
563  reinterpret_cast<const char*>(
564  TELETEXT_CHARSET_G2_LATIN[data - 0x20]));
565  }
566 
567  if (mode == 0x10 && row_address_group == 0x0 && data == 0x40) {
568  SetPacket26ReplacementString(replacement_map, row, address, "@");
569  }
570 
571  if (mode < 0x11 || mode > 0x1f || row_address_group != 0x0) {
572  continue;
573  }
574 
575  if (data >= 0x41 && data <= 0x5a) {
576  SetPacket26ReplacementString(
577  replacement_map, row, address,
578  reinterpret_cast<const char*>(
579  TELETEXT_G2_LATIN_ACCENTS[mode - 0x11][data - 0x41]));
580 
581  } else if (data >= 0x61 && data <= 0x7a) {
582  SetPacket26ReplacementString(
583  replacement_map, row, address,
584  reinterpret_cast<const char*>(
585  TELETEXT_G2_LATIN_ACCENTS[mode - 0x11][data - 0x47]));
586 
587  } else if ((data & 0x7f) >= 0x20) {
588  SetPacket26ReplacementString(
589  replacement_map, row, address,
590  reinterpret_cast<const char*>(
591  TELETEXT_CHARSET_G0_LATIN[(data & 0x7f) - 0x20]));
592  }
593  }
594 }
595 
596 void EsParserTeletext::UpdateNationalSubset(
597  const uint8_t national_subset[13][3]) {
598  for (size_t i = 0; i < 13; ++i) {
599  const size_t position = TELETEXT_NATIONAL_CHAR_INDEX_G0[i];
600  memcpy(current_charset_[position], national_subset[i], 3);
601  }
602 }
603 
604 void EsParserTeletext::SetPacket26ReplacementString(
605  RowColReplacementMap& replacement_map,
606  const uint8_t row,
607  const uint8_t column,
608  std::string&& replacement_string) {
609  auto replacement_map_itr = replacement_map.find(row);
610  if (replacement_map_itr == replacement_map.cend()) {
611  replacement_map.emplace(row, std::unordered_map<uint8_t, std::string>{});
612  }
613  auto& column_map = replacement_map[row];
614  column_map.emplace(column, std::move(replacement_string));
615 }
616 
617 } // namespace mp2t
618 } // namespace media
619 } // namespace shaka
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66