Shaka Packager SDK
Loading...
Searching...
No Matches
es_parser_teletext.cc
1// Copyright 2020 Google Inc. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/formats/mp2t/es_parser_teletext.h>
8
9#include <packager/media/base/bit_reader.h>
10#include <packager/media/base/timestamp.h>
11#include <packager/media/formats/mp2t/es_parser_teletext_tables.h>
12#include <packager/media/formats/mp2t/mp2t_common.h>
13#include <iostream>
14
15namespace shaka {
16namespace media {
17namespace mp2t {
18
19namespace {
20
21constexpr const char* kRegionTeletextPrefix = "ttx_";
22
23const uint8_t EBU_TELETEXT_WITH_SUBTITLING = 0x03;
24const int kPayloadSize = 40;
25const int kNumTriplets = 13;
26
27template <typename T>
28constexpr T bit(T value, const size_t bit_pos) {
29 return (value >> bit_pos) & 0x1;
30}
31
32uint8_t ReadHamming(BitReader& reader) {
33 uint8_t bits;
34 RCHECK(reader.ReadBits(8, &bits));
35 return TELETEXT_HAMMING_8_4[bits];
36}
37
38bool Hamming_24_18(const uint32_t value, uint32_t& out_result) {
39 uint32_t result = value;
40
41 uint8_t test = 0;
42 for (uint8_t i = 0; i < 23; i++) {
43 test ^= ((result >> i) & 0x01) * (i + 0x21);
44 }
45 test ^= ((result >> 0x17) & 0x01) * 0x20;
46
47 if ((test & 0x1f) != 0x1f) {
48 if ((test & 0x20) == 0x20) {
49 return false;
50 }
51 result ^= 1 << (0x1e - test);
52 }
53
54 out_result = (result & 0x000004) >> 2 | (result & 0x000070) >> 3 |
55 (result & 0x007f00) >> 4 | (result & 0x7f0000) >> 5;
56 return true;
57}
58
59bool ParseSubtitlingDescriptor(
60 const uint8_t* descriptor,
61 const size_t size,
62 std::unordered_map<uint16_t, std::string>& result) {
63 BitReader reader(descriptor, size);
64 RCHECK(reader.SkipBits(8));
65
66 size_t data_size;
67 RCHECK(reader.ReadBits(8, &data_size));
68 RCHECK(data_size + 2 <= size);
69
70 for (size_t i = 0; i < data_size; i += 8) {
71 uint32_t lang_code;
72 RCHECK(reader.ReadBits(24, &lang_code));
73 uint8_t ignored_teletext_type;
74 RCHECK(reader.ReadBits(5, &ignored_teletext_type));
75 uint8_t magazine_number;
76 RCHECK(reader.ReadBits(3, &magazine_number));
77 if (magazine_number == 0) {
78 magazine_number = 8;
79 }
80
81 uint8_t page_number_tens;
82 RCHECK(reader.ReadBits(4, &page_number_tens));
83 uint8_t page_number_units;
84 RCHECK(reader.ReadBits(4, &page_number_units));
85 const uint8_t page_number = page_number_tens * 10 + page_number_units;
86
87 std::string lang(3, '\0');
88 lang[0] = static_cast<char>((lang_code >> 16) & 0xff);
89 lang[1] = static_cast<char>((lang_code >> 8) & 0xff);
90 lang[2] = static_cast<char>((lang_code >> 0) & 0xff);
91
92 const uint16_t index = magazine_number * 100 + page_number;
93 result.emplace(index, std::move(lang));
94 }
95
96 return true;
97}
98
99} // namespace
100
101EsParserTeletext::EsParserTeletext(const uint32_t pid,
102 const NewStreamInfoCB& new_stream_info_cb,
103 const EmitTextSampleCB& emit_sample_cb,
104 const uint8_t* descriptor,
105 const size_t descriptor_length)
106 : EsParser(pid),
107 new_stream_info_cb_(new_stream_info_cb),
108 emit_sample_cb_(emit_sample_cb),
109 magazine_(0),
110 page_number_(0),
111 charset_code_(0),
112 current_charset_{},
113 last_pts_(0) {
114 if (!ParseSubtitlingDescriptor(descriptor, descriptor_length, languages_)) {
115 LOG(ERROR) << "Unable to parse teletext_descriptor";
116 }
117 UpdateCharset();
118}
119
120bool EsParserTeletext::Parse(const uint8_t* buf,
121 int size,
122 int64_t pts,
123 int64_t dts) {
124 if (!sent_info_) {
125 sent_info_ = true;
126 auto info = std::make_shared<TextStreamInfo>(pid(), kMpeg2Timescale,
127 kInfiniteDuration, kCodecText,
128 "", "", 0, 0, "");
129 for (const auto& pair : languages_) {
130 info->AddSubStream(pair.first, {pair.second});
131 }
132
133 new_stream_info_cb_(info);
134 }
135
136 return ParseInternal(buf, size, pts);
137}
138
139bool EsParserTeletext::Flush() {
140 std::vector<uint16_t> keys;
141 for (const auto& entry : page_state_) {
142 keys.push_back(entry.first);
143 }
144
145 for (const auto key : keys) {
146 SendPending(key, last_pts_);
147 }
148
149 return true;
150}
151
152void EsParserTeletext::Reset() {
153 page_state_.clear();
154 magazine_ = 0;
155 page_number_ = 0;
156 sent_info_ = false;
157 charset_code_ = 0;
158 UpdateCharset();
159}
160
161bool EsParserTeletext::ParseInternal(const uint8_t* data,
162 const size_t size,
163 const int64_t pts) {
164 BitReader reader(data, size);
165 RCHECK(reader.SkipBits(8));
166 std::vector<TextRow> rows;
167
168 while (reader.bits_available()) {
169 uint8_t data_unit_id;
170 RCHECK(reader.ReadBits(8, &data_unit_id));
171
172 uint8_t data_unit_length;
173 RCHECK(reader.ReadBits(8, &data_unit_length));
174
175 if (data_unit_id != EBU_TELETEXT_WITH_SUBTITLING) {
176 RCHECK(reader.SkipBytes(data_unit_length));
177 continue;
178 }
179
180 if (data_unit_length != 44) {
181 // Teletext data unit length is always 44 bytes
182 LOG(ERROR) << "Bad Teletext data length";
183 break;
184 }
185
186
187 RCHECK(reader.SkipBits(16));
188
189 uint16_t address_bits;
190 RCHECK(reader.ReadBits(16, &address_bits));
191
192 uint8_t magazine = bit(address_bits, 14) + 2 * bit(address_bits, 12) +
193 4 * bit(address_bits, 10);
194
195 if (magazine == 0) {
196 magazine = 8;
197 }
198
199 const uint8_t packet_nr =
200 (bit(address_bits, 8) + 2 * bit(address_bits, 6) +
201 4 * bit(address_bits, 4) + 8 * bit(address_bits, 2) +
202 16 * bit(address_bits, 0));
203 const uint8_t* data_block = reader.current_byte_ptr();
204 RCHECK(reader.SkipBytes(40));
205
206 TextRow row;
207 if (ParseDataBlock(pts, data_block, packet_nr, magazine, row)) {
208 rows.emplace_back(std::move(row));
209 }
210 }
211
212 if (rows.empty()) {
213 return true;
214 }
215 const uint16_t index = magazine_ * 100 + page_number_;
216 auto page_state_itr = page_state_.find(index);
217 if (page_state_itr == page_state_.end()) {
218 page_state_.emplace(index, TextBlock{std::move(rows), {}, last_pts_});
219
220 } else {
221 for (auto& row : rows) {
222 auto& page_state_lines = page_state_itr->second.rows;
223 page_state_lines.emplace_back(std::move(row));
224 }
225 rows.clear();
226 }
227
228 return true;
229}
230
231bool EsParserTeletext::ParseDataBlock(const int64_t pts,
232 const uint8_t* data_block,
233 const uint8_t packet_nr,
234 const uint8_t magazine,
235 TextRow& row) {
236 if (packet_nr == 0) {
237 last_pts_ = pts;
238 BitReader reader(data_block, 32);
239
240 const uint8_t page_number_units = ReadHamming(reader);
241 const uint8_t page_number_tens = ReadHamming(reader);
242 if (page_number_units == 0xf || page_number_tens == 0xf) {
243 RCHECK(reader.SkipBits(40));
244 return false;
245 }
246 const uint8_t page_number = 10 * page_number_tens + page_number_units;
247
248 const uint16_t index = magazine * 100 + page_number;
249 SendPending(index, pts);
250
251 page_number_ = page_number;
252 magazine_ = magazine;
253
254 RCHECK(reader.SkipBits(40));
255 const uint8_t subcode_c11_c14 = ReadHamming(reader);
256 const uint8_t charset_code = subcode_c11_c14 >> 1;
257 if (charset_code != charset_code_) {
258 charset_code_ = charset_code;
259 UpdateCharset();
260 }
261
262 return false;
263
264 } else if (packet_nr == 26) {
265 ParsePacket26(data_block);
266 return false;
267
268 } else if (packet_nr > 26) {
269 return false;
270 }
271
272 row = BuildRow(data_block, packet_nr);
273 return true;
274}
275
276void EsParserTeletext::UpdateCharset() {
277 memcpy(current_charset_, TELETEXT_CHARSET_G0_LATIN, sizeof(TELETEXT_CHARSET_G0_LATIN));
278 if (charset_code_ > 7) {
279 return;
280 }
281 const auto teletext_national_subset =
282 static_cast<TELETEXT_NATIONAL_SUBSET>(charset_code_);
283
284 switch (teletext_national_subset) {
285 case TELETEXT_NATIONAL_SUBSET::ENGLISH:
286 UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_ENGLISH);
287 break;
288 case TELETEXT_NATIONAL_SUBSET::FRENCH:
289 UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_FRENCH);
290 break;
291 case TELETEXT_NATIONAL_SUBSET::SWEDISH_FINNISH_HUNGARIAN:
292 UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_SWEDISH_FINNISH_HUNGARIAN);
293 break;
294 case TELETEXT_NATIONAL_SUBSET::CZECH_SLOVAK:
295 UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_CZECH_SLOVAK);
296 break;
297 case TELETEXT_NATIONAL_SUBSET::GERMAN:
298 UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_GERMAN);
299 break;
300 case TELETEXT_NATIONAL_SUBSET::PORTUGUESE_SPANISH:
301 UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_PORTUGUESE_SPANISH);
302 break;
303 case TELETEXT_NATIONAL_SUBSET::ITALIAN:
304 UpdateNationalSubset(TELETEXT_NATIONAL_SUBSET_ITALIAN);
305 break;
306 case TELETEXT_NATIONAL_SUBSET::NONE:
307 default:
308 break;
309 }
310}
311
312void EsParserTeletext::SendPending(const uint16_t index, const int64_t pts) {
313 auto page_state_itr = page_state_.find(index);
314
315 if (page_state_itr == page_state_.end() ||
316 page_state_itr->second.rows.empty()) {
317 return;
318 }
319
320 const auto& pending_rows = page_state_itr->second.rows;
321 const auto pending_pts = page_state_itr->second.pts;
322
323 TextSettings text_settings;
324 std::shared_ptr<TextSample> text_sample;
325 std::vector<TextFragment> sub_fragments;
326
327 if (pending_rows.size() == 1) {
328 // This is a single line of formatted text.
329 // Propagate row number/2 and alignment
330 const float line_nr = float(pending_rows[0].row_number) / 2.0;
331 text_settings.line = TextNumber(line_nr, TextUnitType::kLines);
332 text_settings.region = kRegionTeletextPrefix + std::to_string(int(line_nr));
333 text_settings.text_alignment = pending_rows[0].alignment;
334 text_sample = std::make_shared<TextSample>(
335 "", pending_pts, pts, text_settings, pending_rows[0].fragment);
336 text_sample->set_sub_stream_index(index);
337 emit_sample_cb_(text_sample);
338 page_state_.erase(index);
339 return;
340 } else {
341 int32_t latest_row_nr = -1;
342 bool last_double_height = false;
343 bool new_sample = true;
344 for (const auto& row : pending_rows) {
345 int row_nr = row.row_number;
346 bool double_height = row.double_height;
347 int row_step = last_double_height ? 2 : 1;
348 if (latest_row_nr != -1) { // Not the first row
349 if (row_nr != latest_row_nr + row_step) {
350 // Send what has been collected since not adjacent
351 text_sample =
352 std::make_shared<TextSample>("", pending_pts, pts, text_settings,
353 TextFragment({}, sub_fragments));
354 text_sample->set_sub_stream_index(index);
355 emit_sample_cb_(text_sample);
356 new_sample = true;
357 } else {
358 // Add a newline and the next row to the current sample
359 sub_fragments.push_back(TextFragment({}, true));
360 sub_fragments.push_back(row.fragment);
361 new_sample = false;
362 }
363 }
364 if (new_sample) {
365 const float line_nr = float(row.row_number) / 2.0;
366 text_settings.line = TextNumber(line_nr, TextUnitType::kLines);
367 text_settings.region =
368 kRegionTeletextPrefix + std::to_string(int(line_nr));
369 text_settings.text_alignment = row.alignment;
370 sub_fragments.clear();
371 sub_fragments.push_back(row.fragment);
372 }
373 last_double_height = double_height;
374 latest_row_nr = row_nr;
375 }
376 }
377
378 text_sample = std::make_shared<TextSample>(
379 "", pending_pts, pts, text_settings, TextFragment({}, sub_fragments));
380 text_sample->set_sub_stream_index(index);
381 emit_sample_cb_(text_sample);
382
383 page_state_.erase(index);
384}
385
386// BuildRow builds a row with alignment information.
387EsParserTeletext::TextRow EsParserTeletext::BuildRow(const uint8_t* data_block,
388 const uint8_t row) const {
389 std::string next_string;
390 next_string.reserve(kPayloadSize * 2);
391
392 const uint16_t index = magazine_ * 100 + page_number_;
393 const auto page_state_itr = page_state_.find(index);
394
395 const std::unordered_map<uint8_t, std::string>* column_replacement_map =
396 nullptr;
397 if (page_state_itr != page_state_.cend()) {
398 const auto row_itr =
399 page_state_itr->second.packet_26_replacements.find(row);
400 if (row_itr != page_state_itr->second.packet_26_replacements.cend()) {
401 column_replacement_map = &(row_itr->second);
402 }
403 }
404
405 int32_t start_pos = 0;
406 int32_t end_pos = 0;
407 bool double_height = false;
408 TextFragmentStyle text_style = TextFragmentStyle();
409 text_style.color = "white";
410 text_style.backgroundColor = "black";
411 // A typical 40 character line looks like:
412 // doubleHeight, [color] spaces, Start, Start, text, End End, spaces
413 for (size_t i = 0; i < kPayloadSize; ++i) {
414 if (column_replacement_map) {
415 const auto column_itr = column_replacement_map->find(i);
416 if (column_itr != column_replacement_map->cend()) {
417 next_string.append(column_itr->second);
418 continue;
419 }
420 }
421
422 char next_char =
423 static_cast<char>(TELETEXT_BITREVERSE_8[data_block[i]] & 0x7f);
424
425 if (next_char < 0x20) {
426 // Here are control characters, which are not printable.
427 // These include colors, double-height, flashing, etc.
428 // We only handle one-foreground color and double-height.
429 switch (next_char) {
430 case 0x0: // Alpha Black (not included in Level 1.5)
431 // color = ColorBlack
432 break;
433 case 0x1:
434 text_style.color = "red";
435 break;
436 case 0x2:
437 text_style.color = "green";
438 break;
439 case 0x3:
440 text_style.color = "yellow";
441 break;
442 case 0x4:
443 text_style.color = "blue";
444 break;
445 case 0x5:
446 text_style.color = "magenta";
447 break;
448 case 0x6:
449 text_style.color = "cyan";
450 break;
451 case 0x7:
452 text_style.color = "white";
453 break;
454 case 0x08: // Flash (not handled)
455 break;
456 case 0x09: // Steady (not handled)
457 break;
458 case 0xa: // End Box
459 end_pos = i - 1;
460 break;
461 case 0xb: // Start Box, typically twice due to double height
462 start_pos = i + 1;
463 continue; // Do not propagate as a space
464 break;
465 case 0xc: // Normal size
466 break;
467 case 0xd: // Double height, typically always used
468 double_height = true;
469 break;
470 case 0x1c: // Black background (not handled)
471 break;
472 case 0x1d: // Set background color from text color.
473 text_style.backgroundColor = text_style.color;
474 text_style.color = "black"; // Avoid having same as background
475 break;
476 default:
477 // Rest of codes below 0x20 are not part of Level 1.5 or related to
478 // mosaic graphics (non-text)
479 break;
480 }
481 next_char =
482 0x20; // These characters result in a space if between start and end
483 }
484 if (start_pos == 0 || end_pos != 0) { // Not between start and end
485 continue;
486 }
487 switch (next_char) {
488 case '&':
489 next_string.append("&amp;");
490 break;
491 case '<':
492 next_string.append("&lt;");
493 break;
494 default: {
495 const std::string replacement(current_charset_[next_char - 0x20]);
496 next_string.append(replacement);
497 } break;
498 }
499 }
500 if (end_pos == 0) {
501 end_pos = kPayloadSize - 1;
502 }
503
504 // Using start_pos and end_pos we approximated alignment of text
505 // depending on the number of spaces to the left and right of the text.
506 auto left_right_diff = start_pos - (kPayloadSize - 1 - end_pos);
507 TextAlignment alignment;
508 if (left_right_diff > 4) {
509 alignment = TextAlignment::kRight;
510 } else if (left_right_diff < -4) {
511 alignment = TextAlignment::kLeft;
512 } else {
513 alignment = TextAlignment::kCenter;
514 }
515 const auto text_row = TextRow(
516 {alignment, row, double_height, {TextFragment(text_style, next_string)}});
517
518 return text_row;
519}
520
521void EsParserTeletext::ParsePacket26(const uint8_t* data_block) {
522 const uint16_t index = magazine_ * 100 + page_number_;
523 auto page_state_itr = page_state_.find(index);
524 if (page_state_itr == page_state_.end()) {
525 page_state_.emplace(index, TextBlock{{}, {}, last_pts_});
526 }
527 auto& replacement_map = page_state_[index].packet_26_replacements;
528
529 uint8_t row = 0;
530
531 std::vector<uint32_t> x26_triplets;
532 x26_triplets.reserve(kNumTriplets);
533 for (uint8_t i = 1; i < kPayloadSize; i += 3) {
534 const uint32_t bytes = (TELETEXT_BITREVERSE_8[data_block[i + 2]] << 16) |
535 (TELETEXT_BITREVERSE_8[data_block[i + 1]] << 8) |
536 TELETEXT_BITREVERSE_8[data_block[i]];
537 uint32_t triplet;
538 if (Hamming_24_18(bytes, triplet)) {
539 x26_triplets.emplace_back(triplet);
540 }
541 }
542
543 for (const auto triplet : x26_triplets) {
544 const uint8_t mode = (triplet & 0x7c0) >> 6;
545 const uint8_t address = triplet & 0x3f;
546 const uint8_t row_address_group = (address >= 0x28) && (address <= 0x3f);
547
548 if ((mode == 0x4) && (row_address_group == 0x1)) {
549 row = address - 0x28;
550 if (row == 0x0) {
551 row = 0x18;
552 }
553 }
554
555 if (mode >= 0x11 && mode <= 0x1f && row_address_group == 0x1) {
556 break;
557 }
558
559 const uint8_t data = (triplet & 0x3f800) >> 11;
560
561 if (mode == 0x0f && row_address_group == 0x0 && data > 0x1f) {
562 SetPacket26ReplacementString(replacement_map, row, address,
563 reinterpret_cast<const char*>(
564 TELETEXT_CHARSET_G2_LATIN[data - 0x20]));
565 }
566
567 if (mode == 0x10 && row_address_group == 0x0 && data == 0x40) {
568 SetPacket26ReplacementString(replacement_map, row, address, "@");
569 }
570
571 if (mode < 0x11 || mode > 0x1f || row_address_group != 0x0) {
572 continue;
573 }
574
575 if (data >= 0x41 && data <= 0x5a) {
576 SetPacket26ReplacementString(
577 replacement_map, row, address,
578 reinterpret_cast<const char*>(
579 TELETEXT_G2_LATIN_ACCENTS[mode - 0x11][data - 0x41]));
580
581 } else if (data >= 0x61 && data <= 0x7a) {
582 SetPacket26ReplacementString(
583 replacement_map, row, address,
584 reinterpret_cast<const char*>(
585 TELETEXT_G2_LATIN_ACCENTS[mode - 0x11][data - 0x47]));
586
587 } else if ((data & 0x7f) >= 0x20) {
588 SetPacket26ReplacementString(
589 replacement_map, row, address,
590 reinterpret_cast<const char*>(
591 TELETEXT_CHARSET_G0_LATIN[(data & 0x7f) - 0x20]));
592 }
593 }
594}
595
596void EsParserTeletext::UpdateNationalSubset(
597 const uint8_t national_subset[13][3]) {
598 for (size_t i = 0; i < 13; ++i) {
599 const size_t position = TELETEXT_NATIONAL_CHAR_INDEX_G0[i];
600 memcpy(current_charset_[position], national_subset[i], 3);
601 }
602}
603
604void EsParserTeletext::SetPacket26ReplacementString(
605 RowColReplacementMap& replacement_map,
606 const uint8_t row,
607 const uint8_t column,
608 std::string&& replacement_string) {
609 auto replacement_map_itr = replacement_map.find(row);
610 if (replacement_map_itr == replacement_map.cend()) {
611 replacement_map.emplace(row, std::unordered_map<uint8_t, std::string>{});
612 }
613 auto& column_map = replacement_map[row];
614 column_map.emplace(column, std::move(replacement_string));
615}
616
617} // namespace mp2t
618} // namespace media
619} // namespace shaka
All the methods that are virtual are virtual for mocking.