7 #include <packager/media/formats/webvtt/webvtt_parser.h>
9 #include <absl/log/check.h>
10 #include <absl/log/log.h>
11 #include <absl/strings/numbers.h>
12 #include <absl/strings/str_format.h>
13 #include <absl/strings/str_split.h>
15 #include <packager/kv_pairs/kv_pairs.h>
16 #include <packager/media/base/text_stream_info.h>
17 #include <packager/media/formats/webvtt/webvtt_utils.h>
18 #include <packager/utils/string_trim_split.h>
24 const uint64_t kStreamIndex = 0;
26 std::string BlockToString(
const std::string* block,
size_t size) {
27 std::string out =
" --- BLOCK START ---\n";
29 for (
size_t i = 0; i < size; i++) {
35 out.append(
" --- BLOCK END ---");
44 bool IsLikelyNote(
const std::string& line) {
45 return line ==
"NOTE" || absl::StartsWith(line,
"NOTE ") ||
46 absl::StartsWith(line,
"NOTE\t");
52 bool IsLikelyCueTiming(
const std::string& line) {
53 return line.find(
"-->") != std::string::npos;
61 bool MaybeCueId(
const std::string& line) {
62 return line.find(
"-->") == std::string::npos;
69 bool IsLikelyStyle(
const std::string& line) {
70 return absl::StripTrailingAsciiWhitespace(line) ==
"STYLE";
77 bool IsLikelyRegion(
const std::string& line) {
78 return absl::StripTrailingAsciiWhitespace(line) ==
"REGION";
81 bool ParsePercent(
const std::string& str,
float* value) {
84 if (str[str.size() - 1] !=
'%') {
89 if (!absl::SimpleAtod(str.substr(0, str.size() - 1), &temp) || temp > 100) {
96 bool ParseDoublePercent(
const std::string& str,
float* a,
float* b) {
97 std::vector<std::string> percents = SplitAndTrimSkipEmpty(str,
',');
99 if (percents.size() != 2) {
102 float temp_a, temp_b;
103 if (!ParsePercent(percents[0], &temp_a) ||
104 !ParsePercent(percents[1], &temp_b)) {
112 void ParseSettings(
const std::string&
id,
113 const std::string& value,
114 TextSettings* settings) {
116 if (
id ==
"region") {
117 settings->region = value;
118 }
else if (
id ==
"vertical") {
120 settings->writing_direction = WritingDirection::kVerticalGrowingLeft;
121 }
else if (value ==
"lr") {
122 settings->writing_direction = WritingDirection::kVerticalGrowingRight;
124 LOG(WARNING) <<
"Invalid WebVTT vertical setting: " << value;
126 }
else if (
id ==
"line") {
127 const auto pos = value.find(
',');
128 const std::string line = value.substr(0, pos);
129 const std::string align =
130 pos != std::string::npos ? value.substr(pos + 1) :
"";
131 if (pos != std::string::npos) {
132 LOG(WARNING) <<
"WebVTT line alignment isn't supported";
135 if (!line.empty() && line[line.size() - 1] ==
'%') {
137 if (!ParsePercent(line, &temp)) {
138 LOG(WARNING) <<
"Invalid WebVTT line: " << value;
141 settings->line.emplace(temp, TextUnitType::kPercent);
144 if (!absl::SimpleAtod(line, &temp)) {
145 LOG(WARNING) <<
"Invalid WebVTT line: " << value;
148 settings->line.emplace(temp, TextUnitType::kLines);
150 }
else if (
id ==
"position") {
151 const auto pos = value.find(
',');
152 const std::string position = value.substr(0, pos);
153 const std::string align =
154 pos != std::string::npos ? value.substr(pos + 1) :
"";
155 if (pos != std::string::npos) {
156 LOG(WARNING) <<
"WebVTT position alignment isn't supported";
160 if (ParsePercent(position, &temp)) {
161 settings->position.emplace(temp, TextUnitType::kPercent);
163 LOG(WARNING) <<
"Invalid WebVTT position: " << value;
165 }
else if (
id ==
"size") {
167 if (ParsePercent(value, &temp)) {
168 settings->width.emplace(temp, TextUnitType::kPercent);
170 LOG(WARNING) <<
"Invalid WebVTT size: " << value;
172 }
else if (
id ==
"align") {
173 if (value ==
"start") {
174 settings->text_alignment = TextAlignment::kStart;
175 }
else if (value ==
"center" || value ==
"middle") {
176 settings->text_alignment = TextAlignment::kCenter;
177 }
else if (value ==
"end") {
178 settings->text_alignment = TextAlignment::kEnd;
179 }
else if (value ==
"left") {
180 settings->text_alignment = TextAlignment::kLeft;
181 }
else if (value ==
"right") {
182 settings->text_alignment = TextAlignment::kRight;
184 LOG(WARNING) <<
"Invalid WebVTT align: " << value;
187 LOG(WARNING) <<
"Unknown WebVTT setting: " << id;
193 WebVttParser::WebVttParser() {}
199 DCHECK(init_cb_ ==
nullptr);
200 DCHECK(init_cb !=
nullptr);
201 DCHECK(new_text_sample_cb !=
nullptr);
202 DCHECK(!decryption_key_source) <<
"Encrypted WebVTT not supported";
205 new_text_sample_cb_ = new_text_sample_cb;
213 bool WebVttParser::Parse(
const uint8_t* buf,
int size) {
218 bool WebVttParser::Parse() {
220 std::vector<std::string> block;
221 if (!reader_.
Next(&block)) {
227 if (block.size() != 1) {
228 LOG(WARNING) <<
"Failed to read WEBVTT header - "
229 <<
"block size should be 1 but was " << block.size() <<
".";
231 if (block[0] !=
"WEBVTT" && block[0] !=
"\xEF\xBB\xBFWEBVTT") {
232 LOG(WARNING) <<
"Failed to read WEBVTT header - should be WEBVTT but was "
238 std::vector<std::string> block;
239 while (reader_.
Next(&block)) {
240 if (!ParseBlock(block))
246 bool WebVttParser::ParseBlock(
const std::vector<std::string>& block) {
248 if (IsLikelyNote(block[0])) {
254 if (IsLikelyStyle(block[0])) {
257 <<
"Found style block after seeing cue. Ignoring style block";
259 for (
size_t i = 1; i < block.size(); i++) {
260 if (!css_styles_.empty())
262 css_styles_ += block[i];
269 if (IsLikelyRegion(block[0])) {
272 <<
"Found region block after seeing cue. Ignoring region block";
275 return ParseRegion(block);
280 if (block.size() >= 2 && MaybeCueId(block[0]) &&
281 IsLikelyCueTiming(block[1]) && ParseCueWithId(block)) {
287 if (IsLikelyCueTiming(block[0]) && ParseCueWithNoId(block)) {
292 LOG(ERROR) <<
"Failed to determine block classification:\n"
293 << BlockToString(block.data(), block.size());
297 bool WebVttParser::ParseRegion(
const std::vector<std::string>& block) {
299 std::string region_id;
302 region.width.value = 100;
303 region.width.type = TextUnitType::kPercent;
304 region.height.value = 3;
305 region.height.type = TextUnitType::kLines;
306 region.window_anchor_x.value = 0;
307 region.window_anchor_x.type = TextUnitType::kPercent;
308 region.window_anchor_y.value = 100;
309 region.window_anchor_y.type = TextUnitType::kPercent;
310 region.region_anchor_x.value = 0;
311 region.region_anchor_x.type = TextUnitType::kPercent;
312 region.region_anchor_y.value = 100;
313 region.region_anchor_y.type = TextUnitType::kPercent;
316 for (
const auto& line : block) {
323 std::vector<KVPair> kv_pairs = SplitStringIntoKeyValuePairs(line,
':',
' ');
325 for (
const auto& pair : kv_pairs) {
326 const std::string& value = pair.second;
327 if (pair.first ==
"id") {
328 if (value.find(
"-->") != std::string::npos) {
329 LOG(ERROR) <<
"Invalid WebVTT REGION ID: " << value;
332 if (regions_.find(value) != regions_.end()) {
333 LOG(ERROR) <<
"Duplicate WebVTT REGION: " << value;
337 }
else if (pair.first ==
"width") {
338 if (!ParsePercent(value, ®ion.width.value)) {
339 LOG(ERROR) <<
"Invalid WebVTT REGION width: " << value;
342 }
else if (pair.first ==
"lines") {
344 if (!absl::SimpleAtoi(value, &temp)) {
345 LOG(ERROR) <<
"Invalid WebVTT REGION lines: " << value;
348 region.height.value = temp;
349 }
else if (pair.first ==
"regionanchor") {
350 if (!ParseDoublePercent(value, ®ion.region_anchor_x.value,
351 ®ion.region_anchor_y.value)) {
352 LOG(ERROR) <<
"Invalid WebVTT REGION regionanchor: " << value;
355 }
else if (pair.first ==
"viewportanchor") {
356 if (!ParseDoublePercent(value, ®ion.window_anchor_x.value,
357 ®ion.window_anchor_y.value)) {
358 LOG(ERROR) <<
"Invalid WebVTT REGION windowanchor: " << value;
361 }
else if (pair.first ==
"scroll") {
363 LOG(ERROR) <<
"Invalid WebVTT REGION scroll: " << value;
366 region.scroll =
true;
368 LOG(ERROR) <<
"Unknown WebVTT REGION setting: " << pair.first;
373 if (region_id.empty()) {
374 LOG(ERROR) <<
"WebVTT REGION id is required";
377 regions_.insert(std::make_pair(region_id, std::move(region)));
381 bool WebVttParser::ParseCueWithNoId(
const std::vector<std::string>& block) {
382 return ParseCue(
"", block.data(), block.size());
385 bool WebVttParser::ParseCueWithId(
const std::vector<std::string>& block) {
386 return ParseCue(block[0], block.data() + 1, block.size() - 1);
389 bool WebVttParser::ParseCue(
const std::string&
id,
390 const std::string* block,
392 std::vector<std::string> time_and_style =
393 SplitAndTrimSkipEmpty(block[0],
' ');
395 int64_t start_time = 0;
396 int64_t end_time = 0;
398 const bool parsed_time =
399 time_and_style.size() >= 3 && time_and_style[1] ==
"-->" &&
400 WebVttTimestampToMs(time_and_style[0], &start_time) &&
401 WebVttTimestampToMs(time_and_style[2], &end_time);
404 LOG(ERROR) <<
"Could not parse start time, -->, and end time from "
409 if (!stream_info_dispatched_)
410 DispatchTextStreamInfo();
423 if (end_time <= start_time) {
424 LOG(WARNING) <<
"WebVTT input is not spec compliant. Start time ("
425 << start_time <<
") should be less than end time (" << end_time
426 <<
"). Skipping webvtt cue:"
427 << BlockToString(block, block_size);
431 TextSettings settings;
432 for (
size_t i = 3; i < time_and_style.size(); i++) {
433 const auto pos = time_and_style[i].find(
':');
434 if (pos == std::string::npos) {
438 const std::string key = time_and_style[i].substr(0, pos);
439 const std::string value = time_and_style[i].substr(pos + 1);
440 ParseSettings(key, value, &settings);
446 TextFragmentStyle no_styles;
447 for (
size_t i = 1; i < block_size; i++) {
448 if (i > 1 && i != block_size) {
449 body.sub_fragments.emplace_back(no_styles,
true);
451 body.sub_fragments.emplace_back(no_styles, block[i]);
455 std::make_shared<TextSample>(
id, start_time, end_time, settings, body);
456 return new_text_sample_cb_(kStreamIndex, sample);
459 void WebVttParser::DispatchTextStreamInfo() {
460 stream_info_dispatched_ =
true;
462 const int kTrackId = 0;
464 const int kTimescale = 1000;
468 const int kDuration = 0;
469 const char kWebVttCodecString[] =
"wvtt";
470 const int64_t kNoWidth = 0;
471 const int64_t kNoHeight = 0;
473 const char kNoLanguage[] =
"";
475 const auto stream = std::make_shared<TextStreamInfo>(
476 kTrackId, kTimescale, kDuration, kCodecWebVtt, kWebVttCodecString,
"",
477 kNoWidth, kNoHeight, kNoLanguage);
478 stream->set_css_styles(css_styles_);
479 for (
const auto& pair : regions_)
480 stream->AddRegion(pair.first, pair.second);
482 std::vector<std::shared_ptr<StreamInfo>> streams{stream};
All the methods that are virtual are virtual for mocking.