Shaka Packager SDK
Loading...
Searching...
No Matches
webvtt_utils.cc
1// Copyright 2017 Google LLC. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/formats/webvtt/webvtt_utils.h>
8
9#include <algorithm>
10#include <cctype>
11#include <cinttypes>
12#include <cmath>
13#include <unordered_set>
14
15#include <absl/log/check.h>
16#include <absl/log/log.h>
17#include <absl/strings/numbers.h>
18#include <absl/strings/str_format.h>
19
20#include <packager/macros/logging.h>
21
22namespace shaka {
23namespace media {
24
25namespace {
26
27constexpr const char* kRegionTeletextPrefix = "ttx_";
28
29bool GetTotalMilliseconds(uint64_t hours,
30 uint64_t minutes,
31 uint64_t seconds,
32 uint64_t ms,
33 int64_t* out) {
34 DCHECK(out);
35 if (minutes > 59 || seconds > 59 || ms > 999) {
36 VLOG(1) << "Hours:" << hours << " Minutes:" << minutes
37 << " Seconds:" << seconds << " MS:" << ms
38 << " shoud have never made it to GetTotalMilliseconds";
39 return false;
40 }
41 *out = 60 * 60 * 1000 * hours + 60 * 1000 * minutes + 1000 * seconds + ms;
42 return true;
43}
44
45enum class StyleTagKind {
46 kUnderline,
47 kBold,
48 kItalic,
49};
50
51std::string GetOpenTag(StyleTagKind tag) {
52 switch (tag) {
53 case StyleTagKind::kUnderline:
54 return "<u>";
55 case StyleTagKind::kBold:
56 return "<b>";
57 case StyleTagKind::kItalic:
58 return "<i>";
59 }
60 return ""; // Not reached, but Windows doesn't like NOTIMPLEMENTED.
61}
62
63std::string GetCloseTag(StyleTagKind tag) {
64 switch (tag) {
65 case StyleTagKind::kUnderline:
66 return "</u>";
67 case StyleTagKind::kBold:
68 return "</b>";
69 case StyleTagKind::kItalic:
70 return "</i>";
71 }
72 return ""; // Not reached, but Windows doesn't like NOTIMPLEMENTED.
73}
74
75bool IsWhitespace(char c) {
76 return c == '\t' || c == '\r' || c == '\n' || c == ' ';
77}
78
79// Replace consecutive whitespaces with a single whitespace.
80std::string CollapseWhitespace(const std::string& data) {
81 std::string output;
82 output.resize(data.size());
83 size_t chars_written = 0;
84 bool in_whitespace = false;
85 for (char c : data) {
86 if (IsWhitespace(c)) {
87 if (!in_whitespace) {
88 in_whitespace = true;
89 output[chars_written++] = ' ';
90 }
91 } else {
92 in_whitespace = false;
93 output[chars_written++] = c;
94 }
95 }
96 output.resize(chars_written);
97 return output;
98}
99
100std::string WriteFragment(const TextFragment& fragment,
101 std::list<StyleTagKind>* tags) {
102 std::string ret;
103 size_t local_tag_count = 0;
104 auto has = [tags](StyleTagKind tag) {
105 return std::find(tags->begin(), tags->end(), tag) != tags->end();
106 };
107 auto push_tag = [tags, &local_tag_count, &has](StyleTagKind tag) {
108 if (has(tag)) {
109 return std::string();
110 }
111 tags->push_back(tag);
112 local_tag_count++;
113 return GetOpenTag(tag);
114 };
115
116 if ((fragment.style.underline == false && has(StyleTagKind::kUnderline)) ||
117 (fragment.style.bold == false && has(StyleTagKind::kBold)) ||
118 (fragment.style.italic == false && has(StyleTagKind::kItalic))) {
119 LOG(WARNING) << "WebVTT output doesn't support disabling "
120 "underline/bold/italic within a cue";
121 }
122
123 if (fragment.newline) {
124 // Newlines represent separate WebVTT cues. So close the existing tags to
125 // be nice and re-open them on the new line.
126 for (auto it = tags->rbegin(); it != tags->rend(); it++) {
127 ret += GetCloseTag(*it);
128 }
129 ret += "\n";
130 for (const auto tag : *tags) {
131 ret += GetOpenTag(tag);
132 }
133 } else {
134 if (fragment.style.underline == true) {
135 ret += push_tag(StyleTagKind::kUnderline);
136 }
137 if (fragment.style.bold == true) {
138 ret += push_tag(StyleTagKind::kBold);
139 }
140 if (fragment.style.italic == true) {
141 ret += push_tag(StyleTagKind::kItalic);
142 }
143
144 if (!fragment.body.empty()) {
145 // Replace newlines and consecutive whitespace with a single space. If
146 // the user wanted an explicit newline, they should use the "newline"
147 // field.
148 ret += CollapseWhitespace(fragment.body);
149 } else {
150 for (const auto& frag : fragment.sub_fragments) {
151 ret += WriteFragment(frag, tags);
152 }
153 }
154
155 // Pop all the local tags we pushed.
156 while (local_tag_count > 0) {
157 ret += GetCloseTag(tags->back());
158 tags->pop_back();
159 local_tag_count--;
160 }
161 }
162 return ret;
163}
164
165} // namespace
166
167bool WebVttTimestampToMs(const std::string_view& source, int64_t* out) {
168 DCHECK(out);
169
170 if (source.length() < 9) {
171 LOG(WARNING) << "Timestamp '" << source << "' is mal-formed";
172 return false;
173 }
174
175 const size_t minutes_begin = source.length() - 9;
176 const size_t seconds_begin = source.length() - 6;
177 const size_t milliseconds_begin = source.length() - 3;
178
179 uint64_t hours = 0;
180 uint64_t minutes = 0;
181 uint64_t seconds = 0;
182 uint64_t ms = 0;
183
184 const bool has_hours =
185 minutes_begin >= 3 && source[minutes_begin - 1] == ':' &&
186 absl::SimpleAtoi(source.substr(0, minutes_begin - 1), &hours);
187
188 if ((minutes_begin == 0 || has_hours) && source[seconds_begin - 1] == ':' &&
189 source[milliseconds_begin - 1] == '.' &&
190 absl::SimpleAtoi(source.substr(minutes_begin, 2), &minutes) &&
191 absl::SimpleAtoi(source.substr(seconds_begin, 2), &seconds) &&
192 absl::SimpleAtoi(source.substr(milliseconds_begin, 3), &ms)) {
193 return GetTotalMilliseconds(hours, minutes, seconds, ms, out);
194 }
195
196 LOG(WARNING) << "Timestamp '" << source << "' is mal-formed";
197 return false;
198}
199
200std::string MsToWebVttTimestamp(uint64_t ms) {
201 uint64_t remaining = ms;
202
203 uint64_t only_ms = remaining % 1000;
204 remaining /= 1000;
205 uint64_t only_seconds = remaining % 60;
206 remaining /= 60;
207 uint64_t only_minutes = remaining % 60;
208 remaining /= 60;
209 uint64_t only_hours = remaining;
210
211 return absl::StrFormat("%02" PRIu64 ":%02" PRIu64 ":%02" PRIu64 ".%03" PRIu64,
212 only_hours, only_minutes, only_seconds, only_ms);
213}
214
215std::string FloatToString(double number) {
216 // Keep up to microsecond accuracy but trim trailing 0s
217 std::string formatted = absl::StrFormat("%.6g", number);
218 size_t decimalPos = formatted.find('.');
219 if (decimalPos != std::string::npos) {
220 size_t lastNonZeroPos = formatted.find_last_not_of('0');
221 if (lastNonZeroPos >= decimalPos) {
222 formatted.erase(lastNonZeroPos + 1);
223 }
224 if (formatted.back() == '.') {
225 formatted.pop_back();
226 }
227 }
228
229 return formatted;
230}
231
232std::string WebVttSettingsToString(const TextSettings& settings) {
233 std::string ret;
234 if (!settings.region.empty() &&
235 settings.region.find(kRegionTeletextPrefix) != 0) {
236 // Don't add teletext ttx_ regions, since accompanied by global line numbers
237 ret += " region:";
238 ret += settings.region;
239 }
240 if (settings.line) {
241 switch (settings.line->type) {
242 case TextUnitType::kPercent:
243 ret += " line:";
244 ret += FloatToString(settings.line->value);
245 ret += "%";
246 break;
247 case TextUnitType::kLines:
248 ret += " line:";
249 // The line number should be an integer
250 ret += FloatToString(std::round(settings.line->value));
251 break;
252 case TextUnitType::kPixels:
253 LOG(WARNING) << "WebVTT doesn't support pixel line settings";
254 break;
255 }
256 }
257 if (settings.position) {
258 if (settings.position->type == TextUnitType::kPercent) {
259 ret += " position:";
260 ret += FloatToString(settings.position->value);
261 ret += "%";
262 } else {
263 LOG(WARNING) << "WebVTT only supports percent position settings";
264 }
265 }
266 if (settings.width) {
267 if (settings.width->type == TextUnitType::kPercent) {
268 ret += " size:";
269 ret += FloatToString(settings.width->value);
270 ret += "%";
271 } else {
272 LOG(WARNING) << "WebVTT only supports percent width settings";
273 }
274 }
275 if (settings.height) {
276 LOG(WARNING) << "WebVTT doesn't support cue heights";
277 }
278 if (settings.writing_direction != WritingDirection::kHorizontal) {
279 ret += " direction:";
280 if (settings.writing_direction == WritingDirection::kVerticalGrowingLeft) {
281 ret += "rl";
282 } else {
283 ret += "lr";
284 }
285 }
286 switch (settings.text_alignment) {
287 case TextAlignment::kStart:
288 ret += " align:start";
289 break;
290 case TextAlignment::kEnd:
291 ret += " align:end";
292 break;
293 case TextAlignment::kLeft:
294 ret += " align:left";
295 break;
296 case TextAlignment::kRight:
297 ret += " align:right";
298 break;
299 case TextAlignment::kCenter:
300 ret += " align:center";
301 break;
302 }
303
304 if (!ret.empty()) {
305 DCHECK_EQ(ret[0], ' ');
306 ret.erase(0, 1);
307 }
308 return ret;
309}
310
311std::string WebVttFragmentToString(const TextFragment& fragment) {
312 std::list<StyleTagKind> tags;
313 return WriteFragment(fragment, &tags);
314}
315
316std::string WebVttGetPreamble(const TextStreamInfo& stream_info) {
317 std::string ret;
318 for (const auto& pair : stream_info.regions()) {
319 if (!ret.empty()) {
320 ret += "\n\n";
321 }
322
323 if (pair.second.width.type != TextUnitType::kPercent ||
324 pair.second.height.type != TextUnitType::kLines ||
325 pair.second.window_anchor_x.type != TextUnitType::kPercent ||
326 pair.second.window_anchor_y.type != TextUnitType::kPercent ||
327 pair.second.region_anchor_x.type != TextUnitType::kPercent ||
328 pair.second.region_anchor_y.type != TextUnitType::kPercent) {
329 LOG(WARNING) << "Unsupported unit type in WebVTT region";
330 continue;
331 }
332
333 absl::StrAppendFormat(
334 &ret,
335 "REGION\n"
336 "id:%s\n"
337 "width:%f%%\n"
338 "lines:%d\n"
339 "viewportanchor:%f%%,%f%%\n"
340 "regionanchor:%f%%,%f%%",
341 pair.first.c_str(), pair.second.width.value,
342 static_cast<int>(pair.second.height.value),
343 pair.second.window_anchor_x.value, pair.second.window_anchor_y.value,
344 pair.second.region_anchor_x.value, pair.second.region_anchor_y.value);
345 if (pair.second.scroll) {
346 ret += "\nscroll:up";
347 }
348 }
349
350 if (!stream_info.css_styles().empty()) {
351 if (!ret.empty()) {
352 ret += "\n\n";
353 }
354 ret += "STYLE\n" + stream_info.css_styles();
355 }
356
357 return ret;
358}
359
360} // namespace media
361} // namespace shaka
All the methods that are virtual are virtual for mocking.