Shaka Packager SDK
Loading...
Searching...
No Matches
webvtt_to_mp4_handler.cc
1// Copyright 2017 Google LLC. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/formats/webvtt/webvtt_to_mp4_handler.h>
8
9#include <algorithm>
10#include <map>
11
12#include <absl/log/check.h>
13
14#include <packager/macros/logging.h>
15#include <packager/macros/status.h>
16#include <packager/media/base/buffer_writer.h>
17#include <packager/media/base/timestamp_util.h>
18#include <packager/media/formats/mp4/box_buffer.h>
19#include <packager/media/formats/mp4/box_definitions.h>
20#include <packager/media/formats/webvtt/webvtt_utils.h>
21
22namespace shaka {
23namespace media {
24namespace {
25size_t kTrackId = 0;
26
27enum class DisplayActionType { ADD, REMOVE };
28
29struct DisplayAction {
30 DisplayActionType type;
31 const TextSample* sample;
32};
33
34std::multimap<int64_t, DisplayAction> CreateActionList(
35 int64_t segment_start,
36 int64_t segment_end,
37 const std::list<std::shared_ptr<const TextSample>>& samples) {
38 std::multimap<int64_t, DisplayAction> actions;
39
40 for (const auto& sample : samples) {
41 DCHECK(sample);
42
43 // The add action should occur either in this segment or in a previous
44 // segment. Use wrap-safe comparison since sample PTS may be wrapped
45 // but segment_end is unwrapped.
46 DCHECK(PtsIsBefore(sample->start_time(), segment_end))
47 << "Sample start " << sample->start_time()
48 << " should be before segment end " << segment_end;
49 actions.insert(
50 {sample->start_time(), {DisplayActionType::ADD, sample.get()}});
51
52 // If the remove happens in a later segment, then we don't want to include
53 // that action. Use wrap-safe comparison.
54 if (PtsIsBefore(sample->EndTime(), segment_end)) {
55 actions.insert(
56 {sample->EndTime(), {DisplayActionType::REMOVE, sample.get()}});
57 }
58 }
59
60 return actions;
61}
62
63void WriteSample(const TextSample& sample, BufferWriter* out) {
64 mp4::VTTCueBox box;
65
66 if (sample.id().length()) {
67 box.cue_id.cue_id = sample.id();
68 }
69 box.cue_settings.settings = WebVttSettingsToString(sample.settings());
70 box.cue_payload.cue_text = WebVttFragmentToString(sample.body());
71
72 // If there is internal timing, i.e. WebVTT cue timestamp, then
73 // cue_current_time should be populated
74 // "which gives the VTT timestamp associated with the start time of sample."
75 // TODO(rkuroiwa): Reuse TimestampToMilliseconds() to check if there is an
76 // internal timestamp in the payload to set CueTimeBox.cue_current_time.
77 box.Write(out);
78}
79
80void WriteSamples(const std::list<const TextSample*>& samples,
81 BufferWriter* writer) {
82 DCHECK_GE(samples.size(), 0u);
83
84 for (const auto& sample : samples) {
85 WriteSample(*sample, writer);
86 }
87}
88
89void WriteEmptySample(BufferWriter* writer) {
90 mp4::VTTEmptyCueBox box;
91 box.Write(writer);
92}
93
94std::shared_ptr<MediaSample> CreateMediaSample(const BufferWriter& buffer,
95 int64_t start_time,
96 int64_t end_time) {
97 DCHECK_GE(start_time, 0);
98 DCHECK_GT(end_time, start_time);
99
100 const bool kIsKeyFrame = true;
101
102 std::shared_ptr<MediaSample> sample =
103 MediaSample::CopyFrom(buffer.Buffer(), buffer.Size(), kIsKeyFrame);
104 sample->set_pts(start_time);
105 sample->set_dts(start_time);
106 sample->set_duration(end_time - start_time);
107
108 return sample;
109}
110} // namespace
111
112Status WebVttToMp4Handler::InitializeInternal() {
113 return Status::OK;
114}
115
116Status WebVttToMp4Handler::Process(std::unique_ptr<StreamData> stream_data) {
117 switch (stream_data->stream_data_type) {
118 case StreamDataType::kStreamInfo:
119 return OnStreamInfo(std::move(stream_data));
120 case StreamDataType::kCueEvent:
121 return OnCueEvent(std::move(stream_data));
122 case StreamDataType::kSegmentInfo:
123 return OnSegmentInfo(std::move(stream_data));
124 case StreamDataType::kTextSample:
125 return OnTextSample(std::move(stream_data));
126 default:
127 return Status(error::INTERNAL_ERROR,
128 "Invalid stream data type (" +
129 StreamDataTypeToString(stream_data->stream_data_type) +
130 ") for this WebVttToMp4 handler");
131 }
132}
133
134Status WebVttToMp4Handler::OnStreamInfo(
135 std::unique_ptr<StreamData> stream_data) {
136 DCHECK(stream_data);
137 DCHECK(stream_data->stream_info);
138
139 auto clone = stream_data->stream_info->Clone();
140 clone->set_codec(kCodecWebVtt);
141 clone->set_codec_string("wvtt");
142
143 if (clone->stream_type() != kStreamText) {
144 return Status(error::MUXER_FAILURE, "Incorrect stream type");
145 }
146
147 return Dispatch(
148 StreamData::FromStreamInfo(stream_data->stream_index, std::move(clone)));
149}
150
151Status WebVttToMp4Handler::OnCueEvent(std::unique_ptr<StreamData> stream_data) {
152 DCHECK(stream_data);
153 DCHECK(stream_data->cue_event);
154
155 if (current_segment_.size()) {
156 return Status(error::INTERNAL_ERROR,
157 "Cue Events should come right after segment info.");
158 }
159
160 return Dispatch(std::move(stream_data));
161}
162
163Status WebVttToMp4Handler::OnSegmentInfo(
164 std::unique_ptr<StreamData> stream_data) {
165 DCHECK(stream_data);
166 DCHECK(stream_data->segment_info);
167
168 const auto& segment = stream_data->segment_info;
169
170 int64_t segment_start = segment->start_timestamp;
171 int64_t segment_duration = segment->duration;
172 int64_t segment_end = segment_start + segment_duration;
173
174 RETURN_IF_ERROR(DispatchCurrentSegment(segment_start, segment_end));
175 current_segment_.clear();
176
177 return Dispatch(std::move(stream_data));
178}
179
180Status WebVttToMp4Handler::OnTextSample(
181 std::unique_ptr<StreamData> stream_data) {
182 DCHECK(stream_data);
183 DCHECK(stream_data->text_sample);
184
185 auto& sample = stream_data->text_sample;
186
187 // Ignore empty samples. This will create gaps, but we will handle that
188 // later.
189 if (sample->body().is_empty()) {
190 return Status::OK;
191 }
192
193 // Add the new text sample to the cache of samples that belong in the
194 // current segment.
195 current_segment_.push_back(std::move(stream_data->text_sample));
196 return Status::OK;
197}
198
199Status WebVttToMp4Handler::DispatchCurrentSegment(int64_t segment_start,
200 int64_t segment_end) {
201 // Active will hold all the samples that are "on screen" for the current
202 // section of time.
203 std::list<const TextSample*> active;
204
205 // Move through the segment, jumping between each change to the current state.
206 // A change is defined as a group of one or more DisplayActions.
207 int64_t section_start = segment_start;
208
209 // |actions| is a map of [time] -> [action].
210 auto actions = CreateActionList(segment_start, segment_end, current_segment_);
211 auto front = actions.begin();
212
213 // As it is possible to have a segment with no samples, we can't base this
214 // loop on the number of actions. So we need to keep iterating until we
215 // have written enough sections to get to the end of the segment.
216 while (section_start < segment_end) {
217 // Apply all actions that occur at the start of this part of the segment.
218 // Normally we would only want "== section_start" but as it is possible for
219 // samples to span multiple segments, their start time will be before the
220 // segment's start time. So we want to apply them too if they come before
221 // the segment. Thus why we use "<=".
222 while (front != actions.end() && front->first <= section_start) {
223 auto& action = front->second;
224
225 switch (action.type) {
226 case DisplayActionType::ADD: {
227 active.push_back(action.sample);
228 break;
229 }
230 case DisplayActionType::REMOVE: {
231 auto found = std::find(active.begin(), active.end(), action.sample);
232 DCHECK(found != active.end());
233 active.erase(found);
234 break;
235 }
236 default: {
237 NOTIMPLEMENTED() << "Unsupported DisplayActionType "
238 << static_cast<int>(action.type);
239 break;
240 }
241 }
242
243 // We have "consumed" the action at the front. We can move on.
244 front++;
245 }
246
247 // The end of the section will either be the start of the next section or
248 // the end of the segment.
249 int64_t section_end = front == actions.end() ? segment_end : front->first;
250 DCHECK_GT(section_end, section_start);
251 DCHECK_LE(section_end, segment_end);
252 RETURN_IF_ERROR(MergeDispatchSamples(section_start, section_end, active));
253
254 section_start = section_end;
255 }
256
257 DCHECK(front == actions.end()) << "We should have processed all actions.";
258
259 return Status::OK;
260}
261
262Status WebVttToMp4Handler::MergeDispatchSamples(
263 int64_t start_time,
264 int64_t end_time,
265 const std::list<const TextSample*>& state) {
266 DCHECK_GT(end_time, start_time);
267
268 box_writer_.Clear();
269
270 if (state.size()) {
271 WriteSamples(state, &box_writer_);
272 } else {
273 WriteEmptySample(&box_writer_);
274 }
275
276 return DispatchMediaSample(
277 kTrackId, CreateMediaSample(box_writer_, start_time, end_time));
278}
279} // namespace media
280} // namespace shaka
Status DispatchMediaSample(size_t stream_index, std::shared_ptr< const MediaSample > media_sample) const
Dispatch the media sample to downstream handlers.
Status Dispatch(std::unique_ptr< StreamData > stream_data) const
static std::shared_ptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
All the methods that are virtual are virtual for mocking.