Shaka Packager SDK
Loading...
Searching...
No Matches
webvtt_to_mp4_handler.cc
1// Copyright 2017 Google LLC. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/formats/webvtt/webvtt_to_mp4_handler.h>
8
9#include <algorithm>
10#include <map>
11
12#include <absl/log/check.h>
13
14#include <packager/macros/logging.h>
15#include <packager/macros/status.h>
16#include <packager/media/base/buffer_writer.h>
17#include <packager/media/formats/mp4/box_buffer.h>
18#include <packager/media/formats/mp4/box_definitions.h>
19#include <packager/media/formats/webvtt/webvtt_utils.h>
20
21namespace shaka {
22namespace media {
23namespace {
24size_t kTrackId = 0;
25
26enum class DisplayActionType { ADD, REMOVE };
27
28struct DisplayAction {
29 DisplayActionType type;
30 const TextSample* sample;
31};
32
33std::multimap<int64_t, DisplayAction> CreateActionList(
34 int64_t segment_start,
35 int64_t segment_end,
36 const std::list<std::shared_ptr<const TextSample>>& samples) {
37 std::multimap<int64_t, DisplayAction> actions;
38
39 for (const auto& sample : samples) {
40 DCHECK(sample);
41
42 // The add action should occur either in this segment or in a previous
43 // segment.
44 DCHECK_LT(sample->start_time(), segment_end);
45 actions.insert(
46 {sample->start_time(), {DisplayActionType::ADD, sample.get()}});
47
48 // If the remove happens in a later segment, then we don't want to include
49 // that action.
50 if (sample->EndTime() < segment_end) {
51 actions.insert(
52 {sample->EndTime(), {DisplayActionType::REMOVE, sample.get()}});
53 }
54 }
55
56 return actions;
57}
58
59void WriteSample(const TextSample& sample, BufferWriter* out) {
60 mp4::VTTCueBox box;
61
62 if (sample.id().length()) {
63 box.cue_id.cue_id = sample.id();
64 }
65 box.cue_settings.settings = WebVttSettingsToString(sample.settings());
66 box.cue_payload.cue_text = WebVttFragmentToString(sample.body());
67
68 // If there is internal timing, i.e. WebVTT cue timestamp, then
69 // cue_current_time should be populated
70 // "which gives the VTT timestamp associated with the start time of sample."
71 // TODO(rkuroiwa): Reuse TimestampToMilliseconds() to check if there is an
72 // internal timestamp in the payload to set CueTimeBox.cue_current_time.
73 box.Write(out);
74}
75
76void WriteSamples(const std::list<const TextSample*>& samples,
77 BufferWriter* writer) {
78 DCHECK_GE(samples.size(), 0u);
79
80 for (const auto& sample : samples) {
81 WriteSample(*sample, writer);
82 }
83}
84
85void WriteEmptySample(BufferWriter* writer) {
86 mp4::VTTEmptyCueBox box;
87 box.Write(writer);
88}
89
90std::shared_ptr<MediaSample> CreateMediaSample(const BufferWriter& buffer,
91 int64_t start_time,
92 int64_t end_time) {
93 DCHECK_GE(start_time, 0);
94 DCHECK_GT(end_time, start_time);
95
96 const bool kIsKeyFrame = true;
97
98 std::shared_ptr<MediaSample> sample =
99 MediaSample::CopyFrom(buffer.Buffer(), buffer.Size(), kIsKeyFrame);
100 sample->set_pts(start_time);
101 sample->set_dts(start_time);
102 sample->set_duration(end_time - start_time);
103
104 return sample;
105}
106} // namespace
107
108Status WebVttToMp4Handler::InitializeInternal() {
109 return Status::OK;
110}
111
112Status WebVttToMp4Handler::Process(std::unique_ptr<StreamData> stream_data) {
113 switch (stream_data->stream_data_type) {
114 case StreamDataType::kStreamInfo:
115 return OnStreamInfo(std::move(stream_data));
116 case StreamDataType::kCueEvent:
117 return OnCueEvent(std::move(stream_data));
118 case StreamDataType::kSegmentInfo:
119 return OnSegmentInfo(std::move(stream_data));
120 case StreamDataType::kTextSample:
121 return OnTextSample(std::move(stream_data));
122 default:
123 return Status(error::INTERNAL_ERROR,
124 "Invalid stream data type (" +
125 StreamDataTypeToString(stream_data->stream_data_type) +
126 ") for this WebVttToMp4 handler");
127 }
128}
129
130Status WebVttToMp4Handler::OnStreamInfo(
131 std::unique_ptr<StreamData> stream_data) {
132 DCHECK(stream_data);
133 DCHECK(stream_data->stream_info);
134
135 auto clone = stream_data->stream_info->Clone();
136 clone->set_codec(kCodecWebVtt);
137 clone->set_codec_string("wvtt");
138
139 if (clone->stream_type() != kStreamText) {
140 return Status(error::MUXER_FAILURE, "Incorrect stream type");
141 }
142
143 return Dispatch(
144 StreamData::FromStreamInfo(stream_data->stream_index, std::move(clone)));
145}
146
147Status WebVttToMp4Handler::OnCueEvent(std::unique_ptr<StreamData> stream_data) {
148 DCHECK(stream_data);
149 DCHECK(stream_data->cue_event);
150
151 if (current_segment_.size()) {
152 return Status(error::INTERNAL_ERROR,
153 "Cue Events should come right after segment info.");
154 }
155
156 return Dispatch(std::move(stream_data));
157}
158
159Status WebVttToMp4Handler::OnSegmentInfo(
160 std::unique_ptr<StreamData> stream_data) {
161 DCHECK(stream_data);
162 DCHECK(stream_data->segment_info);
163
164 const auto& segment = stream_data->segment_info;
165
166 int64_t segment_start = segment->start_timestamp;
167 int64_t segment_duration = segment->duration;
168 int64_t segment_end = segment_start + segment_duration;
169
170 RETURN_IF_ERROR(DispatchCurrentSegment(segment_start, segment_end));
171 current_segment_.clear();
172
173 return Dispatch(std::move(stream_data));
174}
175
176Status WebVttToMp4Handler::OnTextSample(
177 std::unique_ptr<StreamData> stream_data) {
178 DCHECK(stream_data);
179 DCHECK(stream_data->text_sample);
180
181 auto& sample = stream_data->text_sample;
182
183 // Ignore empty samples. This will create gaps, but we will handle that
184 // later.
185 if (sample->body().is_empty()) {
186 return Status::OK;
187 }
188
189 // Add the new text sample to the cache of samples that belong in the
190 // current segment.
191 current_segment_.push_back(std::move(stream_data->text_sample));
192 return Status::OK;
193}
194
195Status WebVttToMp4Handler::DispatchCurrentSegment(int64_t segment_start,
196 int64_t segment_end) {
197 // Active will hold all the samples that are "on screen" for the current
198 // section of time.
199 std::list<const TextSample*> active;
200
201 // Move through the segment, jumping between each change to the current state.
202 // A change is defined as a group of one or more DisplayActions.
203 int section_start = segment_start;
204
205 // |actions| is a map of [time] -> [action].
206 auto actions = CreateActionList(segment_start, segment_end, current_segment_);
207 auto front = actions.begin();
208
209 // As it is possible to have a segment with no samples, we can't base this
210 // loop on the number of actions. So we need to keep iterating until we
211 // have written enough sections to get to the end of the segment.
212 while (section_start < segment_end) {
213 // Apply all actions that occur at the start of this part of the segment.
214 // Normally we would only want "== section_start" but as it is possible for
215 // samples to span multiple segments, their start time will be before the
216 // segment's start time. So we want to apply them too if they come before
217 // the segment. Thus why we use "<=".
218 while (front != actions.end() && front->first <= section_start) {
219 auto& action = front->second;
220
221 switch (action.type) {
222 case DisplayActionType::ADD: {
223 active.push_back(action.sample);
224 break;
225 }
226 case DisplayActionType::REMOVE: {
227 auto found = std::find(active.begin(), active.end(), action.sample);
228 DCHECK(found != active.end());
229 active.erase(found);
230 break;
231 }
232 default: {
233 NOTIMPLEMENTED() << "Unsupported DisplayActionType "
234 << static_cast<int>(action.type);
235 break;
236 }
237 }
238
239 // We have "consumed" the action at the front. We can move on.
240 front++;
241 }
242
243 // The end of the section will either be the start of the next section or
244 // the end of the segment.
245 int64_t section_end = front == actions.end() ? segment_end : front->first;
246 DCHECK_GT(section_end, section_start);
247 DCHECK_LE(section_end, segment_end);
248 RETURN_IF_ERROR(MergeDispatchSamples(section_start, section_end, active));
249
250 section_start = section_end;
251 }
252
253 DCHECK(front == actions.end()) << "We should have processed all actions.";
254
255 return Status::OK;
256}
257
258Status WebVttToMp4Handler::MergeDispatchSamples(
259 int64_t start_time,
260 int64_t end_time,
261 const std::list<const TextSample*>& state) {
262 DCHECK_GT(end_time, start_time);
263
264 box_writer_.Clear();
265
266 if (state.size()) {
267 WriteSamples(state, &box_writer_);
268 } else {
269 WriteEmptySample(&box_writer_);
270 }
271
272 return DispatchMediaSample(
273 kTrackId, CreateMediaSample(box_writer_, start_time, end_time));
274}
275} // namespace media
276} // namespace shaka
Status DispatchMediaSample(size_t stream_index, std::shared_ptr< const MediaSample > media_sample) const
Dispatch the media sample to downstream handlers.
Status Dispatch(std::unique_ptr< StreamData > stream_data) const
static std::shared_ptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
All the methods that are virtual are virtual for mocking.