Shaka Packager SDK
webvtt_to_mp4_handler.cc
1 // Copyright 2017 Google LLC. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include <packager/media/formats/webvtt/webvtt_to_mp4_handler.h>
8 
9 #include <algorithm>
10 #include <map>
11 
12 #include <absl/log/check.h>
13 
14 #include <packager/macros/logging.h>
15 #include <packager/macros/status.h>
16 #include <packager/media/base/buffer_writer.h>
17 #include <packager/media/formats/mp4/box_buffer.h>
18 #include <packager/media/formats/mp4/box_definitions.h>
19 #include <packager/media/formats/webvtt/webvtt_utils.h>
20 
21 namespace shaka {
22 namespace media {
23 namespace {
24 size_t kTrackId = 0;
25 
26 enum class DisplayActionType { ADD, REMOVE };
27 
28 struct DisplayAction {
29  DisplayActionType type;
30  const TextSample* sample;
31 };
32 
33 std::multimap<int64_t, DisplayAction> CreateActionList(
34  int64_t segment_start,
35  int64_t segment_end,
36  const std::list<std::shared_ptr<const TextSample>>& samples) {
37  std::multimap<int64_t, DisplayAction> actions;
38 
39  for (const auto& sample : samples) {
40  DCHECK(sample);
41 
42  // The add action should occur either in this segment or in a previous
43  // segment.
44  DCHECK_LT(sample->start_time(), segment_end);
45  actions.insert(
46  {sample->start_time(), {DisplayActionType::ADD, sample.get()}});
47 
48  // If the remove happens in a later segment, then we don't want to include
49  // that action.
50  if (sample->EndTime() < segment_end) {
51  actions.insert(
52  {sample->EndTime(), {DisplayActionType::REMOVE, sample.get()}});
53  }
54  }
55 
56  return actions;
57 }
58 
59 void WriteSample(const TextSample& sample, BufferWriter* out) {
60  mp4::VTTCueBox box;
61 
62  if (sample.id().length()) {
63  box.cue_id.cue_id = sample.id();
64  }
65  box.cue_settings.settings = WebVttSettingsToString(sample.settings());
66  box.cue_payload.cue_text = WebVttFragmentToString(sample.body());
67 
68  // If there is internal timing, i.e. WebVTT cue timestamp, then
69  // cue_current_time should be populated
70  // "which gives the VTT timestamp associated with the start time of sample."
71  // TODO(rkuroiwa): Reuse TimestampToMilliseconds() to check if there is an
72  // internal timestamp in the payload to set CueTimeBox.cue_current_time.
73  box.Write(out);
74 }
75 
76 void WriteSamples(const std::list<const TextSample*>& samples,
77  BufferWriter* writer) {
78  DCHECK_GE(samples.size(), 0u);
79 
80  for (const auto& sample : samples) {
81  WriteSample(*sample, writer);
82  }
83 }
84 
85 void WriteEmptySample(BufferWriter* writer) {
86  mp4::VTTEmptyCueBox box;
87  box.Write(writer);
88 }
89 
90 std::shared_ptr<MediaSample> CreateMediaSample(const BufferWriter& buffer,
91  int64_t start_time,
92  int64_t end_time) {
93  DCHECK_GE(start_time, 0);
94  DCHECK_GT(end_time, start_time);
95 
96  const bool kIsKeyFrame = true;
97 
98  std::shared_ptr<MediaSample> sample =
99  MediaSample::CopyFrom(buffer.Buffer(), buffer.Size(), kIsKeyFrame);
100  sample->set_pts(start_time);
101  sample->set_dts(start_time);
102  sample->set_duration(end_time - start_time);
103 
104  return sample;
105 }
106 } // namespace
107 
108 Status WebVttToMp4Handler::InitializeInternal() {
109  return Status::OK;
110 }
111 
112 Status WebVttToMp4Handler::Process(std::unique_ptr<StreamData> stream_data) {
113  switch (stream_data->stream_data_type) {
114  case StreamDataType::kStreamInfo:
115  return OnStreamInfo(std::move(stream_data));
116  case StreamDataType::kCueEvent:
117  return OnCueEvent(std::move(stream_data));
118  case StreamDataType::kSegmentInfo:
119  return OnSegmentInfo(std::move(stream_data));
120  case StreamDataType::kTextSample:
121  return OnTextSample(std::move(stream_data));
122  default:
123  return Status(error::INTERNAL_ERROR,
124  "Invalid stream data type (" +
125  StreamDataTypeToString(stream_data->stream_data_type) +
126  ") for this WebVttToMp4 handler");
127  }
128 }
129 
130 Status WebVttToMp4Handler::OnStreamInfo(
131  std::unique_ptr<StreamData> stream_data) {
132  DCHECK(stream_data);
133  DCHECK(stream_data->stream_info);
134 
135  auto clone = stream_data->stream_info->Clone();
136  clone->set_codec(kCodecWebVtt);
137  clone->set_codec_string("wvtt");
138 
139  if (clone->stream_type() != kStreamText) {
140  return Status(error::MUXER_FAILURE, "Incorrect stream type");
141  }
142 
143  return Dispatch(
144  StreamData::FromStreamInfo(stream_data->stream_index, std::move(clone)));
145 }
146 
147 Status WebVttToMp4Handler::OnCueEvent(std::unique_ptr<StreamData> stream_data) {
148  DCHECK(stream_data);
149  DCHECK(stream_data->cue_event);
150 
151  if (current_segment_.size()) {
152  return Status(error::INTERNAL_ERROR,
153  "Cue Events should come right after segment info.");
154  }
155 
156  return Dispatch(std::move(stream_data));
157 }
158 
159 Status WebVttToMp4Handler::OnSegmentInfo(
160  std::unique_ptr<StreamData> stream_data) {
161  DCHECK(stream_data);
162  DCHECK(stream_data->segment_info);
163 
164  const auto& segment = stream_data->segment_info;
165 
166  int64_t segment_start = segment->start_timestamp;
167  int64_t segment_duration = segment->duration;
168  int64_t segment_end = segment_start + segment_duration;
169 
170  RETURN_IF_ERROR(DispatchCurrentSegment(segment_start, segment_end));
171  current_segment_.clear();
172 
173  return Dispatch(std::move(stream_data));
174 }
175 
176 Status WebVttToMp4Handler::OnTextSample(
177  std::unique_ptr<StreamData> stream_data) {
178  DCHECK(stream_data);
179  DCHECK(stream_data->text_sample);
180 
181  auto& sample = stream_data->text_sample;
182 
183  // Ignore empty samples. This will create gaps, but we will handle that
184  // later.
185  if (sample->body().is_empty()) {
186  return Status::OK;
187  }
188 
189  // Add the new text sample to the cache of samples that belong in the
190  // current segment.
191  current_segment_.push_back(std::move(stream_data->text_sample));
192  return Status::OK;
193 }
194 
195 Status WebVttToMp4Handler::DispatchCurrentSegment(int64_t segment_start,
196  int64_t segment_end) {
197  // Active will hold all the samples that are "on screen" for the current
198  // section of time.
199  std::list<const TextSample*> active;
200 
201  // Move through the segment, jumping between each change to the current state.
202  // A change is defined as a group of one or more DisplayActions.
203  int section_start = segment_start;
204 
205  // |actions| is a map of [time] -> [action].
206  auto actions = CreateActionList(segment_start, segment_end, current_segment_);
207  auto front = actions.begin();
208 
209  // As it is possible to have a segment with no samples, we can't base this
210  // loop on the number of actions. So we need to keep iterating until we
211  // have written enough sections to get to the end of the segment.
212  while (section_start < segment_end) {
213  // Apply all actions that occur at the start of this part of the segment.
214  // Normally we would only want "== section_start" but as it is possible for
215  // samples to span multiple segments, their start time will be before the
216  // segment's start time. So we want to apply them too if they come before
217  // the segment. Thus why we use "<=".
218  while (front != actions.end() && front->first <= section_start) {
219  auto& action = front->second;
220 
221  switch (action.type) {
222  case DisplayActionType::ADD: {
223  active.push_back(action.sample);
224  break;
225  }
226  case DisplayActionType::REMOVE: {
227  auto found = std::find(active.begin(), active.end(), action.sample);
228  DCHECK(found != active.end());
229  active.erase(found);
230  break;
231  }
232  default: {
233  NOTIMPLEMENTED() << "Unsupported DisplayActionType "
234  << static_cast<int>(action.type);
235  break;
236  }
237  }
238 
239  // We have "consumed" the action at the front. We can move on.
240  front++;
241  }
242 
243  // The end of the section will either be the start of the next section or
244  // the end of the segment.
245  int64_t section_end = front == actions.end() ? segment_end : front->first;
246  DCHECK_GT(section_end, section_start);
247  DCHECK_LE(section_end, segment_end);
248  RETURN_IF_ERROR(MergeDispatchSamples(section_start, section_end, active));
249 
250  section_start = section_end;
251  }
252 
253  DCHECK(front == actions.end()) << "We should have processed all actions.";
254 
255  return Status::OK;
256 }
257 
258 Status WebVttToMp4Handler::MergeDispatchSamples(
259  int64_t start_time,
260  int64_t end_time,
261  const std::list<const TextSample*>& state) {
262  DCHECK_GT(end_time, start_time);
263 
264  box_writer_.Clear();
265 
266  if (state.size()) {
267  WriteSamples(state, &box_writer_);
268  } else {
269  WriteEmptySample(&box_writer_);
270  }
271 
272  return DispatchMediaSample(
273  kTrackId, CreateMediaSample(box_writer_, start_time, end_time));
274 }
275 } // namespace media
276 } // namespace shaka
Status DispatchMediaSample(size_t stream_index, std::shared_ptr< const MediaSample > media_sample) const
Dispatch the media sample to downstream handlers.
Status Dispatch(std::unique_ptr< StreamData > stream_data) const
static std::shared_ptr< MediaSample > CopyFrom(const uint8_t *data, size_t size, bool is_key_frame)
Definition: media_sample.cc:43
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66