Shaka Packager SDK
segmenter.cc
1 // Copyright 2015 Google LLC. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include <packager/media/formats/webm/segmenter.h>
8 
9 #include <absl/log/check.h>
10 #include <mkvmuxer/mkvmuxerutil.h>
11 
12 #include <packager/macros/logging.h>
13 #include <packager/media/base/audio_stream_info.h>
14 #include <packager/media/base/media_handler.h>
15 #include <packager/media/base/muxer_options.h>
16 #include <packager/media/base/video_stream_info.h>
17 #include <packager/media/codecs/vp_codec_configuration_record.h>
18 #include <packager/media/event/muxer_listener.h>
19 #include <packager/media/event/progress_listener.h>
20 #include <packager/media/formats/webm/encryptor.h>
21 #include <packager/media/formats/webm/webm_constants.h>
22 #include <packager/version/version.h>
23 
24 using mkvmuxer::AudioTrack;
25 using mkvmuxer::VideoTrack;
26 
27 namespace shaka {
28 namespace media {
29 namespace webm {
30 namespace {
31 const int64_t kTimecodeScale = 1000000;
32 const int64_t kSecondsToNs = 1000000000L;
33 
34 // Round to closest integer.
35 uint64_t Round(double value) {
36  return static_cast<uint64_t>(value + 0.5);
37 }
38 
39 // There are three different kinds of timestamp here:
40 // (1) ISO-BMFF timestamp (seconds scaled by ISO-BMFF timescale)
41 // This is used in our MediaSample and StreamInfo structures.
42 // (2) WebM timecode (seconds scaled by kSecondsToNs / WebM timecode scale)
43 // This is used in most WebM structures.
44 // (3) Nanoseconds (seconds scaled by kSecondsToNs)
45 // This is used in some WebM structures, e.g. Frame.
46 // We use Nanoseconds as intermediate format here for conversion, in
47 // uint64_t/int64_t, which is sufficient to represent a time as large as 292
48 // years.
49 
50 int64_t BmffTimestampToNs(int64_t timestamp, int64_t time_scale) {
51  // Casting to double is needed otherwise kSecondsToNs * timestamp may overflow
52  // uint64_t/int64_t.
53  return Round(static_cast<double>(timestamp) / time_scale * kSecondsToNs);
54 }
55 
56 int64_t NsToBmffTimestamp(int64_t ns, int64_t time_scale) {
57  // Casting to double is needed otherwise ns * time_scale may overflow
58  // uint64_t/int64_t.
59  return Round(static_cast<double>(ns) / kSecondsToNs * time_scale);
60 }
61 
62 int64_t NsToWebMTimecode(int64_t ns, int64_t timecode_scale) {
63  return ns / timecode_scale;
64 }
65 
66 int64_t WebMTimecodeToNs(int64_t timecode, int64_t timecode_scale) {
67  return timecode * timecode_scale;
68 }
69 
70 } // namespace
71 
72 Segmenter::Segmenter(const MuxerOptions& options) : options_(options) {}
73 
74 Segmenter::~Segmenter() {}
75 
76 Status Segmenter::Initialize(const StreamInfo& info,
77  ProgressListener* progress_listener,
78  MuxerListener* muxer_listener) {
79  is_encrypted_ = info.is_encrypted();
80  duration_ = info.duration();
81  time_scale_ = info.time_scale();
82 
83  muxer_listener_ = muxer_listener;
84 
85  // Use media duration as progress target.
86  progress_target_ = info.duration();
87  progress_listener_ = progress_listener;
88 
89  segment_info_.Init();
90  segment_info_.set_timecode_scale(kTimecodeScale);
91 
92  const std::string version = GetPackagerVersion();
93  if (!version.empty()) {
94  segment_info_.set_writing_app(
95  (GetPackagerProjectUrl() + " version " + version).c_str());
96  }
97 
98  if (options().segment_template.empty()) {
99  // Set an initial duration so the duration element is written; will be
100  // overwritten at the end. This works because this is a float and floats
101  // are always the same size.
102  segment_info_.set_duration(1);
103  }
104 
105  // Create the track info.
106  // The seed is only used to create a UID which we overwrite later.
107  unsigned int seed = 0;
108  std::unique_ptr<mkvmuxer::Track> track;
109  Status status;
110  switch (info.stream_type()) {
111  case kStreamVideo: {
112  std::unique_ptr<VideoTrack> video_track(new VideoTrack(&seed));
113  status = InitializeVideoTrack(static_cast<const VideoStreamInfo&>(info),
114  video_track.get());
115  track = std::move(video_track);
116  break;
117  }
118  case kStreamAudio: {
119  std::unique_ptr<AudioTrack> audio_track(new AudioTrack(&seed));
120  status = InitializeAudioTrack(static_cast<const AudioStreamInfo&>(info),
121  audio_track.get());
122  track = std::move(audio_track);
123  break;
124  }
125  default:
126  NOTIMPLEMENTED() << "Not implemented for stream type: "
127  << info.stream_type();
128  status = Status(error::UNIMPLEMENTED, "Not implemented for stream type");
129  }
130  if (!status.ok())
131  return status;
132 
133  if (info.is_encrypted()) {
134  if (info.encryption_config().per_sample_iv_size != kWebMIvSize)
135  return Status(error::MUXER_FAILURE, "Incorrect size WebM encryption IV.");
136  status = UpdateTrackForEncryption(info.encryption_config().key_id,
137  track.get());
138  if (!status.ok())
139  return status;
140  }
141 
142  tracks_.AddTrack(track.get(), info.track_id());
143  // number() is only available after the above instruction.
144  track_id_ = track->number();
145  // |tracks_| owns |track|.
146  track.release();
147  return DoInitialize();
148 }
149 
150 Status Segmenter::Finalize() {
151  if (prev_sample_ && !prev_sample_->end_of_stream()) {
152  int64_t duration =
153  prev_sample_->pts() - first_timestamp_ + prev_sample_->duration();
154  segment_info_.set_duration(FromBmffTimestamp(duration));
155  }
156  return DoFinalize();
157 }
158 
159 Status Segmenter::AddSample(const MediaSample& source_sample) {
160  std::shared_ptr<MediaSample> sample(source_sample.Clone());
161 
162  // The duration of the first sample may have been adjusted, so use
163  // the duration of the second sample instead.
164  if (num_samples_ < 2) {
165  sample_durations_[num_samples_] = sample->duration();
166  if (num_samples_ == 0)
167  first_timestamp_ = sample->pts();
168  else if (muxer_listener_)
169  muxer_listener_->OnSampleDurationReady(sample_durations_[num_samples_]);
170  num_samples_++;
171  }
172 
173  UpdateProgress(sample->duration());
174 
175  // This writes frames in a delay. Meaning that the previous frame is written
176  // on this call to AddSample. The current frame is stored until the next
177  // call. This is done to determine which frame is the last in a Cluster.
178  // This first block determines if this is a new Cluster and writes the
179  // previous frame first before creating the new Cluster.
180 
181  Status status;
182  if (new_segment_ || new_subsegment_) {
183  status = NewSegment(sample->pts(), new_subsegment_);
184  } else {
185  status = WriteFrame(false /* write_duration */);
186  }
187  if (!status.ok())
188  return status;
189 
190  if (is_encrypted_)
191  UpdateFrameForEncryption(sample.get());
192 
193  new_subsegment_ = false;
194  new_segment_ = false;
195  prev_sample_ = sample;
196  return Status::OK;
197 }
198 
199 Status Segmenter::FinalizeSegment(int64_t /*start_timestamp*/,
200  int64_t /*duration_timestamp*/,
201  bool is_subsegment,
202  int64_t segment_number) {
203  if (is_subsegment)
204  new_subsegment_ = true;
205  else
206  new_segment_ = true;
207  return WriteFrame(true /* write duration */);
208 }
209 
210 float Segmenter::GetDurationInSeconds() const {
211  return WebMTimecodeToNs(segment_info_.duration(),
212  segment_info_.timecode_scale()) /
213  static_cast<double>(kSecondsToNs);
214 }
215 
216 int64_t Segmenter::FromBmffTimestamp(int64_t bmff_timestamp) {
217  return NsToWebMTimecode(
218  BmffTimestampToNs(bmff_timestamp, time_scale_),
219  segment_info_.timecode_scale());
220 }
221 
222 int64_t Segmenter::FromWebMTimecode(int64_t webm_timecode) {
223  return NsToBmffTimestamp(
224  WebMTimecodeToNs(webm_timecode, segment_info_.timecode_scale()),
225  time_scale_);
226 }
227 
228 Status Segmenter::WriteSegmentHeader(uint64_t file_size, MkvWriter* writer) {
229  Status error_status(error::FILE_FAILURE, "Error writing segment header.");
230 
231  if (!WriteEbmlHeader(writer))
232  return error_status;
233 
234  if (WriteID(writer, libwebm::kMkvSegment) != 0)
235  return error_status;
236 
237  const uint64_t segment_size_size = 8;
238  segment_payload_pos_ = writer->Position() + segment_size_size;
239  if (file_size > 0) {
240  // We want the size of the segment element, so subtract the header.
241  if (WriteUIntSize(writer, file_size - segment_payload_pos_,
242  segment_size_size) != 0)
243  return error_status;
244  if (!seek_head_.Write(writer))
245  return error_status;
246  } else {
247  if (SerializeInt(writer, mkvmuxer::kEbmlUnknownValue, segment_size_size) !=
248  0)
249  return error_status;
250  // We don't know the header size, so write a placeholder.
251  if (!seek_head_.WriteVoid(writer))
252  return error_status;
253  }
254 
255  seek_head_.set_info_pos(writer->Position() - segment_payload_pos_);
256  if (!segment_info_.Write(writer))
257  return error_status;
258 
259  seek_head_.set_tracks_pos(writer->Position() - segment_payload_pos_);
260  if (!tracks_.Write(writer))
261  return error_status;
262 
263  return Status::OK;
264 }
265 
266 Status Segmenter::SetCluster(int64_t start_webm_timecode,
267  uint64_t position,
268  MkvWriter* writer) {
269  const int64_t scale = segment_info_.timecode_scale();
270  cluster_.reset(new mkvmuxer::Cluster(start_webm_timecode, position, scale));
271  cluster_->Init(writer);
272  return Status::OK;
273 }
274 
275 void Segmenter::UpdateProgress(uint64_t progress) {
276  accumulated_progress_ += progress;
277  if (!progress_listener_ || progress_target_ == 0)
278  return;
279  // It might happen that accumulated progress exceeds progress_target due to
280  // computation errors, e.g. rounding error. Cap it so it never reports > 100%
281  // progress.
282  if (accumulated_progress_ >= progress_target_) {
283  progress_listener_->OnProgress(1.0);
284  } else {
285  progress_listener_->OnProgress(static_cast<double>(accumulated_progress_) /
286  progress_target_);
287  }
288 }
289 
290 Status Segmenter::InitializeVideoTrack(const VideoStreamInfo& info,
291  VideoTrack* track) {
292  if (info.codec() == kCodecAV1) {
293  track->set_codec_id("V_AV1");
294  if (!track->SetCodecPrivate(info.codec_config().data(),
295  info.codec_config().size())) {
296  return Status(error::INTERNAL_ERROR,
297  "Private codec data required for AV1 streams");
298  }
299  } else if (info.codec() == kCodecVP8) {
300  track->set_codec_id("V_VP8");
301  } else if (info.codec() == kCodecVP9) {
302  track->set_codec_id("V_VP9");
303 
304  // The |StreamInfo::codec_config| field is stored using the MP4 format; we
305  // need to convert it to the WebM format.
306  VPCodecConfigurationRecord vp_config;
307  if (!vp_config.ParseMP4(info.codec_config())) {
308  return Status(error::INTERNAL_ERROR,
309  "Unable to parse VP9 codec configuration");
310  }
311 
312  mkvmuxer::Colour colour;
313  if (vp_config.matrix_coefficients() != AVCOL_SPC_UNSPECIFIED) {
314  colour.set_matrix_coefficients(vp_config.matrix_coefficients());
315  }
316  if (vp_config.transfer_characteristics() != AVCOL_TRC_UNSPECIFIED) {
317  colour.set_transfer_characteristics(vp_config.transfer_characteristics());
318  }
319  if (vp_config.color_primaries() != AVCOL_PRI_UNSPECIFIED) {
320  colour.set_primaries(vp_config.color_primaries());
321  }
322  if (!track->SetColour(colour)) {
323  return Status(error::INTERNAL_ERROR,
324  "Failed to setup color element for VPx streams");
325  }
326 
327  std::vector<uint8_t> codec_config;
328  vp_config.WriteWebM(&codec_config);
329  if (!track->SetCodecPrivate(codec_config.data(), codec_config.size())) {
330  return Status(error::INTERNAL_ERROR,
331  "Private codec data required for VPx streams");
332  }
333  } else {
334  LOG(ERROR) << "Only VP8, VP9 and AV1 video codecs are supported in WebM.";
335  return Status(error::UNIMPLEMENTED,
336  "Only VP8, VP9 and AV1 video codecs are supported in WebM.");
337  }
338 
339  track->set_uid(info.track_id());
340  if (!info.language().empty())
341  track->set_language(info.language().c_str());
342  track->set_type(mkvmuxer::Tracks::kVideo);
343  track->set_width(info.width());
344  track->set_height(info.height());
345  track->set_display_height(info.height());
346  track->set_display_width(info.width() * info.pixel_width() /
347  info.pixel_height());
348  return Status::OK;
349 }
350 
351 Status Segmenter::InitializeAudioTrack(const AudioStreamInfo& info,
352  AudioTrack* track) {
353  if (info.codec() == kCodecOpus) {
354  track->set_codec_id(mkvmuxer::Tracks::kOpusCodecId);
355  } else if (info.codec() == kCodecVorbis) {
356  track->set_codec_id(mkvmuxer::Tracks::kVorbisCodecId);
357  } else {
358  LOG(ERROR) << "Only Vorbis and Opus audio codec are supported in WebM.";
359  return Status(error::UNIMPLEMENTED,
360  "Only Vorbis and Opus audio codecs are supported in WebM.");
361  }
362  if (!track->SetCodecPrivate(info.codec_config().data(),
363  info.codec_config().size())) {
364  return Status(error::INTERNAL_ERROR,
365  "Private codec data required for audio streams");
366  }
367 
368  track->set_uid(info.track_id());
369  if (!info.language().empty())
370  track->set_language(info.language().c_str());
371  track->set_type(mkvmuxer::Tracks::kAudio);
372  track->set_sample_rate(info.sampling_frequency());
373  track->set_channels(info.num_channels());
374  track->set_seek_pre_roll(info.seek_preroll_ns());
375  track->set_codec_delay(info.codec_delay_ns());
376  return Status::OK;
377 }
378 
379 Status Segmenter::WriteFrame(bool write_duration) {
380  // Create a frame manually so we can create non-SimpleBlock frames. This
381  // is required to allow the frame duration to be added. If the duration
382  // is not set, then a SimpleBlock will still be written.
383  mkvmuxer::Frame frame;
384 
385  if (!frame.Init(prev_sample_->data(), prev_sample_->data_size())) {
386  return Status(error::MUXER_FAILURE,
387  "Error adding sample to segment: Frame::Init failed");
388  }
389 
390  if (write_duration) {
391  frame.set_duration(
392  BmffTimestampToNs(prev_sample_->duration(), time_scale_));
393  }
394  frame.set_is_key(prev_sample_->is_key_frame());
395  frame.set_timestamp(
396  BmffTimestampToNs(prev_sample_->pts(), time_scale_));
397  frame.set_track_number(track_id_);
398 
399  if (prev_sample_->side_data_size() > 0) {
400  uint64_t block_add_id;
401  // First 8 bytes of side_data is the BlockAddID element's value, which is
402  // done to mimic ffmpeg behavior. See webm_cluster_parser.cc for details.
403  CHECK_GT(prev_sample_->side_data_size(), sizeof(block_add_id));
404  memcpy(&block_add_id, prev_sample_->side_data(), sizeof(block_add_id));
405  if (!frame.AddAdditionalData(
406  prev_sample_->side_data() + sizeof(block_add_id),
407  prev_sample_->side_data_size() - sizeof(block_add_id),
408  block_add_id)) {
409  return Status(
410  error::MUXER_FAILURE,
411  "Error adding sample to segment: Frame::AddAditionalData Failed");
412  }
413  }
414 
415  if (!prev_sample_->is_key_frame() && !frame.CanBeSimpleBlock()) {
416  frame.set_reference_block_timestamp(
417  BmffTimestampToNs(reference_frame_timestamp_, time_scale_));
418  }
419 
420  // GetRelativeTimecode will return -1 if the relative timecode is too large
421  // to fit in the frame.
422  if (cluster_->GetRelativeTimecode(NsToWebMTimecode(
423  frame.timestamp(), cluster_->timecode_scale())) < 0) {
424  const double segment_duration =
425  static_cast<double>(frame.timestamp() -
426  WebMTimecodeToNs(cluster_->timecode(),
427  cluster_->timecode_scale())) /
428  kSecondsToNs;
429  LOG(ERROR) << "Error adding sample to segment: segment too large, "
430  << segment_duration
431  << " seconds. Please check your GOP size and segment duration.";
432  return Status(error::MUXER_FAILURE,
433  "Error adding sample to segment: segment too large");
434  }
435 
436  if (!cluster_->AddFrame(&frame)) {
437  return Status(error::MUXER_FAILURE,
438  "Error adding sample to segment: Cluster::AddFrame failed");
439  }
440 
441  // A reference frame is needed for non-keyframes. Having a reference to the
442  // previous block is good enough.
443  // See libwebm Segment::AddGenericFrame
444  reference_frame_timestamp_ = prev_sample_->pts();
445  return Status::OK;
446 }
447 
448 } // namespace webm
449 } // namespace media
450 } // namespace shaka
Holds audio stream information.
Class to hold a media sample.
Definition: media_sample.h:25
std::shared_ptr< MediaSample > Clone() const
Clone the object and return a new MediaSample.
Definition: media_sample.cc:82
An implementation of IMkvWriter using our File type.
Definition: mkv_writer.h:23
mkvmuxer::int64 Position() const override
Definition: mkv_writer.cc:76
This class listens to progress updates events.
Abstract class holds stream information.
Definition: stream_info.h:71
Holds video stream information.
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66