Shaka Packager SDK
vp_codec_configuration_record.cc
1 // Copyright 2015 Google LLC. All rights reserved.
2 //
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file or at
5 // https://developers.google.com/open-source/licenses/bsd
6 
7 #include <packager/media/codecs/vp_codec_configuration_record.h>
8 
9 #include <absl/strings/str_format.h>
10 #include <absl/strings/str_replace.h>
11 
12 #include <packager/macros/logging.h>
13 #include <packager/media/base/bit_reader.h>
14 #include <packager/media/base/buffer_reader.h>
15 #include <packager/media/base/buffer_writer.h>
16 #include <packager/media/base/rcheck.h>
17 
18 namespace shaka {
19 namespace media {
20 namespace {
21 enum VP9CodecFeatures {
22  kFeatureProfile = 1,
23  kFeatureLevel = 2,
24  kFeatureBitDepth = 3,
25  kFeatureChromaSubsampling = 4,
26 };
27 
28 std::string VPCodecAsString(Codec codec) {
29  switch (codec) {
30  case kCodecVP8:
31  return "vp08";
32  case kCodecVP9:
33  return "vp09";
34  default:
35  LOG(WARNING) << "Unknown VP codec: " << codec;
36  return std::string();
37  }
38 }
39 
40 template <typename T>
41 void MergeField(const std::string& name,
42  const std::optional<T>& source_value,
43  std::optional<T>* dest_value) {
44  if (*dest_value) {
45  if (source_value && *source_value != **dest_value) {
46  LOG(WARNING) << "VPx " << name << " is inconsistent, "
47  << static_cast<int>(**dest_value) << " vs "
48  << static_cast<int>(*source_value);
49  }
50  } else {
51  // Only set dest_value if it is not set.
52  *dest_value = source_value;
53  }
54 }
55 
56 enum VP9Level {
57  LEVEL_UNKNOWN = 0,
58  LEVEL_1 = 10,
59  LEVEL_1_1 = 11,
60  LEVEL_2 = 20,
61  LEVEL_2_1 = 21,
62  LEVEL_3 = 30,
63  LEVEL_3_1 = 31,
64  LEVEL_4 = 40,
65  LEVEL_4_1 = 41,
66  LEVEL_5 = 50,
67  LEVEL_5_1 = 51,
68  LEVEL_5_2 = 52,
69  LEVEL_6 = 60,
70  LEVEL_6_1 = 61,
71  LEVEL_6_2 = 62,
72  LEVEL_MAX = 255
73 };
74 
75 struct VP9LevelCharacteristics {
76  uint64_t max_luma_sample_rate;
77  uint32_t max_luma_picture_size;
78  double max_avg_bitrate;
79  double max_cpb_size;
80  double min_compression_ratio;
81  uint8_t max_num_column_tiles;
82  uint32_t min_altref_distance;
83  uint8_t max_ref_frame_buffers;
84 };
85 
86 struct VP9LevelDefinition {
87  VP9Level level;
88  VP9LevelCharacteristics characteristics;
89 };
90 
91 VP9Level LevelFromCharacteristics(uint64_t luma_sample_rate,
92  uint32_t luma_picture_size) {
93  // https://www.webmproject.org/vp9/levels/.
94  const VP9LevelDefinition vp9_level_definitions[] = {
95  {LEVEL_1, {829440, 36864, 200, 400, 2, 1, 4, 8}},
96  {LEVEL_1_1, {2764800, 73728, 800, 1000, 2, 1, 4, 8}},
97  {LEVEL_2, {4608000, 122880, 1800, 1500, 2, 1, 4, 8}},
98  {LEVEL_2_1, {9216000, 245760, 3600, 2800, 2, 2, 4, 8}},
99  {LEVEL_3, {20736000, 552960, 7200, 6000, 2, 4, 4, 8}},
100  {LEVEL_3_1, {36864000, 983040, 12000, 10000, 2, 4, 4, 8}},
101  {LEVEL_4, {83558400, 2228224, 18000, 16000, 4, 4, 4, 8}},
102  {LEVEL_4_1, {160432128, 2228224, 30000, 18000, 4, 4, 5, 6}},
103  {LEVEL_5, {311951360, 8912896, 60000, 36000, 6, 8, 6, 4}},
104  {LEVEL_5_1, {588251136, 8912896, 120000, 46000, 8, 8, 10, 4}},
105  {LEVEL_5_2, {1176502272, 8912896, 180000, 90000, 8, 8, 10, 4}},
106  {LEVEL_6, {1176502272, 35651584, 180000, 90000, 8, 16, 10, 4}},
107  {LEVEL_6_1, {2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4}},
108  {LEVEL_6_2, {4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4}},
109  };
110 
111  for (const VP9LevelDefinition& def : vp9_level_definitions) {
112  // All the characteristic fields except max_luma_sample_rate and
113  // max_luma_picture_size are ignored to avoid the extra complexities of
114  // computing those values. It may result in incorrect level being returned.
115  // If this is a problem, please file a bug to
116  // https://github.com/shaka-project/shaka-packager/issues.
117  if (luma_sample_rate <= def.characteristics.max_luma_sample_rate &&
118  luma_picture_size <= def.characteristics.max_luma_picture_size) {
119  return def.level;
120  }
121  }
122 
123  LOG(WARNING) << "Cannot determine VP9 level for luma_sample_rate ("
124  << luma_sample_rate << ") or luma_picture_size ("
125  << luma_picture_size << "). Returning LEVEL_1.";
126  return LEVEL_1;
127 }
128 
129 } // namespace
130 
131 VPCodecConfigurationRecord::VPCodecConfigurationRecord() {}
132 
133 VPCodecConfigurationRecord::VPCodecConfigurationRecord(
134  uint8_t profile,
135  uint8_t level,
136  uint8_t bit_depth,
137  uint8_t chroma_subsampling,
138  bool video_full_range_flag,
139  uint8_t color_primaries,
140  uint8_t transfer_characteristics,
141  uint8_t matrix_coefficients,
142  const std::vector<uint8_t>& codec_initialization_data)
143  : profile_(profile),
144  level_(level),
145  bit_depth_(bit_depth),
146  chroma_subsampling_(chroma_subsampling),
147  video_full_range_flag_(video_full_range_flag),
148  color_primaries_(color_primaries),
149  transfer_characteristics_(transfer_characteristics),
150  matrix_coefficients_(matrix_coefficients),
151  codec_initialization_data_(codec_initialization_data) {}
152 
153 VPCodecConfigurationRecord::~VPCodecConfigurationRecord(){};
154 
155 // https://www.webmproject.org/vp9/mp4/
156 bool VPCodecConfigurationRecord::ParseMP4(const std::vector<uint8_t>& data) {
157  BitReader reader(data.data(), data.size());
158  uint8_t value;
159  RCHECK(reader.ReadBits(8, &value));
160  profile_ = value;
161  RCHECK(reader.ReadBits(8, &value));
162  level_ = value;
163  RCHECK(reader.ReadBits(4, &value));
164  bit_depth_ = value;
165  RCHECK(reader.ReadBits(3, &value));
166  chroma_subsampling_ = value;
167  bool bool_value;
168  RCHECK(reader.ReadBits(1, &bool_value));
169  video_full_range_flag_ = bool_value;
170  RCHECK(reader.ReadBits(8, &value));
171  color_primaries_ = value;
172  RCHECK(reader.ReadBits(8, &value));
173  transfer_characteristics_ = value;
174  RCHECK(reader.ReadBits(8, &value));
175  matrix_coefficients_ = value;
176 
177  uint16_t codec_initialization_data_size = 0;
178  RCHECK(reader.ReadBits(16, &codec_initialization_data_size));
179  RCHECK(reader.bits_available() >= codec_initialization_data_size * 8u);
180  const size_t header_size = data.size() - reader.bits_available() / 8;
181  codec_initialization_data_.assign(
182  data.begin() + header_size,
183  data.begin() + header_size + codec_initialization_data_size);
184  return true;
185 }
186 
187 // http://wiki.webmproject.org/vp9-codecprivate
188 bool VPCodecConfigurationRecord::ParseWebM(const std::vector<uint8_t>& data) {
189  BufferReader reader(data.data(), data.size());
190 
191  while (reader.HasBytes(1)) {
192  uint8_t id;
193  uint8_t size;
194  RCHECK(reader.Read1(&id));
195  RCHECK(reader.Read1(&size));
196 
197  uint8_t value = 0;
198  switch (id) {
199  case kFeatureProfile:
200  RCHECK(size == 1);
201  RCHECK(reader.Read1(&value));
202  profile_ = value;
203  break;
204  case kFeatureLevel:
205  RCHECK(size == 1);
206  RCHECK(reader.Read1(&value));
207  level_ = value;
208  break;
209  case kFeatureBitDepth:
210  RCHECK(size == 1);
211  RCHECK(reader.Read1(&value));
212  bit_depth_ = value;
213  break;
214  case kFeatureChromaSubsampling:
215  RCHECK(size == 1);
216  RCHECK(reader.Read1(&value));
217  chroma_subsampling_ = value;
218  break;
219  default: {
220  LOG(WARNING) << "Skipping unknown VP9 codec feature " << id;
221  RCHECK(reader.SkipBytes(size));
222  }
223  }
224  }
225 
226  return true;
227 }
228 
229 void VPCodecConfigurationRecord::SetVP9Level(uint16_t width,
230  uint16_t height,
231  double sample_duration_seconds) {
232  // https://www.webmproject.org/vp9/levels/.
233 
234  const uint32_t luma_picture_size = width * height;
235  // Alt-Ref frames are not taken into consideration intentionally to avoid the
236  // extra complexities. It may result in smaller luma_sample_rate may than the
237  // actual luma_sample_rate, leading to incorrect level being returned.
238  // If this is a problem, please file a bug to
239  // https://github.com/shaka-project/shaka-packager/issues.
240  const double kUnknownSampleDuration = 0.0;
241  // The decision is based on luma_picture_size only if duration is unknown.
242  uint64_t luma_sample_rate = 0;
243  if (sample_duration_seconds != kUnknownSampleDuration)
244  luma_sample_rate = luma_picture_size / sample_duration_seconds;
245 
246  level_ = LevelFromCharacteristics(luma_sample_rate, luma_picture_size);
247 }
248 
249 void VPCodecConfigurationRecord::WriteMP4(std::vector<uint8_t>* data) const {
250  BufferWriter writer;
251  writer.AppendInt(profile());
252  writer.AppendInt(level());
253  uint8_t bit_depth_chroma = (bit_depth() << 4) | (chroma_subsampling() << 1) |
254  (video_full_range_flag() ? 1 : 0);
255  writer.AppendInt(bit_depth_chroma);
256  writer.AppendInt(color_primaries());
257  writer.AppendInt(transfer_characteristics());
258  writer.AppendInt(matrix_coefficients());
259  uint16_t codec_initialization_data_size =
260  static_cast<uint16_t>(codec_initialization_data_.size());
261  writer.AppendInt(codec_initialization_data_size);
262  writer.AppendVector(codec_initialization_data_);
263  writer.SwapBuffer(data);
264 }
265 
266 void VPCodecConfigurationRecord::WriteWebM(std::vector<uint8_t>* data) const {
267  BufferWriter writer;
268 
269  if (profile_) {
270  writer.AppendInt(static_cast<uint8_t>(kFeatureProfile)); // ID = 1
271  writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
272  writer.AppendInt(*profile_);
273  }
274 
275  if (level_) {
276  writer.AppendInt(static_cast<uint8_t>(kFeatureLevel)); // ID = 2
277  writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
278  writer.AppendInt(*level_);
279  }
280 
281  if (bit_depth_) {
282  writer.AppendInt(static_cast<uint8_t>(kFeatureBitDepth)); // ID = 3
283  writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
284  writer.AppendInt(*bit_depth_);
285  }
286 
287  if (chroma_subsampling_) {
288  // ID = 4, Length = 1
289  writer.AppendInt(static_cast<uint8_t>(kFeatureChromaSubsampling));
290  writer.AppendInt(static_cast<uint8_t>(1));
291  writer.AppendInt(*chroma_subsampling_);
292  }
293 
294  writer.SwapBuffer(data);
295 }
296 
297 std::string VPCodecConfigurationRecord::GetCodecString(Codec codec) const {
298  const std::string fields[] = {
299  absl::StrFormat("%d", profile()),
300  absl::StrFormat("%d", level()),
301  absl::StrFormat("%d", bit_depth()),
302  absl::StrFormat("%d", chroma_subsampling()),
303  absl::StrFormat("%d", color_primaries()),
304  absl::StrFormat("%d", transfer_characteristics()),
305  absl::StrFormat("%d", matrix_coefficients()),
306  (video_full_range_flag_ && *video_full_range_flag_) ? "01" : "00",
307  };
308 
309  std::string codec_string = VPCodecAsString(codec);
310  for (const std::string& field : fields) {
311  // Make sure every field is at least 2-chars wide. The space will be
312  // replaced with '0' afterwards.
313  absl::StrAppendFormat(&codec_string, ".%2s", field.c_str());
314  }
315  absl::StrReplaceAll({{" ", "0"}}, &codec_string);
316  return codec_string;
317 }
318 
319 void VPCodecConfigurationRecord::MergeFrom(
320  const VPCodecConfigurationRecord& other) {
321  MergeField("profile", other.profile_, &profile_);
322  MergeField("level", other.level_, &level_);
323  MergeField("bit depth", other.bit_depth_, &bit_depth_);
324  MergeField("chroma subsampling", other.chroma_subsampling_,
325  &chroma_subsampling_);
326  MergeField("video full range flag", other.video_full_range_flag_,
327  &video_full_range_flag_);
328  MergeField("color primaries", other.color_primaries_, &color_primaries_);
329  MergeField("transfer characteristics", other.transfer_characteristics_,
330  &transfer_characteristics_);
331  MergeField("matrix coefficients", other.matrix_coefficients_,
332  &matrix_coefficients_);
333 
334  if (codec_initialization_data_.empty() ||
335  !other.codec_initialization_data_.empty()) {
336  if (!codec_initialization_data_.empty() &&
337  codec_initialization_data_ != other.codec_initialization_data_) {
338  LOG(WARNING) << "VPx codec initialization data is inconsistent";
339  }
340  codec_initialization_data_ = other.codec_initialization_data_;
341  }
342 
343  MergeField("chroma location", other.chroma_location_, &chroma_location_);
344  UpdateChromaSubsamplingIfNeeded();
345 }
346 
347 void VPCodecConfigurationRecord::SetChromaSubsampling(uint8_t subsampling_x,
348  uint8_t subsampling_y) {
349  VLOG(3) << "Set Chroma subsampling " << static_cast<int>(subsampling_x) << " "
350  << static_cast<int>(subsampling_y);
351  if (subsampling_x == 0 && subsampling_y == 0) {
352  chroma_subsampling_ = CHROMA_444;
353  } else if (subsampling_x == 0 && subsampling_y == 1) {
354  chroma_subsampling_ = CHROMA_440;
355  } else if (subsampling_x == 1 && subsampling_y == 0) {
356  chroma_subsampling_ = CHROMA_422;
357  } else if (subsampling_x == 1 && subsampling_y == 1) {
358  // VP9 assumes that chrome samples are collocated with luma samples if
359  // there is no explicit signaling outside of VP9 bitstream.
360  chroma_subsampling_ = CHROMA_420_COLLOCATED_WITH_LUMA;
361  } else {
362  LOG(WARNING) << "Unexpected chroma subsampling values: "
363  << static_cast<int>(subsampling_x) << " "
364  << static_cast<int>(subsampling_y);
365  }
366  UpdateChromaSubsamplingIfNeeded();
367 }
368 
369 void VPCodecConfigurationRecord::SetChromaSubsampling(
370  ChromaSubsampling chroma_subsampling) {
371  chroma_subsampling_ = chroma_subsampling;
372  UpdateChromaSubsamplingIfNeeded();
373 }
374 
375 void VPCodecConfigurationRecord::SetChromaLocation(uint8_t chroma_siting_x,
376  uint8_t chroma_siting_y) {
377  VLOG(3) << "Set Chroma Location " << static_cast<int>(chroma_siting_x) << " "
378  << static_cast<int>(chroma_siting_y);
379  if (chroma_siting_x == kLeftCollocated && chroma_siting_y == kTopCollocated) {
380  chroma_location_ = AVCHROMA_LOC_TOPLEFT;
381  } else if (chroma_siting_x == kLeftCollocated && chroma_siting_y == kHalf) {
382  chroma_location_ = AVCHROMA_LOC_LEFT;
383  } else if (chroma_siting_x == kHalf && chroma_siting_y == kTopCollocated) {
384  chroma_location_ = AVCHROMA_LOC_TOP;
385  } else if (chroma_siting_x == kHalf && chroma_siting_y == kHalf) {
386  chroma_location_ = AVCHROMA_LOC_CENTER;
387  } else {
388  LOG(WARNING) << "Unexpected chroma siting values: "
389  << static_cast<int>(chroma_siting_x) << " "
390  << static_cast<int>(chroma_siting_y);
391  }
392  UpdateChromaSubsamplingIfNeeded();
393 }
394 
395 void VPCodecConfigurationRecord::UpdateChromaSubsamplingIfNeeded() {
396  // Use chroma location to fix the chroma subsampling format.
397  if (chroma_location_ && chroma_subsampling_ &&
398  (*chroma_subsampling_ == CHROMA_420_VERTICAL ||
399  *chroma_subsampling_ == CHROMA_420_COLLOCATED_WITH_LUMA)) {
400  if (*chroma_location_ == AVCHROMA_LOC_TOPLEFT)
401  chroma_subsampling_ = CHROMA_420_COLLOCATED_WITH_LUMA;
402  else if (*chroma_location_ == AVCHROMA_LOC_LEFT)
403  chroma_subsampling_ = CHROMA_420_VERTICAL;
404  VLOG(3) << "Chroma subsampling " << static_cast<int>(*chroma_subsampling_);
405  }
406 }
407 
408 } // namespace media
409 } // namespace shaka
A class to read bit streams.
Definition: bit_reader.h:20
size_t bits_available() const
Definition: bit_reader.h:92
bool ReadBits(size_t num_bits, T *out)
Definition: bit_reader.h:38
bool HasBytes(size_t count)
Definition: buffer_reader.h:30
bool SkipBytes(size_t num_bytes)
Class for parsing or writing VP codec configuration record.
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66