Shaka Packager SDK
Loading...
Searching...
No Matches
vp_codec_configuration_record.cc
1// Copyright 2015 Google LLC. All rights reserved.
2//
3// Use of this source code is governed by a BSD-style
4// license that can be found in the LICENSE file or at
5// https://developers.google.com/open-source/licenses/bsd
6
7#include <packager/media/codecs/vp_codec_configuration_record.h>
8
9#include <absl/strings/str_format.h>
10#include <absl/strings/str_replace.h>
11
12#include <packager/macros/logging.h>
13#include <packager/media/base/bit_reader.h>
14#include <packager/media/base/buffer_reader.h>
15#include <packager/media/base/buffer_writer.h>
16#include <packager/media/base/rcheck.h>
17
18namespace shaka {
19namespace media {
20namespace {
21enum VP9CodecFeatures {
22 kFeatureProfile = 1,
23 kFeatureLevel = 2,
24 kFeatureBitDepth = 3,
25 kFeatureChromaSubsampling = 4,
26};
27
28std::string VPCodecAsString(Codec codec) {
29 switch (codec) {
30 case kCodecVP8:
31 return "vp08";
32 case kCodecVP9:
33 return "vp09";
34 default:
35 LOG(WARNING) << "Unknown VP codec: " << codec;
36 return std::string();
37 }
38}
39
40template <typename T>
41void MergeField(const std::string& name,
42 const std::optional<T>& source_value,
43 std::optional<T>* dest_value) {
44 if (*dest_value) {
45 if (source_value && *source_value != **dest_value) {
46 LOG(WARNING) << "VPx " << name << " is inconsistent, "
47 << static_cast<int>(**dest_value) << " vs "
48 << static_cast<int>(*source_value);
49 }
50 } else {
51 // Only set dest_value if it is not set.
52 *dest_value = source_value;
53 }
54}
55
56enum VP9Level {
57 LEVEL_UNKNOWN = 0,
58 LEVEL_1 = 10,
59 LEVEL_1_1 = 11,
60 LEVEL_2 = 20,
61 LEVEL_2_1 = 21,
62 LEVEL_3 = 30,
63 LEVEL_3_1 = 31,
64 LEVEL_4 = 40,
65 LEVEL_4_1 = 41,
66 LEVEL_5 = 50,
67 LEVEL_5_1 = 51,
68 LEVEL_5_2 = 52,
69 LEVEL_6 = 60,
70 LEVEL_6_1 = 61,
71 LEVEL_6_2 = 62,
72 LEVEL_MAX = 255
73};
74
75struct VP9LevelCharacteristics {
76 uint64_t max_luma_sample_rate;
77 uint32_t max_luma_picture_size;
78 double max_avg_bitrate;
79 double max_cpb_size;
80 double min_compression_ratio;
81 uint8_t max_num_column_tiles;
82 uint32_t min_altref_distance;
83 uint8_t max_ref_frame_buffers;
84};
85
86struct VP9LevelDefinition {
87 VP9Level level;
88 VP9LevelCharacteristics characteristics;
89};
90
91VP9Level LevelFromCharacteristics(uint64_t luma_sample_rate,
92 uint32_t luma_picture_size) {
93 // https://www.webmproject.org/vp9/levels/.
94 const VP9LevelDefinition vp9_level_definitions[] = {
95 {LEVEL_1, {829440, 36864, 200, 400, 2, 1, 4, 8}},
96 {LEVEL_1_1, {2764800, 73728, 800, 1000, 2, 1, 4, 8}},
97 {LEVEL_2, {4608000, 122880, 1800, 1500, 2, 1, 4, 8}},
98 {LEVEL_2_1, {9216000, 245760, 3600, 2800, 2, 2, 4, 8}},
99 {LEVEL_3, {20736000, 552960, 7200, 6000, 2, 4, 4, 8}},
100 {LEVEL_3_1, {36864000, 983040, 12000, 10000, 2, 4, 4, 8}},
101 {LEVEL_4, {83558400, 2228224, 18000, 16000, 4, 4, 4, 8}},
102 {LEVEL_4_1, {160432128, 2228224, 30000, 18000, 4, 4, 5, 6}},
103 {LEVEL_5, {311951360, 8912896, 60000, 36000, 6, 8, 6, 4}},
104 {LEVEL_5_1, {588251136, 8912896, 120000, 46000, 8, 8, 10, 4}},
105 {LEVEL_5_2, {1176502272, 8912896, 180000, 90000, 8, 8, 10, 4}},
106 {LEVEL_6, {1176502272, 35651584, 180000, 90000, 8, 16, 10, 4}},
107 {LEVEL_6_1, {2353004544u, 35651584, 240000, 180000, 8, 16, 10, 4}},
108 {LEVEL_6_2, {4706009088u, 35651584, 480000, 360000, 8, 16, 10, 4}},
109 };
110
111 for (const VP9LevelDefinition& def : vp9_level_definitions) {
112 // All the characteristic fields except max_luma_sample_rate and
113 // max_luma_picture_size are ignored to avoid the extra complexities of
114 // computing those values. It may result in incorrect level being returned.
115 // If this is a problem, please file a bug to
116 // https://github.com/shaka-project/shaka-packager/issues.
117 if (luma_sample_rate <= def.characteristics.max_luma_sample_rate &&
118 luma_picture_size <= def.characteristics.max_luma_picture_size) {
119 return def.level;
120 }
121 }
122
123 LOG(WARNING) << "Cannot determine VP9 level for luma_sample_rate ("
124 << luma_sample_rate << ") or luma_picture_size ("
125 << luma_picture_size << "). Returning LEVEL_1.";
126 return LEVEL_1;
127}
128
129} // namespace
130
131VPCodecConfigurationRecord::VPCodecConfigurationRecord() {}
132
133VPCodecConfigurationRecord::VPCodecConfigurationRecord(
134 uint8_t profile,
135 uint8_t level,
136 uint8_t bit_depth,
137 uint8_t chroma_subsampling,
138 bool video_full_range_flag,
139 uint8_t color_primaries,
140 uint8_t transfer_characteristics,
141 uint8_t matrix_coefficients,
142 const std::vector<uint8_t>& codec_initialization_data)
143 : profile_(profile),
144 level_(level),
145 bit_depth_(bit_depth),
146 chroma_subsampling_(chroma_subsampling),
147 video_full_range_flag_(video_full_range_flag),
148 color_primaries_(color_primaries),
149 transfer_characteristics_(transfer_characteristics),
150 matrix_coefficients_(matrix_coefficients),
151 codec_initialization_data_(codec_initialization_data) {}
152
153VPCodecConfigurationRecord::~VPCodecConfigurationRecord(){};
154
155// https://www.webmproject.org/vp9/mp4/
156bool VPCodecConfigurationRecord::ParseMP4(const std::vector<uint8_t>& data) {
157 BitReader reader(data.data(), data.size());
158 uint8_t value;
159 RCHECK(reader.ReadBits(8, &value));
160 profile_ = value;
161 RCHECK(reader.ReadBits(8, &value));
162 level_ = value;
163 RCHECK(reader.ReadBits(4, &value));
164 bit_depth_ = value;
165 RCHECK(reader.ReadBits(3, &value));
166 chroma_subsampling_ = value;
167 bool bool_value;
168 RCHECK(reader.ReadBits(1, &bool_value));
169 video_full_range_flag_ = bool_value;
170 RCHECK(reader.ReadBits(8, &value));
171 color_primaries_ = value;
172 RCHECK(reader.ReadBits(8, &value));
173 transfer_characteristics_ = value;
174 RCHECK(reader.ReadBits(8, &value));
175 matrix_coefficients_ = value;
176
177 uint16_t codec_initialization_data_size = 0;
178 RCHECK(reader.ReadBits(16, &codec_initialization_data_size));
179 RCHECK(reader.bits_available() >= codec_initialization_data_size * 8u);
180 const size_t header_size = data.size() - reader.bits_available() / 8;
181 codec_initialization_data_.assign(
182 data.begin() + header_size,
183 data.begin() + header_size + codec_initialization_data_size);
184 return true;
185}
186
187// http://wiki.webmproject.org/vp9-codecprivate
188bool VPCodecConfigurationRecord::ParseWebM(const std::vector<uint8_t>& data) {
189 BufferReader reader(data.data(), data.size());
190
191 while (reader.HasBytes(1)) {
192 uint8_t id;
193 uint8_t size;
194 RCHECK(reader.Read1(&id));
195 RCHECK(reader.Read1(&size));
196
197 uint8_t value = 0;
198 switch (id) {
199 case kFeatureProfile:
200 RCHECK(size == 1);
201 RCHECK(reader.Read1(&value));
202 profile_ = value;
203 break;
204 case kFeatureLevel:
205 RCHECK(size == 1);
206 RCHECK(reader.Read1(&value));
207 level_ = value;
208 break;
209 case kFeatureBitDepth:
210 RCHECK(size == 1);
211 RCHECK(reader.Read1(&value));
212 bit_depth_ = value;
213 break;
214 case kFeatureChromaSubsampling:
215 RCHECK(size == 1);
216 RCHECK(reader.Read1(&value));
217 chroma_subsampling_ = value;
218 break;
219 default: {
220 LOG(WARNING) << "Skipping unknown VP9 codec feature " << id;
221 RCHECK(reader.SkipBytes(size));
222 }
223 }
224 }
225
226 return true;
227}
228
229void VPCodecConfigurationRecord::SetVP9Level(uint16_t width,
230 uint16_t height,
231 double sample_duration_seconds) {
232 // https://www.webmproject.org/vp9/levels/.
233
234 const uint32_t luma_picture_size = width * height;
235 // Alt-Ref frames are not taken into consideration intentionally to avoid the
236 // extra complexities. It may result in smaller luma_sample_rate may than the
237 // actual luma_sample_rate, leading to incorrect level being returned.
238 // If this is a problem, please file a bug to
239 // https://github.com/shaka-project/shaka-packager/issues.
240 const double kUnknownSampleDuration = 0.0;
241 // The decision is based on luma_picture_size only if duration is unknown.
242 uint64_t luma_sample_rate = 0;
243 if (sample_duration_seconds != kUnknownSampleDuration)
244 luma_sample_rate = luma_picture_size / sample_duration_seconds;
245
246 level_ = LevelFromCharacteristics(luma_sample_rate, luma_picture_size);
247}
248
249void VPCodecConfigurationRecord::WriteMP4(std::vector<uint8_t>* data) const {
250 BufferWriter writer;
251 writer.AppendInt(profile());
252 writer.AppendInt(level());
253 uint8_t bit_depth_chroma = (bit_depth() << 4) | (chroma_subsampling() << 1) |
254 (video_full_range_flag() ? 1 : 0);
255 writer.AppendInt(bit_depth_chroma);
256 writer.AppendInt(color_primaries());
257 writer.AppendInt(transfer_characteristics());
258 writer.AppendInt(matrix_coefficients());
259 uint16_t codec_initialization_data_size =
260 static_cast<uint16_t>(codec_initialization_data_.size());
261 writer.AppendInt(codec_initialization_data_size);
262 writer.AppendVector(codec_initialization_data_);
263 writer.SwapBuffer(data);
264}
265
266void VPCodecConfigurationRecord::WriteWebM(std::vector<uint8_t>* data) const {
267 BufferWriter writer;
268
269 if (profile_) {
270 writer.AppendInt(static_cast<uint8_t>(kFeatureProfile)); // ID = 1
271 writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
272 writer.AppendInt(*profile_);
273 }
274
275 if (level_) {
276 writer.AppendInt(static_cast<uint8_t>(kFeatureLevel)); // ID = 2
277 writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
278 writer.AppendInt(*level_);
279 }
280
281 if (bit_depth_) {
282 writer.AppendInt(static_cast<uint8_t>(kFeatureBitDepth)); // ID = 3
283 writer.AppendInt(static_cast<uint8_t>(1)); // Length = 1
284 writer.AppendInt(*bit_depth_);
285 }
286
287 if (chroma_subsampling_) {
288 // ID = 4, Length = 1
289 writer.AppendInt(static_cast<uint8_t>(kFeatureChromaSubsampling));
290 writer.AppendInt(static_cast<uint8_t>(1));
291 writer.AppendInt(*chroma_subsampling_);
292 }
293
294 writer.SwapBuffer(data);
295}
296
297std::string VPCodecConfigurationRecord::GetCodecString(Codec codec) const {
298 const std::string fields[] = {
299 absl::StrFormat("%d", profile()),
300 absl::StrFormat("%d", level()),
301 absl::StrFormat("%d", bit_depth()),
302 absl::StrFormat("%d", chroma_subsampling()),
303 absl::StrFormat("%d", color_primaries()),
304 absl::StrFormat("%d", transfer_characteristics()),
305 absl::StrFormat("%d", matrix_coefficients()),
306 (video_full_range_flag_ && *video_full_range_flag_) ? "01" : "00",
307 };
308
309 std::string codec_string = VPCodecAsString(codec);
310 for (const std::string& field : fields) {
311 // Make sure every field is at least 2-chars wide. The space will be
312 // replaced with '0' afterwards.
313 absl::StrAppendFormat(&codec_string, ".%2s", field.c_str());
314 }
315 absl::StrReplaceAll({{" ", "0"}}, &codec_string);
316 return codec_string;
317}
318
319void VPCodecConfigurationRecord::MergeFrom(
320 const VPCodecConfigurationRecord& other) {
321 MergeField("profile", other.profile_, &profile_);
322 MergeField("level", other.level_, &level_);
323 MergeField("bit depth", other.bit_depth_, &bit_depth_);
324 MergeField("chroma subsampling", other.chroma_subsampling_,
325 &chroma_subsampling_);
326 MergeField("video full range flag", other.video_full_range_flag_,
327 &video_full_range_flag_);
328 MergeField("color primaries", other.color_primaries_, &color_primaries_);
329 MergeField("transfer characteristics", other.transfer_characteristics_,
330 &transfer_characteristics_);
331 MergeField("matrix coefficients", other.matrix_coefficients_,
332 &matrix_coefficients_);
333
334 if (codec_initialization_data_.empty() ||
335 !other.codec_initialization_data_.empty()) {
336 if (!codec_initialization_data_.empty() &&
337 codec_initialization_data_ != other.codec_initialization_data_) {
338 LOG(WARNING) << "VPx codec initialization data is inconsistent";
339 }
340 codec_initialization_data_ = other.codec_initialization_data_;
341 }
342
343 MergeField("chroma location", other.chroma_location_, &chroma_location_);
344 UpdateChromaSubsamplingIfNeeded();
345}
346
347void VPCodecConfigurationRecord::SetChromaSubsampling(uint8_t subsampling_x,
348 uint8_t subsampling_y) {
349 VLOG(3) << "Set Chroma subsampling " << static_cast<int>(subsampling_x) << " "
350 << static_cast<int>(subsampling_y);
351 if (subsampling_x == 0 && subsampling_y == 0) {
352 chroma_subsampling_ = CHROMA_444;
353 } else if (subsampling_x == 0 && subsampling_y == 1) {
354 chroma_subsampling_ = CHROMA_440;
355 } else if (subsampling_x == 1 && subsampling_y == 0) {
356 chroma_subsampling_ = CHROMA_422;
357 } else if (subsampling_x == 1 && subsampling_y == 1) {
358 // VP9 assumes that chrome samples are collocated with luma samples if
359 // there is no explicit signaling outside of VP9 bitstream.
360 chroma_subsampling_ = CHROMA_420_COLLOCATED_WITH_LUMA;
361 } else {
362 LOG(WARNING) << "Unexpected chroma subsampling values: "
363 << static_cast<int>(subsampling_x) << " "
364 << static_cast<int>(subsampling_y);
365 }
366 UpdateChromaSubsamplingIfNeeded();
367}
368
369void VPCodecConfigurationRecord::SetChromaSubsampling(
370 ChromaSubsampling chroma_subsampling) {
371 chroma_subsampling_ = chroma_subsampling;
372 UpdateChromaSubsamplingIfNeeded();
373}
374
375void VPCodecConfigurationRecord::SetChromaLocation(uint8_t chroma_siting_x,
376 uint8_t chroma_siting_y) {
377 VLOG(3) << "Set Chroma Location " << static_cast<int>(chroma_siting_x) << " "
378 << static_cast<int>(chroma_siting_y);
379 if (chroma_siting_x == kLeftCollocated && chroma_siting_y == kTopCollocated) {
380 chroma_location_ = AVCHROMA_LOC_TOPLEFT;
381 } else if (chroma_siting_x == kLeftCollocated && chroma_siting_y == kHalf) {
382 chroma_location_ = AVCHROMA_LOC_LEFT;
383 } else if (chroma_siting_x == kHalf && chroma_siting_y == kTopCollocated) {
384 chroma_location_ = AVCHROMA_LOC_TOP;
385 } else if (chroma_siting_x == kHalf && chroma_siting_y == kHalf) {
386 chroma_location_ = AVCHROMA_LOC_CENTER;
387 } else {
388 LOG(WARNING) << "Unexpected chroma siting values: "
389 << static_cast<int>(chroma_siting_x) << " "
390 << static_cast<int>(chroma_siting_y);
391 }
392 UpdateChromaSubsamplingIfNeeded();
393}
394
395void VPCodecConfigurationRecord::UpdateChromaSubsamplingIfNeeded() {
396 // Use chroma location to fix the chroma subsampling format.
397 if (chroma_location_ && chroma_subsampling_ &&
398 (*chroma_subsampling_ == CHROMA_420_VERTICAL ||
399 *chroma_subsampling_ == CHROMA_420_COLLOCATED_WITH_LUMA)) {
400 if (*chroma_location_ == AVCHROMA_LOC_TOPLEFT)
401 chroma_subsampling_ = CHROMA_420_COLLOCATED_WITH_LUMA;
402 else if (*chroma_location_ == AVCHROMA_LOC_LEFT)
403 chroma_subsampling_ = CHROMA_420_VERTICAL;
404 VLOG(3) << "Chroma subsampling " << static_cast<int>(*chroma_subsampling_);
405 }
406}
407
408} // namespace media
409} // namespace shaka
A class to read bit streams.
Definition bit_reader.h:20
size_t bits_available() const
Definition bit_reader.h:92
bool ReadBits(size_t num_bits, T *out)
Definition bit_reader.h:38
bool HasBytes(size_t count)
bool SkipBytes(size_t num_bytes)
Class for parsing or writing VP codec configuration record.
All the methods that are virtual are virtual for mocking.