Shaka Packager SDK
Loading...
Searching...
No Matches
es_parser_h26x.cc
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <packager/media/formats/mp2t/es_parser_h26x.h>
6
7#include <cstdint>
8
9#include <absl/log/check.h>
10#include <absl/log/log.h>
11
12#include <packager/macros/logging.h>
13#include <packager/media/base/media_sample.h>
14#include <packager/media/base/offset_byte_queue.h>
15#include <packager/media/base/timestamp.h>
16#include <packager/media/base/video_stream_info.h>
17#include <packager/media/codecs/h26x_byte_to_unit_stream_converter.h>
18#include <packager/media/formats/mp2t/mp2t_common.h>
19
20namespace shaka {
21namespace media {
22namespace mp2t {
23
24namespace {
25
26const int kStartCodeSize = 3;
27const int kH264NaluHeaderSize = 1;
28const int kH265NaluHeaderSize = 2;
29
30} // namespace
31
32EsParserH26x::EsParserH26x(
33 Nalu::CodecType type,
34 std::unique_ptr<H26xByteToUnitStreamConverter> stream_converter,
35 uint32_t pid,
36 const EmitSampleCB& emit_sample_cb)
37 : EsParser(pid),
38 emit_sample_cb_(emit_sample_cb),
39 type_(type),
40 es_queue_(new media::OffsetByteQueue()),
41 stream_converter_(std::move(stream_converter)) {}
42
43EsParserH26x::~EsParserH26x() {}
44
45bool EsParserH26x::Parse(const uint8_t* buf,
46 int size,
47 int64_t pts,
48 int64_t dts) {
49 // Note: Parse is invoked each time a PES packet has been reassembled.
50 // Unfortunately, a PES packet does not necessarily map
51 // to an h264/h265 access unit, although the HLS recommendation is to use one
52 // PES for each access unit (but this is just a recommendation and some
53 // streams do not comply with this recommendation).
54
55 // HLS recommendation: "In AVC video, you should have both a DTS and a
56 // PTS in each PES header".
57 // However, some streams do not comply with this recommendation.
58 if (pts == kNoTimestamp) {
59 DVLOG(1) << "Each video PES should have a PTS";
60 } else {
61 TimingDesc timing_desc;
62 timing_desc.pts = pts;
63 timing_desc.dts = (dts != kNoTimestamp) ? dts : pts;
64
65 // Link the end of the byte queue with the incoming timing descriptor.
66 timing_desc_list_.push_back(
67 std::pair<int64_t, TimingDesc>(es_queue_->tail(), timing_desc));
68
69 // Warns if there are a large number of cached timestamps, which should be 1
70 // or 2 if everythings works as expected.
71 const size_t kWarningSize =
72 24; // An arbitrary number (it is 1 second for a fps of 24).
73 LOG_IF(WARNING, timing_desc_list_.size() >= kWarningSize)
74 << "Unusually large number of cached timestamps ("
75 << timing_desc_list_.size() << ").";
76 }
77
78 // Add the incoming bytes to the ES queue.
79 es_queue_->Push(buf, size);
80 return ParseInternal();
81}
82
83bool EsParserH26x::Flush() {
84 DVLOG(1) << "EsParserH26x::Flush";
85
86 // Simulate two additional AUDs to force emitting the last access unit
87 // which is assumed to be complete at this point.
88 // Two AUDs are needed because the exact size of a NAL unit can only be
89 // determined after seeing the next NAL unit, so we need a second AUD to
90 // finish the parsing of the first AUD.
91 if (type_ == Nalu::kH264) {
92 const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09, 0x00, 0x00, 0x01, 0x09};
93 es_queue_->Push(aud, sizeof(aud));
94 } else {
95 DCHECK_EQ(Nalu::kH265, type_);
96 const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01,
97 0x00, 0x00, 0x01, 0x46, 0x01};
98 es_queue_->Push(aud, sizeof(aud));
99 }
100
101 RCHECK(ParseInternal());
102
103 if (pending_sample_) {
104 // Flush pending sample.
105 if (!pending_sample_duration_) {
106 pending_sample_duration_ =
107 CalculateSampleDuration(pending_sample_pps_id_);
108 }
109 pending_sample_->set_duration(pending_sample_duration_);
110 emit_sample_cb_(std::move(pending_sample_));
111 }
112 return true;
113}
114
115void EsParserH26x::Reset() {
116 es_queue_.reset(new media::OffsetByteQueue());
117 current_search_position_ = 0;
118 current_access_unit_position_ = 0;
119 current_video_slice_info_.valid = false;
120 next_access_unit_position_set_ = false;
121 next_access_unit_position_ = 0;
122 current_nalu_info_.reset();
123 timing_desc_list_.clear();
124 pending_sample_ = std::shared_ptr<MediaSample>();
125 pending_sample_duration_ = 0;
126 waiting_for_key_frame_ = true;
127}
128
129bool EsParserH26x::SearchForNalu(uint64_t* position, Nalu* nalu) {
130 const uint8_t* es;
131 int es_size;
132 es_queue_->PeekAt(current_search_position_, &es, &es_size);
133
134 // Find a start code.
135 uint64_t start_code_offset;
136 uint8_t start_code_size;
137 const bool start_code_found = NaluReader::FindStartCode(
138 es, es_size, &start_code_offset, &start_code_size);
139
140 if (!start_code_found) {
141 // We didn't find a start code, so we don't have to search this data again.
142 if (es_size > kStartCodeSize)
143 current_search_position_ += es_size - kStartCodeSize;
144 return false;
145 }
146
147 // Ensure the next NAL unit is a real NAL unit.
148 const uint8_t* next_nalu_ptr = es + start_code_offset + start_code_size;
149 // This size is likely inaccurate, this is just to get the header info.
150 const int64_t next_nalu_size = es_size - start_code_offset - start_code_size;
151 if (next_nalu_size <
152 (type_ == Nalu::kH264 ? kH264NaluHeaderSize : kH265NaluHeaderSize)) {
153 // There was not enough data, wait for more.
154 return false;
155 }
156
157 // Update search position for next nalu.
158 current_search_position_ += start_code_offset + start_code_size;
159
160 // |next_nalu_info_| is made global intentionally to avoid repetitive memory
161 // allocation which could create memory fragments.
162 if (!next_nalu_info_)
163 next_nalu_info_.reset(new NaluInfo);
164 if (!next_nalu_info_->nalu.Initialize(type_, next_nalu_ptr, next_nalu_size)) {
165 // This NAL unit is invalid, skip it and search again.
166 return SearchForNalu(position, nalu);
167 }
168 next_nalu_info_->position = current_search_position_ - start_code_size;
169 next_nalu_info_->start_code_size = start_code_size;
170
171 const bool current_nalu_set = current_nalu_info_ ? true : false;
172 if (current_nalu_info_) {
173 // Starting position for the nalu including start code.
174 *position = current_nalu_info_->position;
175 // Update the NALU because the data pointer may have been invalidated.
176 const uint8_t* current_nalu_ptr =
177 next_nalu_ptr +
178 (current_nalu_info_->position + current_nalu_info_->start_code_size) -
179 current_search_position_;
180 const uint64_t current_nalu_size = next_nalu_info_->position -
181 current_nalu_info_->position -
182 current_nalu_info_->start_code_size;
183 CHECK(nalu->Initialize(type_, current_nalu_ptr, current_nalu_size));
184 }
185 current_nalu_info_.swap(next_nalu_info_);
186 return current_nalu_set ? true : SearchForNalu(position, nalu);
187}
188
189bool EsParserH26x::ParseInternal() {
190 uint64_t position;
191 Nalu nalu;
192 VideoSliceInfo video_slice_info;
193 while (SearchForNalu(&position, &nalu)) {
194 // ITU H.264 sec. 7.4.1.2.3
195 // H264: The first of the NAL units with |can_start_access_unit() == true|
196 // after the last VCL NAL unit of a primary coded picture specifies the
197 // start of a new access unit.
198 // ITU H.265 sec. 7.4.2.4.4
199 // H265: The first of the NAL units with |can_start_access_unit() == true|
200 // after the last VCL NAL unit preceding firstBlPicNalUnit (the first
201 // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if
202 // any, specifies the start of a new access unit.
203 if (nalu.can_start_access_unit()) {
204 if (!next_access_unit_position_set_) {
205 next_access_unit_position_set_ = true;
206 next_access_unit_position_ = position;
207 }
208 RCHECK(ProcessNalu(nalu, &video_slice_info));
209 if (nalu.is_vcl() && !video_slice_info.valid) {
210 // This could happen only if decoder config is not available yet. Drop
211 // this frame.
212 DCHECK(!current_video_slice_info_.valid);
213 next_access_unit_position_set_ = false;
214 continue;
215 }
216 } else if (nalu.is_vcl()) {
217 // This isn't the first VCL NAL unit. Next access unit should start after
218 // this NAL unit.
219 next_access_unit_position_set_ = false;
220 continue;
221 }
222
223 // AUD shall be the first NAL unit if present. There shall be at most one
224 // AUD in any access unit. We can emit the current access unit which shall
225 // not contain the AUD.
226 if (nalu.is_aud()) {
227 RCHECK(EmitCurrentAccessUnit());
228 continue;
229 }
230
231 // We can only determine if the current access unit ends after seeing
232 // another VCL NAL unit.
233 if (!video_slice_info.valid)
234 continue;
235
236 // Check if it is the first VCL NAL unit of a primary coded picture. It is
237 // always true for H265 as nuh_layer_id shall be == 0 at this point.
238 bool is_first_vcl_nalu = true;
239 if (type_ == Nalu::kH264) {
240 if (current_video_slice_info_.valid) {
241 // ITU H.264 sec. 7.4.1.2.4 Detection of the first VCL NAL unit of a
242 // primary coded picture. Only pps_id and frame_num are checked here.
243 is_first_vcl_nalu =
244 video_slice_info.frame_num != current_video_slice_info_.frame_num ||
245 video_slice_info.pps_id != current_video_slice_info_.pps_id;
246 }
247 }
248 if (!is_first_vcl_nalu) {
249 // This isn't the first VCL NAL unit. Next access unit should start after
250 // this NAL unit.
251 next_access_unit_position_set_ = false;
252 continue;
253 }
254
255 DCHECK(next_access_unit_position_set_);
256 RCHECK(EmitCurrentAccessUnit());
257
258 // Delete the data we have already processed.
259 es_queue_->Trim(next_access_unit_position_);
260
261 current_access_unit_position_ = next_access_unit_position_;
262 current_video_slice_info_ = video_slice_info;
263 next_access_unit_position_set_ = false;
264 }
265 return true;
266}
267
268bool EsParserH26x::EmitCurrentAccessUnit() {
269 if (current_video_slice_info_.valid) {
270 if (current_video_slice_info_.is_key_frame)
271 waiting_for_key_frame_ = false;
272 if (!waiting_for_key_frame_) {
273 RCHECK(
274 EmitFrame(current_access_unit_position_,
275 next_access_unit_position_ - current_access_unit_position_,
276 current_video_slice_info_.is_key_frame,
277 current_video_slice_info_.pps_id));
278 }
279 current_video_slice_info_.valid = false;
280 }
281 return true;
282}
283
284bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
285 int access_unit_size,
286 bool is_key_frame,
287 int pps_id) {
288 // Get the access unit timing info.
289 TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp};
290 while (!timing_desc_list_.empty() &&
291 timing_desc_list_.front().first <= access_unit_pos) {
292 current_timing_desc = timing_desc_list_.front().second;
293 timing_desc_list_.pop_front();
294 }
295 if (current_timing_desc.pts == kNoTimestamp)
296 return false;
297
298 // Emit a frame.
299 DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << access_unit_pos
300 << " size=" << access_unit_size << " pts "
301 << current_timing_desc.pts << " timing_desc_list size "
302 << timing_desc_list_.size();
303 int es_size;
304 const uint8_t* es;
305 es_queue_->PeekAt(access_unit_pos, &es, &es_size);
306
307 // Convert frame to unit stream format.
308 std::vector<uint8_t> converted_frame;
309 if (!stream_converter_->ConvertByteStreamToNalUnitStream(es, access_unit_size,
310 &converted_frame)) {
311 DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
312 return false;
313 }
314
315 // Update the video decoder configuration if needed.
316 RCHECK(UpdateVideoDecoderConfig(pps_id));
317
318 // Create the media sample, emitting always the previous sample after
319 // calculating its duration.
320 std::shared_ptr<MediaSample> media_sample = MediaSample::CopyFrom(
321 converted_frame.data(), converted_frame.size(), is_key_frame);
322 media_sample->set_dts(current_timing_desc.dts);
323 media_sample->set_pts(current_timing_desc.pts);
324 if (pending_sample_) {
325 if (media_sample->dts() <= pending_sample_->dts()) {
326 LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " dts "
327 << media_sample->dts()
328 << " less than or equal to previous dts "
329 << pending_sample_->dts();
330 // Keep the sample but adjust the sample duration to a very small value,
331 // in case that the sample is still needed for the decoding afterwards.
332 const int64_t kArbitrarySmallDuration = 0.001 * kMpeg2Timescale; // 1ms.
333 pending_sample_->set_duration(kArbitrarySmallDuration);
334 } else {
335 int64_t sample_duration = media_sample->dts() - pending_sample_->dts();
336 pending_sample_->set_duration(sample_duration);
337
338 const int kArbitraryGapScale = 10;
339 if (pending_sample_duration_ &&
340 sample_duration > kArbitraryGapScale * pending_sample_duration_) {
341 LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " Possible GAP at dts "
342 << pending_sample_->dts() << " with next sample at dts "
343 << media_sample->dts() << " (difference "
344 << sample_duration << ")";
345 }
346
347 pending_sample_duration_ = sample_duration;
348 }
349 emit_sample_cb_(std::move(pending_sample_));
350 }
351 pending_sample_ = media_sample;
352 pending_sample_pps_id_ = pps_id;
353
354 return true;
355}
356
357} // namespace mp2t
358} // namespace media
359} // namespace shaka
All the methods that are virtual are virtual for mocking.