Shaka Packager SDK
Loading...
Searching...
No Matches
es_parser_h26x.cc
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <packager/media/formats/mp2t/es_parser_h26x.h>
6
7#include <cstdint>
8
9#include <absl/log/check.h>
10#include <absl/log/log.h>
11
12#include <packager/macros/logging.h>
13#include <packager/media/base/media_sample.h>
14#include <packager/media/base/offset_byte_queue.h>
15#include <packager/media/base/timestamp.h>
16#include <packager/media/base/video_stream_info.h>
17#include <packager/media/codecs/h26x_byte_to_unit_stream_converter.h>
18#include <packager/media/formats/mp2t/mp2t_common.h>
19
20namespace shaka {
21namespace media {
22namespace mp2t {
23
24namespace {
25
26const int kStartCodeSize = 3;
27const int kH264NaluHeaderSize = 1;
28const int kH265NaluHeaderSize = 2;
29
30} // namespace
31
32EsParserH26x::EsParserH26x(
33 Nalu::CodecType type,
34 std::unique_ptr<H26xByteToUnitStreamConverter> stream_converter,
35 uint32_t pid,
36 const EmitSampleCB& emit_sample_cb)
37 : EsParser(pid),
38 emit_sample_cb_(emit_sample_cb),
39 type_(type),
40 es_queue_(new media::OffsetByteQueue()),
41 stream_converter_(std::move(stream_converter)) {}
42
43EsParserH26x::~EsParserH26x() {}
44
45bool EsParserH26x::Parse(const uint8_t* buf,
46 int size,
47 int64_t pts,
48 int64_t dts) {
49 // Note: Parse is invoked each time a PES packet has been reassembled.
50 // Unfortunately, a PES packet does not necessarily map
51 // to an h264/h265 access unit, although the HLS recommendation is to use one
52 // PES for each access unit (but this is just a recommendation and some
53 // streams do not comply with this recommendation).
54
55 // HLS recommendation: "In AVC video, you should have both a DTS and a
56 // PTS in each PES header".
57 // However, some streams do not comply with this recommendation.
58 if (pts == kNoTimestamp) {
59 DVLOG(1) << "Each video PES should have a PTS";
60 } else {
61 TimingDesc timing_desc;
62 timing_desc.pts = pts;
63 timing_desc.dts = (dts != kNoTimestamp) ? dts : pts;
64
65 // Link the end of the byte queue with the incoming timing descriptor.
66 timing_desc_list_.push_back(
67 std::pair<int64_t, TimingDesc>(es_queue_->tail(), timing_desc));
68
69 // Warns if there are a large number of cached timestamps, which should be 1
70 // or 2 if everythings works as expected.
71 const size_t kWarningSize =
72 24; // An arbitrary number (it is 1 second for a fps of 24).
73 LOG_IF(WARNING, timing_desc_list_.size() >= kWarningSize)
74 << "Unusually large number of cached timestamps ("
75 << timing_desc_list_.size() << ").";
76 }
77
78 // Add the incoming bytes to the ES queue.
79 es_queue_->Push(buf, size);
80 return ParseInternal();
81}
82
83bool EsParserH26x::Flush() {
84 DVLOG(1) << "EsParserH26x::Flush";
85
86 // Simulate two additional AUDs to force emitting the last access unit
87 // which is assumed to be complete at this point.
88 // Two AUDs are needed because the exact size of a NAL unit can only be
89 // determined after seeing the next NAL unit, so we need a second AUD to
90 // finish the parsing of the first AUD.
91 if (type_ == Nalu::kH264) {
92 const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09, 0x00, 0x00, 0x01, 0x09};
93 es_queue_->Push(aud, sizeof(aud));
94 } else {
95 DCHECK_EQ(Nalu::kH265, type_);
96 const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01,
97 0x00, 0x00, 0x01, 0x46, 0x01};
98 es_queue_->Push(aud, sizeof(aud));
99 }
100
101 RCHECK(ParseInternal());
102
103 if (pending_sample_) {
104 // Flush pending sample.
105 if (!pending_sample_duration_) {
106 pending_sample_duration_ = CalculateSampleDuration(pending_sample_pps_id_);
107 }
108 pending_sample_->set_duration(pending_sample_duration_);
109 emit_sample_cb_(std::move(pending_sample_));
110 }
111 return true;
112}
113
114void EsParserH26x::Reset() {
115 es_queue_.reset(new media::OffsetByteQueue());
116 current_search_position_ = 0;
117 current_access_unit_position_ = 0;
118 current_video_slice_info_.valid = false;
119 next_access_unit_position_set_ = false;
120 next_access_unit_position_ = 0;
121 current_nalu_info_.reset();
122 timing_desc_list_.clear();
123 pending_sample_ = std::shared_ptr<MediaSample>();
124 pending_sample_duration_ = 0;
125 waiting_for_key_frame_ = true;
126}
127
128bool EsParserH26x::SearchForNalu(uint64_t* position, Nalu* nalu) {
129 const uint8_t* es;
130 int es_size;
131 es_queue_->PeekAt(current_search_position_, &es, &es_size);
132
133 // Find a start code.
134 uint64_t start_code_offset;
135 uint8_t start_code_size;
136 const bool start_code_found = NaluReader::FindStartCode(
137 es, es_size, &start_code_offset, &start_code_size);
138
139 if (!start_code_found) {
140 // We didn't find a start code, so we don't have to search this data again.
141 if (es_size > kStartCodeSize)
142 current_search_position_ += es_size - kStartCodeSize;
143 return false;
144 }
145
146 // Ensure the next NAL unit is a real NAL unit.
147 const uint8_t* next_nalu_ptr = es + start_code_offset + start_code_size;
148 // This size is likely inaccurate, this is just to get the header info.
149 const int64_t next_nalu_size = es_size - start_code_offset - start_code_size;
150 if (next_nalu_size <
151 (type_ == Nalu::kH264 ? kH264NaluHeaderSize : kH265NaluHeaderSize)) {
152 // There was not enough data, wait for more.
153 return false;
154 }
155
156 // Update search position for next nalu.
157 current_search_position_ += start_code_offset + start_code_size;
158
159 // |next_nalu_info_| is made global intentionally to avoid repetitive memory
160 // allocation which could create memory fragments.
161 if (!next_nalu_info_)
162 next_nalu_info_.reset(new NaluInfo);
163 if (!next_nalu_info_->nalu.Initialize(type_, next_nalu_ptr, next_nalu_size)) {
164 // This NAL unit is invalid, skip it and search again.
165 return SearchForNalu(position, nalu);
166 }
167 next_nalu_info_->position = current_search_position_ - start_code_size;
168 next_nalu_info_->start_code_size = start_code_size;
169
170 const bool current_nalu_set = current_nalu_info_ ? true : false;
171 if (current_nalu_info_) {
172 // Starting position for the nalu including start code.
173 *position = current_nalu_info_->position;
174 // Update the NALU because the data pointer may have been invalidated.
175 const uint8_t* current_nalu_ptr =
176 next_nalu_ptr +
177 (current_nalu_info_->position + current_nalu_info_->start_code_size) -
178 current_search_position_;
179 const uint64_t current_nalu_size = next_nalu_info_->position -
180 current_nalu_info_->position -
181 current_nalu_info_->start_code_size;
182 CHECK(nalu->Initialize(type_, current_nalu_ptr, current_nalu_size));
183 }
184 current_nalu_info_.swap(next_nalu_info_);
185 return current_nalu_set ? true : SearchForNalu(position, nalu);
186}
187
188bool EsParserH26x::ParseInternal() {
189 uint64_t position;
190 Nalu nalu;
191 VideoSliceInfo video_slice_info;
192 while (SearchForNalu(&position, &nalu)) {
193 // ITU H.264 sec. 7.4.1.2.3
194 // H264: The first of the NAL units with |can_start_access_unit() == true|
195 // after the last VCL NAL unit of a primary coded picture specifies the
196 // start of a new access unit.
197 // ITU H.265 sec. 7.4.2.4.4
198 // H265: The first of the NAL units with |can_start_access_unit() == true|
199 // after the last VCL NAL unit preceding firstBlPicNalUnit (the first
200 // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if
201 // any, specifies the start of a new access unit.
202 if (nalu.can_start_access_unit()) {
203 if (!next_access_unit_position_set_) {
204 next_access_unit_position_set_ = true;
205 next_access_unit_position_ = position;
206 }
207 RCHECK(ProcessNalu(nalu, &video_slice_info));
208 if (nalu.is_vcl() && !video_slice_info.valid) {
209 // This could happen only if decoder config is not available yet. Drop
210 // this frame.
211 DCHECK(!current_video_slice_info_.valid);
212 next_access_unit_position_set_ = false;
213 continue;
214 }
215 } else if (nalu.is_vcl()) {
216 // This isn't the first VCL NAL unit. Next access unit should start after
217 // this NAL unit.
218 next_access_unit_position_set_ = false;
219 continue;
220 }
221
222 // AUD shall be the first NAL unit if present. There shall be at most one
223 // AUD in any access unit. We can emit the current access unit which shall
224 // not contain the AUD.
225 if (nalu.is_aud()) {
226 RCHECK(EmitCurrentAccessUnit());
227 continue;
228 }
229
230 // We can only determine if the current access unit ends after seeing
231 // another VCL NAL unit.
232 if (!video_slice_info.valid)
233 continue;
234
235 // Check if it is the first VCL NAL unit of a primary coded picture. It is
236 // always true for H265 as nuh_layer_id shall be == 0 at this point.
237 bool is_first_vcl_nalu = true;
238 if (type_ == Nalu::kH264) {
239 if (current_video_slice_info_.valid) {
240 // ITU H.264 sec. 7.4.1.2.4 Detection of the first VCL NAL unit of a
241 // primary coded picture. Only pps_id and frame_num are checked here.
242 is_first_vcl_nalu =
243 video_slice_info.frame_num != current_video_slice_info_.frame_num ||
244 video_slice_info.pps_id != current_video_slice_info_.pps_id;
245 }
246 }
247 if (!is_first_vcl_nalu) {
248 // This isn't the first VCL NAL unit. Next access unit should start after
249 // this NAL unit.
250 next_access_unit_position_set_ = false;
251 continue;
252 }
253
254 DCHECK(next_access_unit_position_set_);
255 RCHECK(EmitCurrentAccessUnit());
256
257 // Delete the data we have already processed.
258 es_queue_->Trim(next_access_unit_position_);
259
260 current_access_unit_position_ = next_access_unit_position_;
261 current_video_slice_info_ = video_slice_info;
262 next_access_unit_position_set_ = false;
263 }
264 return true;
265}
266
267bool EsParserH26x::EmitCurrentAccessUnit() {
268 if (current_video_slice_info_.valid) {
269 if (current_video_slice_info_.is_key_frame)
270 waiting_for_key_frame_ = false;
271 if (!waiting_for_key_frame_) {
272 RCHECK(
273 EmitFrame(current_access_unit_position_,
274 next_access_unit_position_ - current_access_unit_position_,
275 current_video_slice_info_.is_key_frame,
276 current_video_slice_info_.pps_id));
277 }
278 current_video_slice_info_.valid = false;
279 }
280 return true;
281}
282
283bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
284 int access_unit_size,
285 bool is_key_frame,
286 int pps_id) {
287 // Get the access unit timing info.
288 TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp};
289 while (!timing_desc_list_.empty() &&
290 timing_desc_list_.front().first <= access_unit_pos) {
291 current_timing_desc = timing_desc_list_.front().second;
292 timing_desc_list_.pop_front();
293 }
294 if (current_timing_desc.pts == kNoTimestamp)
295 return false;
296
297 // Emit a frame.
298 DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << access_unit_pos
299 << " size=" << access_unit_size << " pts "
300 << current_timing_desc.pts << " timing_desc_list size "
301 << timing_desc_list_.size();
302 int es_size;
303 const uint8_t* es;
304 es_queue_->PeekAt(access_unit_pos, &es, &es_size);
305
306 // Convert frame to unit stream format.
307 std::vector<uint8_t> converted_frame;
308 if (!stream_converter_->ConvertByteStreamToNalUnitStream(
309 es, access_unit_size, &converted_frame)) {
310 DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
311 return false;
312 }
313
314 // Update the video decoder configuration if needed.
315 RCHECK(UpdateVideoDecoderConfig(pps_id));
316
317 // Create the media sample, emitting always the previous sample after
318 // calculating its duration.
319 std::shared_ptr<MediaSample> media_sample = MediaSample::CopyFrom(
320 converted_frame.data(), converted_frame.size(), is_key_frame);
321 media_sample->set_dts(current_timing_desc.dts);
322 media_sample->set_pts(current_timing_desc.pts);
323 if (pending_sample_) {
324 if (media_sample->dts() <= pending_sample_->dts()) {
325 LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " dts "
326 << media_sample->dts()
327 << " less than or equal to previous dts "
328 << pending_sample_->dts();
329 // Keep the sample but adjust the sample duration to a very small value,
330 // in case that the sample is still needed for the decoding afterwards.
331 const int64_t kArbitrarySmallDuration = 0.001 * kMpeg2Timescale; // 1ms.
332 pending_sample_->set_duration(kArbitrarySmallDuration);
333 } else {
334 int64_t sample_duration = media_sample->dts() - pending_sample_->dts();
335 pending_sample_->set_duration(sample_duration);
336
337 const int kArbitraryGapScale = 10;
338 if (pending_sample_duration_ &&
339 sample_duration > kArbitraryGapScale * pending_sample_duration_) {
340 LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " Possible GAP at dts "
341 << pending_sample_->dts() << " with next sample at dts "
342 << media_sample->dts() << " (difference "
343 << sample_duration << ")";
344 }
345
346 pending_sample_duration_ = sample_duration;
347 }
348 emit_sample_cb_(std::move(pending_sample_));
349 }
350 pending_sample_ = media_sample;
351 pending_sample_pps_id_ = pps_id;
352
353 return true;
354}
355
356} // namespace mp2t
357} // namespace media
358} // namespace shaka
All the methods that are virtual are virtual for mocking.