Shaka Packager SDK
es_parser_h26x.cc
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <packager/media/formats/mp2t/es_parser_h26x.h>
6 
7 #include <cstdint>
8 
9 #include <absl/log/check.h>
10 #include <absl/log/log.h>
11 
12 #include <packager/macros/logging.h>
13 #include <packager/media/base/media_sample.h>
14 #include <packager/media/base/offset_byte_queue.h>
15 #include <packager/media/base/timestamp.h>
16 #include <packager/media/base/video_stream_info.h>
17 #include <packager/media/codecs/h26x_byte_to_unit_stream_converter.h>
18 #include <packager/media/formats/mp2t/mp2t_common.h>
19 
20 namespace shaka {
21 namespace media {
22 namespace mp2t {
23 
24 namespace {
25 
26 const int kStartCodeSize = 3;
27 const int kH264NaluHeaderSize = 1;
28 const int kH265NaluHeaderSize = 2;
29 
30 } // namespace
31 
32 EsParserH26x::EsParserH26x(
33  Nalu::CodecType type,
34  std::unique_ptr<H26xByteToUnitStreamConverter> stream_converter,
35  uint32_t pid,
36  const EmitSampleCB& emit_sample_cb)
37  : EsParser(pid),
38  emit_sample_cb_(emit_sample_cb),
39  type_(type),
40  es_queue_(new media::OffsetByteQueue()),
41  stream_converter_(std::move(stream_converter)) {}
42 
43 EsParserH26x::~EsParserH26x() {}
44 
45 bool EsParserH26x::Parse(const uint8_t* buf,
46  int size,
47  int64_t pts,
48  int64_t dts) {
49  // Note: Parse is invoked each time a PES packet has been reassembled.
50  // Unfortunately, a PES packet does not necessarily map
51  // to an h264/h265 access unit, although the HLS recommendation is to use one
52  // PES for each access unit (but this is just a recommendation and some
53  // streams do not comply with this recommendation).
54 
55  // HLS recommendation: "In AVC video, you should have both a DTS and a
56  // PTS in each PES header".
57  // However, some streams do not comply with this recommendation.
58  if (pts == kNoTimestamp) {
59  DVLOG(1) << "Each video PES should have a PTS";
60  } else {
61  TimingDesc timing_desc;
62  timing_desc.pts = pts;
63  timing_desc.dts = (dts != kNoTimestamp) ? dts : pts;
64 
65  // Link the end of the byte queue with the incoming timing descriptor.
66  timing_desc_list_.push_back(
67  std::pair<int64_t, TimingDesc>(es_queue_->tail(), timing_desc));
68 
69  // Warns if there are a large number of cached timestamps, which should be 1
70  // or 2 if everythings works as expected.
71  const size_t kWarningSize =
72  24; // An arbitrary number (it is 1 second for a fps of 24).
73  LOG_IF(WARNING, timing_desc_list_.size() >= kWarningSize)
74  << "Unusually large number of cached timestamps ("
75  << timing_desc_list_.size() << ").";
76  }
77 
78  // Add the incoming bytes to the ES queue.
79  es_queue_->Push(buf, size);
80  return ParseInternal();
81 }
82 
83 bool EsParserH26x::Flush() {
84  DVLOG(1) << "EsParserH26x::Flush";
85 
86  // Simulate two additional AUDs to force emitting the last access unit
87  // which is assumed to be complete at this point.
88  // Two AUDs are needed because the exact size of a NAL unit can only be
89  // determined after seeing the next NAL unit, so we need a second AUD to
90  // finish the parsing of the first AUD.
91  if (type_ == Nalu::kH264) {
92  const uint8_t aud[] = {0x00, 0x00, 0x01, 0x09, 0x00, 0x00, 0x01, 0x09};
93  es_queue_->Push(aud, sizeof(aud));
94  } else {
95  DCHECK_EQ(Nalu::kH265, type_);
96  const uint8_t aud[] = {0x00, 0x00, 0x01, 0x46, 0x01,
97  0x00, 0x00, 0x01, 0x46, 0x01};
98  es_queue_->Push(aud, sizeof(aud));
99  }
100 
101  RCHECK(ParseInternal());
102 
103  if (pending_sample_) {
104  // Flush pending sample.
105  if (!pending_sample_duration_) {
106  pending_sample_duration_ = CalculateSampleDuration(pending_sample_pps_id_);
107  }
108  pending_sample_->set_duration(pending_sample_duration_);
109  emit_sample_cb_(std::move(pending_sample_));
110  }
111  return true;
112 }
113 
114 void EsParserH26x::Reset() {
115  es_queue_.reset(new media::OffsetByteQueue());
116  current_search_position_ = 0;
117  current_access_unit_position_ = 0;
118  current_video_slice_info_.valid = false;
119  next_access_unit_position_set_ = false;
120  next_access_unit_position_ = 0;
121  current_nalu_info_.reset();
122  timing_desc_list_.clear();
123  pending_sample_ = std::shared_ptr<MediaSample>();
124  pending_sample_duration_ = 0;
125  waiting_for_key_frame_ = true;
126 }
127 
128 bool EsParserH26x::SearchForNalu(uint64_t* position, Nalu* nalu) {
129  const uint8_t* es;
130  int es_size;
131  es_queue_->PeekAt(current_search_position_, &es, &es_size);
132 
133  // Find a start code.
134  uint64_t start_code_offset;
135  uint8_t start_code_size;
136  const bool start_code_found = NaluReader::FindStartCode(
137  es, es_size, &start_code_offset, &start_code_size);
138 
139  if (!start_code_found) {
140  // We didn't find a start code, so we don't have to search this data again.
141  if (es_size > kStartCodeSize)
142  current_search_position_ += es_size - kStartCodeSize;
143  return false;
144  }
145 
146  // Ensure the next NAL unit is a real NAL unit.
147  const uint8_t* next_nalu_ptr = es + start_code_offset + start_code_size;
148  // This size is likely inaccurate, this is just to get the header info.
149  const int64_t next_nalu_size = es_size - start_code_offset - start_code_size;
150  if (next_nalu_size <
151  (type_ == Nalu::kH264 ? kH264NaluHeaderSize : kH265NaluHeaderSize)) {
152  // There was not enough data, wait for more.
153  return false;
154  }
155 
156  // Update search position for next nalu.
157  current_search_position_ += start_code_offset + start_code_size;
158 
159  // |next_nalu_info_| is made global intentionally to avoid repetitive memory
160  // allocation which could create memory fragments.
161  if (!next_nalu_info_)
162  next_nalu_info_.reset(new NaluInfo);
163  if (!next_nalu_info_->nalu.Initialize(type_, next_nalu_ptr, next_nalu_size)) {
164  // This NAL unit is invalid, skip it and search again.
165  return SearchForNalu(position, nalu);
166  }
167  next_nalu_info_->position = current_search_position_ - start_code_size;
168  next_nalu_info_->start_code_size = start_code_size;
169 
170  const bool current_nalu_set = current_nalu_info_ ? true : false;
171  if (current_nalu_info_) {
172  // Starting position for the nalu including start code.
173  *position = current_nalu_info_->position;
174  // Update the NALU because the data pointer may have been invalidated.
175  const uint8_t* current_nalu_ptr =
176  next_nalu_ptr +
177  (current_nalu_info_->position + current_nalu_info_->start_code_size) -
178  current_search_position_;
179  const uint64_t current_nalu_size = next_nalu_info_->position -
180  current_nalu_info_->position -
181  current_nalu_info_->start_code_size;
182  CHECK(nalu->Initialize(type_, current_nalu_ptr, current_nalu_size));
183  }
184  current_nalu_info_.swap(next_nalu_info_);
185  return current_nalu_set ? true : SearchForNalu(position, nalu);
186 }
187 
188 bool EsParserH26x::ParseInternal() {
189  uint64_t position;
190  Nalu nalu;
191  VideoSliceInfo video_slice_info;
192  while (SearchForNalu(&position, &nalu)) {
193  // ITU H.264 sec. 7.4.1.2.3
194  // H264: The first of the NAL units with |can_start_access_unit() == true|
195  // after the last VCL NAL unit of a primary coded picture specifies the
196  // start of a new access unit.
197  // ITU H.265 sec. 7.4.2.4.4
198  // H265: The first of the NAL units with |can_start_access_unit() == true|
199  // after the last VCL NAL unit preceding firstBlPicNalUnit (the first
200  // VCL NAL unit of a coded picture with nuh_layer_id equal to 0), if
201  // any, specifies the start of a new access unit.
202  if (nalu.can_start_access_unit()) {
203  if (!next_access_unit_position_set_) {
204  next_access_unit_position_set_ = true;
205  next_access_unit_position_ = position;
206  }
207  RCHECK(ProcessNalu(nalu, &video_slice_info));
208  if (nalu.is_vcl() && !video_slice_info.valid) {
209  // This could happen only if decoder config is not available yet. Drop
210  // this frame.
211  DCHECK(!current_video_slice_info_.valid);
212  next_access_unit_position_set_ = false;
213  continue;
214  }
215  } else if (nalu.is_vcl()) {
216  // This isn't the first VCL NAL unit. Next access unit should start after
217  // this NAL unit.
218  next_access_unit_position_set_ = false;
219  continue;
220  }
221 
222  // AUD shall be the first NAL unit if present. There shall be at most one
223  // AUD in any access unit. We can emit the current access unit which shall
224  // not contain the AUD.
225  if (nalu.is_aud()) {
226  RCHECK(EmitCurrentAccessUnit());
227  continue;
228  }
229 
230  // We can only determine if the current access unit ends after seeing
231  // another VCL NAL unit.
232  if (!video_slice_info.valid)
233  continue;
234 
235  // Check if it is the first VCL NAL unit of a primary coded picture. It is
236  // always true for H265 as nuh_layer_id shall be == 0 at this point.
237  bool is_first_vcl_nalu = true;
238  if (type_ == Nalu::kH264) {
239  if (current_video_slice_info_.valid) {
240  // ITU H.264 sec. 7.4.1.2.4 Detection of the first VCL NAL unit of a
241  // primary coded picture. Only pps_id and frame_num are checked here.
242  is_first_vcl_nalu =
243  video_slice_info.frame_num != current_video_slice_info_.frame_num ||
244  video_slice_info.pps_id != current_video_slice_info_.pps_id;
245  }
246  }
247  if (!is_first_vcl_nalu) {
248  // This isn't the first VCL NAL unit. Next access unit should start after
249  // this NAL unit.
250  next_access_unit_position_set_ = false;
251  continue;
252  }
253 
254  DCHECK(next_access_unit_position_set_);
255  RCHECK(EmitCurrentAccessUnit());
256 
257  // Delete the data we have already processed.
258  es_queue_->Trim(next_access_unit_position_);
259 
260  current_access_unit_position_ = next_access_unit_position_;
261  current_video_slice_info_ = video_slice_info;
262  next_access_unit_position_set_ = false;
263  }
264  return true;
265 }
266 
267 bool EsParserH26x::EmitCurrentAccessUnit() {
268  if (current_video_slice_info_.valid) {
269  if (current_video_slice_info_.is_key_frame)
270  waiting_for_key_frame_ = false;
271  if (!waiting_for_key_frame_) {
272  RCHECK(
273  EmitFrame(current_access_unit_position_,
274  next_access_unit_position_ - current_access_unit_position_,
275  current_video_slice_info_.is_key_frame,
276  current_video_slice_info_.pps_id));
277  }
278  current_video_slice_info_.valid = false;
279  }
280  return true;
281 }
282 
283 bool EsParserH26x::EmitFrame(int64_t access_unit_pos,
284  int access_unit_size,
285  bool is_key_frame,
286  int pps_id) {
287  // Get the access unit timing info.
288  TimingDesc current_timing_desc = {kNoTimestamp, kNoTimestamp};
289  while (!timing_desc_list_.empty() &&
290  timing_desc_list_.front().first <= access_unit_pos) {
291  current_timing_desc = timing_desc_list_.front().second;
292  timing_desc_list_.pop_front();
293  }
294  if (current_timing_desc.pts == kNoTimestamp)
295  return false;
296 
297  // Emit a frame.
298  DVLOG(LOG_LEVEL_ES) << "Emit frame: stream_pos=" << access_unit_pos
299  << " size=" << access_unit_size << " pts "
300  << current_timing_desc.pts << " timing_desc_list size "
301  << timing_desc_list_.size();
302  int es_size;
303  const uint8_t* es;
304  es_queue_->PeekAt(access_unit_pos, &es, &es_size);
305 
306  // Convert frame to unit stream format.
307  std::vector<uint8_t> converted_frame;
308  if (!stream_converter_->ConvertByteStreamToNalUnitStream(
309  es, access_unit_size, &converted_frame)) {
310  DLOG(ERROR) << "Failure to convert video frame to unit stream format.";
311  return false;
312  }
313 
314  // Update the video decoder configuration if needed.
315  RCHECK(UpdateVideoDecoderConfig(pps_id));
316 
317  // Create the media sample, emitting always the previous sample after
318  // calculating its duration.
319  std::shared_ptr<MediaSample> media_sample = MediaSample::CopyFrom(
320  converted_frame.data(), converted_frame.size(), is_key_frame);
321  media_sample->set_dts(current_timing_desc.dts);
322  media_sample->set_pts(current_timing_desc.pts);
323  if (pending_sample_) {
324  if (media_sample->dts() <= pending_sample_->dts()) {
325  LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " dts "
326  << media_sample->dts()
327  << " less than or equal to previous dts "
328  << pending_sample_->dts();
329  // Keep the sample but adjust the sample duration to a very small value,
330  // in case that the sample is still needed for the decoding afterwards.
331  const int64_t kArbitrarySmallDuration = 0.001 * kMpeg2Timescale; // 1ms.
332  pending_sample_->set_duration(kArbitrarySmallDuration);
333  } else {
334  int64_t sample_duration = media_sample->dts() - pending_sample_->dts();
335  pending_sample_->set_duration(sample_duration);
336 
337  const int kArbitraryGapScale = 10;
338  if (pending_sample_duration_ &&
339  sample_duration > kArbitraryGapScale * pending_sample_duration_) {
340  LOG(WARNING) << "[MPEG-2 TS] PID " << pid() << " Possible GAP at dts "
341  << pending_sample_->dts() << " with next sample at dts "
342  << media_sample->dts() << " (difference "
343  << sample_duration << ")";
344  }
345 
346  pending_sample_duration_ = sample_duration;
347  }
348  emit_sample_cb_(std::move(pending_sample_));
349  }
350  pending_sample_ = media_sample;
351  pending_sample_pps_id_ = pps_id;
352 
353  return true;
354 }
355 
356 } // namespace mp2t
357 } // namespace media
358 } // namespace shaka
All the methods that are virtual are virtual for mocking.
Definition: crypto_flags.cc:66