decoding_state.cc (12628B)
1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/video_coding/deprecated/decoding_state.h" 12 13 #include <cstdint> 14 #include <cstring> 15 #include <map> 16 #include <set> 17 #include <vector> 18 19 #include "api/video/video_codec_type.h" 20 #include "api/video/video_frame_type.h" 21 #include "common_video/h264/h264_common.h" 22 #include "modules/include/module_common_types_public.h" 23 #include "modules/video_coding/codecs/interface/common_constants.h" 24 #include "modules/video_coding/deprecated/frame_buffer.h" 25 #include "modules/video_coding/deprecated/packet.h" 26 #include "rtc_base/checks.h" 27 #include "rtc_base/logging.h" 28 29 namespace webrtc { 30 31 VCMDecodingState::VCMDecodingState() 32 : sequence_num_(0), 33 time_stamp_(0), 34 picture_id_(kNoPictureId), 35 temporal_id_(kNoTemporalIdx), 36 tl0_pic_id_(kNoTl0PicIdx), 37 full_sync_(true), 38 in_initial_state_(true) { 39 memset(frame_decoded_, 0, sizeof(frame_decoded_)); 40 } 41 42 VCMDecodingState::~VCMDecodingState() {} 43 44 void VCMDecodingState::Reset() { 45 // TODO(mikhal): Verify - not always would want to reset the sync 46 sequence_num_ = 0; 47 time_stamp_ = 0; 48 picture_id_ = kNoPictureId; 49 temporal_id_ = kNoTemporalIdx; 50 tl0_pic_id_ = kNoTl0PicIdx; 51 full_sync_ = true; 52 in_initial_state_ = true; 53 memset(frame_decoded_, 0, sizeof(frame_decoded_)); 54 received_sps_.clear(); 55 received_pps_.clear(); 56 } 57 58 uint32_t VCMDecodingState::time_stamp() const { 59 return time_stamp_; 60 } 61 62 uint16_t VCMDecodingState::sequence_num() const { 63 return sequence_num_; 64 } 65 66 bool VCMDecodingState::IsOldFrame(const VCMFrameBuffer* frame) const { 67 RTC_DCHECK(frame); 68 if (in_initial_state_) 69 return false; 70 return !IsNewerTimestamp(frame->RtpTimestamp(), time_stamp_); 71 } 72 73 bool VCMDecodingState::IsOldPacket(const VCMPacket* packet) const { 74 RTC_DCHECK(packet); 75 if (in_initial_state_) 76 return false; 77 return !IsNewerTimestamp(packet->timestamp, time_stamp_); 78 } 79 80 void VCMDecodingState::SetState(const VCMFrameBuffer* frame) { 81 RTC_DCHECK(frame); 82 RTC_CHECK_GE(frame->GetHighSeqNum(), 0); 83 if (!UsingFlexibleMode(frame)) 84 UpdateSyncState(frame); 85 sequence_num_ = static_cast<uint16_t>(frame->GetHighSeqNum()); 86 time_stamp_ = frame->RtpTimestamp(); 87 picture_id_ = frame->PictureId(); 88 temporal_id_ = frame->TemporalId(); 89 tl0_pic_id_ = frame->Tl0PicId(); 90 91 for (const NaluInfo& nalu : frame->GetNaluInfos()) { 92 if (nalu.type == H264::NaluType::kPps) { 93 if (nalu.pps_id < 0) { 94 RTC_LOG(LS_WARNING) << "Received pps without pps id."; 95 } else if (nalu.sps_id < 0) { 96 RTC_LOG(LS_WARNING) << "Received pps without sps id."; 97 } else { 98 received_pps_[nalu.pps_id] = nalu.sps_id; 99 } 100 } else if (nalu.type == H264::NaluType::kSps) { 101 if (nalu.sps_id < 0) { 102 RTC_LOG(LS_WARNING) << "Received sps without sps id."; 103 } else { 104 received_sps_.insert(nalu.sps_id); 105 } 106 } 107 } 108 109 if (UsingFlexibleMode(frame)) { 110 uint16_t frame_index = picture_id_ % kFrameDecodedLength; 111 if (in_initial_state_) { 112 frame_decoded_cleared_to_ = frame_index; 113 } else if (frame->FrameType() == VideoFrameType::kVideoFrameKey) { 114 memset(frame_decoded_, 0, sizeof(frame_decoded_)); 115 frame_decoded_cleared_to_ = frame_index; 116 } else { 117 if (AheadOfFramesDecodedClearedTo(frame_index)) { 118 while (frame_decoded_cleared_to_ != frame_index) { 119 frame_decoded_cleared_to_ = 120 (frame_decoded_cleared_to_ + 1) % kFrameDecodedLength; 121 frame_decoded_[frame_decoded_cleared_to_] = false; 122 } 123 } 124 } 125 frame_decoded_[frame_index] = true; 126 } 127 128 in_initial_state_ = false; 129 } 130 131 void VCMDecodingState::CopyFrom(const VCMDecodingState& state) { 132 sequence_num_ = state.sequence_num_; 133 time_stamp_ = state.time_stamp_; 134 picture_id_ = state.picture_id_; 135 temporal_id_ = state.temporal_id_; 136 tl0_pic_id_ = state.tl0_pic_id_; 137 full_sync_ = state.full_sync_; 138 in_initial_state_ = state.in_initial_state_; 139 frame_decoded_cleared_to_ = state.frame_decoded_cleared_to_; 140 memcpy(frame_decoded_, state.frame_decoded_, sizeof(frame_decoded_)); 141 received_sps_ = state.received_sps_; 142 received_pps_ = state.received_pps_; 143 } 144 145 bool VCMDecodingState::UpdateEmptyFrame(const VCMFrameBuffer* frame) { 146 bool empty_packet = frame->GetHighSeqNum() == frame->GetLowSeqNum(); 147 if (in_initial_state_ && empty_packet) { 148 // Drop empty packets as long as we are in the initial state. 149 return true; 150 } 151 if ((empty_packet && ContinuousSeqNum(frame->GetHighSeqNum())) || 152 ContinuousFrame(frame)) { 153 // Continuous empty packets or continuous frames can be dropped if we 154 // advance the sequence number. 155 sequence_num_ = frame->GetHighSeqNum(); 156 time_stamp_ = frame->RtpTimestamp(); 157 return true; 158 } 159 return false; 160 } 161 162 void VCMDecodingState::UpdateOldPacket(const VCMPacket* packet) { 163 RTC_DCHECK(packet); 164 if (packet->timestamp == time_stamp_) { 165 // Late packet belonging to the last decoded frame - make sure we update the 166 // last decoded sequence number. 167 sequence_num_ = LatestSequenceNumber(packet->seqNum, sequence_num_); 168 } 169 } 170 171 void VCMDecodingState::SetSeqNum(uint16_t new_seq_num) { 172 sequence_num_ = new_seq_num; 173 } 174 175 bool VCMDecodingState::in_initial_state() const { 176 return in_initial_state_; 177 } 178 179 bool VCMDecodingState::full_sync() const { 180 return full_sync_; 181 } 182 183 void VCMDecodingState::UpdateSyncState(const VCMFrameBuffer* frame) { 184 if (in_initial_state_) 185 return; 186 if (frame->TemporalId() == kNoTemporalIdx || 187 frame->Tl0PicId() == kNoTl0PicIdx) { 188 full_sync_ = true; 189 } else if (frame->FrameType() == VideoFrameType::kVideoFrameKey || 190 frame->LayerSync()) { 191 full_sync_ = true; 192 } else if (full_sync_) { 193 // Verify that we are still in sync. 194 // Sync will be broken if continuity is true for layers but not for the 195 // other methods (PictureId and SeqNum). 196 if (UsingPictureId(frame)) { 197 // First check for a valid tl0PicId. 198 if (frame->Tl0PicId() - tl0_pic_id_ > 1) { 199 full_sync_ = false; 200 } else { 201 full_sync_ = ContinuousPictureId(frame->PictureId()); 202 } 203 } else { 204 full_sync_ = 205 ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum())); 206 } 207 } 208 } 209 210 bool VCMDecodingState::ContinuousFrame(const VCMFrameBuffer* frame) const { 211 // Check continuity based on the following hierarchy: 212 // - Temporal layers (stop here if out of sync). 213 // - Picture Id when available. 214 // - Sequence numbers. 215 // Return true when in initial state. 216 // Note that when a method is not applicable it will return false. 217 RTC_DCHECK(frame); 218 // A key frame is always considered continuous as it doesn't refer to any 219 // frames and therefore won't introduce any errors even if prior frames are 220 // missing. 221 if (frame->FrameType() == VideoFrameType::kVideoFrameKey && 222 HaveSpsAndPps(frame->GetNaluInfos())) { 223 return true; 224 } 225 // When in the initial state we always require a key frame to start decoding. 226 if (in_initial_state_) 227 return false; 228 if (ContinuousLayer(frame->TemporalId(), frame->Tl0PicId())) 229 return true; 230 // tl0picId is either not used, or should remain unchanged. 231 if (frame->Tl0PicId() != tl0_pic_id_) 232 return false; 233 // Base layers are not continuous or temporal layers are inactive. 234 // In the presence of temporal layers, check for Picture ID/sequence number 235 // continuity if sync can be restored by this frame. 236 if (!full_sync_ && !frame->LayerSync()) 237 return false; 238 if (UsingPictureId(frame)) { 239 if (UsingFlexibleMode(frame)) { 240 return ContinuousFrameRefs(frame); 241 } else { 242 return ContinuousPictureId(frame->PictureId()); 243 } 244 } else { 245 return ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum())) && 246 HaveSpsAndPps(frame->GetNaluInfos()); 247 } 248 } 249 250 bool VCMDecodingState::ContinuousPictureId(int picture_id) const { 251 int next_picture_id = picture_id_ + 1; 252 if (picture_id < picture_id_) { 253 // Wrap 254 if (picture_id_ >= 0x80) { 255 // 15 bits used for picture id 256 return ((next_picture_id & 0x7FFF) == picture_id); 257 } else { 258 // 7 bits used for picture id 259 return ((next_picture_id & 0x7F) == picture_id); 260 } 261 } 262 // No wrap 263 return (next_picture_id == picture_id); 264 } 265 266 bool VCMDecodingState::ContinuousSeqNum(uint16_t seq_num) const { 267 return seq_num == static_cast<uint16_t>(sequence_num_ + 1); 268 } 269 270 bool VCMDecodingState::ContinuousLayer(int temporal_id, int tl0_pic_id) const { 271 // First, check if applicable. 272 if (temporal_id == kNoTemporalIdx || tl0_pic_id == kNoTl0PicIdx) 273 return false; 274 // If this is the first frame to use temporal layers, make sure we start 275 // from base. 276 else if (tl0_pic_id_ == kNoTl0PicIdx && temporal_id_ == kNoTemporalIdx && 277 temporal_id == 0) 278 return true; 279 280 // Current implementation: Look for base layer continuity. 281 if (temporal_id != 0) 282 return false; 283 return (static_cast<uint8_t>(tl0_pic_id_ + 1) == tl0_pic_id); 284 } 285 286 bool VCMDecodingState::ContinuousFrameRefs(const VCMFrameBuffer* frame) const { 287 uint8_t num_refs = frame->CodecSpecific()->codecSpecific.VP9.num_ref_pics; 288 for (uint8_t r = 0; r < num_refs; ++r) { 289 uint16_t frame_ref = frame->PictureId() - 290 frame->CodecSpecific()->codecSpecific.VP9.p_diff[r]; 291 uint16_t frame_index = frame_ref % kFrameDecodedLength; 292 if (AheadOfFramesDecodedClearedTo(frame_index) || 293 !frame_decoded_[frame_index]) { 294 return false; 295 } 296 } 297 return true; 298 } 299 300 bool VCMDecodingState::UsingPictureId(const VCMFrameBuffer* frame) const { 301 return (frame->PictureId() != kNoPictureId && picture_id_ != kNoPictureId); 302 } 303 304 bool VCMDecodingState::UsingFlexibleMode(const VCMFrameBuffer* frame) const { 305 bool is_flexible_mode = 306 frame->CodecSpecific()->codecType == kVideoCodecVP9 && 307 frame->CodecSpecific()->codecSpecific.VP9.flexible_mode; 308 if (is_flexible_mode && frame->PictureId() == kNoPictureId) { 309 RTC_LOG(LS_WARNING) << "Frame is marked as using flexible mode but no" 310 "picture id is set."; 311 return false; 312 } 313 return is_flexible_mode; 314 } 315 316 // TODO(philipel): change how check work, this check practially 317 // limits the max p_diff to 64. 318 bool VCMDecodingState::AheadOfFramesDecodedClearedTo(uint16_t index) const { 319 // No way of knowing for sure if we are actually ahead of 320 // frame_decoded_cleared_to_. We just make the assumption 321 // that we are not trying to reference back to a very old 322 // index, but instead are referencing a newer index. 323 uint16_t diff = 324 index > frame_decoded_cleared_to_ 325 ? kFrameDecodedLength - (index - frame_decoded_cleared_to_) 326 : frame_decoded_cleared_to_ - index; 327 return diff > kFrameDecodedLength / 2; 328 } 329 330 bool VCMDecodingState::HaveSpsAndPps(const std::vector<NaluInfo>& nalus) const { 331 std::set<int> new_sps; 332 std::map<int, int> new_pps; 333 for (const NaluInfo& nalu : nalus) { 334 // Check if this nalu actually contains sps/pps information or dependencies. 335 if (nalu.sps_id == -1 && nalu.pps_id == -1) 336 continue; 337 switch (nalu.type) { 338 case H264::NaluType::kPps: 339 if (nalu.pps_id < 0) { 340 RTC_LOG(LS_WARNING) << "Received pps without pps id."; 341 } else if (nalu.sps_id < 0) { 342 RTC_LOG(LS_WARNING) << "Received pps without sps id."; 343 } else { 344 new_pps[nalu.pps_id] = nalu.sps_id; 345 } 346 break; 347 case H264::NaluType::kSps: 348 if (nalu.sps_id < 0) { 349 RTC_LOG(LS_WARNING) << "Received sps without sps id."; 350 } else { 351 new_sps.insert(nalu.sps_id); 352 } 353 break; 354 default: { 355 int needed_sps = -1; 356 auto pps_it = new_pps.find(nalu.pps_id); 357 if (pps_it != new_pps.end()) { 358 needed_sps = pps_it->second; 359 } else { 360 auto pps_it2 = received_pps_.find(nalu.pps_id); 361 if (pps_it2 == received_pps_.end()) { 362 return false; 363 } 364 needed_sps = pps_it2->second; 365 } 366 if (new_sps.find(needed_sps) == new_sps.end() && 367 received_sps_.find(needed_sps) == received_sps_.end()) { 368 return false; 369 } 370 break; 371 } 372 } 373 } 374 return true; 375 } 376 377 } // namespace webrtc