sps_parser_unittest.cc (8935B)
1 /* 2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "common_video/h264/sps_parser.h" 12 13 #include <cstddef> 14 #include <cstdint> 15 #include <optional> 16 17 #include "api/array_view.h" 18 #include "common_video/h264/h264_common.h" 19 #include "rtc_base/bit_buffer.h" 20 #include "rtc_base/buffer.h" 21 #include "test/gtest.h" 22 23 namespace webrtc { 24 25 // Example SPS can be generated with ffmpeg. Here's an example set of commands, 26 // runnable on OS X: 27 // 1) Generate a video, from the camera: 28 // ffmpeg -f avfoundation -i "0" -video_size 640x360 camera.mov 29 // 30 // 2) Scale the video to the desired size: 31 // ffmpeg -i camera.mov -vf scale=640x360 scaled.mov 32 // 33 // 3) Get just the H.264 bitstream in AnnexB: 34 // ffmpeg -i scaled.mov -vcodec copy -vbsf h264_mp4toannexb -an out.h264 35 // 36 // 4) Open out.h264 and find the SPS, generally everything between the first 37 // two start codes (0 0 0 1 or 0 0 1). The first byte should be 0x67, 38 // which should be stripped out before being passed to the parser. 39 40 static const size_t kSpsBufferMaxSize = 256; 41 42 // Generates a fake SPS with basically everything empty but the width/height. 43 // Pass in a buffer of at least kSpsBufferMaxSize. 44 // The fake SPS that this generates also always has at least one emulation byte 45 // at offset 2, since the first two bytes are always 0, and has a 0x3 as the 46 // level_idc, to make sure the parser doesn't eat all 0x3 bytes. 47 void GenerateFakeSps(uint16_t width, 48 uint16_t height, 49 int id, 50 uint32_t log2_max_frame_num_minus4, 51 uint32_t log2_max_pic_order_cnt_lsb_minus4, 52 Buffer* out_buffer) { 53 uint8_t rbsp[kSpsBufferMaxSize] = {0}; 54 BitBufferWriter writer(rbsp, kSpsBufferMaxSize); 55 // Profile byte. 56 writer.WriteUInt8(0); 57 // Constraint sets and reserved zero bits. 58 writer.WriteUInt8(0); 59 // level_idc. 60 writer.WriteUInt8(0x3u); 61 // seq_paramter_set_id. 62 writer.WriteExponentialGolomb(id); 63 // Profile is not special, so we skip all the chroma format settings. 64 65 // Now some bit magic. 66 // log2_max_frame_num_minus4: ue(v). 67 writer.WriteExponentialGolomb(log2_max_frame_num_minus4); 68 // pic_order_cnt_type: ue(v). 0 is the type we want. 69 writer.WriteExponentialGolomb(0); 70 // log2_max_pic_order_cnt_lsb_minus4: ue(v). 0 is fine. 71 writer.WriteExponentialGolomb(log2_max_pic_order_cnt_lsb_minus4); 72 // max_num_ref_frames: ue(v). 0 is fine. 73 writer.WriteExponentialGolomb(0); 74 // gaps_in_frame_num_value_allowed_flag: u(1). 75 writer.WriteBits(0, 1); 76 // Next are width/height. First, calculate the mbs/map_units versions. 77 uint16_t width_in_mbs_minus1 = (width + 15) / 16 - 1; 78 79 // For the height, we're going to define frame_mbs_only_flag, so we need to 80 // divide by 2. See the parser for the full calculation. 81 uint16_t height_in_map_units_minus1 = ((height + 15) / 16 - 1) / 2; 82 // Write each as ue(v). 83 writer.WriteExponentialGolomb(width_in_mbs_minus1); 84 writer.WriteExponentialGolomb(height_in_map_units_minus1); 85 // frame_mbs_only_flag: u(1). Needs to be false. 86 writer.WriteBits(0, 1); 87 // mb_adaptive_frame_field_flag: u(1). 88 writer.WriteBits(0, 1); 89 // direct_8x8_inferene_flag: u(1). 90 writer.WriteBits(0, 1); 91 // frame_cropping_flag: u(1). 1, so we can supply crop. 92 writer.WriteBits(1, 1); 93 // Now we write the left/right/top/bottom crop. For simplicity, we'll put all 94 // the crop at the left/top. 95 // We picked a 4:2:0 format, so the crops are 1/2 the pixel crop values. 96 // Left/right. 97 writer.WriteExponentialGolomb(((16 - (width % 16)) % 16) / 2); 98 writer.WriteExponentialGolomb(0); 99 // Top/bottom. 100 writer.WriteExponentialGolomb(((16 - (height % 16)) % 16) / 2); 101 writer.WriteExponentialGolomb(0); 102 103 // vui_parameters_present_flag: u(1) 104 writer.WriteBits(0, 1); 105 106 // Get the number of bytes written (including the last partial byte). 107 size_t byte_count, bit_offset; 108 writer.GetCurrentOffset(&byte_count, &bit_offset); 109 if (bit_offset > 0) { 110 byte_count++; 111 } 112 113 out_buffer->Clear(); 114 H264::WriteRbsp(MakeArrayView(rbsp, byte_count), out_buffer); 115 } 116 117 TEST(H264SpsParserTest, TestSampleSPSHdLandscape) { 118 // SPS for a 1280x720 camera capture from ffmpeg on osx. Contains 119 // emulation bytes but no cropping. 120 const uint8_t buffer[] = {0x7A, 0x00, 0x1F, 0xBC, 0xD9, 0x40, 0x50, 0x05, 121 0xBA, 0x10, 0x00, 0x00, 0x03, 0x00, 0xC0, 0x00, 122 0x00, 0x2A, 0xE0, 0xF1, 0x83, 0x19, 0x60}; 123 std::optional<SpsParser::SpsState> sps = SpsParser::ParseSps(buffer); 124 ASSERT_TRUE(sps.has_value()); 125 EXPECT_EQ(1280u, sps->width); 126 EXPECT_EQ(720u, sps->height); 127 } 128 129 TEST(H264SpsParserTest, TestSampleSPSVgaLandscape) { 130 // SPS for a 640x360 camera capture from ffmpeg on osx. Contains emulation 131 // bytes and cropping (360 isn't divisible by 16). 132 const uint8_t buffer[] = {0x7A, 0x00, 0x1E, 0xBC, 0xD9, 0x40, 0xA0, 0x2F, 133 0xF8, 0x98, 0x40, 0x00, 0x00, 0x03, 0x01, 0x80, 134 0x00, 0x00, 0x56, 0x83, 0xC5, 0x8B, 0x65, 0x80}; 135 std::optional<SpsParser::SpsState> sps = SpsParser::ParseSps(buffer); 136 ASSERT_TRUE(sps.has_value()); 137 EXPECT_EQ(640u, sps->width); 138 EXPECT_EQ(360u, sps->height); 139 } 140 141 TEST(H264SpsParserTest, TestSampleSPSWeirdResolution) { 142 // SPS for a 200x400 camera capture from ffmpeg on osx. Horizontal and 143 // veritcal crop (neither dimension is divisible by 16). 144 const uint8_t buffer[] = {0x7A, 0x00, 0x0D, 0xBC, 0xD9, 0x43, 0x43, 0x3E, 145 0x5E, 0x10, 0x00, 0x00, 0x03, 0x00, 0x60, 0x00, 146 0x00, 0x15, 0xA0, 0xF1, 0x42, 0x99, 0x60}; 147 std::optional<SpsParser::SpsState> sps = SpsParser::ParseSps(buffer); 148 ASSERT_TRUE(sps.has_value()); 149 EXPECT_EQ(200u, sps->width); 150 EXPECT_EQ(400u, sps->height); 151 } 152 153 TEST(H264SpsParserTest, TestSyntheticSPSQvgaLandscape) { 154 Buffer buffer; 155 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); 156 std::optional<SpsParser::SpsState> sps = SpsParser::ParseSps(buffer); 157 ASSERT_TRUE(sps.has_value()); 158 EXPECT_EQ(320u, sps->width); 159 EXPECT_EQ(180u, sps->height); 160 EXPECT_EQ(1u, sps->id); 161 } 162 163 TEST(H264SpsParserTest, TestSyntheticSPSWeirdResolution) { 164 Buffer buffer; 165 GenerateFakeSps(156u, 122u, 2, 0, 0, &buffer); 166 std::optional<SpsParser::SpsState> sps = SpsParser::ParseSps(buffer); 167 ASSERT_TRUE(sps.has_value()); 168 EXPECT_EQ(156u, sps->width); 169 EXPECT_EQ(122u, sps->height); 170 EXPECT_EQ(2u, sps->id); 171 } 172 173 TEST(H264SpsParserTest, TestSampleSPSWithScalingLists) { 174 // SPS from a 1920x1080 video. Contains scaling lists (and vertical cropping). 175 const uint8_t buffer[] = {0x64, 0x00, 0x2a, 0xad, 0x84, 0x01, 0x0c, 0x20, 176 0x08, 0x61, 0x00, 0x43, 0x08, 0x02, 0x18, 0x40, 177 0x10, 0xc2, 0x00, 0x84, 0x3b, 0x50, 0x3c, 0x01, 178 0x13, 0xf2, 0xcd, 0xc0, 0x40, 0x40, 0x50, 0x00, 179 0x00, 0x00, 0x10, 0x00, 0x00, 0x01, 0xe8, 0x40}; 180 std::optional<SpsParser::SpsState> sps = SpsParser::ParseSps(buffer); 181 ASSERT_TRUE(sps.has_value()); 182 EXPECT_EQ(1920u, sps->width); 183 EXPECT_EQ(1080u, sps->height); 184 } 185 186 TEST(H264SpsParserTest, TestLog2MaxFrameNumMinus4) { 187 Buffer buffer; 188 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); 189 std::optional<SpsParser::SpsState> sps = SpsParser::ParseSps(buffer); 190 ASSERT_TRUE(sps.has_value()); 191 EXPECT_EQ(320u, sps->width); 192 EXPECT_EQ(180u, sps->height); 193 EXPECT_EQ(1u, sps->id); 194 EXPECT_EQ(4u, sps->log2_max_frame_num); 195 196 GenerateFakeSps(320u, 180u, 1, 12, 0, &buffer); 197 sps = SpsParser::ParseSps(buffer); 198 ASSERT_TRUE(sps.has_value()); 199 EXPECT_EQ(320u, sps->width); 200 EXPECT_EQ(180u, sps->height); 201 EXPECT_EQ(1u, sps->id); 202 EXPECT_EQ(16u, sps->log2_max_frame_num); 203 204 GenerateFakeSps(320u, 180u, 1, 13, 0, &buffer); 205 EXPECT_FALSE(SpsParser::ParseSps(buffer)); 206 } 207 208 TEST(H264SpsParserTest, TestLog2MaxPicOrderCntMinus4) { 209 Buffer buffer; 210 GenerateFakeSps(320u, 180u, 1, 0, 0, &buffer); 211 std::optional<SpsParser::SpsState> sps = SpsParser::ParseSps(buffer); 212 ASSERT_TRUE(sps.has_value()); 213 EXPECT_EQ(320u, sps->width); 214 EXPECT_EQ(180u, sps->height); 215 EXPECT_EQ(1u, sps->id); 216 EXPECT_EQ(4u, sps->log2_max_pic_order_cnt_lsb); 217 218 GenerateFakeSps(320u, 180u, 1, 0, 12, &buffer); 219 EXPECT_TRUE(static_cast<bool>(sps = SpsParser::ParseSps(buffer))); 220 EXPECT_EQ(320u, sps->width); 221 EXPECT_EQ(180u, sps->height); 222 EXPECT_EQ(1u, sps->id); 223 EXPECT_EQ(16u, sps->log2_max_pic_order_cnt_lsb); 224 225 GenerateFakeSps(320u, 180u, 1, 0, 13, &buffer); 226 EXPECT_FALSE(SpsParser::ParseSps(buffer)); 227 } 228 229 } // namespace webrtc