snappy_unittest.cc (36724B)
1 // Copyright 2005 and onwards Google Inc. 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google Inc. nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #include <algorithm> 30 #include <cinttypes> 31 #include <cmath> 32 #include <cstdlib> 33 #include <random> 34 #include <string> 35 #include <utility> 36 #include <vector> 37 38 #include "snappy-test.h" 39 40 #include "gtest/gtest.h" 41 42 #include "snappy-internal.h" 43 #include "snappy-sinksource.h" 44 #include "snappy.h" 45 #include "snappy_test_data.h" 46 47 SNAPPY_FLAG(bool, snappy_dump_decompression_table, false, 48 "If true, we print the decompression table during tests."); 49 50 namespace snappy { 51 52 namespace { 53 54 #if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF 55 56 // To test against code that reads beyond its input, this class copies a 57 // string to a newly allocated group of pages, the last of which 58 // is made unreadable via mprotect. Note that we need to allocate the 59 // memory with mmap(), as POSIX allows mprotect() only on memory allocated 60 // with mmap(), and some malloc/posix_memalign implementations expect to 61 // be able to read previously allocated memory while doing heap allocations. 62 class DataEndingAtUnreadablePage { 63 public: 64 explicit DataEndingAtUnreadablePage(const std::string& s) { 65 const size_t page_size = sysconf(_SC_PAGESIZE); 66 const size_t size = s.size(); 67 // Round up space for string to a multiple of page_size. 68 size_t space_for_string = (size + page_size - 1) & ~(page_size - 1); 69 alloc_size_ = space_for_string + page_size; 70 mem_ = mmap(NULL, alloc_size_, 71 PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 72 CHECK_NE(MAP_FAILED, mem_); 73 protected_page_ = reinterpret_cast<char*>(mem_) + space_for_string; 74 char* dst = protected_page_ - size; 75 std::memcpy(dst, s.data(), size); 76 data_ = dst; 77 size_ = size; 78 // Make guard page unreadable. 79 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_NONE)); 80 } 81 82 ~DataEndingAtUnreadablePage() { 83 const size_t page_size = sysconf(_SC_PAGESIZE); 84 // Undo the mprotect. 85 CHECK_EQ(0, mprotect(protected_page_, page_size, PROT_READ|PROT_WRITE)); 86 CHECK_EQ(0, munmap(mem_, alloc_size_)); 87 } 88 89 const char* data() const { return data_; } 90 size_t size() const { return size_; } 91 92 private: 93 size_t alloc_size_; 94 void* mem_; 95 char* protected_page_; 96 const char* data_; 97 size_t size_; 98 }; 99 100 #else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF 101 102 // Fallback for systems without mmap. 103 using DataEndingAtUnreadablePage = std::string; 104 105 #endif 106 107 int VerifyString(const std::string& input) { 108 std::string compressed; 109 DataEndingAtUnreadablePage i(input); 110 const size_t written = snappy::Compress(i.data(), i.size(), &compressed); 111 CHECK_EQ(written, compressed.size()); 112 CHECK_LE(compressed.size(), 113 snappy::MaxCompressedLength(input.size())); 114 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 115 116 std::string uncompressed; 117 DataEndingAtUnreadablePage c(compressed); 118 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); 119 CHECK_EQ(uncompressed, input); 120 return uncompressed.size(); 121 } 122 123 void VerifyStringSink(const std::string& input) { 124 std::string compressed; 125 DataEndingAtUnreadablePage i(input); 126 const size_t written = snappy::Compress(i.data(), i.size(), &compressed); 127 CHECK_EQ(written, compressed.size()); 128 CHECK_LE(compressed.size(), 129 snappy::MaxCompressedLength(input.size())); 130 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 131 132 std::string uncompressed; 133 uncompressed.resize(input.size()); 134 snappy::UncheckedByteArraySink sink(string_as_array(&uncompressed)); 135 DataEndingAtUnreadablePage c(compressed); 136 snappy::ByteArraySource source(c.data(), c.size()); 137 CHECK(snappy::Uncompress(&source, &sink)); 138 CHECK_EQ(uncompressed, input); 139 } 140 141 struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) { 142 std::minstd_rand0 rng(input.size()); 143 std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10); 144 num = uniform_1_to_10(rng); 145 if (input.size() < num) { 146 num = input.size(); 147 } 148 struct iovec* iov = new iovec[num]; 149 size_t used_so_far = 0; 150 std::bernoulli_distribution one_in_five(1.0 / 5); 151 for (size_t i = 0; i < num; ++i) { 152 assert(used_so_far < input.size()); 153 iov[i].iov_base = buf + used_so_far; 154 if (i == num - 1) { 155 iov[i].iov_len = input.size() - used_so_far; 156 } else { 157 // Randomly choose to insert a 0 byte entry. 158 if (one_in_five(rng)) { 159 iov[i].iov_len = 0; 160 } else { 161 std::uniform_int_distribution<size_t> uniform_not_used_so_far( 162 0, input.size() - used_so_far - 1); 163 iov[i].iov_len = uniform_not_used_so_far(rng); 164 } 165 } 166 used_so_far += iov[i].iov_len; 167 } 168 return iov; 169 } 170 171 int VerifyIOVecSource(const std::string& input) { 172 std::string compressed; 173 std::string copy = input; 174 char* buf = const_cast<char*>(copy.data()); 175 size_t num = 0; 176 struct iovec* iov = GetIOVec(input, buf, num); 177 const size_t written = snappy::CompressFromIOVec(iov, num, &compressed); 178 CHECK_EQ(written, compressed.size()); 179 CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size())); 180 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 181 182 std::string uncompressed; 183 DataEndingAtUnreadablePage c(compressed); 184 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); 185 CHECK_EQ(uncompressed, input); 186 delete[] iov; 187 return uncompressed.size(); 188 } 189 190 void VerifyIOVecSink(const std::string& input) { 191 std::string compressed; 192 DataEndingAtUnreadablePage i(input); 193 const size_t written = snappy::Compress(i.data(), i.size(), &compressed); 194 CHECK_EQ(written, compressed.size()); 195 CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size())); 196 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 197 char* buf = new char[input.size()]; 198 size_t num = 0; 199 struct iovec* iov = GetIOVec(input, buf, num); 200 CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov, 201 num)); 202 CHECK(!memcmp(buf, input.data(), input.size())); 203 delete[] iov; 204 delete[] buf; 205 } 206 207 // Test that data compressed by a compressor that does not 208 // obey block sizes is uncompressed properly. 209 void VerifyNonBlockedCompression(const std::string& input) { 210 if (input.length() > snappy::kBlockSize) { 211 // We cannot test larger blocks than the maximum block size, obviously. 212 return; 213 } 214 215 std::string prefix; 216 Varint::Append32(&prefix, input.size()); 217 218 // Setup compression table 219 snappy::internal::WorkingMemory wmem(input.size()); 220 int table_size; 221 uint16_t* table = wmem.GetHashTable(input.size(), &table_size); 222 223 // Compress entire input in one shot 224 std::string compressed; 225 compressed += prefix; 226 compressed.resize(prefix.size()+snappy::MaxCompressedLength(input.size())); 227 char* dest = string_as_array(&compressed) + prefix.size(); 228 char* end = snappy::internal::CompressFragment(input.data(), input.size(), 229 dest, table, table_size); 230 compressed.resize(end - compressed.data()); 231 232 // Uncompress into std::string 233 std::string uncomp_str; 234 CHECK(snappy::Uncompress(compressed.data(), compressed.size(), &uncomp_str)); 235 CHECK_EQ(uncomp_str, input); 236 237 // Uncompress using source/sink 238 std::string uncomp_str2; 239 uncomp_str2.resize(input.size()); 240 snappy::UncheckedByteArraySink sink(string_as_array(&uncomp_str2)); 241 snappy::ByteArraySource source(compressed.data(), compressed.size()); 242 CHECK(snappy::Uncompress(&source, &sink)); 243 CHECK_EQ(uncomp_str2, input); 244 245 // Uncompress into iovec 246 { 247 static const int kNumBlocks = 10; 248 struct iovec vec[kNumBlocks]; 249 const int block_size = 1 + input.size() / kNumBlocks; 250 std::string iovec_data(block_size * kNumBlocks, 'x'); 251 for (int i = 0; i < kNumBlocks; ++i) { 252 vec[i].iov_base = string_as_array(&iovec_data) + i * block_size; 253 vec[i].iov_len = block_size; 254 } 255 CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), 256 vec, kNumBlocks)); 257 CHECK_EQ(std::string(iovec_data.data(), input.size()), input); 258 } 259 } 260 261 // Expand the input so that it is at least K times as big as block size 262 std::string Expand(const std::string& input) { 263 static const int K = 3; 264 std::string data = input; 265 while (data.size() < K * snappy::kBlockSize) { 266 data += input; 267 } 268 return data; 269 } 270 271 int Verify(const std::string& input) { 272 VLOG(1) << "Verifying input of size " << input.size(); 273 274 // Compress using string based routines 275 const int result = VerifyString(input); 276 277 // Compress using `iovec`-based routines. 278 CHECK_EQ(VerifyIOVecSource(input), result); 279 280 // Verify using sink based routines 281 VerifyStringSink(input); 282 283 VerifyNonBlockedCompression(input); 284 VerifyIOVecSink(input); 285 if (!input.empty()) { 286 const std::string expanded = Expand(input); 287 VerifyNonBlockedCompression(expanded); 288 VerifyIOVecSink(input); 289 } 290 291 return result; 292 } 293 294 bool IsValidCompressedBuffer(const std::string& c) { 295 return snappy::IsValidCompressedBuffer(c.data(), c.size()); 296 } 297 bool Uncompress(const std::string& c, std::string* u) { 298 return snappy::Uncompress(c.data(), c.size(), u); 299 } 300 301 // This test checks to ensure that snappy doesn't coredump if it gets 302 // corrupted data. 303 TEST(CorruptedTest, VerifyCorrupted) { 304 std::string source = "making sure we don't crash with corrupted input"; 305 VLOG(1) << source; 306 std::string dest; 307 std::string uncmp; 308 snappy::Compress(source.data(), source.size(), &dest); 309 310 // Mess around with the data. It's hard to simulate all possible 311 // corruptions; this is just one example ... 312 CHECK_GT(dest.size(), 3); 313 dest[1]--; 314 dest[3]++; 315 // this really ought to fail. 316 CHECK(!IsValidCompressedBuffer(dest)); 317 CHECK(!Uncompress(dest, &uncmp)); 318 319 // This is testing for a security bug - a buffer that decompresses to 100k 320 // but we lie in the snappy header and only reserve 0 bytes of memory :) 321 source.resize(100000); 322 for (char& source_char : source) { 323 source_char = 'A'; 324 } 325 snappy::Compress(source.data(), source.size(), &dest); 326 dest[0] = dest[1] = dest[2] = dest[3] = 0; 327 CHECK(!IsValidCompressedBuffer(dest)); 328 CHECK(!Uncompress(dest, &uncmp)); 329 330 if (sizeof(void *) == 4) { 331 // Another security check; check a crazy big length can't DoS us with an 332 // over-allocation. 333 // Currently this is done only for 32-bit builds. On 64-bit builds, 334 // where 3 GB might be an acceptable allocation size, Uncompress() 335 // attempts to decompress, and sometimes causes the test to run out of 336 // memory. 337 dest[0] = dest[1] = dest[2] = dest[3] = '\xff'; 338 // This decodes to a really large size, i.e., about 3 GB. 339 dest[4] = 'k'; 340 CHECK(!IsValidCompressedBuffer(dest)); 341 CHECK(!Uncompress(dest, &uncmp)); 342 } else { 343 LOG(WARNING) << "Crazy decompression lengths not checked on 64-bit build"; 344 } 345 346 // This decodes to about 2 MB; much smaller, but should still fail. 347 dest[0] = dest[1] = dest[2] = '\xff'; 348 dest[3] = 0x00; 349 CHECK(!IsValidCompressedBuffer(dest)); 350 CHECK(!Uncompress(dest, &uncmp)); 351 352 // try reading stuff in from a bad file. 353 for (int i = 1; i <= 3; ++i) { 354 std::string data = 355 ReadTestDataFile(StrFormat("baddata%d.snappy", i).c_str(), 0); 356 std::string uncmp; 357 // check that we don't return a crazy length 358 size_t ulen; 359 CHECK(!snappy::GetUncompressedLength(data.data(), data.size(), &ulen) 360 || (ulen < (1<<20))); 361 uint32_t ulen2; 362 snappy::ByteArraySource source(data.data(), data.size()); 363 CHECK(!snappy::GetUncompressedLength(&source, &ulen2) || 364 (ulen2 < (1<<20))); 365 CHECK(!IsValidCompressedBuffer(data)); 366 CHECK(!Uncompress(data, &uncmp)); 367 } 368 } 369 370 // Helper routines to construct arbitrary compressed strings. 371 // These mirror the compression code in snappy.cc, but are copied 372 // here so that we can bypass some limitations in the how snappy.cc 373 // invokes these routines. 374 void AppendLiteral(std::string* dst, const std::string& literal) { 375 if (literal.empty()) return; 376 int n = literal.size() - 1; 377 if (n < 60) { 378 // Fit length in tag byte 379 dst->push_back(0 | (n << 2)); 380 } else { 381 // Encode in upcoming bytes 382 char number[4]; 383 int count = 0; 384 while (n > 0) { 385 number[count++] = n & 0xff; 386 n >>= 8; 387 } 388 dst->push_back(0 | ((59+count) << 2)); 389 *dst += std::string(number, count); 390 } 391 *dst += literal; 392 } 393 394 void AppendCopy(std::string* dst, int offset, int length) { 395 while (length > 0) { 396 // Figure out how much to copy in one shot 397 int to_copy; 398 if (length >= 68) { 399 to_copy = 64; 400 } else if (length > 64) { 401 to_copy = 60; 402 } else { 403 to_copy = length; 404 } 405 length -= to_copy; 406 407 if ((to_copy >= 4) && (to_copy < 12) && (offset < 2048)) { 408 assert(to_copy-4 < 8); // Must fit in 3 bits 409 dst->push_back(1 | ((to_copy-4) << 2) | ((offset >> 8) << 5)); 410 dst->push_back(offset & 0xff); 411 } else if (offset < 65536) { 412 dst->push_back(2 | ((to_copy-1) << 2)); 413 dst->push_back(offset & 0xff); 414 dst->push_back(offset >> 8); 415 } else { 416 dst->push_back(3 | ((to_copy-1) << 2)); 417 dst->push_back(offset & 0xff); 418 dst->push_back((offset >> 8) & 0xff); 419 dst->push_back((offset >> 16) & 0xff); 420 dst->push_back((offset >> 24) & 0xff); 421 } 422 } 423 } 424 425 TEST(Snappy, SimpleTests) { 426 Verify(""); 427 Verify("a"); 428 Verify("ab"); 429 Verify("abc"); 430 431 Verify("aaaaaaa" + std::string(16, 'b') + std::string("aaaaa") + "abc"); 432 Verify("aaaaaaa" + std::string(256, 'b') + std::string("aaaaa") + "abc"); 433 Verify("aaaaaaa" + std::string(2047, 'b') + std::string("aaaaa") + "abc"); 434 Verify("aaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc"); 435 Verify("abcaaaaaaa" + std::string(65536, 'b') + std::string("aaaaa") + "abc"); 436 } 437 438 // Regression test for cr/345340892. 439 TEST(Snappy, AppendSelfPatternExtensionEdgeCases) { 440 Verify("abcabcabcabcabcabcab"); 441 Verify("abcabcabcabcabcabcab0123456789ABCDEF"); 442 443 Verify("abcabcabcabcabcabcabcabcabcabcabcabc"); 444 Verify("abcabcabcabcabcabcabcabcabcabcabcabc0123456789ABCDEF"); 445 } 446 447 // Regression test for cr/345340892. 448 TEST(Snappy, AppendSelfPatternExtensionEdgeCasesExhaustive) { 449 std::mt19937 rng; 450 std::uniform_int_distribution<int> uniform_byte(0, 255); 451 for (int pattern_size = 1; pattern_size <= 18; ++pattern_size) { 452 for (int length = 1; length <= 64; ++length) { 453 for (int extra_bytes_after_pattern : {0, 1, 15, 16, 128}) { 454 const int size = pattern_size + length + extra_bytes_after_pattern; 455 std::string input; 456 input.resize(size); 457 for (int i = 0; i < pattern_size; ++i) { 458 input[i] = 'a' + i; 459 } 460 for (int i = 0; i < length; ++i) { 461 input[pattern_size + i] = input[i]; 462 } 463 for (int i = 0; i < extra_bytes_after_pattern; ++i) { 464 input[pattern_size + length + i] = 465 static_cast<char>(uniform_byte(rng)); 466 } 467 Verify(input); 468 } 469 } 470 } 471 } 472 473 // Verify max blowup (lots of four-byte copies) 474 TEST(Snappy, MaxBlowup) { 475 std::mt19937 rng; 476 std::uniform_int_distribution<int> uniform_byte(0, 255); 477 std::string input; 478 for (int i = 0; i < 80000; ++i) 479 input.push_back(static_cast<char>(uniform_byte(rng))); 480 481 for (int i = 0; i < 80000; i += 4) { 482 std::string four_bytes(input.end() - i - 4, input.end() - i); 483 input.append(four_bytes); 484 } 485 Verify(input); 486 } 487 488 // Issue #201, when output is more than 4GB, we had a data corruption bug. 489 // We cannot run this test always because of CI constraints. 490 TEST(Snappy, DISABLED_MoreThan4GB) { 491 std::mt19937 rng; 492 std::uniform_int_distribution<int> uniform_byte(0, 255); 493 std::string input; 494 input.resize((1ull << 32) - 1); 495 for (uint64_t i = 0; i < ((1ull << 32) - 1); ++i) 496 input[i] = static_cast<char>(uniform_byte(rng)); 497 Verify(input); 498 } 499 500 TEST(Snappy, RandomData) { 501 std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed)); 502 std::uniform_int_distribution<int> uniform_0_to_3(0, 3); 503 std::uniform_int_distribution<int> uniform_0_to_8(0, 8); 504 std::uniform_int_distribution<int> uniform_byte(0, 255); 505 std::uniform_int_distribution<size_t> uniform_4k(0, 4095); 506 std::uniform_int_distribution<size_t> uniform_64k(0, 65535); 507 std::bernoulli_distribution one_in_ten(1.0 / 10); 508 509 constexpr int num_ops = 20000; 510 for (int i = 0; i < num_ops; ++i) { 511 if ((i % 1000) == 0) { 512 VLOG(0) << "Random op " << i << " of " << num_ops; 513 } 514 515 std::string x; 516 size_t len = uniform_4k(rng); 517 if (i < 100) { 518 len = 65536 + uniform_64k(rng); 519 } 520 while (x.size() < len) { 521 int run_len = 1; 522 if (one_in_ten(rng)) { 523 int skewed_bits = uniform_0_to_8(rng); 524 // int is guaranteed to hold at least 16 bits, this uses at most 8 bits. 525 std::uniform_int_distribution<int> skewed_low(0, 526 (1 << skewed_bits) - 1); 527 run_len = skewed_low(rng); 528 } 529 char c = static_cast<char>(uniform_byte(rng)); 530 if (i >= 100) { 531 int skewed_bits = uniform_0_to_3(rng); 532 // int is guaranteed to hold at least 16 bits, this uses at most 3 bits. 533 std::uniform_int_distribution<int> skewed_low(0, 534 (1 << skewed_bits) - 1); 535 c = static_cast<char>(skewed_low(rng)); 536 } 537 while (run_len-- > 0 && x.size() < len) { 538 x.push_back(c); 539 } 540 } 541 542 Verify(x); 543 } 544 } 545 546 TEST(Snappy, FourByteOffset) { 547 // The new compressor cannot generate four-byte offsets since 548 // it chops up the input into 32KB pieces. So we hand-emit the 549 // copy manually. 550 551 // The two fragments that make up the input string. 552 std::string fragment1 = "012345689abcdefghijklmnopqrstuvwxyz"; 553 std::string fragment2 = "some other string"; 554 555 // How many times each fragment is emitted. 556 const int n1 = 2; 557 const int n2 = 100000 / fragment2.size(); 558 const size_t length = n1 * fragment1.size() + n2 * fragment2.size(); 559 560 std::string compressed; 561 Varint::Append32(&compressed, length); 562 563 AppendLiteral(&compressed, fragment1); 564 std::string src = fragment1; 565 for (int i = 0; i < n2; ++i) { 566 AppendLiteral(&compressed, fragment2); 567 src += fragment2; 568 } 569 AppendCopy(&compressed, src.size(), fragment1.size()); 570 src += fragment1; 571 CHECK_EQ(length, src.size()); 572 573 std::string uncompressed; 574 CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 575 CHECK(snappy::Uncompress(compressed.data(), compressed.size(), 576 &uncompressed)); 577 CHECK_EQ(uncompressed, src); 578 } 579 580 TEST(Snappy, IOVecSourceEdgeCases) { 581 // Validate that empty leading, trailing, and in-between iovecs are handled: 582 // [] [] ['a'] [] ['b'] []. 583 std::string data = "ab"; 584 char* buf = const_cast<char*>(data.data()); 585 size_t used_so_far = 0; 586 static const int kLengths[] = {0, 0, 1, 0, 1, 0}; 587 struct iovec iov[ARRAYSIZE(kLengths)]; 588 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 589 iov[i].iov_base = buf + used_so_far; 590 iov[i].iov_len = kLengths[i]; 591 used_so_far += kLengths[i]; 592 } 593 std::string compressed; 594 snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed); 595 std::string uncompressed; 596 snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed); 597 CHECK_EQ(data, uncompressed); 598 } 599 600 TEST(Snappy, IOVecSinkEdgeCases) { 601 // Test some tricky edge cases in the iovec output that are not necessarily 602 // exercised by random tests. 603 604 // Our output blocks look like this initially (the last iovec is bigger 605 // than depicted): 606 // [ ] [ ] [ ] [ ] [ ] 607 static const int kLengths[] = { 2, 1, 4, 8, 128 }; 608 609 struct iovec iov[ARRAYSIZE(kLengths)]; 610 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 611 iov[i].iov_base = new char[kLengths[i]]; 612 iov[i].iov_len = kLengths[i]; 613 } 614 615 std::string compressed; 616 Varint::Append32(&compressed, 22); 617 618 // A literal whose output crosses three blocks. 619 // [ab] [c] [123 ] [ ] [ ] 620 AppendLiteral(&compressed, "abc123"); 621 622 // A copy whose output crosses two blocks (source and destination 623 // segments marked). 624 // [ab] [c] [1231] [23 ] [ ] 625 // ^--^ -- 626 AppendCopy(&compressed, 3, 3); 627 628 // A copy where the input is, at first, in the block before the output: 629 // 630 // [ab] [c] [1231] [231231 ] [ ] 631 // ^--- ^--- 632 // Then during the copy, the pointers move such that the input and 633 // output pointers are in the same block: 634 // 635 // [ab] [c] [1231] [23123123] [ ] 636 // ^- ^- 637 // And then they move again, so that the output pointer is no longer 638 // in the same block as the input pointer: 639 // [ab] [c] [1231] [23123123] [123 ] 640 // ^-- ^-- 641 AppendCopy(&compressed, 6, 9); 642 643 // Finally, a copy where the input is from several blocks back, 644 // and it also crosses three blocks: 645 // 646 // [ab] [c] [1231] [23123123] [123b ] 647 // ^ ^ 648 // [ab] [c] [1231] [23123123] [123bc ] 649 // ^ ^ 650 // [ab] [c] [1231] [23123123] [123bc12 ] 651 // ^- ^- 652 AppendCopy(&compressed, 17, 4); 653 654 CHECK(snappy::RawUncompressToIOVec( 655 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov))); 656 CHECK_EQ(0, memcmp(iov[0].iov_base, "ab", 2)); 657 CHECK_EQ(0, memcmp(iov[1].iov_base, "c", 1)); 658 CHECK_EQ(0, memcmp(iov[2].iov_base, "1231", 4)); 659 CHECK_EQ(0, memcmp(iov[3].iov_base, "23123123", 8)); 660 CHECK_EQ(0, memcmp(iov[4].iov_base, "123bc12", 7)); 661 662 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 663 delete[] reinterpret_cast<char *>(iov[i].iov_base); 664 } 665 } 666 667 TEST(Snappy, IOVecLiteralOverflow) { 668 static const int kLengths[] = { 3, 4 }; 669 670 struct iovec iov[ARRAYSIZE(kLengths)]; 671 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 672 iov[i].iov_base = new char[kLengths[i]]; 673 iov[i].iov_len = kLengths[i]; 674 } 675 676 std::string compressed; 677 Varint::Append32(&compressed, 8); 678 679 AppendLiteral(&compressed, "12345678"); 680 681 CHECK(!snappy::RawUncompressToIOVec( 682 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov))); 683 684 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 685 delete[] reinterpret_cast<char *>(iov[i].iov_base); 686 } 687 } 688 689 TEST(Snappy, IOVecCopyOverflow) { 690 static const int kLengths[] = { 3, 4 }; 691 692 struct iovec iov[ARRAYSIZE(kLengths)]; 693 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 694 iov[i].iov_base = new char[kLengths[i]]; 695 iov[i].iov_len = kLengths[i]; 696 } 697 698 std::string compressed; 699 Varint::Append32(&compressed, 8); 700 701 AppendLiteral(&compressed, "123"); 702 AppendCopy(&compressed, 3, 5); 703 704 CHECK(!snappy::RawUncompressToIOVec( 705 compressed.data(), compressed.size(), iov, ARRAYSIZE(iov))); 706 707 for (int i = 0; i < ARRAYSIZE(kLengths); ++i) { 708 delete[] reinterpret_cast<char *>(iov[i].iov_base); 709 } 710 } 711 712 bool CheckUncompressedLength(const std::string& compressed, size_t* ulength) { 713 const bool result1 = snappy::GetUncompressedLength(compressed.data(), 714 compressed.size(), 715 ulength); 716 717 snappy::ByteArraySource source(compressed.data(), compressed.size()); 718 uint32_t length; 719 const bool result2 = snappy::GetUncompressedLength(&source, &length); 720 CHECK_EQ(result1, result2); 721 return result1; 722 } 723 724 TEST(SnappyCorruption, TruncatedVarint) { 725 std::string compressed, uncompressed; 726 size_t ulength; 727 compressed.push_back('\xf0'); 728 CHECK(!CheckUncompressedLength(compressed, &ulength)); 729 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 730 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(), 731 &uncompressed)); 732 } 733 734 TEST(SnappyCorruption, UnterminatedVarint) { 735 std::string compressed, uncompressed; 736 size_t ulength; 737 compressed.push_back('\x80'); 738 compressed.push_back('\x80'); 739 compressed.push_back('\x80'); 740 compressed.push_back('\x80'); 741 compressed.push_back('\x80'); 742 compressed.push_back(10); 743 CHECK(!CheckUncompressedLength(compressed, &ulength)); 744 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 745 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(), 746 &uncompressed)); 747 } 748 749 TEST(SnappyCorruption, OverflowingVarint) { 750 std::string compressed, uncompressed; 751 size_t ulength; 752 compressed.push_back('\xfb'); 753 compressed.push_back('\xff'); 754 compressed.push_back('\xff'); 755 compressed.push_back('\xff'); 756 compressed.push_back('\x7f'); 757 CHECK(!CheckUncompressedLength(compressed, &ulength)); 758 CHECK(!snappy::IsValidCompressedBuffer(compressed.data(), compressed.size())); 759 CHECK(!snappy::Uncompress(compressed.data(), compressed.size(), 760 &uncompressed)); 761 } 762 763 TEST(Snappy, ReadPastEndOfBuffer) { 764 // Check that we do not read past end of input 765 766 // Make a compressed string that ends with a single-byte literal 767 std::string compressed; 768 Varint::Append32(&compressed, 1); 769 AppendLiteral(&compressed, "x"); 770 771 std::string uncompressed; 772 DataEndingAtUnreadablePage c(compressed); 773 CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed)); 774 CHECK_EQ(uncompressed, std::string("x")); 775 } 776 777 // Check for an infinite loop caused by a copy with offset==0 778 TEST(Snappy, ZeroOffsetCopy) { 779 const char* compressed = "\x40\x12\x00\x00"; 780 // \x40 Length (must be > kMaxIncrementCopyOverflow) 781 // \x12\x00\x00 Copy with offset==0, length==5 782 char uncompressed[100]; 783 EXPECT_FALSE(snappy::RawUncompress(compressed, 4, uncompressed)); 784 } 785 786 TEST(Snappy, ZeroOffsetCopyValidation) { 787 const char* compressed = "\x05\x12\x00\x00"; 788 // \x05 Length 789 // \x12\x00\x00 Copy with offset==0, length==5 790 EXPECT_FALSE(snappy::IsValidCompressedBuffer(compressed, 4)); 791 } 792 793 int TestFindMatchLength(const char* s1, const char *s2, unsigned length) { 794 uint64_t data; 795 std::pair<size_t, bool> p = 796 snappy::internal::FindMatchLength(s1, s2, s2 + length, &data); 797 CHECK_EQ(p.first < 8, p.second); 798 return p.first; 799 } 800 801 TEST(Snappy, FindMatchLength) { 802 // Exercise all different code paths through the function. 803 // 64-bit version: 804 805 // Hit s1_limit in 64-bit loop, hit s1_limit in single-character loop. 806 EXPECT_EQ(6, TestFindMatchLength("012345", "012345", 6)); 807 EXPECT_EQ(11, TestFindMatchLength("01234567abc", "01234567abc", 11)); 808 809 // Hit s1_limit in 64-bit loop, find a non-match in single-character loop. 810 EXPECT_EQ(9, TestFindMatchLength("01234567abc", "01234567axc", 9)); 811 812 // Same, but edge cases. 813 EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc!", 11)); 814 EXPECT_EQ(11, TestFindMatchLength("01234567abc!", "01234567abc?", 11)); 815 816 // Find non-match at once in first loop. 817 EXPECT_EQ(0, TestFindMatchLength("01234567xxxxxxxx", "?1234567xxxxxxxx", 16)); 818 EXPECT_EQ(1, TestFindMatchLength("01234567xxxxxxxx", "0?234567xxxxxxxx", 16)); 819 EXPECT_EQ(4, TestFindMatchLength("01234567xxxxxxxx", "01237654xxxxxxxx", 16)); 820 EXPECT_EQ(7, TestFindMatchLength("01234567xxxxxxxx", "0123456?xxxxxxxx", 16)); 821 822 // Find non-match in first loop after one block. 823 EXPECT_EQ(8, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 824 "abcdefgh?1234567xxxxxxxx", 24)); 825 EXPECT_EQ(9, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 826 "abcdefgh0?234567xxxxxxxx", 24)); 827 EXPECT_EQ(12, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 828 "abcdefgh01237654xxxxxxxx", 24)); 829 EXPECT_EQ(15, TestFindMatchLength("abcdefgh01234567xxxxxxxx", 830 "abcdefgh0123456?xxxxxxxx", 24)); 831 832 // 32-bit version: 833 834 // Short matches. 835 EXPECT_EQ(0, TestFindMatchLength("01234567", "?1234567", 8)); 836 EXPECT_EQ(1, TestFindMatchLength("01234567", "0?234567", 8)); 837 EXPECT_EQ(2, TestFindMatchLength("01234567", "01?34567", 8)); 838 EXPECT_EQ(3, TestFindMatchLength("01234567", "012?4567", 8)); 839 EXPECT_EQ(4, TestFindMatchLength("01234567", "0123?567", 8)); 840 EXPECT_EQ(5, TestFindMatchLength("01234567", "01234?67", 8)); 841 EXPECT_EQ(6, TestFindMatchLength("01234567", "012345?7", 8)); 842 EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 8)); 843 EXPECT_EQ(7, TestFindMatchLength("01234567", "0123456?", 7)); 844 EXPECT_EQ(7, TestFindMatchLength("01234567!", "0123456??", 7)); 845 846 // Hit s1_limit in 32-bit loop, hit s1_limit in single-character loop. 847 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd", "xxxxxxabcd", 10)); 848 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd?", "xxxxxxabcd?", 10)); 849 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcdef", "xxxxxxabcdef", 13)); 850 851 // Same, but edge cases. 852 EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc!", 12)); 853 EXPECT_EQ(12, TestFindMatchLength("xxxxxx0123abc!", "xxxxxx0123abc?", 12)); 854 855 // Hit s1_limit in 32-bit loop, find a non-match in single-character loop. 856 EXPECT_EQ(11, TestFindMatchLength("xxxxxx0123abc", "xxxxxx0123axc", 13)); 857 858 // Find non-match at once in first loop. 859 EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123xxxxxxxx", 860 "xxxxxx?123xxxxxxxx", 18)); 861 EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123xxxxxxxx", 862 "xxxxxx0?23xxxxxxxx", 18)); 863 EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123xxxxxxxx", 864 "xxxxxx0132xxxxxxxx", 18)); 865 EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123xxxxxxxx", 866 "xxxxxx012?xxxxxxxx", 18)); 867 868 // Same, but edge cases. 869 EXPECT_EQ(6, TestFindMatchLength("xxxxxx0123", "xxxxxx?123", 10)); 870 EXPECT_EQ(7, TestFindMatchLength("xxxxxx0123", "xxxxxx0?23", 10)); 871 EXPECT_EQ(8, TestFindMatchLength("xxxxxx0123", "xxxxxx0132", 10)); 872 EXPECT_EQ(9, TestFindMatchLength("xxxxxx0123", "xxxxxx012?", 10)); 873 874 // Find non-match in first loop after one block. 875 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123xx", 876 "xxxxxxabcd?123xx", 16)); 877 EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123xx", 878 "xxxxxxabcd0?23xx", 16)); 879 EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123xx", 880 "xxxxxxabcd0132xx", 16)); 881 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123xx", 882 "xxxxxxabcd012?xx", 16)); 883 884 // Same, but edge cases. 885 EXPECT_EQ(10, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd?123", 14)); 886 EXPECT_EQ(11, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0?23", 14)); 887 EXPECT_EQ(12, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd0132", 14)); 888 EXPECT_EQ(13, TestFindMatchLength("xxxxxxabcd0123", "xxxxxxabcd012?", 14)); 889 } 890 891 TEST(Snappy, FindMatchLengthRandom) { 892 constexpr int kNumTrials = 10000; 893 constexpr int kTypicalLength = 10; 894 std::minstd_rand0 rng(snappy::GetFlag(FLAGS_test_random_seed)); 895 std::uniform_int_distribution<int> uniform_byte(0, 255); 896 std::bernoulli_distribution one_in_two(1.0 / 2); 897 std::bernoulli_distribution one_in_typical_length(1.0 / kTypicalLength); 898 899 for (int i = 0; i < kNumTrials; ++i) { 900 std::string s, t; 901 char a = static_cast<char>(uniform_byte(rng)); 902 char b = static_cast<char>(uniform_byte(rng)); 903 while (!one_in_typical_length(rng)) { 904 s.push_back(one_in_two(rng) ? a : b); 905 t.push_back(one_in_two(rng) ? a : b); 906 } 907 DataEndingAtUnreadablePage u(s); 908 DataEndingAtUnreadablePage v(t); 909 size_t matched = TestFindMatchLength(u.data(), v.data(), t.size()); 910 if (matched == t.size()) { 911 EXPECT_EQ(s, t); 912 } else { 913 EXPECT_NE(s[matched], t[matched]); 914 for (size_t j = 0; j < matched; ++j) { 915 EXPECT_EQ(s[j], t[j]); 916 } 917 } 918 } 919 } 920 921 uint16_t MakeEntry(unsigned int extra, unsigned int len, 922 unsigned int copy_offset) { 923 // Check that all of the fields fit within the allocated space 924 assert(extra == (extra & 0x7)); // At most 3 bits 925 assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits 926 assert(len == (len & 0x7f)); // At most 7 bits 927 return len | (copy_offset << 8) | (extra << 11); 928 } 929 930 // Check that the decompression table is correct, and optionally print out 931 // the computed one. 932 TEST(Snappy, VerifyCharTable) { 933 using snappy::internal::LITERAL; 934 using snappy::internal::COPY_1_BYTE_OFFSET; 935 using snappy::internal::COPY_2_BYTE_OFFSET; 936 using snappy::internal::COPY_4_BYTE_OFFSET; 937 using snappy::internal::char_table; 938 939 uint16_t dst[256]; 940 941 // Place invalid entries in all places to detect missing initialization 942 int assigned = 0; 943 for (int i = 0; i < 256; ++i) { 944 dst[i] = 0xffff; 945 } 946 947 // Small LITERAL entries. We store (len-1) in the top 6 bits. 948 for (uint8_t len = 1; len <= 60; ++len) { 949 dst[LITERAL | ((len - 1) << 2)] = MakeEntry(0, len, 0); 950 assigned++; 951 } 952 953 // Large LITERAL entries. We use 60..63 in the high 6 bits to 954 // encode the number of bytes of length info that follow the opcode. 955 for (uint8_t extra_bytes = 1; extra_bytes <= 4; ++extra_bytes) { 956 // We set the length field in the lookup table to 1 because extra 957 // bytes encode len-1. 958 dst[LITERAL | ((extra_bytes + 59) << 2)] = MakeEntry(extra_bytes, 1, 0); 959 assigned++; 960 } 961 962 // COPY_1_BYTE_OFFSET. 963 // 964 // The tag byte in the compressed data stores len-4 in 3 bits, and 965 // offset/256 in 3 bits. offset%256 is stored in the next byte. 966 // 967 // This format is used for length in range [4..11] and offset in 968 // range [0..2047] 969 for (uint8_t len = 4; len < 12; ++len) { 970 for (uint16_t offset = 0; offset < 2048; offset += 256) { 971 uint8_t offset_high = static_cast<uint8_t>(offset >> 8); 972 dst[COPY_1_BYTE_OFFSET | ((len - 4) << 2) | (offset_high << 5)] = 973 MakeEntry(1, len, offset_high); 974 assigned++; 975 } 976 } 977 978 // COPY_2_BYTE_OFFSET. 979 // Tag contains len-1 in top 6 bits, and offset in next two bytes. 980 for (uint8_t len = 1; len <= 64; ++len) { 981 dst[COPY_2_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(2, len, 0); 982 assigned++; 983 } 984 985 // COPY_4_BYTE_OFFSET. 986 // Tag contents len-1 in top 6 bits, and offset in next four bytes. 987 for (uint8_t len = 1; len <= 64; ++len) { 988 dst[COPY_4_BYTE_OFFSET | ((len - 1) << 2)] = MakeEntry(4, len, 0); 989 assigned++; 990 } 991 992 // Check that each entry was initialized exactly once. 993 EXPECT_EQ(256, assigned) << "Assigned only " << assigned << " of 256"; 994 for (int i = 0; i < 256; ++i) { 995 EXPECT_NE(0xffff, dst[i]) << "Did not assign byte " << i; 996 } 997 998 if (snappy::GetFlag(FLAGS_snappy_dump_decompression_table)) { 999 std::printf("static const uint16_t char_table[256] = {\n "); 1000 for (int i = 0; i < 256; ++i) { 1001 std::printf("0x%04x%s", 1002 dst[i], 1003 ((i == 255) ? "\n" : (((i % 8) == 7) ? ",\n " : ", "))); 1004 } 1005 std::printf("};\n"); 1006 } 1007 1008 // Check that computed table matched recorded table. 1009 for (int i = 0; i < 256; ++i) { 1010 EXPECT_EQ(dst[i], char_table[i]) << "Mismatch in byte " << i; 1011 } 1012 } 1013 1014 TEST(Snappy, TestBenchmarkFiles) { 1015 for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) { 1016 Verify(ReadTestDataFile(kTestDataFiles[i].filename, 1017 kTestDataFiles[i].size_limit)); 1018 } 1019 } 1020 1021 } // namespace 1022 1023 } // namespace snappy