tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lz4.c (118145B)


      1 /*
      2   LZ4 - Fast LZ compression algorithm
      3   Copyright (C) 2011-2023, Yann Collet.
      4 
      5   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
      6 
      7   Redistribution and use in source and binary forms, with or without
      8   modification, are permitted provided that the following conditions are
      9   met:
     10 
     11       * Redistributions of source code must retain the above copyright
     12   notice, this list of conditions and the following disclaimer.
     13       * Redistributions in binary form must reproduce the above
     14   copyright notice, this list of conditions and the following disclaimer
     15   in the documentation and/or other materials provided with the
     16   distribution.
     17 
     18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30   You can contact the author at :
     31    - LZ4 homepage : http://www.lz4.org
     32    - LZ4 source repository : https://github.com/lz4/lz4
     33 */
     34 
     35 /*-************************************
     36 *  Tuning parameters
     37 **************************************/
     38 /*
     39 * LZ4_HEAPMODE :
     40 * Select how stateless compression functions like `LZ4_compress_default()`
     41 * allocate memory for their hash table,
     42 * in memory stack (0:default, fastest), or in memory heap (1:requires malloc()).
     43 */
     44 #ifndef LZ4_HEAPMODE
     45 #  define LZ4_HEAPMODE 0
     46 #endif
     47 
     48 /*
     49 * LZ4_ACCELERATION_DEFAULT :
     50 * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
     51 */
     52 #define LZ4_ACCELERATION_DEFAULT 1
     53 /*
     54 * LZ4_ACCELERATION_MAX :
     55 * Any "acceleration" value higher than this threshold
     56 * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
     57 */
     58 #define LZ4_ACCELERATION_MAX 65537
     59 
     60 
     61 /*-************************************
     62 *  CPU Feature Detection
     63 **************************************/
     64 /* LZ4_FORCE_MEMORY_ACCESS
     65 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
     66 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
     67 * The below switch allow to select different access method for improved performance.
     68 * Method 0 (default) : use `memcpy()`. Safe and portable.
     69 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
     70 *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
     71 * Method 2 : direct access. This method is portable but violate C standard.
     72 *            It can generate buggy code on targets which assembly generation depends on alignment.
     73 *            But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
     74 * See https://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
     75 * Prefer these methods in priority order (0 > 1 > 2)
     76 */
     77 #ifndef LZ4_FORCE_MEMORY_ACCESS   /* can be defined externally */
     78 #  if defined(__GNUC__) && \
     79  ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) \
     80  || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
     81 #    define LZ4_FORCE_MEMORY_ACCESS 2
     82 #  elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || defined(__GNUC__) || defined(_MSC_VER)
     83 #    define LZ4_FORCE_MEMORY_ACCESS 1
     84 #  endif
     85 #endif
     86 
     87 /*
     88 * LZ4_FORCE_SW_BITCOUNT
     89 * Define this parameter if your target system or compiler does not support hardware bit count
     90 */
     91 #if defined(_MSC_VER) && defined(_WIN32_WCE)   /* Visual Studio for WinCE doesn't support Hardware bit count */
     92 #  undef  LZ4_FORCE_SW_BITCOUNT  /* avoid double def */
     93 #  define LZ4_FORCE_SW_BITCOUNT
     94 #endif
     95 
     96 
     97 
     98 /*-************************************
     99 *  Dependency
    100 **************************************/
    101 /*
    102 * LZ4_SRC_INCLUDED:
    103 * Amalgamation flag, whether lz4.c is included
    104 */
    105 #ifndef LZ4_SRC_INCLUDED
    106 #  define LZ4_SRC_INCLUDED 1
    107 #endif
    108 
    109 #ifndef LZ4_DISABLE_DEPRECATE_WARNINGS
    110 #  define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
    111 #endif
    112 
    113 #ifndef LZ4_STATIC_LINKING_ONLY
    114 #  define LZ4_STATIC_LINKING_ONLY
    115 #endif
    116 #include "lz4.h"
    117 /* see also "memory routines" below */
    118 
    119 
    120 /*-************************************
    121 *  Compiler Options
    122 **************************************/
    123 #if defined(_MSC_VER) && (_MSC_VER >= 1400)  /* Visual Studio 2005+ */
    124 #  include <intrin.h>               /* only present in VS2005+ */
    125 #  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
    126 #  pragma warning(disable : 6237)   /* disable: C6237: conditional expression is always 0 */
    127 #  pragma warning(disable : 6239)   /* disable: C6239: (<non-zero constant> && <expression>) always evaluates to the result of <expression> */
    128 #  pragma warning(disable : 6240)   /* disable: C6240: (<expression> && <non-zero constant>) always evaluates to the result of <expression> */
    129 #  pragma warning(disable : 6326)   /* disable: C6326: Potential comparison of a constant with another constant */
    130 #endif  /* _MSC_VER */
    131 
    132 #ifndef LZ4_FORCE_INLINE
    133 #  if defined (_MSC_VER) && !defined (__clang__)    /* MSVC */
    134 #    define LZ4_FORCE_INLINE static __forceinline
    135 #  else
    136 #    if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* C99 */
    137 #      if defined (__GNUC__) || defined (__clang__)
    138 #        define LZ4_FORCE_INLINE static inline __attribute__((always_inline))
    139 #      else
    140 #        define LZ4_FORCE_INLINE static inline
    141 #      endif
    142 #    else
    143 #      define LZ4_FORCE_INLINE static
    144 #    endif /* __STDC_VERSION__ */
    145 #  endif  /* _MSC_VER */
    146 #endif /* LZ4_FORCE_INLINE */
    147 
    148 /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
    149 * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
    150 * together with a simple 8-byte copy loop as a fall-back path.
    151 * However, this optimization hurts the decompression speed by >30%,
    152 * because the execution does not go to the optimized loop
    153 * for typical compressible data, and all of the preamble checks
    154 * before going to the fall-back path become useless overhead.
    155 * This optimization happens only with the -O3 flag, and -O2 generates
    156 * a simple 8-byte copy loop.
    157 * With gcc on ppc64le, all of the LZ4_decompress_* and LZ4_wildCopy8
    158 * functions are annotated with __attribute__((optimize("O2"))),
    159 * and also LZ4_wildCopy8 is forcibly inlined, so that the O2 attribute
    160 * of LZ4_wildCopy8 does not affect the compression speed.
    161 */
    162 #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
    163 #  define LZ4_FORCE_O2  __attribute__((optimize("O2")))
    164 #  undef LZ4_FORCE_INLINE
    165 #  define LZ4_FORCE_INLINE  static __inline __attribute__((optimize("O2"),always_inline))
    166 #else
    167 #  define LZ4_FORCE_O2
    168 #endif
    169 
    170 #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
    171 #  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
    172 #else
    173 #  define expect(expr,value)    (expr)
    174 #endif
    175 
    176 #ifndef likely
    177 #define likely(expr)     expect((expr) != 0, 1)
    178 #endif
    179 #ifndef unlikely
    180 #define unlikely(expr)   expect((expr) != 0, 0)
    181 #endif
    182 
    183 /* Should the alignment test prove unreliable, for some reason,
    184 * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
    185 #ifndef LZ4_ALIGN_TEST  /* can be externally provided */
    186 # define LZ4_ALIGN_TEST 1
    187 #endif
    188 
    189 
    190 /*-************************************
    191 *  Memory routines
    192 **************************************/
    193 
    194 /*! LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION :
    195 *  Disable relatively high-level LZ4/HC functions that use dynamic memory
    196 *  allocation functions (malloc(), calloc(), free()).
    197 *
    198 *  Note that this is a compile-time switch. And since it disables
    199 *  public/stable LZ4 v1 API functions, we don't recommend using this
    200 *  symbol to generate a library for distribution.
    201 *
    202 *  The following public functions are removed when this symbol is defined.
    203 *  - lz4   : LZ4_createStream, LZ4_freeStream,
    204 *            LZ4_createStreamDecode, LZ4_freeStreamDecode, LZ4_create (deprecated)
    205 *  - lz4hc : LZ4_createStreamHC, LZ4_freeStreamHC,
    206 *            LZ4_createHC (deprecated), LZ4_freeHC  (deprecated)
    207 *  - lz4frame, lz4file : All LZ4F_* functions
    208 */
    209 #if defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
    210 #  define ALLOC(s)          lz4_error_memory_allocation_is_disabled
    211 #  define ALLOC_AND_ZERO(s) lz4_error_memory_allocation_is_disabled
    212 #  define FREEMEM(p)        lz4_error_memory_allocation_is_disabled
    213 #elif defined(LZ4_USER_MEMORY_FUNCTIONS)
    214 /* memory management functions can be customized by user project.
    215 * Below functions must exist somewhere in the Project
    216 * and be available at link time */
    217 void* LZ4_malloc(size_t s);
    218 void* LZ4_calloc(size_t n, size_t s);
    219 void  LZ4_free(void* p);
    220 # define ALLOC(s)          LZ4_malloc(s)
    221 # define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
    222 # define FREEMEM(p)        LZ4_free(p)
    223 #else
    224 # include <stdlib.h>   /* malloc, calloc, free */
    225 # define ALLOC(s)          malloc(s)
    226 # define ALLOC_AND_ZERO(s) calloc(1,s)
    227 # define FREEMEM(p)        free(p)
    228 #endif
    229 
    230 #if ! LZ4_FREESTANDING
    231 #  include <string.h>   /* memset, memcpy */
    232 #endif
    233 #if !defined(LZ4_memset)
    234 #  define LZ4_memset(p,v,s) memset((p),(v),(s))
    235 #endif
    236 #define MEM_INIT(p,v,s)   LZ4_memset((p),(v),(s))
    237 
    238 
    239 /*-************************************
    240 *  Common Constants
    241 **************************************/
    242 #define MINMATCH 4
    243 
    244 #define WILDCOPYLENGTH 8
    245 #define LASTLITERALS   5   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
    246 #define MFLIMIT       12   /* see ../doc/lz4_Block_format.md#parsing-restrictions */
    247 #define MATCH_SAFEGUARD_DISTANCE  ((2*WILDCOPYLENGTH) - MINMATCH)   /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
    248 #define FASTLOOP_SAFE_DISTANCE 64
    249 static const int LZ4_minLength = (MFLIMIT+1);
    250 
    251 #define KB *(1 <<10)
    252 #define MB *(1 <<20)
    253 #define GB *(1U<<30)
    254 
    255 #define LZ4_DISTANCE_ABSOLUTE_MAX 65535
    256 #if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX)   /* max supported by LZ4 format */
    257 #  error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
    258 #endif
    259 
    260 #define ML_BITS  4
    261 #define ML_MASK  ((1U<<ML_BITS)-1)
    262 #define RUN_BITS (8-ML_BITS)
    263 #define RUN_MASK ((1U<<RUN_BITS)-1)
    264 
    265 
    266 /*-************************************
    267 *  Error detection
    268 **************************************/
    269 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
    270 #  include <assert.h>
    271 #else
    272 #  ifndef assert
    273 #    define assert(condition) ((void)0)
    274 #  endif
    275 #endif
    276 
    277 #define LZ4_STATIC_ASSERT(c)   { enum { LZ4_static_assert = 1/(int)(!!(c)) }; }   /* use after variable declarations */
    278 
    279 #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
    280 #  include <stdio.h>
    281   static int g_debuglog_enable = 1;
    282 #  define DEBUGLOG(l, ...) {                          \
    283        if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) {  \
    284            fprintf(stderr, __FILE__  " %i: ", __LINE__); \
    285            fprintf(stderr, __VA_ARGS__);             \
    286            fprintf(stderr, " \n");                   \
    287    }   }
    288 #else
    289 #  define DEBUGLOG(l, ...) {}    /* disabled */
    290 #endif
    291 
    292 static int LZ4_isAligned(const void* ptr, size_t alignment)
    293 {
    294    return ((size_t)ptr & (alignment -1)) == 0;
    295 }
    296 
    297 
    298 /*-************************************
    299 *  Types
    300 **************************************/
    301 #include <limits.h>
    302 #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
    303 # include <stdint.h>
    304  typedef  uint8_t BYTE;
    305  typedef uint16_t U16;
    306  typedef uint32_t U32;
    307  typedef  int32_t S32;
    308  typedef uint64_t U64;
    309  typedef uintptr_t uptrval;
    310 #else
    311 # if UINT_MAX != 4294967295UL
    312 #   error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
    313 # endif
    314  typedef unsigned char       BYTE;
    315  typedef unsigned short      U16;
    316  typedef unsigned int        U32;
    317  typedef   signed int        S32;
    318  typedef unsigned long long  U64;
    319  typedef size_t              uptrval;   /* generally true, except OpenVMS-64 */
    320 #endif
    321 
    322 #if defined(__x86_64__)
    323  typedef U64    reg_t;   /* 64-bits in x32 mode */
    324 #else
    325  typedef size_t reg_t;   /* 32-bits in x32 mode */
    326 #endif
    327 
    328 typedef enum {
    329    notLimited = 0,
    330    limitedOutput = 1,
    331    fillOutput = 2
    332 } limitedOutput_directive;
    333 
    334 
    335 /*-************************************
    336 *  Reading and writing into memory
    337 **************************************/
    338 
    339 /**
    340 * LZ4 relies on memcpy with a constant size being inlined. In freestanding
    341 * environments, the compiler can't assume the implementation of memcpy() is
    342 * standard compliant, so it can't apply its specialized memcpy() inlining
    343 * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
    344 * memcpy() as if it were standard compliant, so it can inline it in freestanding
    345 * environments. This is needed when decompressing the Linux Kernel, for example.
    346 */
    347 #if !defined(LZ4_memcpy)
    348 #  if defined(__GNUC__) && (__GNUC__ >= 4)
    349 #    define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
    350 #  else
    351 #    define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
    352 #  endif
    353 #endif
    354 
    355 #if !defined(LZ4_memmove)
    356 #  if defined(__GNUC__) && (__GNUC__ >= 4)
    357 #    define LZ4_memmove __builtin_memmove
    358 #  else
    359 #    define LZ4_memmove memmove
    360 #  endif
    361 #endif
    362 
    363 static unsigned LZ4_isLittleEndian(void)
    364 {
    365    const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental */
    366    return one.c[0];
    367 }
    368 
    369 #if defined(__GNUC__) || defined(__INTEL_COMPILER)
    370 #define LZ4_PACK( __Declaration__ ) __Declaration__ __attribute__((__packed__))
    371 #elif defined(_MSC_VER)
    372 #define LZ4_PACK( __Declaration__ ) __pragma( pack(push, 1) ) __Declaration__ __pragma( pack(pop))
    373 #endif
    374 
    375 #if defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==2)
    376 /* lie to the compiler about data alignment; use with caution */
    377 
    378 static U16 LZ4_read16(const void* memPtr) { return *(const U16*) memPtr; }
    379 static U32 LZ4_read32(const void* memPtr) { return *(const U32*) memPtr; }
    380 static reg_t LZ4_read_ARCH(const void* memPtr) { return *(const reg_t*) memPtr; }
    381 
    382 static void LZ4_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
    383 static void LZ4_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
    384 
    385 #elif defined(LZ4_FORCE_MEMORY_ACCESS) && (LZ4_FORCE_MEMORY_ACCESS==1)
    386 
    387 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
    388 /* currently only defined for gcc and icc */
    389 LZ4_PACK(typedef struct { U16 u16; }) LZ4_unalign16;
    390 LZ4_PACK(typedef struct { U32 u32; }) LZ4_unalign32;
    391 LZ4_PACK(typedef struct { reg_t uArch; }) LZ4_unalignST;
    392 
    393 static U16 LZ4_read16(const void* ptr) { return ((const LZ4_unalign16*)ptr)->u16; }
    394 static U32 LZ4_read32(const void* ptr) { return ((const LZ4_unalign32*)ptr)->u32; }
    395 static reg_t LZ4_read_ARCH(const void* ptr) { return ((const LZ4_unalignST*)ptr)->uArch; }
    396 
    397 static void LZ4_write16(void* memPtr, U16 value) { ((LZ4_unalign16*)memPtr)->u16 = value; }
    398 static void LZ4_write32(void* memPtr, U32 value) { ((LZ4_unalign32*)memPtr)->u32 = value; }
    399 
    400 #else  /* safe and portable access using memcpy() */
    401 
    402 static U16 LZ4_read16(const void* memPtr)
    403 {
    404    U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
    405 }
    406 
    407 static U32 LZ4_read32(const void* memPtr)
    408 {
    409    U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
    410 }
    411 
    412 static reg_t LZ4_read_ARCH(const void* memPtr)
    413 {
    414    reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
    415 }
    416 
    417 static void LZ4_write16(void* memPtr, U16 value)
    418 {
    419    LZ4_memcpy(memPtr, &value, sizeof(value));
    420 }
    421 
    422 static void LZ4_write32(void* memPtr, U32 value)
    423 {
    424    LZ4_memcpy(memPtr, &value, sizeof(value));
    425 }
    426 
    427 #endif /* LZ4_FORCE_MEMORY_ACCESS */
    428 
    429 
    430 static U16 LZ4_readLE16(const void* memPtr)
    431 {
    432    if (LZ4_isLittleEndian()) {
    433        return LZ4_read16(memPtr);
    434    } else {
    435        const BYTE* p = (const BYTE*)memPtr;
    436        return (U16)((U16)p[0] | (p[1]<<8));
    437    }
    438 }
    439 
    440 #ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
    441 static U32 LZ4_readLE32(const void* memPtr)
    442 {
    443    if (LZ4_isLittleEndian()) {
    444        return LZ4_read32(memPtr);
    445    } else {
    446        const BYTE* p = (const BYTE*)memPtr;
    447        return (U32)p[0] | (p[1]<<8) | (p[2]<<16) | (p[3]<<24);
    448    }
    449 }
    450 #endif
    451 
    452 static void LZ4_writeLE16(void* memPtr, U16 value)
    453 {
    454    if (LZ4_isLittleEndian()) {
    455        LZ4_write16(memPtr, value);
    456    } else {
    457        BYTE* p = (BYTE*)memPtr;
    458        p[0] = (BYTE) value;
    459        p[1] = (BYTE)(value>>8);
    460    }
    461 }
    462 
    463 /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
    464 LZ4_FORCE_INLINE
    465 void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
    466 {
    467    BYTE* d = (BYTE*)dstPtr;
    468    const BYTE* s = (const BYTE*)srcPtr;
    469    BYTE* const e = (BYTE*)dstEnd;
    470 
    471    do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
    472 }
    473 
    474 static const unsigned inc32table[8] = {0, 1, 2,  1,  0,  4, 4, 4};
    475 static const int      dec64table[8] = {0, 0, 0, -1, -4,  1, 2, 3};
    476 
    477 
    478 #ifndef LZ4_FAST_DEC_LOOP
    479 #  if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
    480 #    define LZ4_FAST_DEC_LOOP 1
    481 #  elif defined(__aarch64__) && defined(__APPLE__)
    482 #    define LZ4_FAST_DEC_LOOP 1
    483 #  elif defined(__aarch64__) && !defined(__clang__)
    484     /* On non-Apple aarch64, we disable this optimization for clang because
    485      * on certain mobile chipsets, performance is reduced with clang. For
    486      * more information refer to https://github.com/lz4/lz4/pull/707 */
    487 #    define LZ4_FAST_DEC_LOOP 1
    488 #  else
    489 #    define LZ4_FAST_DEC_LOOP 0
    490 #  endif
    491 #endif
    492 
    493 #if LZ4_FAST_DEC_LOOP
    494 
    495 LZ4_FORCE_INLINE void
    496 LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
    497 {
    498    assert(srcPtr + offset == dstPtr);
    499    if (offset < 8) {
    500        LZ4_write32(dstPtr, 0);   /* silence an msan warning when offset==0 */
    501        dstPtr[0] = srcPtr[0];
    502        dstPtr[1] = srcPtr[1];
    503        dstPtr[2] = srcPtr[2];
    504        dstPtr[3] = srcPtr[3];
    505        srcPtr += inc32table[offset];
    506        LZ4_memcpy(dstPtr+4, srcPtr, 4);
    507        srcPtr -= dec64table[offset];
    508        dstPtr += 8;
    509    } else {
    510        LZ4_memcpy(dstPtr, srcPtr, 8);
    511        dstPtr += 8;
    512        srcPtr += 8;
    513    }
    514 
    515    LZ4_wildCopy8(dstPtr, srcPtr, dstEnd);
    516 }
    517 
    518 /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
    519 * this version copies two times 16 bytes (instead of one time 32 bytes)
    520 * because it must be compatible with offsets >= 16. */
    521 LZ4_FORCE_INLINE void
    522 LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
    523 {
    524    BYTE* d = (BYTE*)dstPtr;
    525    const BYTE* s = (const BYTE*)srcPtr;
    526    BYTE* const e = (BYTE*)dstEnd;
    527 
    528    do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
    529 }
    530 
    531 /* LZ4_memcpy_using_offset()  presumes :
    532 * - dstEnd >= dstPtr + MINMATCH
    533 * - there is at least 12 bytes available to write after dstEnd */
    534 LZ4_FORCE_INLINE void
    535 LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
    536 {
    537    BYTE v[8];
    538 
    539    assert(dstEnd >= dstPtr + MINMATCH);
    540 
    541    switch(offset) {
    542    case 1:
    543        MEM_INIT(v, *srcPtr, 8);
    544        break;
    545    case 2:
    546        LZ4_memcpy(v, srcPtr, 2);
    547        LZ4_memcpy(&v[2], srcPtr, 2);
    548 #if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
    549 #  pragma warning(push)
    550 #  pragma warning(disable : 6385) /* warning C6385: Reading invalid data from 'v'. */
    551 #endif
    552        LZ4_memcpy(&v[4], v, 4);
    553 #if defined(_MSC_VER) && (_MSC_VER <= 1937) /* MSVC 2022 ver 17.7 or earlier */
    554 #  pragma warning(pop)
    555 #endif
    556        break;
    557    case 4:
    558        LZ4_memcpy(v, srcPtr, 4);
    559        LZ4_memcpy(&v[4], srcPtr, 4);
    560        break;
    561    default:
    562        LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
    563        return;
    564    }
    565 
    566    LZ4_memcpy(dstPtr, v, 8);
    567    dstPtr += 8;
    568    while (dstPtr < dstEnd) {
    569        LZ4_memcpy(dstPtr, v, 8);
    570        dstPtr += 8;
    571    }
    572 }
    573 #endif
    574 
    575 
    576 /*-************************************
    577 *  Common functions
    578 **************************************/
    579 static unsigned LZ4_NbCommonBytes (reg_t val)
    580 {
    581    assert(val != 0);
    582    if (LZ4_isLittleEndian()) {
    583        if (sizeof(val) == 8) {
    584 #       if defined(_MSC_VER) && (_MSC_VER >= 1800) && (defined(_M_AMD64) && !defined(_M_ARM64EC)) && !defined(LZ4_FORCE_SW_BITCOUNT)
    585 /*-*************************************************************************************************
    586 * ARM64EC is a Microsoft-designed ARM64 ABI compatible with AMD64 applications on ARM64 Windows 11.
    587 * The ARM64EC ABI does not support AVX/AVX2/AVX512 instructions, nor their relevant intrinsics
    588 * including _tzcnt_u64. Therefore, we need to neuter the _tzcnt_u64 code path for ARM64EC.
    589 ****************************************************************************************************/
    590 #         if defined(__clang__) && (__clang_major__ < 10)
    591            /* Avoid undefined clang-cl intrinsics issue.
    592             * See https://github.com/lz4/lz4/pull/1017 for details. */
    593            return (unsigned)__builtin_ia32_tzcnt_u64(val) >> 3;
    594 #         else
    595            /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
    596            return (unsigned)_tzcnt_u64(val) >> 3;
    597 #         endif
    598 #       elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
    599            unsigned long r = 0;
    600            _BitScanForward64(&r, (U64)val);
    601            return (unsigned)r >> 3;
    602 #       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
    603                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
    604                                        !defined(LZ4_FORCE_SW_BITCOUNT)
    605            return (unsigned)__builtin_ctzll((U64)val) >> 3;
    606 #       else
    607            const U64 m = 0x0101010101010101ULL;
    608            val ^= val - 1;
    609            return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
    610 #       endif
    611        } else /* 32 bits */ {
    612 #       if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
    613            unsigned long r;
    614            _BitScanForward(&r, (U32)val);
    615            return (unsigned)r >> 3;
    616 #       elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
    617                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
    618                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
    619            return (unsigned)__builtin_ctz((U32)val) >> 3;
    620 #       else
    621            const U32 m = 0x01010101;
    622            return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
    623 #       endif
    624        }
    625    } else   /* Big Endian CPU */ {
    626        if (sizeof(val)==8) {
    627 #       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
    628                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
    629                        !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
    630            return (unsigned)__builtin_clzll((U64)val) >> 3;
    631 #       else
    632 #if 1
    633            /* this method is probably faster,
    634             * but adds a 128 bytes lookup table */
    635            static const unsigned char ctz7_tab[128] = {
    636                7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    637                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    638                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    639                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    640                6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    641                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    642                5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    643                4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    644            };
    645            U64 const mask = 0x0101010101010101ULL;
    646            U64 const t = (((val >> 8) - mask) | val) & mask;
    647            return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
    648 #else
    649            /* this method doesn't consume memory space like the previous one,
    650             * but it contains several branches,
    651             * that may end up slowing execution */
    652            static const U32 by32 = sizeof(val)*4;  /* 32 on 64 bits (goal), 16 on 32 bits.
    653            Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
    654            Note that this code path is never triggered in 32-bits mode. */
    655            unsigned r;
    656            if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
    657            if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
    658            r += (!val);
    659            return r;
    660 #endif
    661 #       endif
    662        } else /* 32 bits */ {
    663 #       if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
    664                            ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
    665                                        !defined(LZ4_FORCE_SW_BITCOUNT)
    666            return (unsigned)__builtin_clz((U32)val) >> 3;
    667 #       else
    668            val >>= 8;
    669            val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
    670              (val + 0x00FF0000)) >> 24;
    671            return (unsigned)val ^ 3;
    672 #       endif
    673        }
    674    }
    675 }
    676 
    677 
    678 #define STEPSIZE sizeof(reg_t)
    679 LZ4_FORCE_INLINE
    680 unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
    681 {
    682    const BYTE* const pStart = pIn;
    683 
    684    if (likely(pIn < pInLimit-(STEPSIZE-1))) {
    685        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
    686        if (!diff) {
    687            pIn+=STEPSIZE; pMatch+=STEPSIZE;
    688        } else {
    689            return LZ4_NbCommonBytes(diff);
    690    }   }
    691 
    692    while (likely(pIn < pInLimit-(STEPSIZE-1))) {
    693        reg_t const diff = LZ4_read_ARCH(pMatch) ^ LZ4_read_ARCH(pIn);
    694        if (!diff) { pIn+=STEPSIZE; pMatch+=STEPSIZE; continue; }
    695        pIn += LZ4_NbCommonBytes(diff);
    696        return (unsigned)(pIn - pStart);
    697    }
    698 
    699    if ((STEPSIZE==8) && (pIn<(pInLimit-3)) && (LZ4_read32(pMatch) == LZ4_read32(pIn))) { pIn+=4; pMatch+=4; }
    700    if ((pIn<(pInLimit-1)) && (LZ4_read16(pMatch) == LZ4_read16(pIn))) { pIn+=2; pMatch+=2; }
    701    if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
    702    return (unsigned)(pIn - pStart);
    703 }
    704 
    705 
    706 #ifndef LZ4_COMMONDEFS_ONLY
    707 /*-************************************
    708 *  Local Constants
    709 **************************************/
    710 static const int LZ4_64Klimit = ((64 KB) + (MFLIMIT-1));
    711 static const U32 LZ4_skipTrigger = 6;  /* Increase this value ==> compression run slower on incompressible data */
    712 
    713 
    714 /*-************************************
    715 *  Local Structures and types
    716 **************************************/
    717 typedef enum { clearedTable = 0, byPtr, byU32, byU16 } tableType_t;
    718 
    719 /**
    720 * This enum distinguishes several different modes of accessing previous
    721 * content in the stream.
    722 *
    723 * - noDict        : There is no preceding content.
    724 * - withPrefix64k : Table entries up to ctx->dictSize before the current blob
    725 *                   blob being compressed are valid and refer to the preceding
    726 *                   content (of length ctx->dictSize), which is available
    727 *                   contiguously preceding in memory the content currently
    728 *                   being compressed.
    729 * - usingExtDict  : Like withPrefix64k, but the preceding content is somewhere
    730 *                   else in memory, starting at ctx->dictionary with length
    731 *                   ctx->dictSize.
    732 * - usingDictCtx  : Everything concerning the preceding content is
    733 *                   in a separate context, pointed to by ctx->dictCtx.
    734 *                   ctx->dictionary, ctx->dictSize, and table entries
    735 *                   in the current context that refer to positions
    736 *                   preceding the beginning of the current compression are
    737 *                   ignored. Instead, ctx->dictCtx->dictionary and ctx->dictCtx
    738 *                   ->dictSize describe the location and size of the preceding
    739 *                   content, and matches are found by looking in the ctx
    740 *                   ->dictCtx->hashTable.
    741 */
    742 typedef enum { noDict = 0, withPrefix64k, usingExtDict, usingDictCtx } dict_directive;
    743 typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
    744 
    745 
    746 /*-************************************
    747 *  Local Utils
    748 **************************************/
    749 int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
    750 const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
    751 int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
    752 int LZ4_sizeofState(void) { return sizeof(LZ4_stream_t); }
    753 
    754 
    755 /*-****************************************
    756 *  Internal Definitions, used only in Tests
    757 *******************************************/
    758 #if defined (__cplusplus)
    759 extern "C" {
    760 #endif
    761 
    762 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
    763 
    764 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
    765                                     int compressedSize, int maxOutputSize,
    766                                     const void* dictStart, size_t dictSize);
    767 int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
    768                                     int compressedSize, int targetOutputSize, int dstCapacity,
    769                                     const void* dictStart, size_t dictSize);
    770 #if defined (__cplusplus)
    771 }
    772 #endif
    773 
    774 /*-******************************
    775 *  Compression functions
    776 ********************************/
    777 LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
    778 {
    779    if (tableType == byU16)
    780        return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
    781    else
    782        return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
    783 }
    784 
    785 LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
    786 {
    787    const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
    788    if (LZ4_isLittleEndian()) {
    789        const U64 prime5bytes = 889523592379ULL;
    790        return (U32)(((sequence << 24) * prime5bytes) >> (64 - hashLog));
    791    } else {
    792        const U64 prime8bytes = 11400714785074694791ULL;
    793        return (U32)(((sequence >> 24) * prime8bytes) >> (64 - hashLog));
    794    }
    795 }
    796 
    797 LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tableType)
    798 {
    799    if ((sizeof(reg_t)==8) && (tableType != byU16)) return LZ4_hash5(LZ4_read_ARCH(p), tableType);
    800 
    801 #ifdef LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT
    802    return LZ4_hash4(LZ4_readLE32(p), tableType);
    803 #else
    804    return LZ4_hash4(LZ4_read32(p), tableType);
    805 #endif
    806 }
    807 
    808 LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
    809 {
    810    switch (tableType)
    811    {
    812    default: /* fallthrough */
    813    case clearedTable: { /* illegal! */ assert(0); return; }
    814    case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
    815    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
    816    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
    817    }
    818 }
    819 
    820 LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
    821 {
    822    switch (tableType)
    823    {
    824    default: /* fallthrough */
    825    case clearedTable: /* fallthrough */
    826    case byPtr: { /* illegal! */ assert(0); return; }
    827    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = idx; return; }
    828    case byU16: { U16* hashTable = (U16*) tableBase; assert(idx < 65536); hashTable[h] = (U16)idx; return; }
    829    }
    830 }
    831 
    832 /* LZ4_putPosition*() : only used in byPtr mode */
    833 LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
    834                                  void* tableBase, tableType_t const tableType)
    835 {
    836    const BYTE** const hashTable = (const BYTE**)tableBase;
    837    assert(tableType == byPtr); (void)tableType;
    838    hashTable[h] = p;
    839 }
    840 
    841 LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType)
    842 {
    843    U32 const h = LZ4_hashPosition(p, tableType);
    844    LZ4_putPositionOnHash(p, h, tableBase, tableType);
    845 }
    846 
    847 /* LZ4_getIndexOnHash() :
    848 * Index of match position registered in hash table.
    849 * hash position must be calculated by using base+index, or dictBase+index.
    850 * Assumption 1 : only valid if tableType == byU32 or byU16.
    851 * Assumption 2 : h is presumed valid (within limits of hash table)
    852 */
    853 LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
    854 {
    855    LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
    856    if (tableType == byU32) {
    857        const U32* const hashTable = (const U32*) tableBase;
    858        assert(h < (1U << (LZ4_MEMORY_USAGE-2)));
    859        return hashTable[h];
    860    }
    861    if (tableType == byU16) {
    862        const U16* const hashTable = (const U16*) tableBase;
    863        assert(h < (1U << (LZ4_MEMORY_USAGE-1)));
    864        return hashTable[h];
    865    }
    866    assert(0); return 0;  /* forbidden case */
    867 }
    868 
    869 static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType_t tableType)
    870 {
    871    assert(tableType == byPtr); (void)tableType;
    872    { const BYTE* const* hashTable = (const BYTE* const*) tableBase; return hashTable[h]; }
    873 }
    874 
    875 LZ4_FORCE_INLINE const BYTE*
    876 LZ4_getPosition(const BYTE* p,
    877                const void* tableBase, tableType_t tableType)
    878 {
    879    U32 const h = LZ4_hashPosition(p, tableType);
    880    return LZ4_getPositionOnHash(h, tableBase, tableType);
    881 }
    882 
    883 LZ4_FORCE_INLINE void
    884 LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
    885           const int inputSize,
    886           const tableType_t tableType) {
    887    /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
    888     * therefore safe to use no matter what mode we're in. Otherwise, we figure
    889     * out if it's safe to leave as is or whether it needs to be reset.
    890     */
    891    if ((tableType_t)cctx->tableType != clearedTable) {
    892        assert(inputSize >= 0);
    893        if ((tableType_t)cctx->tableType != tableType
    894          || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
    895          || ((tableType == byU32) && cctx->currentOffset > 1 GB)
    896          || tableType == byPtr
    897          || inputSize >= 4 KB)
    898        {
    899            DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
    900            MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
    901            cctx->currentOffset = 0;
    902            cctx->tableType = (U32)clearedTable;
    903        } else {
    904            DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
    905        }
    906    }
    907 
    908    /* Adding a gap, so all previous entries are > LZ4_DISTANCE_MAX back,
    909     * is faster than compressing without a gap.
    910     * However, compressing with currentOffset == 0 is faster still,
    911     * so we preserve that case.
    912     */
    913    if (cctx->currentOffset != 0 && tableType == byU32) {
    914        DEBUGLOG(5, "LZ4_prepareTable: adding 64KB to currentOffset");
    915        cctx->currentOffset += 64 KB;
    916    }
    917 
    918    /* Finally, clear history */
    919    cctx->dictCtx = NULL;
    920    cctx->dictionary = NULL;
    921    cctx->dictSize = 0;
    922 }
    923 
    924 /** LZ4_compress_generic_validated() :
    925 *  inlined, to ensure branches are decided at compilation time.
    926 *  The following conditions are presumed already validated:
    927 *  - source != NULL
    928 *  - inputSize > 0
    929 */
    930 LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
    931                 LZ4_stream_t_internal* const cctx,
    932                 const char* const source,
    933                 char* const dest,
    934                 const int inputSize,
    935                 int*  inputConsumed, /* only written when outputDirective == fillOutput */
    936                 const int maxOutputSize,
    937                 const limitedOutput_directive outputDirective,
    938                 const tableType_t tableType,
    939                 const dict_directive dictDirective,
    940                 const dictIssue_directive dictIssue,
    941                 const int acceleration)
    942 {
    943    int result;
    944    const BYTE* ip = (const BYTE*)source;
    945 
    946    U32 const startIndex = cctx->currentOffset;
    947    const BYTE* base = (const BYTE*)source - startIndex;
    948    const BYTE* lowLimit;
    949 
    950    const LZ4_stream_t_internal* dictCtx = (const LZ4_stream_t_internal*) cctx->dictCtx;
    951    const BYTE* const dictionary =
    952        dictDirective == usingDictCtx ? dictCtx->dictionary : cctx->dictionary;
    953    const U32 dictSize =
    954        dictDirective == usingDictCtx ? dictCtx->dictSize : cctx->dictSize;
    955    const U32 dictDelta =
    956        (dictDirective == usingDictCtx) ? startIndex - dictCtx->currentOffset : 0;   /* make indexes in dictCtx comparable with indexes in current context */
    957 
    958    int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
    959    U32 const prefixIdxLimit = startIndex - dictSize;   /* used when dictDirective == dictSmall */
    960    const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
    961    const BYTE* anchor = (const BYTE*) source;
    962    const BYTE* const iend = ip + inputSize;
    963    const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
    964    const BYTE* const matchlimit = iend - LASTLITERALS;
    965 
    966    /* the dictCtx currentOffset is indexed on the start of the dictionary,
    967     * while a dictionary in the current context precedes the currentOffset */
    968    const BYTE* dictBase = (dictionary == NULL) ? NULL :
    969                           (dictDirective == usingDictCtx) ?
    970                            dictionary + dictSize - dictCtx->currentOffset :
    971                            dictionary + dictSize - startIndex;
    972 
    973    BYTE* op = (BYTE*) dest;
    974    BYTE* const olimit = op + maxOutputSize;
    975 
    976    U32 offset = 0;
    977    U32 forwardH;
    978 
    979    DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
    980    assert(ip != NULL);
    981    if (tableType == byU16) assert(inputSize<LZ4_64Klimit);  /* Size too large (not within 64K limit) */
    982    if (tableType == byPtr) assert(dictDirective==noDict);   /* only supported use case with byPtr */
    983    /* If init conditions are not met, we don't have to mark stream
    984     * as having dirty context, since no action was taken yet */
    985    if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
    986    assert(acceleration >= 1);
    987 
    988    lowLimit = (const BYTE*)source - (dictDirective == withPrefix64k ? dictSize : 0);
    989 
    990    /* Update context state */
    991    if (dictDirective == usingDictCtx) {
    992        /* Subsequent linked blocks can't use the dictionary. */
    993        /* Instead, they use the block we just compressed. */
    994        cctx->dictCtx = NULL;
    995        cctx->dictSize = (U32)inputSize;
    996    } else {
    997        cctx->dictSize += (U32)inputSize;
    998    }
    999    cctx->currentOffset += (U32)inputSize;
   1000    cctx->tableType = (U32)tableType;
   1001 
   1002    if (inputSize<LZ4_minLength) goto _last_literals;        /* Input too small, no compression (all literals) */
   1003 
   1004    /* First Byte */
   1005    {   U32 const h = LZ4_hashPosition(ip, tableType);
   1006        if (tableType == byPtr) {
   1007            LZ4_putPositionOnHash(ip, h, cctx->hashTable, byPtr);
   1008        } else {
   1009            LZ4_putIndexOnHash(startIndex, h, cctx->hashTable, tableType);
   1010    }   }
   1011    ip++; forwardH = LZ4_hashPosition(ip, tableType);
   1012 
   1013    /* Main Loop */
   1014    for ( ; ; ) {
   1015        const BYTE* match;
   1016        BYTE* token;
   1017        const BYTE* filledIp;
   1018 
   1019        /* Find a match */
   1020        if (tableType == byPtr) {
   1021            const BYTE* forwardIp = ip;
   1022            int step = 1;
   1023            int searchMatchNb = acceleration << LZ4_skipTrigger;
   1024            do {
   1025                U32 const h = forwardH;
   1026                ip = forwardIp;
   1027                forwardIp += step;
   1028                step = (searchMatchNb++ >> LZ4_skipTrigger);
   1029 
   1030                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
   1031                assert(ip < mflimitPlusOne);
   1032 
   1033                match = LZ4_getPositionOnHash(h, cctx->hashTable, tableType);
   1034                forwardH = LZ4_hashPosition(forwardIp, tableType);
   1035                LZ4_putPositionOnHash(ip, h, cctx->hashTable, tableType);
   1036 
   1037            } while ( (match+LZ4_DISTANCE_MAX < ip)
   1038                   || (LZ4_read32(match) != LZ4_read32(ip)) );
   1039 
   1040        } else {   /* byU32, byU16 */
   1041 
   1042            const BYTE* forwardIp = ip;
   1043            int step = 1;
   1044            int searchMatchNb = acceleration << LZ4_skipTrigger;
   1045            do {
   1046                U32 const h = forwardH;
   1047                U32 const current = (U32)(forwardIp - base);
   1048                U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
   1049                assert(matchIndex <= current);
   1050                assert(forwardIp - base < (ptrdiff_t)(2 GB - 1));
   1051                ip = forwardIp;
   1052                forwardIp += step;
   1053                step = (searchMatchNb++ >> LZ4_skipTrigger);
   1054 
   1055                if (unlikely(forwardIp > mflimitPlusOne)) goto _last_literals;
   1056                assert(ip < mflimitPlusOne);
   1057 
   1058                if (dictDirective == usingDictCtx) {
   1059                    if (matchIndex < startIndex) {
   1060                        /* there was no match, try the dictionary */
   1061                        assert(tableType == byU32);
   1062                        matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
   1063                        match = dictBase + matchIndex;
   1064                        matchIndex += dictDelta;   /* make dictCtx index comparable with current context */
   1065                        lowLimit = dictionary;
   1066                    } else {
   1067                        match = base + matchIndex;
   1068                        lowLimit = (const BYTE*)source;
   1069                    }
   1070                } else if (dictDirective == usingExtDict) {
   1071                    if (matchIndex < startIndex) {
   1072                        DEBUGLOG(7, "extDict candidate: matchIndex=%5u  <  startIndex=%5u", matchIndex, startIndex);
   1073                        assert(startIndex - matchIndex >= MINMATCH);
   1074                        assert(dictBase);
   1075                        match = dictBase + matchIndex;
   1076                        lowLimit = dictionary;
   1077                    } else {
   1078                        match = base + matchIndex;
   1079                        lowLimit = (const BYTE*)source;
   1080                    }
   1081                } else {   /* single continuous memory segment */
   1082                    match = base + matchIndex;
   1083                }
   1084                forwardH = LZ4_hashPosition(forwardIp, tableType);
   1085                LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
   1086 
   1087                DEBUGLOG(7, "candidate at pos=%u  (offset=%u \n", matchIndex, current - matchIndex);
   1088                if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; }    /* match outside of valid area */
   1089                assert(matchIndex < current);
   1090                if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
   1091                  && (matchIndex+LZ4_DISTANCE_MAX < current)) {
   1092                    continue;
   1093                } /* too far */
   1094                assert((current - matchIndex) <= LZ4_DISTANCE_MAX);  /* match now expected within distance */
   1095 
   1096                if (LZ4_read32(match) == LZ4_read32(ip)) {
   1097                    if (maybe_extMem) offset = current - matchIndex;
   1098                    break;   /* match found */
   1099                }
   1100 
   1101            } while(1);
   1102        }
   1103 
   1104        /* Catch up */
   1105        filledIp = ip;
   1106        assert(ip > anchor); /* this is always true as ip has been advanced before entering the main loop */
   1107        if ((match > lowLimit) && unlikely(ip[-1] == match[-1])) {
   1108            do { ip--; match--; } while (((ip > anchor) & (match > lowLimit)) && (unlikely(ip[-1] == match[-1])));
   1109        }
   1110 
   1111        /* Encode Literals */
   1112        {   unsigned const litLength = (unsigned)(ip - anchor);
   1113            token = op++;
   1114            if ((outputDirective == limitedOutput) &&  /* Check output buffer overflow */
   1115                (unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
   1116                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
   1117            }
   1118            if ((outputDirective == fillOutput) &&
   1119                (unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
   1120                op--;
   1121                goto _last_literals;
   1122            }
   1123            if (litLength >= RUN_MASK) {
   1124                unsigned len = litLength - RUN_MASK;
   1125                *token = (RUN_MASK<<ML_BITS);
   1126                for(; len >= 255 ; len-=255) *op++ = 255;
   1127                *op++ = (BYTE)len;
   1128            }
   1129            else *token = (BYTE)(litLength<<ML_BITS);
   1130 
   1131            /* Copy Literals */
   1132            LZ4_wildCopy8(op, anchor, op+litLength);
   1133            op+=litLength;
   1134            DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
   1135                        (int)(anchor-(const BYTE*)source), litLength, (int)(ip-(const BYTE*)source));
   1136        }
   1137 
   1138 _next_match:
   1139        /* at this stage, the following variables must be correctly set :
   1140         * - ip : at start of LZ operation
   1141         * - match : at start of previous pattern occurrence; can be within current prefix, or within extDict
   1142         * - offset : if maybe_ext_memSegment==1 (constant)
   1143         * - lowLimit : must be == dictionary to mean "match is within extDict"; must be == source otherwise
   1144         * - token and *token : position to write 4-bits for match length; higher 4-bits for literal length supposed already written
   1145         */
   1146 
   1147        if ((outputDirective == fillOutput) &&
   1148            (op + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit)) {
   1149            /* the match was too close to the end, rewind and go to last literals */
   1150            op = token;
   1151            goto _last_literals;
   1152        }
   1153 
   1154        /* Encode Offset */
   1155        if (maybe_extMem) {   /* static test */
   1156            DEBUGLOG(6, "             with offset=%u  (ext if > %i)", offset, (int)(ip - (const BYTE*)source));
   1157            assert(offset <= LZ4_DISTANCE_MAX && offset > 0);
   1158            LZ4_writeLE16(op, (U16)offset); op+=2;
   1159        } else  {
   1160            DEBUGLOG(6, "             with offset=%u  (same segment)", (U32)(ip - match));
   1161            assert(ip-match <= LZ4_DISTANCE_MAX);
   1162            LZ4_writeLE16(op, (U16)(ip - match)); op+=2;
   1163        }
   1164 
   1165        /* Encode MatchLength */
   1166        {   unsigned matchCode;
   1167 
   1168            if ( (dictDirective==usingExtDict || dictDirective==usingDictCtx)
   1169              && (lowLimit==dictionary) /* match within extDict */ ) {
   1170                const BYTE* limit = ip + (dictEnd-match);
   1171                assert(dictEnd > match);
   1172                if (limit > matchlimit) limit = matchlimit;
   1173                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, limit);
   1174                ip += (size_t)matchCode + MINMATCH;
   1175                if (ip==limit) {
   1176                    unsigned const more = LZ4_count(limit, (const BYTE*)source, matchlimit);
   1177                    matchCode += more;
   1178                    ip += more;
   1179                }
   1180                DEBUGLOG(6, "             with matchLength=%u starting in extDict", matchCode+MINMATCH);
   1181            } else {
   1182                matchCode = LZ4_count(ip+MINMATCH, match+MINMATCH, matchlimit);
   1183                ip += (size_t)matchCode + MINMATCH;
   1184                DEBUGLOG(6, "             with matchLength=%u", matchCode+MINMATCH);
   1185            }
   1186 
   1187            if ((outputDirective) &&    /* Check output buffer overflow */
   1188                (unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
   1189                if (outputDirective == fillOutput) {
   1190                    /* Match description too long : reduce it */
   1191                    U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
   1192                    ip -= matchCode - newMatchCode;
   1193                    assert(newMatchCode < matchCode);
   1194                    matchCode = newMatchCode;
   1195                    if (unlikely(ip <= filledIp)) {
   1196                        /* We have already filled up to filledIp so if ip ends up less than filledIp
   1197                         * we have positions in the hash table beyond the current position. This is
   1198                         * a problem if we reuse the hash table. So we have to remove these positions
   1199                         * from the hash table.
   1200                         */
   1201                        const BYTE* ptr;
   1202                        DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
   1203                        for (ptr = ip; ptr <= filledIp; ++ptr) {
   1204                            U32 const h = LZ4_hashPosition(ptr, tableType);
   1205                            LZ4_clearHash(h, cctx->hashTable, tableType);
   1206                        }
   1207                    }
   1208                } else {
   1209                    assert(outputDirective == limitedOutput);
   1210                    return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
   1211                }
   1212            }
   1213            if (matchCode >= ML_MASK) {
   1214                *token += ML_MASK;
   1215                matchCode -= ML_MASK;
   1216                LZ4_write32(op, 0xFFFFFFFF);
   1217                while (matchCode >= 4*255) {
   1218                    op+=4;
   1219                    LZ4_write32(op, 0xFFFFFFFF);
   1220                    matchCode -= 4*255;
   1221                }
   1222                op += matchCode / 255;
   1223                *op++ = (BYTE)(matchCode % 255);
   1224            } else
   1225                *token += (BYTE)(matchCode);
   1226        }
   1227        /* Ensure we have enough space for the last literals. */
   1228        assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
   1229 
   1230        anchor = ip;
   1231 
   1232        /* Test end of chunk */
   1233        if (ip >= mflimitPlusOne) break;
   1234 
   1235        /* Fill table */
   1236        {   U32 const h = LZ4_hashPosition(ip-2, tableType);
   1237            if (tableType == byPtr) {
   1238                LZ4_putPositionOnHash(ip-2, h, cctx->hashTable, byPtr);
   1239            } else {
   1240                U32 const idx = (U32)((ip-2) - base);
   1241                LZ4_putIndexOnHash(idx, h, cctx->hashTable, tableType);
   1242        }   }
   1243 
   1244        /* Test next position */
   1245        if (tableType == byPtr) {
   1246 
   1247            match = LZ4_getPosition(ip, cctx->hashTable, tableType);
   1248            LZ4_putPosition(ip, cctx->hashTable, tableType);
   1249            if ( (match+LZ4_DISTANCE_MAX >= ip)
   1250              && (LZ4_read32(match) == LZ4_read32(ip)) )
   1251            { token=op++; *token=0; goto _next_match; }
   1252 
   1253        } else {   /* byU32, byU16 */
   1254 
   1255            U32 const h = LZ4_hashPosition(ip, tableType);
   1256            U32 const current = (U32)(ip-base);
   1257            U32 matchIndex = LZ4_getIndexOnHash(h, cctx->hashTable, tableType);
   1258            assert(matchIndex < current);
   1259            if (dictDirective == usingDictCtx) {
   1260                if (matchIndex < startIndex) {
   1261                    /* there was no match, try the dictionary */
   1262                    assert(tableType == byU32);
   1263                    matchIndex = LZ4_getIndexOnHash(h, dictCtx->hashTable, byU32);
   1264                    match = dictBase + matchIndex;
   1265                    lowLimit = dictionary;   /* required for match length counter */
   1266                    matchIndex += dictDelta;
   1267                } else {
   1268                    match = base + matchIndex;
   1269                    lowLimit = (const BYTE*)source;  /* required for match length counter */
   1270                }
   1271            } else if (dictDirective==usingExtDict) {
   1272                if (matchIndex < startIndex) {
   1273                    assert(dictBase);
   1274                    match = dictBase + matchIndex;
   1275                    lowLimit = dictionary;   /* required for match length counter */
   1276                } else {
   1277                    match = base + matchIndex;
   1278                    lowLimit = (const BYTE*)source;   /* required for match length counter */
   1279                }
   1280            } else {   /* single memory segment */
   1281                match = base + matchIndex;
   1282            }
   1283            LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
   1284            assert(matchIndex < current);
   1285            if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
   1286              && (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
   1287              && (LZ4_read32(match) == LZ4_read32(ip)) ) {
   1288                token=op++;
   1289                *token=0;
   1290                if (maybe_extMem) offset = current - matchIndex;
   1291                DEBUGLOG(6, "seq.start:%i, literals=%u, match.start:%i",
   1292                            (int)(anchor-(const BYTE*)source), 0, (int)(ip-(const BYTE*)source));
   1293                goto _next_match;
   1294            }
   1295        }
   1296 
   1297        /* Prepare next loop */
   1298        forwardH = LZ4_hashPosition(++ip, tableType);
   1299 
   1300    }
   1301 
   1302 _last_literals:
   1303    /* Encode Last Literals */
   1304    {   size_t lastRun = (size_t)(iend - anchor);
   1305        if ( (outputDirective) &&  /* Check output buffer overflow */
   1306            (op + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > olimit)) {
   1307            if (outputDirective == fillOutput) {
   1308                /* adapt lastRun to fill 'dst' */
   1309                assert(olimit >= op);
   1310                lastRun  = (size_t)(olimit-op) - 1/*token*/;
   1311                lastRun -= (lastRun + 256 - RUN_MASK) / 256;  /*additional length tokens*/
   1312            } else {
   1313                assert(outputDirective == limitedOutput);
   1314                return 0;   /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
   1315            }
   1316        }
   1317        DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
   1318        if (lastRun >= RUN_MASK) {
   1319            size_t accumulator = lastRun - RUN_MASK;
   1320            *op++ = RUN_MASK << ML_BITS;
   1321            for(; accumulator >= 255 ; accumulator-=255) *op++ = 255;
   1322            *op++ = (BYTE) accumulator;
   1323        } else {
   1324            *op++ = (BYTE)(lastRun<<ML_BITS);
   1325        }
   1326        LZ4_memcpy(op, anchor, lastRun);
   1327        ip = anchor + lastRun;
   1328        op += lastRun;
   1329    }
   1330 
   1331    if (outputDirective == fillOutput) {
   1332        *inputConsumed = (int) (((const char*)ip)-source);
   1333    }
   1334    result = (int)(((char*)op) - dest);
   1335    assert(result > 0);
   1336    DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
   1337    return result;
   1338 }
   1339 
   1340 /** LZ4_compress_generic() :
   1341 *  inlined, to ensure branches are decided at compilation time;
   1342 *  takes care of src == (NULL, 0)
   1343 *  and forward the rest to LZ4_compress_generic_validated */
   1344 LZ4_FORCE_INLINE int LZ4_compress_generic(
   1345                 LZ4_stream_t_internal* const cctx,
   1346                 const char* const src,
   1347                 char* const dst,
   1348                 const int srcSize,
   1349                 int *inputConsumed, /* only written when outputDirective == fillOutput */
   1350                 const int dstCapacity,
   1351                 const limitedOutput_directive outputDirective,
   1352                 const tableType_t tableType,
   1353                 const dict_directive dictDirective,
   1354                 const dictIssue_directive dictIssue,
   1355                 const int acceleration)
   1356 {
   1357    DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
   1358                srcSize, dstCapacity);
   1359 
   1360    if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; }  /* Unsupported srcSize, too large (or negative) */
   1361    if (srcSize == 0) {   /* src == NULL supported if srcSize == 0 */
   1362        if (outputDirective != notLimited && dstCapacity <= 0) return 0;  /* no output, can't write anything */
   1363        DEBUGLOG(5, "Generating an empty block");
   1364        assert(outputDirective == notLimited || dstCapacity >= 1);
   1365        assert(dst != NULL);
   1366        dst[0] = 0;
   1367        if (outputDirective == fillOutput) {
   1368            assert (inputConsumed != NULL);
   1369            *inputConsumed = 0;
   1370        }
   1371        return 1;
   1372    }
   1373    assert(src != NULL);
   1374 
   1375    return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
   1376                inputConsumed, /* only written into if outputDirective == fillOutput */
   1377                dstCapacity, outputDirective,
   1378                tableType, dictDirective, dictIssue, acceleration);
   1379 }
   1380 
   1381 
   1382 int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
   1383 {
   1384    LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
   1385    assert(ctx != NULL);
   1386    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
   1387    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
   1388    if (maxOutputSize >= LZ4_compressBound(inputSize)) {
   1389        if (inputSize < LZ4_64Klimit) {
   1390            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
   1391        } else {
   1392            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1393            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1394        }
   1395    } else {
   1396        if (inputSize < LZ4_64Klimit) {
   1397            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
   1398        } else {
   1399            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1400            return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1401        }
   1402    }
   1403 }
   1404 
   1405 /**
   1406 * LZ4_compress_fast_extState_fastReset() :
   1407 * A variant of LZ4_compress_fast_extState().
   1408 *
   1409 * Using this variant avoids an expensive initialization step. It is only safe
   1410 * to call if the state buffer is known to be correctly initialized already
   1411 * (see comment in lz4.h on LZ4_resetStream_fast() for a definition of
   1412 * "correctly initialized").
   1413 */
   1414 int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
   1415 {
   1416    LZ4_stream_t_internal* const ctx = &((LZ4_stream_t*)state)->internal_donotuse;
   1417    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
   1418    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
   1419    assert(ctx != NULL);
   1420 
   1421    if (dstCapacity >= LZ4_compressBound(srcSize)) {
   1422        if (srcSize < LZ4_64Klimit) {
   1423            const tableType_t tableType = byU16;
   1424            LZ4_prepareTable(ctx, srcSize, tableType);
   1425            if (ctx->currentOffset) {
   1426                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, dictSmall, acceleration);
   1427            } else {
   1428                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1429            }
   1430        } else {
   1431            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1432            LZ4_prepareTable(ctx, srcSize, tableType);
   1433            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
   1434        }
   1435    } else {
   1436        if (srcSize < LZ4_64Klimit) {
   1437            const tableType_t tableType = byU16;
   1438            LZ4_prepareTable(ctx, srcSize, tableType);
   1439            if (ctx->currentOffset) {
   1440                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, dictSmall, acceleration);
   1441            } else {
   1442                return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1443            }
   1444        } else {
   1445            const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1446            LZ4_prepareTable(ctx, srcSize, tableType);
   1447            return LZ4_compress_generic(ctx, src, dst, srcSize, NULL, dstCapacity, limitedOutput, tableType, noDict, noDictIssue, acceleration);
   1448        }
   1449    }
   1450 }
   1451 
   1452 
   1453 int LZ4_compress_fast(const char* src, char* dest, int srcSize, int dstCapacity, int acceleration)
   1454 {
   1455    int result;
   1456 #if (LZ4_HEAPMODE)
   1457    LZ4_stream_t* const ctxPtr = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
   1458    if (ctxPtr == NULL) return 0;
   1459 #else
   1460    LZ4_stream_t ctx;
   1461    LZ4_stream_t* const ctxPtr = &ctx;
   1462 #endif
   1463    result = LZ4_compress_fast_extState(ctxPtr, src, dest, srcSize, dstCapacity, acceleration);
   1464 
   1465 #if (LZ4_HEAPMODE)
   1466    FREEMEM(ctxPtr);
   1467 #endif
   1468    return result;
   1469 }
   1470 
   1471 
   1472 int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity)
   1473 {
   1474    return LZ4_compress_fast(src, dst, srcSize, dstCapacity, 1);
   1475 }
   1476 
   1477 
   1478 /* Note!: This function leaves the stream in an unclean/broken state!
   1479 * It is not safe to subsequently use the same state with a _fastReset() or
   1480 * _continue() call without resetting it. */
   1481 static int LZ4_compress_destSize_extState_internal(LZ4_stream_t* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration)
   1482 {
   1483    void* const s = LZ4_initStream(state, sizeof (*state));
   1484    assert(s != NULL); (void)s;
   1485 
   1486    if (targetDstSize >= LZ4_compressBound(*srcSizePtr)) {  /* compression success is guaranteed */
   1487        return LZ4_compress_fast_extState(state, src, dst, *srcSizePtr, targetDstSize, acceleration);
   1488    } else {
   1489        if (*srcSizePtr < LZ4_64Klimit) {
   1490            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, byU16, noDict, noDictIssue, acceleration);
   1491        } else {
   1492            tableType_t const addrMode = ((sizeof(void*)==4) && ((uptrval)src > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
   1493            return LZ4_compress_generic(&state->internal_donotuse, src, dst, *srcSizePtr, srcSizePtr, targetDstSize, fillOutput, addrMode, noDict, noDictIssue, acceleration);
   1494    }   }
   1495 }
   1496 
   1497 int LZ4_compress_destSize_extState(void* state, const char* src, char* dst, int* srcSizePtr, int targetDstSize, int acceleration)
   1498 {
   1499    int const r = LZ4_compress_destSize_extState_internal((LZ4_stream_t*)state, src, dst, srcSizePtr, targetDstSize, acceleration);
   1500    /* clean the state on exit */
   1501    LZ4_initStream(state, sizeof (LZ4_stream_t));
   1502    return r;
   1503 }
   1504 
   1505 
   1506 int LZ4_compress_destSize(const char* src, char* dst, int* srcSizePtr, int targetDstSize)
   1507 {
   1508 #if (LZ4_HEAPMODE)
   1509    LZ4_stream_t* const ctx = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));   /* malloc-calloc always properly aligned */
   1510    if (ctx == NULL) return 0;
   1511 #else
   1512    LZ4_stream_t ctxBody;
   1513    LZ4_stream_t* const ctx = &ctxBody;
   1514 #endif
   1515 
   1516    int result = LZ4_compress_destSize_extState_internal(ctx, src, dst, srcSizePtr, targetDstSize, 1);
   1517 
   1518 #if (LZ4_HEAPMODE)
   1519    FREEMEM(ctx);
   1520 #endif
   1521    return result;
   1522 }
   1523 
   1524 
   1525 
   1526 /*-******************************
   1527 *  Streaming functions
   1528 ********************************/
   1529 
   1530 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
   1531 LZ4_stream_t* LZ4_createStream(void)
   1532 {
   1533    LZ4_stream_t* const lz4s = (LZ4_stream_t*)ALLOC(sizeof(LZ4_stream_t));
   1534    LZ4_STATIC_ASSERT(sizeof(LZ4_stream_t) >= sizeof(LZ4_stream_t_internal));
   1535    DEBUGLOG(4, "LZ4_createStream %p", lz4s);
   1536    if (lz4s == NULL) return NULL;
   1537    LZ4_initStream(lz4s, sizeof(*lz4s));
   1538    return lz4s;
   1539 }
   1540 #endif
   1541 
   1542 static size_t LZ4_stream_t_alignment(void)
   1543 {
   1544 #if LZ4_ALIGN_TEST
   1545    typedef struct { char c; LZ4_stream_t t; } t_a;
   1546    return sizeof(t_a) - sizeof(LZ4_stream_t);
   1547 #else
   1548    return 1;  /* effectively disabled */
   1549 #endif
   1550 }
   1551 
   1552 LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
   1553 {
   1554    DEBUGLOG(5, "LZ4_initStream");
   1555    if (buffer == NULL) { return NULL; }
   1556    if (size < sizeof(LZ4_stream_t)) { return NULL; }
   1557    if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
   1558    MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
   1559    return (LZ4_stream_t*)buffer;
   1560 }
   1561 
   1562 /* resetStream is now deprecated,
   1563 * prefer initStream() which is more general */
   1564 void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
   1565 {
   1566    DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
   1567    MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
   1568 }
   1569 
   1570 void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
   1571    LZ4_prepareTable(&(ctx->internal_donotuse), 0, byU32);
   1572 }
   1573 
   1574 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
   1575 int LZ4_freeStream (LZ4_stream_t* LZ4_stream)
   1576 {
   1577    if (!LZ4_stream) return 0;   /* support free on NULL */
   1578    DEBUGLOG(5, "LZ4_freeStream %p", LZ4_stream);
   1579    FREEMEM(LZ4_stream);
   1580    return (0);
   1581 }
   1582 #endif
   1583 
   1584 
   1585 typedef enum { _ld_fast, _ld_slow } LoadDict_mode_e;
   1586 #define HASH_UNIT sizeof(reg_t)
   1587 int LZ4_loadDict_internal(LZ4_stream_t* LZ4_dict,
   1588                    const char* dictionary, int dictSize,
   1589                    LoadDict_mode_e _ld)
   1590 {
   1591    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
   1592    const tableType_t tableType = byU32;
   1593    const BYTE* p = (const BYTE*)dictionary;
   1594    const BYTE* const dictEnd = p + dictSize;
   1595    U32 idx32;
   1596 
   1597    DEBUGLOG(4, "LZ4_loadDict (%i bytes from %p into %p)", dictSize, dictionary, LZ4_dict);
   1598 
   1599    /* It's necessary to reset the context,
   1600     * and not just continue it with prepareTable()
   1601     * to avoid any risk of generating overflowing matchIndex
   1602     * when compressing using this dictionary */
   1603    LZ4_resetStream(LZ4_dict);
   1604 
   1605    /* We always increment the offset by 64 KB, since, if the dict is longer,
   1606     * we truncate it to the last 64k, and if it's shorter, we still want to
   1607     * advance by a whole window length so we can provide the guarantee that
   1608     * there are only valid offsets in the window, which allows an optimization
   1609     * in LZ4_compress_fast_continue() where it uses noDictIssue even when the
   1610     * dictionary isn't a full 64k. */
   1611    dict->currentOffset += 64 KB;
   1612 
   1613    if (dictSize < (int)HASH_UNIT) {
   1614        return 0;
   1615    }
   1616 
   1617    if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
   1618    dict->dictionary = p;
   1619    dict->dictSize = (U32)(dictEnd - p);
   1620    dict->tableType = (U32)tableType;
   1621    idx32 = dict->currentOffset - dict->dictSize;
   1622 
   1623    while (p <= dictEnd-HASH_UNIT) {
   1624        U32 const h = LZ4_hashPosition(p, tableType);
   1625        /* Note: overwriting => favors positions end of dictionary */
   1626        LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
   1627        p+=3; idx32+=3;
   1628    }
   1629 
   1630    if (_ld == _ld_slow) {
   1631        /* Fill hash table with additional references, to improve compression capability */
   1632        p = dict->dictionary;
   1633        idx32 = dict->currentOffset - dict->dictSize;
   1634        while (p <= dictEnd-HASH_UNIT) {
   1635            U32 const h = LZ4_hashPosition(p, tableType);
   1636            U32 const limit = dict->currentOffset - 64 KB;
   1637            if (LZ4_getIndexOnHash(h, dict->hashTable, tableType) <= limit) {
   1638                /* Note: not overwriting => favors positions beginning of dictionary */
   1639                LZ4_putIndexOnHash(idx32, h, dict->hashTable, tableType);
   1640            }
   1641            p++; idx32++;
   1642        }
   1643    }
   1644 
   1645    return (int)dict->dictSize;
   1646 }
   1647 
   1648 int LZ4_loadDict(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
   1649 {
   1650    return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_fast);
   1651 }
   1652 
   1653 int LZ4_loadDictSlow(LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
   1654 {
   1655    return LZ4_loadDict_internal(LZ4_dict, dictionary, dictSize, _ld_slow);
   1656 }
   1657 
   1658 void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream)
   1659 {
   1660    const LZ4_stream_t_internal* dictCtx = (dictionaryStream == NULL) ? NULL :
   1661        &(dictionaryStream->internal_donotuse);
   1662 
   1663    DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
   1664             workingStream, dictionaryStream,
   1665             dictCtx != NULL ? dictCtx->dictSize : 0);
   1666 
   1667    if (dictCtx != NULL) {
   1668        /* If the current offset is zero, we will never look in the
   1669         * external dictionary context, since there is no value a table
   1670         * entry can take that indicate a miss. In that case, we need
   1671         * to bump the offset to something non-zero.
   1672         */
   1673        if (workingStream->internal_donotuse.currentOffset == 0) {
   1674            workingStream->internal_donotuse.currentOffset = 64 KB;
   1675        }
   1676 
   1677        /* Don't actually attach an empty dictionary.
   1678         */
   1679        if (dictCtx->dictSize == 0) {
   1680            dictCtx = NULL;
   1681        }
   1682    }
   1683    workingStream->internal_donotuse.dictCtx = dictCtx;
   1684 }
   1685 
   1686 
   1687 static void LZ4_renormDictT(LZ4_stream_t_internal* LZ4_dict, int nextSize)
   1688 {
   1689    assert(nextSize >= 0);
   1690    if (LZ4_dict->currentOffset + (unsigned)nextSize > 0x80000000) {   /* potential ptrdiff_t overflow (32-bits mode) */
   1691        /* rescale hash table */
   1692        U32 const delta = LZ4_dict->currentOffset - 64 KB;
   1693        const BYTE* dictEnd = LZ4_dict->dictionary + LZ4_dict->dictSize;
   1694        int i;
   1695        DEBUGLOG(4, "LZ4_renormDictT");
   1696        for (i=0; i<LZ4_HASH_SIZE_U32; i++) {
   1697            if (LZ4_dict->hashTable[i] < delta) LZ4_dict->hashTable[i]=0;
   1698            else LZ4_dict->hashTable[i] -= delta;
   1699        }
   1700        LZ4_dict->currentOffset = 64 KB;
   1701        if (LZ4_dict->dictSize > 64 KB) LZ4_dict->dictSize = 64 KB;
   1702        LZ4_dict->dictionary = dictEnd - LZ4_dict->dictSize;
   1703    }
   1704 }
   1705 
   1706 
   1707 int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
   1708                                const char* source, char* dest,
   1709                                int inputSize, int maxOutputSize,
   1710                                int acceleration)
   1711 {
   1712    const tableType_t tableType = byU32;
   1713    LZ4_stream_t_internal* const streamPtr = &LZ4_stream->internal_donotuse;
   1714    const char* dictEnd = streamPtr->dictSize ? (const char*)streamPtr->dictionary + streamPtr->dictSize : NULL;
   1715 
   1716    DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i, dictSize=%u)", inputSize, streamPtr->dictSize);
   1717 
   1718    LZ4_renormDictT(streamPtr, inputSize);   /* fix index overflow */
   1719    if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
   1720    if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
   1721 
   1722    /* invalidate tiny dictionaries */
   1723    if ( (streamPtr->dictSize < 4)     /* tiny dictionary : not enough for a hash */
   1724      && (dictEnd != source)           /* prefix mode */
   1725      && (inputSize > 0)               /* tolerance : don't lose history, in case next invocation would use prefix mode */
   1726      && (streamPtr->dictCtx == NULL)  /* usingDictCtx */
   1727      ) {
   1728        DEBUGLOG(5, "LZ4_compress_fast_continue: dictSize(%u) at addr:%p is too small", streamPtr->dictSize, streamPtr->dictionary);
   1729        /* remove dictionary existence from history, to employ faster prefix mode */
   1730        streamPtr->dictSize = 0;
   1731        streamPtr->dictionary = (const BYTE*)source;
   1732        dictEnd = source;
   1733    }
   1734 
   1735    /* Check overlapping input/dictionary space */
   1736    {   const char* const sourceEnd = source + inputSize;
   1737        if ((sourceEnd > (const char*)streamPtr->dictionary) && (sourceEnd < dictEnd)) {
   1738            streamPtr->dictSize = (U32)(dictEnd - sourceEnd);
   1739            if (streamPtr->dictSize > 64 KB) streamPtr->dictSize = 64 KB;
   1740            if (streamPtr->dictSize < 4) streamPtr->dictSize = 0;
   1741            streamPtr->dictionary = (const BYTE*)dictEnd - streamPtr->dictSize;
   1742        }
   1743    }
   1744 
   1745    /* prefix mode : source data follows dictionary */
   1746    if (dictEnd == source) {
   1747        if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset))
   1748            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, dictSmall, acceleration);
   1749        else
   1750            return LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, withPrefix64k, noDictIssue, acceleration);
   1751    }
   1752 
   1753    /* external dictionary mode */
   1754    {   int result;
   1755        if (streamPtr->dictCtx) {
   1756            /* We depend here on the fact that dictCtx'es (produced by
   1757             * LZ4_loadDict) guarantee that their tables contain no references
   1758             * to offsets between dictCtx->currentOffset - 64 KB and
   1759             * dictCtx->currentOffset - dictCtx->dictSize. This makes it safe
   1760             * to use noDictIssue even when the dict isn't a full 64 KB.
   1761             */
   1762            if (inputSize > 4 KB) {
   1763                /* For compressing large blobs, it is faster to pay the setup
   1764                 * cost to copy the dictionary's tables into the active context,
   1765                 * so that the compression loop is only looking into one table.
   1766                 */
   1767                LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
   1768                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
   1769            } else {
   1770                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
   1771            }
   1772        } else {  /* small data <= 4 KB */
   1773            if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
   1774                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, dictSmall, acceleration);
   1775            } else {
   1776                result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
   1777            }
   1778        }
   1779        streamPtr->dictionary = (const BYTE*)source;
   1780        streamPtr->dictSize = (U32)inputSize;
   1781        return result;
   1782    }
   1783 }
   1784 
   1785 
   1786 /* Hidden debug function, to force-test external dictionary mode */
   1787 int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize)
   1788 {
   1789    LZ4_stream_t_internal* const streamPtr = &LZ4_dict->internal_donotuse;
   1790    int result;
   1791 
   1792    LZ4_renormDictT(streamPtr, srcSize);
   1793 
   1794    if ((streamPtr->dictSize < 64 KB) && (streamPtr->dictSize < streamPtr->currentOffset)) {
   1795        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, dictSmall, 1);
   1796    } else {
   1797        result = LZ4_compress_generic(streamPtr, source, dest, srcSize, NULL, 0, notLimited, byU32, usingExtDict, noDictIssue, 1);
   1798    }
   1799 
   1800    streamPtr->dictionary = (const BYTE*)source;
   1801    streamPtr->dictSize = (U32)srcSize;
   1802 
   1803    return result;
   1804 }
   1805 
   1806 
   1807 /*! LZ4_saveDict() :
   1808 *  If previously compressed data block is not guaranteed to remain available at its memory location,
   1809 *  save it into a safer place (char* safeBuffer).
   1810 *  Note : no need to call LZ4_loadDict() afterwards, dictionary is immediately usable,
   1811 *         one can therefore call LZ4_compress_fast_continue() right after.
   1812 * @return : saved dictionary size in bytes (necessarily <= dictSize), or 0 if error.
   1813 */
   1814 int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
   1815 {
   1816    LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
   1817 
   1818    DEBUGLOG(5, "LZ4_saveDict : dictSize=%i, safeBuffer=%p", dictSize, safeBuffer);
   1819 
   1820    if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
   1821    if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
   1822 
   1823    if (safeBuffer == NULL) assert(dictSize == 0);
   1824    if (dictSize > 0) {
   1825        const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
   1826        assert(dict->dictionary);
   1827        LZ4_memmove(safeBuffer, previousDictEnd - dictSize, (size_t)dictSize);
   1828    }
   1829 
   1830    dict->dictionary = (const BYTE*)safeBuffer;
   1831    dict->dictSize = (U32)dictSize;
   1832 
   1833    return dictSize;
   1834 }
   1835 
   1836 
   1837 
   1838 /*-*******************************
   1839 *  Decompression functions
   1840 ********************************/
   1841 
   1842 typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
   1843 
   1844 #undef MIN
   1845 #define MIN(a,b)    ( (a) < (b) ? (a) : (b) )
   1846 
   1847 
   1848 /* variant for decompress_unsafe()
   1849 * does not know end of input
   1850 * presumes input is well formed
   1851 * note : will consume at least one byte */
   1852 static size_t read_long_length_no_check(const BYTE** pp)
   1853 {
   1854    size_t b, l = 0;
   1855    do { b = **pp; (*pp)++; l += b; } while (b==255);
   1856    DEBUGLOG(6, "read_long_length_no_check: +length=%zu using %zu input bytes", l, l/255 + 1)
   1857    return l;
   1858 }
   1859 
   1860 /* core decoder variant for LZ4_decompress_fast*()
   1861 * for legacy support only : these entry points are deprecated.
   1862 * - Presumes input is correctly formed (no defense vs malformed inputs)
   1863 * - Does not know input size (presume input buffer is "large enough")
   1864 * - Decompress a full block (only)
   1865 * @return : nb of bytes read from input.
   1866 * Note : this variant is not optimized for speed, just for maintenance.
   1867 *        the goal is to remove support of decompress_fast*() variants by v2.0
   1868 **/
   1869 LZ4_FORCE_INLINE int
   1870 LZ4_decompress_unsafe_generic(
   1871                 const BYTE* const istart,
   1872                 BYTE* const ostart,
   1873                 int decompressedSize,
   1874 
   1875                 size_t prefixSize,
   1876                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
   1877                 const size_t dictSize         /* note: =0 if dictStart==NULL */
   1878                 )
   1879 {
   1880    const BYTE* ip = istart;
   1881    BYTE* op = (BYTE*)ostart;
   1882    BYTE* const oend = ostart + decompressedSize;
   1883    const BYTE* const prefixStart = ostart - prefixSize;
   1884 
   1885    DEBUGLOG(5, "LZ4_decompress_unsafe_generic");
   1886    if (dictStart == NULL) assert(dictSize == 0);
   1887 
   1888    while (1) {
   1889        /* start new sequence */
   1890        unsigned token = *ip++;
   1891 
   1892        /* literals */
   1893        {   size_t ll = token >> ML_BITS;
   1894            if (ll==15) {
   1895                /* long literal length */
   1896                ll += read_long_length_no_check(&ip);
   1897            }
   1898            if ((size_t)(oend-op) < ll) return -1; /* output buffer overflow */
   1899            LZ4_memmove(op, ip, ll); /* support in-place decompression */
   1900            op += ll;
   1901            ip += ll;
   1902            if ((size_t)(oend-op) < MFLIMIT) {
   1903                if (op==oend) break;  /* end of block */
   1904                DEBUGLOG(5, "invalid: literals end at distance %zi from end of block", oend-op);
   1905                /* incorrect end of block :
   1906                 * last match must start at least MFLIMIT==12 bytes before end of output block */
   1907                return -1;
   1908        }   }
   1909 
   1910        /* match */
   1911        {   size_t ml = token & 15;
   1912            size_t const offset = LZ4_readLE16(ip);
   1913            ip+=2;
   1914 
   1915            if (ml==15) {
   1916                /* long literal length */
   1917                ml += read_long_length_no_check(&ip);
   1918            }
   1919            ml += MINMATCH;
   1920 
   1921            if ((size_t)(oend-op) < ml) return -1; /* output buffer overflow */
   1922 
   1923            {   const BYTE* match = op - offset;
   1924 
   1925                /* out of range */
   1926                if (offset > (size_t)(op - prefixStart) + dictSize) {
   1927                    DEBUGLOG(6, "offset out of range");
   1928                    return -1;
   1929                }
   1930 
   1931                /* check special case : extDict */
   1932                if (offset > (size_t)(op - prefixStart)) {
   1933                    /* extDict scenario */
   1934                    const BYTE* const dictEnd = dictStart + dictSize;
   1935                    const BYTE* extMatch = dictEnd - (offset - (size_t)(op-prefixStart));
   1936                    size_t const extml = (size_t)(dictEnd - extMatch);
   1937                    if (extml > ml) {
   1938                        /* match entirely within extDict */
   1939                        LZ4_memmove(op, extMatch, ml);
   1940                        op += ml;
   1941                        ml = 0;
   1942                    } else {
   1943                        /* match split between extDict & prefix */
   1944                        LZ4_memmove(op, extMatch, extml);
   1945                        op += extml;
   1946                        ml -= extml;
   1947                    }
   1948                    match = prefixStart;
   1949                }
   1950 
   1951                /* match copy - slow variant, supporting overlap copy */
   1952                {   size_t u;
   1953                    for (u=0; u<ml; u++) {
   1954                        op[u] = match[u];
   1955            }   }   }
   1956            op += ml;
   1957            if ((size_t)(oend-op) < LASTLITERALS) {
   1958                DEBUGLOG(5, "invalid: match ends at distance %zi from end of block", oend-op);
   1959                /* incorrect end of block :
   1960                 * last match must stop at least LASTLITERALS==5 bytes before end of output block */
   1961                return -1;
   1962            }
   1963        } /* match */
   1964    } /* main loop */
   1965    return (int)(ip - istart);
   1966 }
   1967 
   1968 
   1969 /* Read the variable-length literal or match length.
   1970 *
   1971 * @ip : input pointer
   1972 * @ilimit : position after which if length is not decoded, the input is necessarily corrupted.
   1973 * @initial_check - check ip >= ipmax before start of loop.  Returns initial_error if so.
   1974 * @error (output) - error code.  Must be set to 0 before call.
   1975 **/
   1976 typedef size_t Rvl_t;
   1977 static const Rvl_t rvl_error = (Rvl_t)(-1);
   1978 LZ4_FORCE_INLINE Rvl_t
   1979 read_variable_length(const BYTE** ip, const BYTE* ilimit,
   1980                     int initial_check)
   1981 {
   1982    Rvl_t s, length = 0;
   1983    assert(ip != NULL);
   1984    assert(*ip !=  NULL);
   1985    assert(ilimit != NULL);
   1986    if (initial_check && unlikely((*ip) >= ilimit)) {    /* read limit reached */
   1987        return rvl_error;
   1988    }
   1989    s = **ip;
   1990    (*ip)++;
   1991    length += s;
   1992    if (unlikely((*ip) > ilimit)) {    /* read limit reached */
   1993        return rvl_error;
   1994    }
   1995    /* accumulator overflow detection (32-bit mode only) */
   1996    if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
   1997        return rvl_error;
   1998    }
   1999    if (likely(s != 255)) return length;
   2000    do {
   2001        s = **ip;
   2002        (*ip)++;
   2003        length += s;
   2004        if (unlikely((*ip) > ilimit)) {    /* read limit reached */
   2005            return rvl_error;
   2006        }
   2007        /* accumulator overflow detection (32-bit mode only) */
   2008        if ((sizeof(length) < 8) && unlikely(length > ((Rvl_t)(-1)/2)) ) {
   2009            return rvl_error;
   2010        }
   2011    } while (s == 255);
   2012 
   2013    return length;
   2014 }
   2015 
   2016 /*! LZ4_decompress_generic() :
   2017 *  This generic decompression function covers all use cases.
   2018 *  It shall be instantiated several times, using different sets of directives.
   2019 *  Note that it is important for performance that this function really get inlined,
   2020 *  in order to remove useless branches during compilation optimization.
   2021 */
   2022 LZ4_FORCE_INLINE int
   2023 LZ4_decompress_generic(
   2024                 const char* const src,
   2025                 char* const dst,
   2026                 int srcSize,
   2027                 int outputSize,         /* If endOnInput==endOnInputSize, this value is `dstCapacity` */
   2028 
   2029                 earlyEnd_directive partialDecoding,  /* full, partial */
   2030                 dict_directive dict,                 /* noDict, withPrefix64k, usingExtDict */
   2031                 const BYTE* const lowPrefix,  /* always <= dst, == dst when no prefix */
   2032                 const BYTE* const dictStart,  /* only if dict==usingExtDict */
   2033                 const size_t dictSize         /* note : = 0 if noDict */
   2034                 )
   2035 {
   2036    if ((src == NULL) || (outputSize < 0)) { return -1; }
   2037 
   2038    {   const BYTE* ip = (const BYTE*) src;
   2039        const BYTE* const iend = ip + srcSize;
   2040 
   2041        BYTE* op = (BYTE*) dst;
   2042        BYTE* const oend = op + outputSize;
   2043        BYTE* cpy;
   2044 
   2045        const BYTE* const dictEnd = (dictStart == NULL) ? NULL : dictStart + dictSize;
   2046 
   2047        const int checkOffset = (dictSize < (int)(64 KB));
   2048 
   2049 
   2050        /* Set up the "end" pointers for the shortcut. */
   2051        const BYTE* const shortiend = iend - 14 /*maxLL*/ - 2 /*offset*/;
   2052        const BYTE* const shortoend = oend - 14 /*maxLL*/ - 18 /*maxML*/;
   2053 
   2054        const BYTE* match;
   2055        size_t offset;
   2056        unsigned token;
   2057        size_t length;
   2058 
   2059 
   2060        DEBUGLOG(5, "LZ4_decompress_generic (srcSize:%i, dstSize:%i)", srcSize, outputSize);
   2061 
   2062        /* Special cases */
   2063        assert(lowPrefix <= op);
   2064        if (unlikely(outputSize==0)) {
   2065            /* Empty output buffer */
   2066            if (partialDecoding) return 0;
   2067            return ((srcSize==1) && (*ip==0)) ? 0 : -1;
   2068        }
   2069        if (unlikely(srcSize==0)) { return -1; }
   2070 
   2071    /* LZ4_FAST_DEC_LOOP:
   2072     * designed for modern OoO performance cpus,
   2073     * where copying reliably 32-bytes is preferable to an unpredictable branch.
   2074     * note : fast loop may show a regression for some client arm chips. */
   2075 #if LZ4_FAST_DEC_LOOP
   2076        if ((oend - op) < FASTLOOP_SAFE_DISTANCE) {
   2077            DEBUGLOG(6, "move to safe decode loop");
   2078            goto safe_decode;
   2079        }
   2080 
   2081        /* Fast loop : decode sequences as long as output < oend-FASTLOOP_SAFE_DISTANCE */
   2082        DEBUGLOG(6, "using fast decode loop");
   2083        while (1) {
   2084            /* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
   2085            assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
   2086            assert(ip < iend);
   2087            token = *ip++;
   2088            length = token >> ML_BITS;  /* literal length */
   2089            DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
   2090 
   2091            /* decode literal length */
   2092            if (length == RUN_MASK) {
   2093                size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
   2094                if (addl == rvl_error) {
   2095                    DEBUGLOG(6, "error reading long literal length");
   2096                    goto _output_error;
   2097                }
   2098                length += addl;
   2099                if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
   2100                if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
   2101 
   2102                /* copy literals */
   2103                LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
   2104                if ((op+length>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
   2105                LZ4_wildCopy32(op, ip, op+length);
   2106                ip += length; op += length;
   2107            } else if (ip <= iend-(16 + 1/*max lit + offset + nextToken*/)) {
   2108                /* We don't need to check oend, since we check it once for each loop below */
   2109                DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
   2110                /* Literals can only be <= 14, but hope compilers optimize better when copy by a register size */
   2111                LZ4_memcpy(op, ip, 16);
   2112                ip += length; op += length;
   2113            } else {
   2114                goto safe_literal_copy;
   2115            }
   2116 
   2117            /* get offset */
   2118            offset = LZ4_readLE16(ip); ip+=2;
   2119            DEBUGLOG(6, "blockPos%6u: offset = %u", (unsigned)(op-(BYTE*)dst), (unsigned)offset);
   2120            match = op - offset;
   2121            assert(match <= op);  /* overflow check */
   2122 
   2123            /* get matchlength */
   2124            length = token & ML_MASK;
   2125            DEBUGLOG(7, "  match length token = %u (len==%u)", (unsigned)length, (unsigned)length+MINMATCH);
   2126 
   2127            if (length == ML_MASK) {
   2128                size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
   2129                if (addl == rvl_error) {
   2130                    DEBUGLOG(5, "error reading long match length");
   2131                    goto _output_error;
   2132                }
   2133                length += addl;
   2134                length += MINMATCH;
   2135                DEBUGLOG(7, "  long match length == %u", (unsigned)length);
   2136                if (unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
   2137                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
   2138                    goto safe_match_copy;
   2139                }
   2140            } else {
   2141                length += MINMATCH;
   2142                if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
   2143                    DEBUGLOG(7, "moving to safe_match_copy (ml==%u)", (unsigned)length);
   2144                    goto safe_match_copy;
   2145                }
   2146 
   2147                /* Fastpath check: skip LZ4_wildCopy32 when true */
   2148                if ((dict == withPrefix64k) || (match >= lowPrefix)) {
   2149                    if (offset >= 8) {
   2150                        assert(match >= lowPrefix);
   2151                        assert(match <= op);
   2152                        assert(op + 18 <= oend);
   2153 
   2154                        LZ4_memcpy(op, match, 8);
   2155                        LZ4_memcpy(op+8, match+8, 8);
   2156                        LZ4_memcpy(op+16, match+16, 2);
   2157                        op += length;
   2158                        continue;
   2159            }   }   }
   2160 
   2161            if ( checkOffset && (unlikely(match + dictSize < lowPrefix)) ) {
   2162                DEBUGLOG(5, "Error : pos=%zi, offset=%zi => outside buffers", op-lowPrefix, op-match);
   2163                goto _output_error;
   2164            }
   2165            /* match starting within external dictionary */
   2166            if ((dict==usingExtDict) && (match < lowPrefix)) {
   2167                assert(dictEnd != NULL);
   2168                if (unlikely(op+length > oend-LASTLITERALS)) {
   2169                    if (partialDecoding) {
   2170                        DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
   2171                        length = MIN(length, (size_t)(oend-op));
   2172                    } else {
   2173                        DEBUGLOG(6, "end-of-block condition violated")
   2174                        goto _output_error;
   2175                }   }
   2176 
   2177                if (length <= (size_t)(lowPrefix-match)) {
   2178                    /* match fits entirely within external dictionary : just copy */
   2179                    LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
   2180                    op += length;
   2181                } else {
   2182                    /* match stretches into both external dictionary and current block */
   2183                    size_t const copySize = (size_t)(lowPrefix - match);
   2184                    size_t const restSize = length - copySize;
   2185                    LZ4_memcpy(op, dictEnd - copySize, copySize);
   2186                    op += copySize;
   2187                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
   2188                        BYTE* const endOfMatch = op + restSize;
   2189                        const BYTE* copyFrom = lowPrefix;
   2190                        while (op < endOfMatch) { *op++ = *copyFrom++; }
   2191                    } else {
   2192                        LZ4_memcpy(op, lowPrefix, restSize);
   2193                        op += restSize;
   2194                }   }
   2195                continue;
   2196            }
   2197 
   2198            /* copy match within block */
   2199            cpy = op + length;
   2200 
   2201            assert((op <= oend) && (oend-op >= 32));
   2202            if (unlikely(offset<16)) {
   2203                LZ4_memcpy_using_offset(op, match, cpy, offset);
   2204            } else {
   2205                LZ4_wildCopy32(op, match, cpy);
   2206            }
   2207 
   2208            op = cpy;   /* wildcopy correction */
   2209        }
   2210    safe_decode:
   2211 #endif
   2212 
   2213        /* Main Loop : decode remaining sequences where output < FASTLOOP_SAFE_DISTANCE */
   2214        DEBUGLOG(6, "using safe decode loop");
   2215        while (1) {
   2216            assert(ip < iend);
   2217            token = *ip++;
   2218            length = token >> ML_BITS;  /* literal length */
   2219            DEBUGLOG(7, "blockPos%6u: litLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
   2220 
   2221            /* A two-stage shortcut for the most common case:
   2222             * 1) If the literal length is 0..14, and there is enough space,
   2223             * enter the shortcut and copy 16 bytes on behalf of the literals
   2224             * (in the fast mode, only 8 bytes can be safely copied this way).
   2225             * 2) Further if the match length is 4..18, copy 18 bytes in a similar
   2226             * manner; but we ensure that there's enough space in the output for
   2227             * those 18 bytes earlier, upon entering the shortcut (in other words,
   2228             * there is a combined check for both stages).
   2229             */
   2230            if ( (length != RUN_MASK)
   2231                /* strictly "less than" on input, to re-enter the loop with at least one byte */
   2232              && likely((ip < shortiend) & (op <= shortoend)) ) {
   2233                /* Copy the literals */
   2234                LZ4_memcpy(op, ip, 16);
   2235                op += length; ip += length;
   2236 
   2237                /* The second stage: prepare for match copying, decode full info.
   2238                 * If it doesn't work out, the info won't be wasted. */
   2239                length = token & ML_MASK; /* match length */
   2240                DEBUGLOG(7, "blockPos%6u: matchLength token = %u (len=%u)", (unsigned)(op-(BYTE*)dst), (unsigned)length, (unsigned)length + 4);
   2241                offset = LZ4_readLE16(ip); ip += 2;
   2242                match = op - offset;
   2243                assert(match <= op); /* check overflow */
   2244 
   2245                /* Do not deal with overlapping matches. */
   2246                if ( (length != ML_MASK)
   2247                  && (offset >= 8)
   2248                  && (dict==withPrefix64k || match >= lowPrefix) ) {
   2249                    /* Copy the match. */
   2250                    LZ4_memcpy(op + 0, match + 0, 8);
   2251                    LZ4_memcpy(op + 8, match + 8, 8);
   2252                    LZ4_memcpy(op +16, match +16, 2);
   2253                    op += length + MINMATCH;
   2254                    /* Both stages worked, load the next token. */
   2255                    continue;
   2256                }
   2257 
   2258                /* The second stage didn't work out, but the info is ready.
   2259                 * Propel it right to the point of match copying. */
   2260                goto _copy_match;
   2261            }
   2262 
   2263            /* decode literal length */
   2264            if (length == RUN_MASK) {
   2265                size_t const addl = read_variable_length(&ip, iend-RUN_MASK, 1);
   2266                if (addl == rvl_error) { goto _output_error; }
   2267                length += addl;
   2268                if (unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
   2269                if (unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
   2270            }
   2271 
   2272 #if LZ4_FAST_DEC_LOOP
   2273        safe_literal_copy:
   2274 #endif
   2275            /* copy literals */
   2276            cpy = op+length;
   2277 
   2278            LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
   2279            if ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) {
   2280                /* We've either hit the input parsing restriction or the output parsing restriction.
   2281                 * In the normal scenario, decoding a full block, it must be the last sequence,
   2282                 * otherwise it's an error (invalid input or dimensions).
   2283                 * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
   2284                 */
   2285                if (partialDecoding) {
   2286                    /* Since we are partial decoding we may be in this block because of the output parsing
   2287                     * restriction, which is not valid since the output buffer is allowed to be undersized.
   2288                     */
   2289                    DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
   2290                    DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
   2291                    DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
   2292                    DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
   2293                    /* Finishing in the middle of a literals segment,
   2294                     * due to lack of input.
   2295                     */
   2296                    if (ip+length > iend) {
   2297                        length = (size_t)(iend-ip);
   2298                        cpy = op + length;
   2299                    }
   2300                    /* Finishing in the middle of a literals segment,
   2301                     * due to lack of output space.
   2302                     */
   2303                    if (cpy > oend) {
   2304                        cpy = oend;
   2305                        assert(op<=oend);
   2306                        length = (size_t)(oend-op);
   2307                    }
   2308                } else {
   2309                     /* We must be on the last sequence (or invalid) because of the parsing limitations
   2310                      * so check that we exactly consume the input and don't overrun the output buffer.
   2311                      */
   2312                    if ((ip+length != iend) || (cpy > oend)) {
   2313                        DEBUGLOG(5, "should have been last run of literals")
   2314                        DEBUGLOG(5, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
   2315                        DEBUGLOG(5, "or cpy(%p) > (oend-MFLIMIT)(%p)", cpy, oend-MFLIMIT);
   2316                        DEBUGLOG(5, "after writing %u bytes / %i bytes available", (unsigned)(op-(BYTE*)dst), outputSize);
   2317                        goto _output_error;
   2318                    }
   2319                }
   2320                LZ4_memmove(op, ip, length);  /* supports overlapping memory regions, for in-place decompression scenarios */
   2321                ip += length;
   2322                op += length;
   2323                /* Necessarily EOF when !partialDecoding.
   2324                 * When partialDecoding, it is EOF if we've either
   2325                 * filled the output buffer or
   2326                 * can't proceed with reading an offset for following match.
   2327                 */
   2328                if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
   2329                    break;
   2330                }
   2331            } else {
   2332                LZ4_wildCopy8(op, ip, cpy);   /* can overwrite up to 8 bytes beyond cpy */
   2333                ip += length; op = cpy;
   2334            }
   2335 
   2336            /* get offset */
   2337            offset = LZ4_readLE16(ip); ip+=2;
   2338            match = op - offset;
   2339 
   2340            /* get matchlength */
   2341            length = token & ML_MASK;
   2342            DEBUGLOG(7, "blockPos%6u: matchLength token = %u", (unsigned)(op-(BYTE*)dst), (unsigned)length);
   2343 
   2344    _copy_match:
   2345            if (length == ML_MASK) {
   2346                size_t const addl = read_variable_length(&ip, iend - LASTLITERALS + 1, 0);
   2347                if (addl == rvl_error) { goto _output_error; }
   2348                length += addl;
   2349                if (unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error;   /* overflow detection */
   2350            }
   2351            length += MINMATCH;
   2352 
   2353 #if LZ4_FAST_DEC_LOOP
   2354        safe_match_copy:
   2355 #endif
   2356            if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error;   /* Error : offset outside buffers */
   2357            /* match starting within external dictionary */
   2358            if ((dict==usingExtDict) && (match < lowPrefix)) {
   2359                assert(dictEnd != NULL);
   2360                if (unlikely(op+length > oend-LASTLITERALS)) {
   2361                    if (partialDecoding) length = MIN(length, (size_t)(oend-op));
   2362                    else goto _output_error;   /* doesn't respect parsing restriction */
   2363                }
   2364 
   2365                if (length <= (size_t)(lowPrefix-match)) {
   2366                    /* match fits entirely within external dictionary : just copy */
   2367                    LZ4_memmove(op, dictEnd - (lowPrefix-match), length);
   2368                    op += length;
   2369                } else {
   2370                    /* match stretches into both external dictionary and current block */
   2371                    size_t const copySize = (size_t)(lowPrefix - match);
   2372                    size_t const restSize = length - copySize;
   2373                    LZ4_memcpy(op, dictEnd - copySize, copySize);
   2374                    op += copySize;
   2375                    if (restSize > (size_t)(op - lowPrefix)) {  /* overlap copy */
   2376                        BYTE* const endOfMatch = op + restSize;
   2377                        const BYTE* copyFrom = lowPrefix;
   2378                        while (op < endOfMatch) *op++ = *copyFrom++;
   2379                    } else {
   2380                        LZ4_memcpy(op, lowPrefix, restSize);
   2381                        op += restSize;
   2382                }   }
   2383                continue;
   2384            }
   2385            assert(match >= lowPrefix);
   2386 
   2387            /* copy match within block */
   2388            cpy = op + length;
   2389 
   2390            /* partialDecoding : may end anywhere within the block */
   2391            assert(op<=oend);
   2392            if (partialDecoding && (cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
   2393                size_t const mlen = MIN(length, (size_t)(oend-op));
   2394                const BYTE* const matchEnd = match + mlen;
   2395                BYTE* const copyEnd = op + mlen;
   2396                if (matchEnd > op) {   /* overlap copy */
   2397                    while (op < copyEnd) { *op++ = *match++; }
   2398                } else {
   2399                    LZ4_memcpy(op, match, mlen);
   2400                }
   2401                op = copyEnd;
   2402                if (op == oend) { break; }
   2403                continue;
   2404            }
   2405 
   2406            if (unlikely(offset<8)) {
   2407                LZ4_write32(op, 0);   /* silence msan warning when offset==0 */
   2408                op[0] = match[0];
   2409                op[1] = match[1];
   2410                op[2] = match[2];
   2411                op[3] = match[3];
   2412                match += inc32table[offset];
   2413                LZ4_memcpy(op+4, match, 4);
   2414                match -= dec64table[offset];
   2415            } else {
   2416                LZ4_memcpy(op, match, 8);
   2417                match += 8;
   2418            }
   2419            op += 8;
   2420 
   2421            if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
   2422                BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
   2423                if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
   2424                if (op < oCopyLimit) {
   2425                    LZ4_wildCopy8(op, match, oCopyLimit);
   2426                    match += oCopyLimit - op;
   2427                    op = oCopyLimit;
   2428                }
   2429                while (op < cpy) { *op++ = *match++; }
   2430            } else {
   2431                LZ4_memcpy(op, match, 8);
   2432                if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
   2433            }
   2434            op = cpy;   /* wildcopy correction */
   2435        }
   2436 
   2437        /* end of decoding */
   2438        DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
   2439        return (int) (((char*)op)-dst);     /* Nb of output bytes decoded */
   2440 
   2441        /* Overflow error detected */
   2442    _output_error:
   2443        return (int) (-(((const char*)ip)-src))-1;
   2444    }
   2445 }
   2446 
   2447 
   2448 /*===== Instantiate the API decoding functions. =====*/
   2449 
   2450 LZ4_FORCE_O2
   2451 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
   2452 {
   2453    return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
   2454                                  decode_full_block, noDict,
   2455                                  (BYTE*)dest, NULL, 0);
   2456 }
   2457 
   2458 LZ4_FORCE_O2
   2459 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
   2460 {
   2461    dstCapacity = MIN(targetOutputSize, dstCapacity);
   2462    return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
   2463                                  partial_decode,
   2464                                  noDict, (BYTE*)dst, NULL, 0);
   2465 }
   2466 
   2467 LZ4_FORCE_O2
   2468 int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
   2469 {
   2470    DEBUGLOG(5, "LZ4_decompress_fast");
   2471    return LZ4_decompress_unsafe_generic(
   2472                (const BYTE*)source, (BYTE*)dest, originalSize,
   2473                0, NULL, 0);
   2474 }
   2475 
   2476 /*===== Instantiate a few more decoding cases, used more than once. =====*/
   2477 
   2478 LZ4_FORCE_O2 /* Exported, an obsolete API function. */
   2479 int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
   2480 {
   2481    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2482                                  decode_full_block, withPrefix64k,
   2483                                  (BYTE*)dest - 64 KB, NULL, 0);
   2484 }
   2485 
   2486 LZ4_FORCE_O2
   2487 static int LZ4_decompress_safe_partial_withPrefix64k(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity)
   2488 {
   2489    dstCapacity = MIN(targetOutputSize, dstCapacity);
   2490    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
   2491                                  partial_decode, withPrefix64k,
   2492                                  (BYTE*)dest - 64 KB, NULL, 0);
   2493 }
   2494 
   2495 /* Another obsolete API function, paired with the previous one. */
   2496 int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int originalSize)
   2497 {
   2498    return LZ4_decompress_unsafe_generic(
   2499                (const BYTE*)source, (BYTE*)dest, originalSize,
   2500                64 KB, NULL, 0);
   2501 }
   2502 
   2503 LZ4_FORCE_O2
   2504 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
   2505                                               size_t prefixSize)
   2506 {
   2507    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2508                                  decode_full_block, noDict,
   2509                                  (BYTE*)dest-prefixSize, NULL, 0);
   2510 }
   2511 
   2512 LZ4_FORCE_O2
   2513 static int LZ4_decompress_safe_partial_withSmallPrefix(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity,
   2514                                               size_t prefixSize)
   2515 {
   2516    dstCapacity = MIN(targetOutputSize, dstCapacity);
   2517    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
   2518                                  partial_decode, noDict,
   2519                                  (BYTE*)dest-prefixSize, NULL, 0);
   2520 }
   2521 
   2522 LZ4_FORCE_O2
   2523 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
   2524                                     int compressedSize, int maxOutputSize,
   2525                                     const void* dictStart, size_t dictSize)
   2526 {
   2527    DEBUGLOG(5, "LZ4_decompress_safe_forceExtDict");
   2528    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2529                                  decode_full_block, usingExtDict,
   2530                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
   2531 }
   2532 
   2533 LZ4_FORCE_O2
   2534 int LZ4_decompress_safe_partial_forceExtDict(const char* source, char* dest,
   2535                                     int compressedSize, int targetOutputSize, int dstCapacity,
   2536                                     const void* dictStart, size_t dictSize)
   2537 {
   2538    dstCapacity = MIN(targetOutputSize, dstCapacity);
   2539    return LZ4_decompress_generic(source, dest, compressedSize, dstCapacity,
   2540                                  partial_decode, usingExtDict,
   2541                                  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
   2542 }
   2543 
   2544 LZ4_FORCE_O2
   2545 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
   2546                                       const void* dictStart, size_t dictSize)
   2547 {
   2548    return LZ4_decompress_unsafe_generic(
   2549                (const BYTE*)source, (BYTE*)dest, originalSize,
   2550                0, (const BYTE*)dictStart, dictSize);
   2551 }
   2552 
   2553 /* The "double dictionary" mode, for use with e.g. ring buffers: the first part
   2554 * of the dictionary is passed as prefix, and the second via dictStart + dictSize.
   2555 * These routines are used only once, in LZ4_decompress_*_continue().
   2556 */
   2557 LZ4_FORCE_INLINE
   2558 int LZ4_decompress_safe_doubleDict(const char* source, char* dest, int compressedSize, int maxOutputSize,
   2559                                   size_t prefixSize, const void* dictStart, size_t dictSize)
   2560 {
   2561    return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
   2562                                  decode_full_block, usingExtDict,
   2563                                  (BYTE*)dest-prefixSize, (const BYTE*)dictStart, dictSize);
   2564 }
   2565 
   2566 /*===== streaming decompression functions =====*/
   2567 
   2568 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
   2569 LZ4_streamDecode_t* LZ4_createStreamDecode(void)
   2570 {
   2571    LZ4_STATIC_ASSERT(sizeof(LZ4_streamDecode_t) >= sizeof(LZ4_streamDecode_t_internal));
   2572    return (LZ4_streamDecode_t*) ALLOC_AND_ZERO(sizeof(LZ4_streamDecode_t));
   2573 }
   2574 
   2575 int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
   2576 {
   2577    if (LZ4_stream == NULL) { return 0; }  /* support free on NULL */
   2578    FREEMEM(LZ4_stream);
   2579    return 0;
   2580 }
   2581 #endif
   2582 
   2583 /*! LZ4_setStreamDecode() :
   2584 *  Use this function to instruct where to find the dictionary.
   2585 *  This function is not necessary if previous data is still available where it was decoded.
   2586 *  Loading a size of 0 is allowed (same effect as no dictionary).
   2587 * @return : 1 if OK, 0 if error
   2588 */
   2589 int LZ4_setStreamDecode (LZ4_streamDecode_t* LZ4_streamDecode, const char* dictionary, int dictSize)
   2590 {
   2591    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
   2592    lz4sd->prefixSize = (size_t)dictSize;
   2593    if (dictSize) {
   2594        assert(dictionary != NULL);
   2595        lz4sd->prefixEnd = (const BYTE*) dictionary + dictSize;
   2596    } else {
   2597        lz4sd->prefixEnd = (const BYTE*) dictionary;
   2598    }
   2599    lz4sd->externalDict = NULL;
   2600    lz4sd->extDictSize  = 0;
   2601    return 1;
   2602 }
   2603 
   2604 /*! LZ4_decoderRingBufferSize() :
   2605 *  when setting a ring buffer for streaming decompression (optional scenario),
   2606 *  provides the minimum size of this ring buffer
   2607 *  to be compatible with any source respecting maxBlockSize condition.
   2608 *  Note : in a ring buffer scenario,
   2609 *  blocks are presumed decompressed next to each other.
   2610 *  When not enough space remains for next block (remainingSize < maxBlockSize),
   2611 *  decoding resumes from beginning of ring buffer.
   2612 * @return : minimum ring buffer size,
   2613 *           or 0 if there is an error (invalid maxBlockSize).
   2614 */
   2615 int LZ4_decoderRingBufferSize(int maxBlockSize)
   2616 {
   2617    if (maxBlockSize < 0) return 0;
   2618    if (maxBlockSize > LZ4_MAX_INPUT_SIZE) return 0;
   2619    if (maxBlockSize < 16) maxBlockSize = 16;
   2620    return LZ4_DECODER_RING_BUFFER_SIZE(maxBlockSize);
   2621 }
   2622 
   2623 /*
   2624 *_continue() :
   2625    These decoding functions allow decompression of multiple blocks in "streaming" mode.
   2626    Previously decoded blocks must still be available at the memory position where they were decoded.
   2627    If it's not possible, save the relevant part of decoded data into a safe buffer,
   2628    and indicate where it stands using LZ4_setStreamDecode()
   2629 */
   2630 LZ4_FORCE_O2
   2631 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
   2632 {
   2633    LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
   2634    int result;
   2635 
   2636    if (lz4sd->prefixSize == 0) {
   2637        /* The first call, no dictionary yet. */
   2638        assert(lz4sd->extDictSize == 0);
   2639        result = LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
   2640        if (result <= 0) return result;
   2641        lz4sd->prefixSize = (size_t)result;
   2642        lz4sd->prefixEnd = (BYTE*)dest + result;
   2643    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
   2644        /* They're rolling the current segment. */
   2645        if (lz4sd->prefixSize >= 64 KB - 1)
   2646            result = LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
   2647        else if (lz4sd->extDictSize == 0)
   2648            result = LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize,
   2649                                                         lz4sd->prefixSize);
   2650        else
   2651            result = LZ4_decompress_safe_doubleDict(source, dest, compressedSize, maxOutputSize,
   2652                                                    lz4sd->prefixSize, lz4sd->externalDict, lz4sd->extDictSize);
   2653        if (result <= 0) return result;
   2654        lz4sd->prefixSize += (size_t)result;
   2655        lz4sd->prefixEnd  += result;
   2656    } else {
   2657        /* The buffer wraps around, or they're switching to another buffer. */
   2658        lz4sd->extDictSize = lz4sd->prefixSize;
   2659        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
   2660        result = LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize,
   2661                                                  lz4sd->externalDict, lz4sd->extDictSize);
   2662        if (result <= 0) return result;
   2663        lz4sd->prefixSize = (size_t)result;
   2664        lz4sd->prefixEnd  = (BYTE*)dest + result;
   2665    }
   2666 
   2667    return result;
   2668 }
   2669 
   2670 LZ4_FORCE_O2 int
   2671 LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode,
   2672                        const char* source, char* dest, int originalSize)
   2673 {
   2674    LZ4_streamDecode_t_internal* const lz4sd =
   2675        (assert(LZ4_streamDecode!=NULL), &LZ4_streamDecode->internal_donotuse);
   2676    int result;
   2677 
   2678    DEBUGLOG(5, "LZ4_decompress_fast_continue (toDecodeSize=%i)", originalSize);
   2679    assert(originalSize >= 0);
   2680 
   2681    if (lz4sd->prefixSize == 0) {
   2682        DEBUGLOG(5, "first invocation : no prefix nor extDict");
   2683        assert(lz4sd->extDictSize == 0);
   2684        result = LZ4_decompress_fast(source, dest, originalSize);
   2685        if (result <= 0) return result;
   2686        lz4sd->prefixSize = (size_t)originalSize;
   2687        lz4sd->prefixEnd = (BYTE*)dest + originalSize;
   2688    } else if (lz4sd->prefixEnd == (BYTE*)dest) {
   2689        DEBUGLOG(5, "continue using existing prefix");
   2690        result = LZ4_decompress_unsafe_generic(
   2691                        (const BYTE*)source, (BYTE*)dest, originalSize,
   2692                        lz4sd->prefixSize,
   2693                        lz4sd->externalDict, lz4sd->extDictSize);
   2694        if (result <= 0) return result;
   2695        lz4sd->prefixSize += (size_t)originalSize;
   2696        lz4sd->prefixEnd  += originalSize;
   2697    } else {
   2698        DEBUGLOG(5, "prefix becomes extDict");
   2699        lz4sd->extDictSize = lz4sd->prefixSize;
   2700        lz4sd->externalDict = lz4sd->prefixEnd - lz4sd->extDictSize;
   2701        result = LZ4_decompress_fast_extDict(source, dest, originalSize,
   2702                                             lz4sd->externalDict, lz4sd->extDictSize);
   2703        if (result <= 0) return result;
   2704        lz4sd->prefixSize = (size_t)originalSize;
   2705        lz4sd->prefixEnd  = (BYTE*)dest + originalSize;
   2706    }
   2707 
   2708    return result;
   2709 }
   2710 
   2711 
   2712 /*
   2713 Advanced decoding functions :
   2714 *_usingDict() :
   2715    These decoding functions work the same as "_continue" ones,
   2716    the dictionary must be explicitly provided within parameters
   2717 */
   2718 
   2719 int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const char* dictStart, int dictSize)
   2720 {
   2721    if (dictSize==0)
   2722        return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
   2723    if (dictStart+dictSize == dest) {
   2724        if (dictSize >= 64 KB - 1) {
   2725            return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
   2726        }
   2727        assert(dictSize >= 0);
   2728        return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
   2729    }
   2730    assert(dictSize >= 0);
   2731    return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
   2732 }
   2733 
   2734 int LZ4_decompress_safe_partial_usingDict(const char* source, char* dest, int compressedSize, int targetOutputSize, int dstCapacity, const char* dictStart, int dictSize)
   2735 {
   2736    if (dictSize==0)
   2737        return LZ4_decompress_safe_partial(source, dest, compressedSize, targetOutputSize, dstCapacity);
   2738    if (dictStart+dictSize == dest) {
   2739        if (dictSize >= 64 KB - 1) {
   2740            return LZ4_decompress_safe_partial_withPrefix64k(source, dest, compressedSize, targetOutputSize, dstCapacity);
   2741        }
   2742        assert(dictSize >= 0);
   2743        return LZ4_decompress_safe_partial_withSmallPrefix(source, dest, compressedSize, targetOutputSize, dstCapacity, (size_t)dictSize);
   2744    }
   2745    assert(dictSize >= 0);
   2746    return LZ4_decompress_safe_partial_forceExtDict(source, dest, compressedSize, targetOutputSize, dstCapacity, dictStart, (size_t)dictSize);
   2747 }
   2748 
   2749 int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
   2750 {
   2751    if (dictSize==0 || dictStart+dictSize == dest)
   2752        return LZ4_decompress_unsafe_generic(
   2753                        (const BYTE*)source, (BYTE*)dest, originalSize,
   2754                        (size_t)dictSize, NULL, 0);
   2755    assert(dictSize >= 0);
   2756    return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
   2757 }
   2758 
   2759 
   2760 /*=*************************************************
   2761 *  Obsolete Functions
   2762 ***************************************************/
   2763 /* obsolete compression functions */
   2764 int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
   2765 {
   2766    return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
   2767 }
   2768 int LZ4_compress(const char* src, char* dest, int srcSize)
   2769 {
   2770    return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
   2771 }
   2772 int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
   2773 {
   2774    return LZ4_compress_fast_extState(state, src, dst, srcSize, dstSize, 1);
   2775 }
   2776 int LZ4_compress_withState (void* state, const char* src, char* dst, int srcSize)
   2777 {
   2778    return LZ4_compress_fast_extState(state, src, dst, srcSize, LZ4_compressBound(srcSize), 1);
   2779 }
   2780 int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_stream, const char* src, char* dst, int srcSize, int dstCapacity)
   2781 {
   2782    return LZ4_compress_fast_continue(LZ4_stream, src, dst, srcSize, dstCapacity, 1);
   2783 }
   2784 int LZ4_compress_continue (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize)
   2785 {
   2786    return LZ4_compress_fast_continue(LZ4_stream, source, dest, inputSize, LZ4_compressBound(inputSize), 1);
   2787 }
   2788 
   2789 /*
   2790 These decompression functions are deprecated and should no longer be used.
   2791 They are only provided here for compatibility with older user programs.
   2792 - LZ4_uncompress is totally equivalent to LZ4_decompress_fast
   2793 - LZ4_uncompress_unknownOutputSize is totally equivalent to LZ4_decompress_safe
   2794 */
   2795 int LZ4_uncompress (const char* source, char* dest, int outputSize)
   2796 {
   2797    return LZ4_decompress_fast(source, dest, outputSize);
   2798 }
   2799 int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize)
   2800 {
   2801    return LZ4_decompress_safe(source, dest, isize, maxOutputSize);
   2802 }
   2803 
   2804 /* Obsolete Streaming functions */
   2805 
   2806 int LZ4_sizeofStreamState(void) { return sizeof(LZ4_stream_t); }
   2807 
   2808 int LZ4_resetStreamState(void* state, char* inputBuffer)
   2809 {
   2810    (void)inputBuffer;
   2811    LZ4_resetStream((LZ4_stream_t*)state);
   2812    return 0;
   2813 }
   2814 
   2815 #if !defined(LZ4_STATIC_LINKING_ONLY_DISABLE_MEMORY_ALLOCATION)
   2816 void* LZ4_create (char* inputBuffer)
   2817 {
   2818    (void)inputBuffer;
   2819    return LZ4_createStream();
   2820 }
   2821 #endif
   2822 
   2823 char* LZ4_slideInputBuffer (void* state)
   2824 {
   2825    /* avoid const char * -> char * conversion warning */
   2826    return (char *)(uptrval)((LZ4_stream_t*)state)->internal_donotuse.dictionary;
   2827 }
   2828 
   2829 #endif   /* LZ4_COMMONDEFS_ONLY */