tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

trees.c (40937B)


      1 /* trees.c -- output deflated data using Huffman coding
      2 * Copyright (C) 1995-2024 Jean-loup Gailly
      3 * detect_data_type() function provided freely by Cosmin Truta, 2006
      4 * For conditions of distribution and use, see copyright notice in zlib.h
      5 */
      6 
      7 /*
      8 *  ALGORITHM
      9 *
     10 *      The "deflation" process uses several Huffman trees. The more
     11 *      common source values are represented by shorter bit sequences.
     12 *
     13 *      Each code tree is stored in a compressed form which is itself
     14 * a Huffman encoding of the lengths of all the code strings (in
     15 * ascending order by source values).  The actual code strings are
     16 * reconstructed from the lengths in the inflate process, as described
     17 * in the deflate specification.
     18 *
     19 *  REFERENCES
     20 *
     21 *      Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
     22 *      Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
     23 *
     24 *      Storer, James A.
     25 *          Data Compression:  Methods and Theory, pp. 49-50.
     26 *          Computer Science Press, 1988.  ISBN 0-7167-8156-5.
     27 *
     28 *      Sedgewick, R.
     29 *          Algorithms, p290.
     30 *          Addison-Wesley, 1983. ISBN 0-201-06672-6.
     31 */
     32 
     33 /* @(#) $Id$ */
     34 
     35 /* #define GEN_TREES_H */
     36 
     37 #include "deflate.h"
     38 
     39 #ifdef ZLIB_DEBUG
     40 #  include <ctype.h>
     41 #endif
     42 
     43 /* ===========================================================================
     44 * Constants
     45 */
     46 
     47 #define MAX_BL_BITS 7
     48 /* Bit length codes must not exceed MAX_BL_BITS bits */
     49 
     50 #define END_BLOCK 256
     51 /* end of block literal code */
     52 
     53 #define REP_3_6      16
     54 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
     55 
     56 #define REPZ_3_10    17
     57 /* repeat a zero length 3-10 times  (3 bits of repeat count) */
     58 
     59 #define REPZ_11_138  18
     60 /* repeat a zero length 11-138 times  (7 bits of repeat count) */
     61 
     62 local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
     63   = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
     64 
     65 local const int extra_dbits[D_CODES] /* extra bits for each distance code */
     66   = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
     67 
     68 local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
     69   = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
     70 
     71 local const uch bl_order[BL_CODES]
     72   = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
     73 /* The lengths of the bit length codes are sent in order of decreasing
     74 * probability, to avoid transmitting the lengths for unused bit length codes.
     75 */
     76 
     77 /* ===========================================================================
     78 * Local data. These are initialized only once.
     79 */
     80 
     81 #define DIST_CODE_LEN  512 /* see definition of array dist_code below */
     82 
     83 #if defined(GEN_TREES_H) || !defined(STDC)
     84 /* non ANSI compilers may not accept trees.h */
     85 
     86 local ct_data static_ltree[L_CODES+2];
     87 /* The static literal tree. Since the bit lengths are imposed, there is no
     88 * need for the L_CODES extra codes used during heap construction. However
     89 * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
     90 * below).
     91 */
     92 
     93 local ct_data static_dtree[D_CODES];
     94 /* The static distance tree. (Actually a trivial tree since all codes use
     95 * 5 bits.)
     96 */
     97 
     98 uch _dist_code[DIST_CODE_LEN];
     99 /* Distance codes. The first 256 values correspond to the distances
    100 * 3 .. 258, the last 256 values correspond to the top 8 bits of
    101 * the 15 bit distances.
    102 */
    103 
    104 uch _length_code[MAX_MATCH-MIN_MATCH+1];
    105 /* length code for each normalized match length (0 == MIN_MATCH) */
    106 
    107 local int base_length[LENGTH_CODES];
    108 /* First normalized length for each code (0 = MIN_MATCH) */
    109 
    110 local int base_dist[D_CODES];
    111 /* First normalized distance for each code (0 = distance of 1) */
    112 
    113 #else
    114 #  include "trees.h"
    115 #endif /* GEN_TREES_H */
    116 
    117 struct static_tree_desc_s {
    118    const ct_data *static_tree;  /* static tree or NULL */
    119    const intf *extra_bits;      /* extra bits for each code or NULL */
    120    int     extra_base;          /* base index for extra_bits */
    121    int     elems;               /* max number of elements in the tree */
    122    int     max_length;          /* max bit length for the codes */
    123 };
    124 
    125 #ifdef NO_INIT_GLOBAL_POINTERS
    126 #  define TCONST
    127 #else
    128 #  define TCONST const
    129 #endif
    130 
    131 local TCONST static_tree_desc static_l_desc =
    132 {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
    133 
    134 local TCONST static_tree_desc static_d_desc =
    135 {static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
    136 
    137 local TCONST static_tree_desc static_bl_desc =
    138 {(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
    139 
    140 /* ===========================================================================
    141 * Output a short LSB first on the stream.
    142 * IN assertion: there is enough room in pendingBuf.
    143 */
    144 #define put_short(s, w) { \
    145    put_byte(s, (uch)((w) & 0xff)); \
    146    put_byte(s, (uch)((ush)(w) >> 8)); \
    147 }
    148 
    149 /* ===========================================================================
    150 * Reverse the first len bits of a code, using straightforward code (a faster
    151 * method would use a table)
    152 * IN assertion: 1 <= len <= 15
    153 */
    154 local unsigned bi_reverse(unsigned code, int len) {
    155    register unsigned res = 0;
    156    do {
    157        res |= code & 1;
    158        code >>= 1, res <<= 1;
    159    } while (--len > 0);
    160    return res >> 1;
    161 }
    162 
    163 /* ===========================================================================
    164 * Flush the bit buffer, keeping at most 7 bits in it.
    165 */
    166 local void bi_flush(deflate_state *s) {
    167    if (s->bi_valid == 16) {
    168        put_short(s, s->bi_buf);
    169        s->bi_buf = 0;
    170        s->bi_valid = 0;
    171    } else if (s->bi_valid >= 8) {
    172        put_byte(s, (Byte)s->bi_buf);
    173        s->bi_buf >>= 8;
    174        s->bi_valid -= 8;
    175    }
    176 }
    177 
    178 /* ===========================================================================
    179 * Flush the bit buffer and align the output on a byte boundary
    180 */
    181 local void bi_windup(deflate_state *s) {
    182    if (s->bi_valid > 8) {
    183        put_short(s, s->bi_buf);
    184    } else if (s->bi_valid > 0) {
    185        put_byte(s, (Byte)s->bi_buf);
    186    }
    187    s->bi_buf = 0;
    188    s->bi_valid = 0;
    189 #ifdef ZLIB_DEBUG
    190    s->bits_sent = (s->bits_sent + 7) & ~7;
    191 #endif
    192 }
    193 
    194 /* ===========================================================================
    195 * Generate the codes for a given tree and bit counts (which need not be
    196 * optimal).
    197 * IN assertion: the array bl_count contains the bit length statistics for
    198 * the given tree and the field len is set for all tree elements.
    199 * OUT assertion: the field code is set for all tree elements of non
    200 *     zero code length.
    201 */
    202 local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) {
    203    ush next_code[MAX_BITS+1]; /* next code value for each bit length */
    204    unsigned code = 0;         /* running code value */
    205    int bits;                  /* bit index */
    206    int n;                     /* code index */
    207 
    208    /* The distribution counts are first used to generate the code values
    209     * without bit reversal.
    210     */
    211    for (bits = 1; bits <= MAX_BITS; bits++) {
    212        code = (code + bl_count[bits - 1]) << 1;
    213        next_code[bits] = (ush)code;
    214    }
    215    /* Check that the bit counts in bl_count are consistent. The last code
    216     * must be all ones.
    217     */
    218    Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1,
    219            "inconsistent bit counts");
    220    Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
    221 
    222    for (n = 0;  n <= max_code; n++) {
    223        int len = tree[n].Len;
    224        if (len == 0) continue;
    225        /* Now reverse the bits */
    226        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
    227 
    228        Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
    229            n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1));
    230    }
    231 }
    232 
    233 #ifdef GEN_TREES_H
    234 local void gen_trees_header(void);
    235 #endif
    236 
    237 #ifndef ZLIB_DEBUG
    238 #  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
    239   /* Send a code of the given tree. c and tree must not have side effects */
    240 
    241 #else /* !ZLIB_DEBUG */
    242 #  define send_code(s, c, tree) \
    243     { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
    244       send_bits(s, tree[c].Code, tree[c].Len); }
    245 #endif
    246 
    247 /* ===========================================================================
    248 * Send a value on a given number of bits.
    249 * IN assertion: length <= 16 and value fits in length bits.
    250 */
    251 #ifdef ZLIB_DEBUG
    252 local void send_bits(deflate_state *s, int value, int length) {
    253    Tracevv((stderr," l %2d v %4x ", length, value));
    254    Assert(length > 0 && length <= 15, "invalid length");
    255    s->bits_sent += (ulg)length;
    256 
    257    /* If not enough room in bi_buf, use (valid) bits from bi_buf and
    258     * (16 - bi_valid) bits from value, leaving (width - (16 - bi_valid))
    259     * unused bits in value.
    260     */
    261    if (s->bi_valid > (int)Buf_size - length) {
    262        s->bi_buf |= (ush)value << s->bi_valid;
    263        put_short(s, s->bi_buf);
    264        s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
    265        s->bi_valid += length - Buf_size;
    266    } else {
    267        s->bi_buf |= (ush)value << s->bi_valid;
    268        s->bi_valid += length;
    269    }
    270 }
    271 #else /* !ZLIB_DEBUG */
    272 
    273 #define send_bits(s, value, length) \
    274 { int len = length;\
    275  if (s->bi_valid > (int)Buf_size - len) {\
    276    int val = (int)value;\
    277    s->bi_buf |= (ush)val << s->bi_valid;\
    278    put_short(s, s->bi_buf);\
    279    s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
    280    s->bi_valid += len - Buf_size;\
    281  } else {\
    282    s->bi_buf |= (ush)(value) << s->bi_valid;\
    283    s->bi_valid += len;\
    284  }\
    285 }
    286 #endif /* ZLIB_DEBUG */
    287 
    288 
    289 /* the arguments must not have side effects */
    290 
    291 /* ===========================================================================
    292 * Initialize the various 'constant' tables.
    293 */
    294 local void tr_static_init(void) {
    295 #if defined(GEN_TREES_H) || !defined(STDC)
    296    static int static_init_done = 0;
    297    int n;        /* iterates over tree elements */
    298    int bits;     /* bit counter */
    299    int length;   /* length value */
    300    int code;     /* code value */
    301    int dist;     /* distance index */
    302    ush bl_count[MAX_BITS+1];
    303    /* number of codes at each bit length for an optimal tree */
    304 
    305    if (static_init_done) return;
    306 
    307    /* For some embedded targets, global variables are not initialized: */
    308 #ifdef NO_INIT_GLOBAL_POINTERS
    309    static_l_desc.static_tree = static_ltree;
    310    static_l_desc.extra_bits = extra_lbits;
    311    static_d_desc.static_tree = static_dtree;
    312    static_d_desc.extra_bits = extra_dbits;
    313    static_bl_desc.extra_bits = extra_blbits;
    314 #endif
    315 
    316    /* Initialize the mapping length (0..255) -> length code (0..28) */
    317    length = 0;
    318    for (code = 0; code < LENGTH_CODES-1; code++) {
    319        base_length[code] = length;
    320        for (n = 0; n < (1 << extra_lbits[code]); n++) {
    321            _length_code[length++] = (uch)code;
    322        }
    323    }
    324    Assert (length == 256, "tr_static_init: length != 256");
    325    /* Note that the length 255 (match length 258) can be represented
    326     * in two different ways: code 284 + 5 bits or code 285, so we
    327     * overwrite length_code[255] to use the best encoding:
    328     */
    329    _length_code[length - 1] = (uch)code;
    330 
    331    /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
    332    dist = 0;
    333    for (code = 0 ; code < 16; code++) {
    334        base_dist[code] = dist;
    335        for (n = 0; n < (1 << extra_dbits[code]); n++) {
    336            _dist_code[dist++] = (uch)code;
    337        }
    338    }
    339    Assert (dist == 256, "tr_static_init: dist != 256");
    340    dist >>= 7; /* from now on, all distances are divided by 128 */
    341    for ( ; code < D_CODES; code++) {
    342        base_dist[code] = dist << 7;
    343        for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) {
    344            _dist_code[256 + dist++] = (uch)code;
    345        }
    346    }
    347    Assert (dist == 256, "tr_static_init: 256 + dist != 512");
    348 
    349    /* Construct the codes of the static literal tree */
    350    for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
    351    n = 0;
    352    while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
    353    while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
    354    while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
    355    while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
    356    /* Codes 286 and 287 do not exist, but we must include them in the
    357     * tree construction to get a canonical Huffman tree (longest code
    358     * all ones)
    359     */
    360    gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
    361 
    362    /* The static distance tree is trivial: */
    363    for (n = 0; n < D_CODES; n++) {
    364        static_dtree[n].Len = 5;
    365        static_dtree[n].Code = bi_reverse((unsigned)n, 5);
    366    }
    367    static_init_done = 1;
    368 
    369 #  ifdef GEN_TREES_H
    370    gen_trees_header();
    371 #  endif
    372 #endif /* defined(GEN_TREES_H) || !defined(STDC) */
    373 }
    374 
    375 /* ===========================================================================
    376 * Generate the file trees.h describing the static trees.
    377 */
    378 #ifdef GEN_TREES_H
    379 #  ifndef ZLIB_DEBUG
    380 #    include <stdio.h>
    381 #  endif
    382 
    383 #  define SEPARATOR(i, last, width) \
    384      ((i) == (last)? "\n};\n\n" :    \
    385       ((i) % (width) == (width) - 1 ? ",\n" : ", "))
    386 
    387 void gen_trees_header(void) {
    388    FILE *header = fopen("trees.h", "w");
    389    int i;
    390 
    391    Assert (header != NULL, "Can't open trees.h");
    392    fprintf(header,
    393            "/* header created automatically with -DGEN_TREES_H */\n\n");
    394 
    395    fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
    396    for (i = 0; i < L_CODES+2; i++) {
    397        fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
    398                static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
    399    }
    400 
    401    fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
    402    for (i = 0; i < D_CODES; i++) {
    403        fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
    404                static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
    405    }
    406 
    407    fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
    408    for (i = 0; i < DIST_CODE_LEN; i++) {
    409        fprintf(header, "%2u%s", _dist_code[i],
    410                SEPARATOR(i, DIST_CODE_LEN-1, 20));
    411    }
    412 
    413    fprintf(header,
    414        "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
    415    for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
    416        fprintf(header, "%2u%s", _length_code[i],
    417                SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
    418    }
    419 
    420    fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
    421    for (i = 0; i < LENGTH_CODES; i++) {
    422        fprintf(header, "%1u%s", base_length[i],
    423                SEPARATOR(i, LENGTH_CODES-1, 20));
    424    }
    425 
    426    fprintf(header, "local const int base_dist[D_CODES] = {\n");
    427    for (i = 0; i < D_CODES; i++) {
    428        fprintf(header, "%5u%s", base_dist[i],
    429                SEPARATOR(i, D_CODES-1, 10));
    430    }
    431 
    432    fclose(header);
    433 }
    434 #endif /* GEN_TREES_H */
    435 
    436 /* ===========================================================================
    437 * Initialize a new block.
    438 */
    439 local void init_block(deflate_state *s) {
    440    int n; /* iterates over tree elements */
    441 
    442    /* Initialize the trees. */
    443    for (n = 0; n < L_CODES;  n++) s->dyn_ltree[n].Freq = 0;
    444    for (n = 0; n < D_CODES;  n++) s->dyn_dtree[n].Freq = 0;
    445    for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
    446 
    447    s->dyn_ltree[END_BLOCK].Freq = 1;
    448    s->opt_len = s->static_len = 0L;
    449    s->sym_next = s->matches = 0;
    450 }
    451 
    452 /* ===========================================================================
    453 * Initialize the tree data structures for a new zlib stream.
    454 */
    455 void ZLIB_INTERNAL _tr_init(deflate_state *s) {
    456    tr_static_init();
    457 
    458    s->l_desc.dyn_tree = s->dyn_ltree;
    459    s->l_desc.stat_desc = &static_l_desc;
    460 
    461    s->d_desc.dyn_tree = s->dyn_dtree;
    462    s->d_desc.stat_desc = &static_d_desc;
    463 
    464    s->bl_desc.dyn_tree = s->bl_tree;
    465    s->bl_desc.stat_desc = &static_bl_desc;
    466 
    467    s->bi_buf = 0;
    468    s->bi_valid = 0;
    469 #ifdef ZLIB_DEBUG
    470    s->compressed_len = 0L;
    471    s->bits_sent = 0L;
    472 #endif
    473 
    474    /* Initialize the first block of the first file: */
    475    init_block(s);
    476 }
    477 
    478 #define SMALLEST 1
    479 /* Index within the heap array of least frequent node in the Huffman tree */
    480 
    481 
    482 /* ===========================================================================
    483 * Remove the smallest element from the heap and recreate the heap with
    484 * one less element. Updates heap and heap_len.
    485 */
    486 #define pqremove(s, tree, top) \
    487 {\
    488    top = s->heap[SMALLEST]; \
    489    s->heap[SMALLEST] = s->heap[s->heap_len--]; \
    490    pqdownheap(s, tree, SMALLEST); \
    491 }
    492 
    493 /* ===========================================================================
    494 * Compares to subtrees, using the tree depth as tie breaker when
    495 * the subtrees have equal frequency. This minimizes the worst case length.
    496 */
    497 #define smaller(tree, n, m, depth) \
    498   (tree[n].Freq < tree[m].Freq || \
    499   (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
    500 
    501 /* ===========================================================================
    502 * Restore the heap property by moving down the tree starting at node k,
    503 * exchanging a node with the smallest of its two sons if necessary, stopping
    504 * when the heap property is re-established (each father smaller than its
    505 * two sons).
    506 */
    507 local void pqdownheap(deflate_state *s, ct_data *tree, int k) {
    508    int v = s->heap[k];
    509    int j = k << 1;  /* left son of k */
    510    while (j <= s->heap_len) {
    511        /* Set j to the smallest of the two sons: */
    512        if (j < s->heap_len &&
    513            smaller(tree, s->heap[j + 1], s->heap[j], s->depth)) {
    514            j++;
    515        }
    516        /* Exit if v is smaller than both sons */
    517        if (smaller(tree, v, s->heap[j], s->depth)) break;
    518 
    519        /* Exchange v with the smallest son */
    520        s->heap[k] = s->heap[j];  k = j;
    521 
    522        /* And continue down the tree, setting j to the left son of k */
    523        j <<= 1;
    524    }
    525    s->heap[k] = v;
    526 }
    527 
    528 /* ===========================================================================
    529 * Compute the optimal bit lengths for a tree and update the total bit length
    530 * for the current block.
    531 * IN assertion: the fields freq and dad are set, heap[heap_max] and
    532 *    above are the tree nodes sorted by increasing frequency.
    533 * OUT assertions: the field len is set to the optimal bit length, the
    534 *     array bl_count contains the frequencies for each bit length.
    535 *     The length opt_len is updated; static_len is also updated if stree is
    536 *     not null.
    537 */
    538 local void gen_bitlen(deflate_state *s, tree_desc *desc) {
    539    ct_data *tree        = desc->dyn_tree;
    540    int max_code         = desc->max_code;
    541    const ct_data *stree = desc->stat_desc->static_tree;
    542    const intf *extra    = desc->stat_desc->extra_bits;
    543    int base             = desc->stat_desc->extra_base;
    544    int max_length       = desc->stat_desc->max_length;
    545    int h;              /* heap index */
    546    int n, m;           /* iterate over the tree elements */
    547    int bits;           /* bit length */
    548    int xbits;          /* extra bits */
    549    ush f;              /* frequency */
    550    int overflow = 0;   /* number of elements with bit length too large */
    551 
    552    for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
    553 
    554    /* In a first pass, compute the optimal bit lengths (which may
    555     * overflow in the case of the bit length tree).
    556     */
    557    tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
    558 
    559    for (h = s->heap_max + 1; h < HEAP_SIZE; h++) {
    560        n = s->heap[h];
    561        bits = tree[tree[n].Dad].Len + 1;
    562        if (bits > max_length) bits = max_length, overflow++;
    563        tree[n].Len = (ush)bits;
    564        /* We overwrite tree[n].Dad which is no longer needed */
    565 
    566        if (n > max_code) continue; /* not a leaf node */
    567 
    568        s->bl_count[bits]++;
    569        xbits = 0;
    570        if (n >= base) xbits = extra[n - base];
    571        f = tree[n].Freq;
    572        s->opt_len += (ulg)f * (unsigned)(bits + xbits);
    573        if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
    574    }
    575    if (overflow == 0) return;
    576 
    577    Tracev((stderr,"\nbit length overflow\n"));
    578    /* This happens for example on obj2 and pic of the Calgary corpus */
    579 
    580    /* Find the first bit length which could increase: */
    581    do {
    582        bits = max_length - 1;
    583        while (s->bl_count[bits] == 0) bits--;
    584        s->bl_count[bits]--;        /* move one leaf down the tree */
    585        s->bl_count[bits + 1] += 2; /* move one overflow item as its brother */
    586        s->bl_count[max_length]--;
    587        /* The brother of the overflow item also moves one step up,
    588         * but this does not affect bl_count[max_length]
    589         */
    590        overflow -= 2;
    591    } while (overflow > 0);
    592 
    593    /* Now recompute all bit lengths, scanning in increasing frequency.
    594     * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
    595     * lengths instead of fixing only the wrong ones. This idea is taken
    596     * from 'ar' written by Haruhiko Okumura.)
    597     */
    598    for (bits = max_length; bits != 0; bits--) {
    599        n = s->bl_count[bits];
    600        while (n != 0) {
    601            m = s->heap[--h];
    602            if (m > max_code) continue;
    603            if ((unsigned) tree[m].Len != (unsigned) bits) {
    604                Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
    605                s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
    606                tree[m].Len = (ush)bits;
    607            }
    608            n--;
    609        }
    610    }
    611 }
    612 
    613 #ifdef DUMP_BL_TREE
    614 #  include <stdio.h>
    615 #endif
    616 
    617 /* ===========================================================================
    618 * Construct one Huffman tree and assigns the code bit strings and lengths.
    619 * Update the total bit length for the current block.
    620 * IN assertion: the field freq is set for all tree elements.
    621 * OUT assertions: the fields len and code are set to the optimal bit length
    622 *     and corresponding code. The length opt_len is updated; static_len is
    623 *     also updated if stree is not null. The field max_code is set.
    624 */
    625 local void build_tree(deflate_state *s, tree_desc *desc) {
    626    ct_data *tree         = desc->dyn_tree;
    627    const ct_data *stree  = desc->stat_desc->static_tree;
    628    int elems             = desc->stat_desc->elems;
    629    int n, m;          /* iterate over heap elements */
    630    int max_code = -1; /* largest code with non zero frequency */
    631    int node;          /* new node being created */
    632 
    633    /* Construct the initial heap, with least frequent element in
    634     * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n + 1].
    635     * heap[0] is not used.
    636     */
    637    s->heap_len = 0, s->heap_max = HEAP_SIZE;
    638 
    639    for (n = 0; n < elems; n++) {
    640        if (tree[n].Freq != 0) {
    641            s->heap[++(s->heap_len)] = max_code = n;
    642            s->depth[n] = 0;
    643        } else {
    644            tree[n].Len = 0;
    645        }
    646    }
    647 
    648    /* The pkzip format requires that at least one distance code exists,
    649     * and that at least one bit should be sent even if there is only one
    650     * possible code. So to avoid special checks later on we force at least
    651     * two codes of non zero frequency.
    652     */
    653    while (s->heap_len < 2) {
    654        node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
    655        tree[node].Freq = 1;
    656        s->depth[node] = 0;
    657        s->opt_len--; if (stree) s->static_len -= stree[node].Len;
    658        /* node is 0 or 1 so it does not have extra bits */
    659    }
    660    desc->max_code = max_code;
    661 
    662    /* The elements heap[heap_len/2 + 1 .. heap_len] are leaves of the tree,
    663     * establish sub-heaps of increasing lengths:
    664     */
    665    for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
    666 
    667    /* Construct the Huffman tree by repeatedly combining the least two
    668     * frequent nodes.
    669     */
    670    node = elems;              /* next internal node of the tree */
    671    do {
    672        pqremove(s, tree, n);  /* n = node of least frequency */
    673        m = s->heap[SMALLEST]; /* m = node of next least frequency */
    674 
    675        s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
    676        s->heap[--(s->heap_max)] = m;
    677 
    678        /* Create a new node father of n and m */
    679        tree[node].Freq = tree[n].Freq + tree[m].Freq;
    680        s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
    681                                s->depth[n] : s->depth[m]) + 1);
    682        tree[n].Dad = tree[m].Dad = (ush)node;
    683 #ifdef DUMP_BL_TREE
    684        if (tree == s->bl_tree) {
    685            fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
    686                    node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
    687        }
    688 #endif
    689        /* and insert the new node in the heap */
    690        s->heap[SMALLEST] = node++;
    691        pqdownheap(s, tree, SMALLEST);
    692 
    693    } while (s->heap_len >= 2);
    694 
    695    s->heap[--(s->heap_max)] = s->heap[SMALLEST];
    696 
    697    /* At this point, the fields freq and dad are set. We can now
    698     * generate the bit lengths.
    699     */
    700    gen_bitlen(s, (tree_desc *)desc);
    701 
    702    /* The field len is now set, we can generate the bit codes */
    703    gen_codes ((ct_data *)tree, max_code, s->bl_count);
    704 }
    705 
    706 /* ===========================================================================
    707 * Scan a literal or distance tree to determine the frequencies of the codes
    708 * in the bit length tree.
    709 */
    710 local void scan_tree(deflate_state *s, ct_data *tree, int max_code) {
    711    int n;                     /* iterates over all tree elements */
    712    int prevlen = -1;          /* last emitted length */
    713    int curlen;                /* length of current code */
    714    int nextlen = tree[0].Len; /* length of next code */
    715    int count = 0;             /* repeat count of the current code */
    716    int max_count = 7;         /* max repeat count */
    717    int min_count = 4;         /* min repeat count */
    718 
    719    if (nextlen == 0) max_count = 138, min_count = 3;
    720    tree[max_code + 1].Len = (ush)0xffff; /* guard */
    721 
    722    for (n = 0; n <= max_code; n++) {
    723        curlen = nextlen; nextlen = tree[n + 1].Len;
    724        if (++count < max_count && curlen == nextlen) {
    725            continue;
    726        } else if (count < min_count) {
    727            s->bl_tree[curlen].Freq += count;
    728        } else if (curlen != 0) {
    729            if (curlen != prevlen) s->bl_tree[curlen].Freq++;
    730            s->bl_tree[REP_3_6].Freq++;
    731        } else if (count <= 10) {
    732            s->bl_tree[REPZ_3_10].Freq++;
    733        } else {
    734            s->bl_tree[REPZ_11_138].Freq++;
    735        }
    736        count = 0; prevlen = curlen;
    737        if (nextlen == 0) {
    738            max_count = 138, min_count = 3;
    739        } else if (curlen == nextlen) {
    740            max_count = 6, min_count = 3;
    741        } else {
    742            max_count = 7, min_count = 4;
    743        }
    744    }
    745 }
    746 
    747 /* ===========================================================================
    748 * Send a literal or distance tree in compressed form, using the codes in
    749 * bl_tree.
    750 */
    751 local void send_tree(deflate_state *s, ct_data *tree, int max_code) {
    752    int n;                     /* iterates over all tree elements */
    753    int prevlen = -1;          /* last emitted length */
    754    int curlen;                /* length of current code */
    755    int nextlen = tree[0].Len; /* length of next code */
    756    int count = 0;             /* repeat count of the current code */
    757    int max_count = 7;         /* max repeat count */
    758    int min_count = 4;         /* min repeat count */
    759 
    760    /* tree[max_code + 1].Len = -1; */  /* guard already set */
    761    if (nextlen == 0) max_count = 138, min_count = 3;
    762 
    763    for (n = 0; n <= max_code; n++) {
    764        curlen = nextlen; nextlen = tree[n + 1].Len;
    765        if (++count < max_count && curlen == nextlen) {
    766            continue;
    767        } else if (count < min_count) {
    768            do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
    769 
    770        } else if (curlen != 0) {
    771            if (curlen != prevlen) {
    772                send_code(s, curlen, s->bl_tree); count--;
    773            }
    774            Assert(count >= 3 && count <= 6, " 3_6?");
    775            send_code(s, REP_3_6, s->bl_tree); send_bits(s, count - 3, 2);
    776 
    777        } else if (count <= 10) {
    778            send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count - 3, 3);
    779 
    780        } else {
    781            send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count - 11, 7);
    782        }
    783        count = 0; prevlen = curlen;
    784        if (nextlen == 0) {
    785            max_count = 138, min_count = 3;
    786        } else if (curlen == nextlen) {
    787            max_count = 6, min_count = 3;
    788        } else {
    789            max_count = 7, min_count = 4;
    790        }
    791    }
    792 }
    793 
    794 /* ===========================================================================
    795 * Construct the Huffman tree for the bit lengths and return the index in
    796 * bl_order of the last bit length code to send.
    797 */
    798 local int build_bl_tree(deflate_state *s) {
    799    int max_blindex;  /* index of last bit length code of non zero freq */
    800 
    801    /* Determine the bit length frequencies for literal and distance trees */
    802    scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
    803    scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
    804 
    805    /* Build the bit length tree: */
    806    build_tree(s, (tree_desc *)(&(s->bl_desc)));
    807    /* opt_len now includes the length of the tree representations, except the
    808     * lengths of the bit lengths codes and the 5 + 5 + 4 bits for the counts.
    809     */
    810 
    811    /* Determine the number of bit length codes to send. The pkzip format
    812     * requires that at least 4 bit length codes be sent. (appnote.txt says
    813     * 3 but the actual value used is 4.)
    814     */
    815    for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
    816        if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
    817    }
    818    /* Update opt_len to include the bit length tree and counts */
    819    s->opt_len += 3*((ulg)max_blindex + 1) + 5 + 5 + 4;
    820    Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
    821            s->opt_len, s->static_len));
    822 
    823    return max_blindex;
    824 }
    825 
    826 /* ===========================================================================
    827 * Send the header for a block using dynamic Huffman trees: the counts, the
    828 * lengths of the bit length codes, the literal tree and the distance tree.
    829 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
    830 */
    831 local void send_all_trees(deflate_state *s, int lcodes, int dcodes,
    832                          int blcodes) {
    833    int rank;                    /* index in bl_order */
    834 
    835    Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
    836    Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
    837            "too many codes");
    838    Tracev((stderr, "\nbl counts: "));
    839    send_bits(s, lcodes - 257, 5);  /* not +255 as stated in appnote.txt */
    840    send_bits(s, dcodes - 1,   5);
    841    send_bits(s, blcodes - 4,  4);  /* not -3 as stated in appnote.txt */
    842    for (rank = 0; rank < blcodes; rank++) {
    843        Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
    844        send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
    845    }
    846    Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
    847 
    848    send_tree(s, (ct_data *)s->dyn_ltree, lcodes - 1);  /* literal tree */
    849    Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
    850 
    851    send_tree(s, (ct_data *)s->dyn_dtree, dcodes - 1);  /* distance tree */
    852    Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
    853 }
    854 
    855 /* ===========================================================================
    856 * Send a stored block
    857 */
    858 void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf,
    859                                    ulg stored_len, int last) {
    860    send_bits(s, (STORED_BLOCK<<1) + last, 3);  /* send block type */
    861    bi_windup(s);        /* align on byte boundary */
    862    put_short(s, (ush)stored_len);
    863    put_short(s, (ush)~stored_len);
    864    if (stored_len)
    865        zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
    866    s->pending += stored_len;
    867 #ifdef ZLIB_DEBUG
    868    s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
    869    s->compressed_len += (stored_len + 4) << 3;
    870    s->bits_sent += 2*16;
    871    s->bits_sent += stored_len << 3;
    872 #endif
    873 }
    874 
    875 /* ===========================================================================
    876 * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
    877 */
    878 void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) {
    879    bi_flush(s);
    880 }
    881 
    882 /* ===========================================================================
    883 * Send one empty static block to give enough lookahead for inflate.
    884 * This takes 10 bits, of which 7 may remain in the bit buffer.
    885 */
    886 void ZLIB_INTERNAL _tr_align(deflate_state *s) {
    887    send_bits(s, STATIC_TREES<<1, 3);
    888    send_code(s, END_BLOCK, static_ltree);
    889 #ifdef ZLIB_DEBUG
    890    s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
    891 #endif
    892    bi_flush(s);
    893 }
    894 
    895 /* ===========================================================================
    896 * Send the block data compressed using the given Huffman trees
    897 */
    898 local void compress_block(deflate_state *s, const ct_data *ltree,
    899                          const ct_data *dtree) {
    900    unsigned dist;      /* distance of matched string */
    901    int lc;             /* match length or unmatched char (if dist == 0) */
    902    unsigned sx = 0;    /* running index in symbol buffers */
    903    unsigned code;      /* the code to send */
    904    int extra;          /* number of extra bits to send */
    905 
    906    if (s->sym_next != 0) do {
    907 #ifdef LIT_MEM
    908        dist = s->d_buf[sx];
    909        lc = s->l_buf[sx++];
    910 #else
    911        dist = s->sym_buf[sx++] & 0xff;
    912        dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8;
    913        lc = s->sym_buf[sx++];
    914 #endif
    915        if (dist == 0) {
    916            send_code(s, lc, ltree); /* send a literal byte */
    917            Tracecv(isgraph(lc), (stderr," '%c' ", lc));
    918        } else {
    919            /* Here, lc is the match length - MIN_MATCH */
    920            code = _length_code[lc];
    921            send_code(s, code + LITERALS + 1, ltree);   /* send length code */
    922            extra = extra_lbits[code];
    923            if (extra != 0) {
    924                lc -= base_length[code];
    925                send_bits(s, lc, extra);       /* send the extra length bits */
    926            }
    927            dist--; /* dist is now the match distance - 1 */
    928            code = d_code(dist);
    929            Assert (code < D_CODES, "bad d_code");
    930 
    931            send_code(s, code, dtree);       /* send the distance code */
    932            extra = extra_dbits[code];
    933            if (extra != 0) {
    934                dist -= (unsigned)base_dist[code];
    935                send_bits(s, dist, extra);   /* send the extra distance bits */
    936            }
    937        } /* literal or match pair ? */
    938 
    939        /* Check for no overlay of pending_buf on needed symbols */
    940 #ifdef LIT_MEM
    941        Assert(s->pending < 2 * (s->lit_bufsize + sx), "pendingBuf overflow");
    942 #else
    943        Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow");
    944 #endif
    945 
    946    } while (sx < s->sym_next);
    947 
    948    send_code(s, END_BLOCK, ltree);
    949 }
    950 
    951 /* ===========================================================================
    952 * Check if the data type is TEXT or BINARY, using the following algorithm:
    953 * - TEXT if the two conditions below are satisfied:
    954 *    a) There are no non-portable control characters belonging to the
    955 *       "block list" (0..6, 14..25, 28..31).
    956 *    b) There is at least one printable character belonging to the
    957 *       "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
    958 * - BINARY otherwise.
    959 * - The following partially-portable control characters form a
    960 *   "gray list" that is ignored in this detection algorithm:
    961 *   (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
    962 * IN assertion: the fields Freq of dyn_ltree are set.
    963 */
    964 local int detect_data_type(deflate_state *s) {
    965    /* block_mask is the bit mask of block-listed bytes
    966     * set bits 0..6, 14..25, and 28..31
    967     * 0xf3ffc07f = binary 11110011111111111100000001111111
    968     */
    969    unsigned long block_mask = 0xf3ffc07fUL;
    970    int n;
    971 
    972    /* Check for non-textual ("block-listed") bytes. */
    973    for (n = 0; n <= 31; n++, block_mask >>= 1)
    974        if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0))
    975            return Z_BINARY;
    976 
    977    /* Check for textual ("allow-listed") bytes. */
    978    if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
    979            || s->dyn_ltree[13].Freq != 0)
    980        return Z_TEXT;
    981    for (n = 32; n < LITERALS; n++)
    982        if (s->dyn_ltree[n].Freq != 0)
    983            return Z_TEXT;
    984 
    985    /* There are no "block-listed" or "allow-listed" bytes:
    986     * this stream either is empty or has tolerated ("gray-listed") bytes only.
    987     */
    988    return Z_BINARY;
    989 }
    990 
    991 /* ===========================================================================
    992 * Determine the best encoding for the current block: dynamic trees, static
    993 * trees or store, and write out the encoded block.
    994 */
    995 void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf,
    996                                   ulg stored_len, int last) {
    997    ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
    998    int max_blindex = 0;  /* index of last bit length code of non zero freq */
    999 
   1000    /* Build the Huffman trees unless a stored block is forced */
   1001    if (s->level > 0) {
   1002 
   1003        /* Check if the file is binary or text */
   1004        if (s->strm->data_type == Z_UNKNOWN)
   1005            s->strm->data_type = detect_data_type(s);
   1006 
   1007        /* Construct the literal and distance trees */
   1008        build_tree(s, (tree_desc *)(&(s->l_desc)));
   1009        Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
   1010                s->static_len));
   1011 
   1012        build_tree(s, (tree_desc *)(&(s->d_desc)));
   1013        Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
   1014                s->static_len));
   1015        /* At this point, opt_len and static_len are the total bit lengths of
   1016         * the compressed block data, excluding the tree representations.
   1017         */
   1018 
   1019        /* Build the bit length tree for the above two trees, and get the index
   1020         * in bl_order of the last bit length code to send.
   1021         */
   1022        max_blindex = build_bl_tree(s);
   1023 
   1024        /* Determine the best encoding. Compute the block lengths in bytes. */
   1025        opt_lenb = (s->opt_len + 3 + 7) >> 3;
   1026        static_lenb = (s->static_len + 3 + 7) >> 3;
   1027 
   1028        Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
   1029                opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
   1030                s->sym_next / 3));
   1031 
   1032 #ifndef FORCE_STATIC
   1033        if (static_lenb <= opt_lenb || s->strategy == Z_FIXED)
   1034 #endif
   1035            opt_lenb = static_lenb;
   1036 
   1037    } else {
   1038        Assert(buf != (char*)0, "lost buf");
   1039        opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
   1040    }
   1041 
   1042 #ifdef FORCE_STORED
   1043    if (buf != (char*)0) { /* force stored block */
   1044 #else
   1045    if (stored_len + 4 <= opt_lenb && buf != (char*)0) {
   1046                       /* 4: two words for the lengths */
   1047 #endif
   1048        /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
   1049         * Otherwise we can't have processed more than WSIZE input bytes since
   1050         * the last block flush, because compression would have been
   1051         * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
   1052         * transform a block into a stored block.
   1053         */
   1054        _tr_stored_block(s, buf, stored_len, last);
   1055 
   1056    } else if (static_lenb == opt_lenb) {
   1057        send_bits(s, (STATIC_TREES<<1) + last, 3);
   1058        compress_block(s, (const ct_data *)static_ltree,
   1059                       (const ct_data *)static_dtree);
   1060 #ifdef ZLIB_DEBUG
   1061        s->compressed_len += 3 + s->static_len;
   1062 #endif
   1063    } else {
   1064        send_bits(s, (DYN_TREES<<1) + last, 3);
   1065        send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1,
   1066                       max_blindex + 1);
   1067        compress_block(s, (const ct_data *)s->dyn_ltree,
   1068                       (const ct_data *)s->dyn_dtree);
   1069 #ifdef ZLIB_DEBUG
   1070        s->compressed_len += 3 + s->opt_len;
   1071 #endif
   1072    }
   1073    Assert (s->compressed_len == s->bits_sent, "bad compressed size");
   1074    /* The above check is made mod 2^32, for files larger than 512 MB
   1075     * and uLong implemented on 32 bits.
   1076     */
   1077    init_block(s);
   1078 
   1079    if (last) {
   1080        bi_windup(s);
   1081 #ifdef ZLIB_DEBUG
   1082        s->compressed_len += 7;  /* align on byte boundary */
   1083 #endif
   1084    }
   1085    Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len >> 3,
   1086           s->compressed_len - 7*last));
   1087 }
   1088 
   1089 /* ===========================================================================
   1090 * Save the match info and tally the frequency counts. Return true if
   1091 * the current block must be flushed.
   1092 */
   1093 int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) {
   1094 #ifdef LIT_MEM
   1095    s->d_buf[s->sym_next] = (ush)dist;
   1096    s->l_buf[s->sym_next++] = (uch)lc;
   1097 #else
   1098    s->sym_buf[s->sym_next++] = (uch)dist;
   1099    s->sym_buf[s->sym_next++] = (uch)(dist >> 8);
   1100    s->sym_buf[s->sym_next++] = (uch)lc;
   1101 #endif
   1102    if (dist == 0) {
   1103        /* lc is the unmatched char */
   1104        s->dyn_ltree[lc].Freq++;
   1105    } else {
   1106        s->matches++;
   1107        /* Here, lc is the match length - MIN_MATCH */
   1108        dist--;             /* dist = match distance - 1 */
   1109        Assert((ush)dist < (ush)MAX_DIST(s) &&
   1110               (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
   1111               (ush)d_code(dist) < (ush)D_CODES,  "_tr_tally: bad match");
   1112 
   1113        s->dyn_ltree[_length_code[lc] + LITERALS + 1].Freq++;
   1114        s->dyn_dtree[d_code(dist)].Freq++;
   1115    }
   1116    return (s->sym_next == s->sym_end);
   1117 }