tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

bytestrie.cpp (13947B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *   Copyright (C) 2010-2011, International Business Machines
      6 *   Corporation and others.  All Rights Reserved.
      7 *******************************************************************************
      8 *   file name:  bytestrie.cpp
      9 *   encoding:   UTF-8
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2010sep25
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #include "unicode/utypes.h"
     18 #include "unicode/bytestream.h"
     19 #include "unicode/bytestrie.h"
     20 #include "unicode/uobject.h"
     21 #include "cmemory.h"
     22 #include "uassert.h"
     23 
     24 U_NAMESPACE_BEGIN
     25 
     26 BytesTrie::~BytesTrie() {
     27    uprv_free(ownedArray_);
     28 }
     29 
     30 // lead byte already shifted right by 1.
     31 int32_t
     32 BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
     33    int32_t value;
     34    if(leadByte<kMinTwoByteValueLead) {
     35        value=leadByte-kMinOneByteValueLead;
     36    } else if(leadByte<kMinThreeByteValueLead) {
     37        value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
     38    } else if(leadByte<kFourByteValueLead) {
     39        value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
     40    } else if(leadByte==kFourByteValueLead) {
     41        value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
     42    } else {
     43        value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
     44    }
     45    return value;
     46 }
     47 
     48 const uint8_t *
     49 BytesTrie::jumpByDelta(const uint8_t *pos) {
     50    int32_t delta=*pos++;
     51    if(delta<kMinTwoByteDeltaLead) {
     52        // nothing to do
     53    } else if(delta<kMinThreeByteDeltaLead) {
     54        delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
     55    } else if(delta<kFourByteDeltaLead) {
     56        delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
     57        pos+=2;
     58    } else if(delta==kFourByteDeltaLead) {
     59        delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
     60        pos+=3;
     61    } else {
     62        delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
     63        pos+=4;
     64    }
     65    return pos+delta;
     66 }
     67 
     68 UStringTrieResult
     69 BytesTrie::current() const {
     70    const uint8_t *pos=pos_;
     71    if(pos==nullptr) {
     72        return USTRINGTRIE_NO_MATCH;
     73    } else {
     74        int32_t node;
     75        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
     76                valueResult(node) : USTRINGTRIE_NO_VALUE;
     77    }
     78 }
     79 
     80 UStringTrieResult
     81 BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
     82    // Branch according to the current byte.
     83    if(length==0) {
     84        length=*pos++;
     85    }
     86    ++length;
     87    // The length of the branch is the number of bytes to select from.
     88    // The data structure encodes a binary search.
     89    while(length>kMaxBranchLinearSubNodeLength) {
     90        if(inByte<*pos++) {
     91            length>>=1;
     92            pos=jumpByDelta(pos);
     93        } else {
     94            length=length-(length>>1);
     95            pos=skipDelta(pos);
     96        }
     97    }
     98    // Drop down to linear search for the last few bytes.
     99    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
    100    // and divides length by 2.
    101    do {
    102        if(inByte==*pos++) {
    103            UStringTrieResult result;
    104            int32_t node=*pos;
    105            U_ASSERT(node>=kMinValueLead);
    106            if(node&kValueIsFinal) {
    107                // Leave the final value for getValue() to read.
    108                result=USTRINGTRIE_FINAL_VALUE;
    109            } else {
    110                // Use the non-final value as the jump delta.
    111                ++pos;
    112                // int32_t delta=readValue(pos, node>>1);
    113                node>>=1;
    114                int32_t delta;
    115                if(node<kMinTwoByteValueLead) {
    116                    delta=node-kMinOneByteValueLead;
    117                } else if(node<kMinThreeByteValueLead) {
    118                    delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
    119                } else if(node<kFourByteValueLead) {
    120                    delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
    121                    pos+=2;
    122                } else if(node==kFourByteValueLead) {
    123                    delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
    124                    pos+=3;
    125                } else {
    126                    delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
    127                    pos+=4;
    128                }
    129                // end readValue()
    130                pos+=delta;
    131                node=*pos;
    132                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
    133            }
    134            pos_=pos;
    135            return result;
    136        }
    137        --length;
    138        pos=skipValue(pos);
    139    } while(length>1);
    140    if(inByte==*pos++) {
    141        pos_=pos;
    142        int32_t node=*pos;
    143        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
    144    } else {
    145        stop();
    146        return USTRINGTRIE_NO_MATCH;
    147    }
    148 }
    149 
    150 UStringTrieResult
    151 BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
    152    for(;;) {
    153        int32_t node=*pos++;
    154        if(node<kMinLinearMatch) {
    155            return branchNext(pos, node, inByte);
    156        } else if(node<kMinValueLead) {
    157            // Match the first of length+1 bytes.
    158            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
    159            if(inByte==*pos++) {
    160                remainingMatchLength_=--length;
    161                pos_=pos;
    162                return (length<0 && (node=*pos)>=kMinValueLead) ?
    163                        valueResult(node) : USTRINGTRIE_NO_VALUE;
    164            } else {
    165                // No match.
    166                break;
    167            }
    168        } else if(node&kValueIsFinal) {
    169            // No further matching bytes.
    170            break;
    171        } else {
    172            // Skip intermediate value.
    173            pos=skipValue(pos, node);
    174            // The next node must not also be a value node.
    175            U_ASSERT(*pos<kMinValueLead);
    176        }
    177    }
    178    stop();
    179    return USTRINGTRIE_NO_MATCH;
    180 }
    181 
    182 UStringTrieResult
    183 BytesTrie::next(int32_t inByte) {
    184    const uint8_t *pos=pos_;
    185    if(pos==nullptr) {
    186        return USTRINGTRIE_NO_MATCH;
    187    }
    188    if(inByte<0) {
    189        inByte+=0x100;
    190    }
    191    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
    192    if(length>=0) {
    193        // Remaining part of a linear-match node.
    194        if(inByte==*pos++) {
    195            remainingMatchLength_=--length;
    196            pos_=pos;
    197            int32_t node;
    198            return (length<0 && (node=*pos)>=kMinValueLead) ?
    199                    valueResult(node) : USTRINGTRIE_NO_VALUE;
    200        } else {
    201            stop();
    202            return USTRINGTRIE_NO_MATCH;
    203        }
    204    }
    205    return nextImpl(pos, inByte);
    206 }
    207 
    208 UStringTrieResult
    209 BytesTrie::next(const char *s, int32_t sLength) {
    210    if(sLength<0 ? *s==0 : sLength==0) {
    211        // Empty input.
    212        return current();
    213    }
    214    const uint8_t *pos=pos_;
    215    if(pos==nullptr) {
    216        return USTRINGTRIE_NO_MATCH;
    217    }
    218    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
    219    for(;;) {
    220        // Fetch the next input byte, if there is one.
    221        // Continue a linear-match node without rechecking sLength<0.
    222        int32_t inByte;
    223        if(sLength<0) {
    224            for(;;) {
    225                if((inByte=*s++)==0) {
    226                    remainingMatchLength_=length;
    227                    pos_=pos;
    228                    int32_t node;
    229                    return (length<0 && (node=*pos)>=kMinValueLead) ?
    230                            valueResult(node) : USTRINGTRIE_NO_VALUE;
    231                }
    232                if(length<0) {
    233                    remainingMatchLength_=length;
    234                    break;
    235                }
    236                if(inByte!=*pos) {
    237                    stop();
    238                    return USTRINGTRIE_NO_MATCH;
    239                }
    240                ++pos;
    241                --length;
    242            }
    243        } else {
    244            for(;;) {
    245                if(sLength==0) {
    246                    remainingMatchLength_=length;
    247                    pos_=pos;
    248                    int32_t node;
    249                    return (length<0 && (node=*pos)>=kMinValueLead) ?
    250                            valueResult(node) : USTRINGTRIE_NO_VALUE;
    251                }
    252                inByte=*s++;
    253                --sLength;
    254                if(length<0) {
    255                    remainingMatchLength_=length;
    256                    break;
    257                }
    258                if(inByte!=*pos) {
    259                    stop();
    260                    return USTRINGTRIE_NO_MATCH;
    261                }
    262                ++pos;
    263                --length;
    264            }
    265        }
    266        for(;;) {
    267            int32_t node=*pos++;
    268            if(node<kMinLinearMatch) {
    269                UStringTrieResult result=branchNext(pos, node, inByte);
    270                if(result==USTRINGTRIE_NO_MATCH) {
    271                    return USTRINGTRIE_NO_MATCH;
    272                }
    273                // Fetch the next input byte, if there is one.
    274                if(sLength<0) {
    275                    if((inByte=*s++)==0) {
    276                        return result;
    277                    }
    278                } else {
    279                    if(sLength==0) {
    280                        return result;
    281                    }
    282                    inByte=*s++;
    283                    --sLength;
    284                }
    285                if(result==USTRINGTRIE_FINAL_VALUE) {
    286                    // No further matching bytes.
    287                    stop();
    288                    return USTRINGTRIE_NO_MATCH;
    289                }
    290                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
    291            } else if(node<kMinValueLead) {
    292                // Match length+1 bytes.
    293                length=node-kMinLinearMatch;  // Actual match length minus 1.
    294                if(inByte!=*pos) {
    295                    stop();
    296                    return USTRINGTRIE_NO_MATCH;
    297                }
    298                ++pos;
    299                --length;
    300                break;
    301            } else if(node&kValueIsFinal) {
    302                // No further matching bytes.
    303                stop();
    304                return USTRINGTRIE_NO_MATCH;
    305            } else {
    306                // Skip intermediate value.
    307                pos=skipValue(pos, node);
    308                // The next node must not also be a value node.
    309                U_ASSERT(*pos<kMinValueLead);
    310            }
    311        }
    312    }
    313 }
    314 
    315 const uint8_t *
    316 BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
    317                                     UBool haveUniqueValue, int32_t &uniqueValue) {
    318    while(length>kMaxBranchLinearSubNodeLength) {
    319        ++pos;  // ignore the comparison byte
    320        if(nullptr==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
    321            return nullptr;
    322        }
    323        length=length-(length>>1);
    324        pos=skipDelta(pos);
    325    }
    326    do {
    327        ++pos;  // ignore a comparison byte
    328        // handle its value
    329        int32_t node=*pos++;
    330        UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
    331        int32_t value=readValue(pos, node>>1);
    332        pos=skipValue(pos, node);
    333        if(isFinal) {
    334            if(haveUniqueValue) {
    335                if(value!=uniqueValue) {
    336                    return nullptr;
    337                }
    338            } else {
    339                uniqueValue=value;
    340                haveUniqueValue=true;
    341            }
    342        } else {
    343            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
    344                return nullptr;
    345            }
    346            haveUniqueValue=true;
    347        }
    348    } while(--length>1);
    349    return pos+1;  // ignore the last comparison byte
    350 }
    351 
    352 UBool
    353 BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
    354    for(;;) {
    355        int32_t node=*pos++;
    356        if(node<kMinLinearMatch) {
    357            if(node==0) {
    358                node=*pos++;
    359            }
    360            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
    361            if(pos==nullptr) {
    362                return false;
    363            }
    364            haveUniqueValue=true;
    365        } else if(node<kMinValueLead) {
    366            // linear-match node
    367            pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
    368        } else {
    369            UBool isFinal = static_cast<UBool>(node & kValueIsFinal);
    370            int32_t value=readValue(pos, node>>1);
    371            if(haveUniqueValue) {
    372                if(value!=uniqueValue) {
    373                    return false;
    374                }
    375            } else {
    376                uniqueValue=value;
    377                haveUniqueValue=true;
    378            }
    379            if(isFinal) {
    380                return true;
    381            }
    382            pos=skipValue(pos, node);
    383        }
    384    }
    385 }
    386 
    387 int32_t
    388 BytesTrie::getNextBytes(ByteSink &out) const {
    389    const uint8_t *pos=pos_;
    390    if(pos==nullptr) {
    391        return 0;
    392    }
    393    if(remainingMatchLength_>=0) {
    394        append(out, *pos);  // Next byte of a pending linear-match node.
    395        return 1;
    396    }
    397    int32_t node=*pos++;
    398    if(node>=kMinValueLead) {
    399        if(node&kValueIsFinal) {
    400            return 0;
    401        } else {
    402            pos=skipValue(pos, node);
    403            node=*pos++;
    404            U_ASSERT(node<kMinValueLead);
    405        }
    406    }
    407    if(node<kMinLinearMatch) {
    408        if(node==0) {
    409            node=*pos++;
    410        }
    411        getNextBranchBytes(pos, ++node, out);
    412        return node;
    413    } else {
    414        // First byte of the linear-match node.
    415        append(out, *pos);
    416        return 1;
    417    }
    418 }
    419 
    420 void
    421 BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
    422    while(length>kMaxBranchLinearSubNodeLength) {
    423        ++pos;  // ignore the comparison byte
    424        getNextBranchBytes(jumpByDelta(pos), length>>1, out);
    425        length=length-(length>>1);
    426        pos=skipDelta(pos);
    427    }
    428    do {
    429        append(out, *pos++);
    430        pos=skipValue(pos);
    431    } while(--length>1);
    432    append(out, *pos);
    433 }
    434 
    435 void
    436 BytesTrie::append(ByteSink &out, int c) {
    437    char ch = static_cast<char>(c);
    438    out.Append(&ch, 1);
    439 }
    440 
    441 U_NAMESPACE_END