tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

search.cpp (14876B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
      6 **********************************************************************
      7 *   Date        Name        Description
      8 *  03/22/2000   helena      Creation.
      9 **********************************************************************
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     15 
     16 #include "unicode/brkiter.h"
     17 #include "unicode/schriter.h"
     18 #include "unicode/search.h"
     19 #include "usrchimp.h"
     20 #include "cmemory.h"
     21 
     22 // public constructors and destructors -----------------------------------
     23 U_NAMESPACE_BEGIN
     24 
     25 SearchIterator::SearchIterator(const SearchIterator &other)
     26    : UObject(other)
     27 {   
     28    m_breakiterator_            = other.m_breakiterator_;
     29    m_text_                     = other.m_text_;
     30    m_search_ = static_cast<USearch*>(uprv_malloc(sizeof(USearch)));
     31    m_search_->breakIter        = other.m_search_->breakIter;
     32    m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
     33    m_search_->isOverlap        = other.m_search_->isOverlap;
     34    m_search_->elementComparisonType = other.m_search_->elementComparisonType;
     35    m_search_->matchedIndex     = other.m_search_->matchedIndex;
     36    m_search_->matchedLength    = other.m_search_->matchedLength;
     37    m_search_->text             = other.m_search_->text;
     38    m_search_->textLength       = other.m_search_->textLength;
     39 }
     40 
     41 SearchIterator::~SearchIterator()
     42 {
     43    if (m_search_ != nullptr) {
     44        uprv_free(m_search_);
     45    }
     46 }
     47 
     48 // public get and set methods ----------------------------------------
     49 
     50 void SearchIterator::setAttribute(USearchAttribute       attribute,
     51                                  USearchAttributeValue  value,
     52                                  UErrorCode            &status)
     53 {
     54    if (U_SUCCESS(status)) {
     55        switch (attribute)
     56        {
     57        case USEARCH_OVERLAP :
     58            m_search_->isOverlap = (value == USEARCH_ON ? true : false);
     59            break;
     60        case USEARCH_CANONICAL_MATCH :
     61            m_search_->isCanonicalMatch = (value == USEARCH_ON ? true : false);
     62            break;
     63        case USEARCH_ELEMENT_COMPARISON :
     64            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
     65                m_search_->elementComparisonType = static_cast<int16_t>(value);
     66            } else {
     67                m_search_->elementComparisonType = 0;
     68            }
     69            break;
     70        default:
     71            status = U_ILLEGAL_ARGUMENT_ERROR;
     72        }
     73    }
     74    if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
     75        status = U_ILLEGAL_ARGUMENT_ERROR;
     76    }
     77 }
     78 
     79 USearchAttributeValue SearchIterator::getAttribute(
     80                                          USearchAttribute  attribute) const
     81 {
     82    switch (attribute) {
     83    case USEARCH_OVERLAP :
     84        return (m_search_->isOverlap ? USEARCH_ON : USEARCH_OFF);
     85    case USEARCH_CANONICAL_MATCH :
     86        return (m_search_->isCanonicalMatch ? USEARCH_ON : USEARCH_OFF);
     87    case USEARCH_ELEMENT_COMPARISON :
     88        {
     89            int16_t value = m_search_->elementComparisonType;
     90            if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
     91                return static_cast<USearchAttributeValue>(value);
     92            } else {
     93                return USEARCH_STANDARD_ELEMENT_COMPARISON;
     94            }
     95        }
     96    default :
     97        return USEARCH_DEFAULT;
     98    }
     99 }
    100    
    101 int32_t SearchIterator::getMatchedStart() const
    102 {
    103    return m_search_->matchedIndex;
    104 }
    105 
    106 int32_t SearchIterator::getMatchedLength() const
    107 {
    108    return m_search_->matchedLength;
    109 }
    110    
    111 void SearchIterator::getMatchedText(UnicodeString &result) const
    112 {
    113    int32_t matchedindex  = m_search_->matchedIndex;
    114    int32_t     matchedlength = m_search_->matchedLength;
    115    if (matchedindex != USEARCH_DONE && matchedlength != 0) {
    116        result.setTo(m_search_->text + matchedindex, matchedlength); 
    117    }
    118    else {
    119        result.remove();
    120    }
    121 }
    122    
    123 void SearchIterator::setBreakIterator(BreakIterator *breakiter, 
    124                                      UErrorCode &status)
    125 {
    126    if (U_SUCCESS(status)) {
    127 #if 0
    128        m_search_->breakIter = nullptr;
    129        // the c++ breakiterator may not make use of ubreakiterator.
    130        // so we'll have to keep track of it ourselves.
    131 #else
    132        // Well, gee... the Constructors that take a BreakIterator
    133        // all cast the BreakIterator to a UBreakIterator and
    134        // pass it to the corresponding usearch_openFromXXX
    135        // routine, so there's no reason not to do this.
    136        //
    137        // Besides, a UBreakIterator is a BreakIterator, so
    138        // any subclass of BreakIterator should work fine here...
    139        m_search_->breakIter = reinterpret_cast<UBreakIterator*>(breakiter);
    140 #endif
    141        
    142        m_breakiterator_ = breakiter;
    143    }
    144 }
    145    
    146 const BreakIterator * SearchIterator::getBreakIterator() const
    147 {
    148    return m_breakiterator_;
    149 }
    150 
    151 void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
    152 {
    153    if (U_SUCCESS(status)) {
    154        if (text.length() == 0) {
    155            status = U_ILLEGAL_ARGUMENT_ERROR;
    156        }
    157        else {
    158            m_text_        = text;
    159            m_search_->text = m_text_.getBuffer();
    160            m_search_->textLength = m_text_.length();
    161        }
    162    }
    163 }
    164 
    165 void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
    166 {
    167    if (U_SUCCESS(status)) {
    168        text.getText(m_text_);
    169        setText(m_text_, status);
    170    }
    171 }
    172    
    173 const UnicodeString & SearchIterator::getText() const
    174 {
    175    return m_text_;
    176 }
    177 
    178 // operator overloading ----------------------------------------------
    179 
    180 bool SearchIterator::operator==(const SearchIterator &that) const
    181 {
    182    if (this == &that) {
    183        return true;
    184    }
    185    return (m_breakiterator_            == that.m_breakiterator_ &&
    186            m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
    187            m_search_->isOverlap        == that.m_search_->isOverlap &&
    188            m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
    189            m_search_->matchedIndex     == that.m_search_->matchedIndex &&
    190            m_search_->matchedLength    == that.m_search_->matchedLength &&
    191            m_search_->textLength       == that.m_search_->textLength &&
    192            getOffset() == that.getOffset() &&
    193            (m_search_->textLength == 0 ||
    194            (uprv_memcmp(m_search_->text, that.m_search_->text, 
    195                              m_search_->textLength * sizeof(char16_t)) == 0)));
    196 }
    197 
    198 // public methods ----------------------------------------------------
    199 
    200 int32_t SearchIterator::first(UErrorCode &status)
    201 {
    202    if (U_FAILURE(status)) {
    203        return USEARCH_DONE;
    204    }
    205    setOffset(0, status);
    206    return handleNext(0, status);
    207 }
    208 
    209 int32_t SearchIterator::following(int32_t position, 
    210                                      UErrorCode &status)
    211 {
    212    if (U_FAILURE(status)) {
    213        return USEARCH_DONE;
    214    }
    215    setOffset(position, status);
    216    return handleNext(position, status);
    217 }
    218    
    219 int32_t SearchIterator::last(UErrorCode &status)
    220 {
    221    if (U_FAILURE(status)) {
    222        return USEARCH_DONE;
    223    }
    224    setOffset(m_search_->textLength, status);
    225    return handlePrev(m_search_->textLength, status);
    226 }
    227 
    228 int32_t SearchIterator::preceding(int32_t position, 
    229                                      UErrorCode &status)
    230 {
    231    if (U_FAILURE(status)) {
    232        return USEARCH_DONE;
    233    }
    234    setOffset(position, status);
    235    return handlePrev(position, status);
    236 }
    237 
    238 int32_t SearchIterator::next(UErrorCode &status)
    239 {
    240    if (U_SUCCESS(status)) {
    241        int32_t offset = getOffset();
    242        int32_t matchindex  = m_search_->matchedIndex;
    243        int32_t     matchlength = m_search_->matchedLength;
    244        m_search_->reset = false;
    245        if (m_search_->isForwardSearching) {
    246            int32_t textlength = m_search_->textLength;
    247            if (offset == textlength || matchindex == textlength || 
    248                (matchindex != USEARCH_DONE && 
    249                matchindex + matchlength >= textlength)) {
    250                // not enough characters to match
    251                setMatchNotFound();
    252                return USEARCH_DONE; 
    253            }
    254        }
    255        else {
    256            // switching direction. 
    257            // if matchedIndex == USEARCH_DONE, it means that either a 
    258            // setOffset has been called or that previous ran off the text
    259            // string. the iterator would have been set to offset 0 if a 
    260            // match is not found.
    261            m_search_->isForwardSearching = true;
    262            if (m_search_->matchedIndex != USEARCH_DONE) {
    263                // there's no need to set the collation element iterator
    264                // the next call to next will set the offset.
    265                return matchindex;
    266            }
    267        }
    268 
    269        if (matchlength > 0) {
    270            // if matchlength is 0 we are at the start of the iteration
    271            if (m_search_->isOverlap) {
    272                offset ++;
    273            }
    274            else {
    275                offset += matchlength;
    276            }
    277        }
    278        return handleNext(offset, status);
    279    }
    280    return USEARCH_DONE;
    281 }
    282 
    283 int32_t SearchIterator::previous(UErrorCode &status)
    284 {
    285    if (U_SUCCESS(status)) {
    286        int32_t offset;
    287        if (m_search_->reset) {
    288            offset                       = m_search_->textLength;
    289            m_search_->isForwardSearching = false;
    290            m_search_->reset              = false;
    291            setOffset(offset, status);
    292        }
    293        else {
    294            offset = getOffset();
    295        }
    296        
    297        int32_t matchindex = m_search_->matchedIndex;
    298        if (m_search_->isForwardSearching) {
    299            // switching direction. 
    300            // if matchedIndex == USEARCH_DONE, it means that either a 
    301            // setOffset has been called or that next ran off the text
    302            // string. the iterator would have been set to offset textLength if 
    303            // a match is not found.
    304            m_search_->isForwardSearching = false;
    305            if (matchindex != USEARCH_DONE) {
    306                return matchindex;
    307            }
    308        }
    309        else {
    310            if (offset == 0 || matchindex == 0) {
    311                // not enough characters to match
    312                setMatchNotFound();
    313                return USEARCH_DONE; 
    314            }
    315        }
    316 
    317        if (matchindex != USEARCH_DONE) {
    318            if (m_search_->isOverlap) {
    319                matchindex += m_search_->matchedLength - 2;
    320            }
    321 
    322            return handlePrev(matchindex, status); 
    323        }
    324 
    325        return handlePrev(offset, status);
    326    }
    327 
    328    return USEARCH_DONE;
    329 }
    330 
    331 void SearchIterator::reset()
    332 {
    333    UErrorCode status = U_ZERO_ERROR;
    334    setMatchNotFound();
    335    setOffset(0, status);
    336    m_search_->isOverlap          = false;
    337    m_search_->isCanonicalMatch   = false;
    338    m_search_->elementComparisonType = 0;
    339    m_search_->isForwardSearching = true;
    340    m_search_->reset              = true;
    341 }
    342 
    343 // protected constructors and destructors -----------------------------
    344 
    345 SearchIterator::SearchIterator()
    346 {
    347    m_search_ = static_cast<USearch*>(uprv_malloc(sizeof(USearch)));
    348    m_search_->breakIter          = nullptr;
    349    m_search_->isOverlap          = false;
    350    m_search_->isCanonicalMatch   = false;
    351    m_search_->elementComparisonType = 0;
    352    m_search_->isForwardSearching = true;
    353    m_search_->reset              = true;
    354    m_search_->matchedIndex       = USEARCH_DONE;
    355    m_search_->matchedLength      = 0;
    356    m_search_->text               = nullptr;
    357    m_search_->textLength         = 0;
    358    m_breakiterator_              = nullptr;
    359 }
    360 
    361 SearchIterator::SearchIterator(const UnicodeString &text, 
    362                                     BreakIterator *breakiter) :
    363                                     m_breakiterator_(breakiter),
    364                                     m_text_(text)
    365 {
    366    m_search_ = static_cast<USearch*>(uprv_malloc(sizeof(USearch)));
    367    m_search_->breakIter          = nullptr;
    368    m_search_->isOverlap          = false;
    369    m_search_->isCanonicalMatch   = false;
    370    m_search_->elementComparisonType = 0;
    371    m_search_->isForwardSearching = true;
    372    m_search_->reset              = true;
    373    m_search_->matchedIndex       = USEARCH_DONE;
    374    m_search_->matchedLength      = 0;
    375    m_search_->text               = m_text_.getBuffer();
    376    m_search_->textLength         = text.length();
    377 }
    378 
    379 SearchIterator::SearchIterator(CharacterIterator &text, 
    380                               BreakIterator     *breakiter) :
    381                               m_breakiterator_(breakiter)
    382 {
    383    m_search_ = static_cast<USearch*>(uprv_malloc(sizeof(USearch)));
    384    m_search_->breakIter          = nullptr;
    385    m_search_->isOverlap          = false;
    386    m_search_->isCanonicalMatch   = false;
    387    m_search_->elementComparisonType = 0;
    388    m_search_->isForwardSearching = true;
    389    m_search_->reset              = true;
    390    m_search_->matchedIndex       = USEARCH_DONE;
    391    m_search_->matchedLength      = 0;
    392    text.getText(m_text_);
    393    m_search_->text               = m_text_.getBuffer();
    394    m_search_->textLength         = m_text_.length();
    395    m_breakiterator_             = breakiter;
    396 }
    397 
    398 // protected methods ------------------------------------------------------
    399 
    400 SearchIterator & SearchIterator::operator=(const SearchIterator &that)
    401 {
    402    if (this != &that) {
    403        m_breakiterator_            = that.m_breakiterator_;
    404        m_text_                     = that.m_text_;
    405        m_search_->breakIter        = that.m_search_->breakIter;
    406        m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
    407        m_search_->isOverlap        = that.m_search_->isOverlap;
    408        m_search_->elementComparisonType = that.m_search_->elementComparisonType;
    409        m_search_->matchedIndex     = that.m_search_->matchedIndex;
    410        m_search_->matchedLength    = that.m_search_->matchedLength;
    411        m_search_->text             = that.m_search_->text;
    412        m_search_->textLength       = that.m_search_->textLength;
    413    }
    414    return *this;
    415 }
    416 
    417 void SearchIterator::setMatchLength(int32_t length)
    418 {
    419    m_search_->matchedLength = length;
    420 }
    421 
    422 void SearchIterator::setMatchStart(int32_t position)
    423 {
    424    m_search_->matchedIndex = position;
    425 }
    426 
    427 void SearchIterator::setMatchNotFound() 
    428 {
    429    setMatchStart(USEARCH_DONE);
    430    setMatchLength(0);
    431    UErrorCode status = U_ZERO_ERROR;
    432    // by default no errors should be returned here since offsets are within 
    433    // range.
    434    if (m_search_->isForwardSearching) {
    435        setOffset(m_search_->textLength, status);
    436    }
    437    else {
    438        setOffset(0, status);
    439    }
    440 }
    441 
    442 
    443 U_NAMESPACE_END
    444 
    445 #endif /* #if !UCONFIG_NO_COLLATION */