tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

stsearch.cpp (17351B)


      1 // © 2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 **********************************************************************
      5 *   Copyright (C) 2001-2014 IBM and others. All rights reserved.
      6 **********************************************************************
      7 *   Date        Name        Description
      8 *  03/22/2000   helena      Creation.
      9 **********************************************************************
     10 */
     11 
     12 #include "unicode/utypes.h"
     13 
     14 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
     15 
     16 #include "unicode/stsearch.h"
     17 #include "usrchimp.h"
     18 #include "cmemory.h"
     19 
     20 U_NAMESPACE_BEGIN
     21 
     22 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
     23 
     24 // public constructors and destructors -----------------------------------
     25 
     26 StringSearch::StringSearch(const UnicodeString &pattern,
     27                           const UnicodeString &text,
     28                           const Locale        &locale,
     29                                 BreakIterator *breakiter,
     30                                 UErrorCode    &status) :
     31                           SearchIterator(text, breakiter),
     32                           m_pattern_(pattern)
     33 {
     34    if (U_FAILURE(status)) {
     35        m_strsrch_ = nullptr;
     36        return;
     37    }
     38 
     39    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
     40                              m_text_.getBuffer(), m_text_.length(),
     41                              locale.getName(), reinterpret_cast<UBreakIterator*>(breakiter),
     42                              &status);
     43    uprv_free(m_search_);
     44    m_search_ = nullptr;
     45 
     46    if (U_SUCCESS(status)) {
     47        // m_search_ has been created by the base SearchIterator class
     48        m_search_        = m_strsrch_->search;
     49    }
     50 }
     51 
     52 StringSearch::StringSearch(const UnicodeString     &pattern,
     53                           const UnicodeString     &text,
     54                                 RuleBasedCollator *coll,
     55                                 BreakIterator     *breakiter,
     56                                 UErrorCode        &status) :
     57                           SearchIterator(text, breakiter),
     58                           m_pattern_(pattern)
     59 {
     60    if (U_FAILURE(status)) {
     61        m_strsrch_ = nullptr;
     62        return;
     63    }
     64    if (coll == nullptr) {
     65        status     = U_ILLEGAL_ARGUMENT_ERROR;
     66        m_strsrch_ = nullptr;
     67        return;
     68    }
     69    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
     70                                          m_pattern_.length(),
     71                                          m_text_.getBuffer(),
     72                                          m_text_.length(), coll->toUCollator(),
     73                                          reinterpret_cast<UBreakIterator*>(breakiter),
     74                                          &status);
     75    uprv_free(m_search_);
     76    m_search_ = nullptr;
     77 
     78    if (U_SUCCESS(status)) {
     79        // m_search_ has been created by the base SearchIterator class
     80        m_search_ = m_strsrch_->search;
     81    }
     82 }
     83 
     84 StringSearch::StringSearch(const UnicodeString     &pattern,
     85                                 CharacterIterator &text,
     86                           const Locale            &locale,
     87                                 BreakIterator     *breakiter,
     88                                 UErrorCode        &status) :
     89                           SearchIterator(text, breakiter),
     90                           m_pattern_(pattern)
     91 {
     92    if (U_FAILURE(status)) {
     93        m_strsrch_ = nullptr;
     94        return;
     95    }
     96    m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
     97                              m_text_.getBuffer(), m_text_.length(),
     98                              locale.getName(), reinterpret_cast<UBreakIterator*>(breakiter),
     99                              &status);
    100    uprv_free(m_search_);
    101    m_search_ = nullptr;
    102 
    103    if (U_SUCCESS(status)) {
    104        // m_search_ has been created by the base SearchIterator class
    105        m_search_ = m_strsrch_->search;
    106    }
    107 }
    108 
    109 StringSearch::StringSearch(const UnicodeString     &pattern,
    110                                 CharacterIterator &text,
    111                                 RuleBasedCollator *coll,
    112                                 BreakIterator     *breakiter,
    113                                 UErrorCode        &status) :
    114                           SearchIterator(text, breakiter),
    115                           m_pattern_(pattern)
    116 {
    117    if (U_FAILURE(status)) {
    118        m_strsrch_ = nullptr;
    119        return;
    120    }
    121    if (coll == nullptr) {
    122        status     = U_ILLEGAL_ARGUMENT_ERROR;
    123        m_strsrch_ = nullptr;
    124        return;
    125    }
    126    m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    127                                          m_pattern_.length(),
    128                                          m_text_.getBuffer(),
    129                                          m_text_.length(), coll->toUCollator(),
    130                                          reinterpret_cast<UBreakIterator*>(breakiter),
    131                                          &status);
    132    uprv_free(m_search_);
    133    m_search_ = nullptr;
    134 
    135    if (U_SUCCESS(status)) {
    136        // m_search_ has been created by the base SearchIterator class
    137        m_search_ = m_strsrch_->search;
    138    }
    139 }
    140 
    141 StringSearch::StringSearch(const StringSearch &that) :
    142                       SearchIterator(that.m_text_, that.m_breakiterator_),
    143                       m_pattern_(that.m_pattern_)
    144 {
    145    UErrorCode status = U_ZERO_ERROR;
    146 
    147    // Free m_search_ from the superclass
    148    uprv_free(m_search_);
    149    m_search_ = nullptr;
    150 
    151    if (that.m_strsrch_ == nullptr) {
    152        // This was not a good copy
    153        m_strsrch_ = nullptr;
    154    }
    155    else {
    156        // Make a deep copy
    157        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    158                                              m_pattern_.length(),
    159                                              m_text_.getBuffer(),
    160                                              m_text_.length(),
    161                                              that.m_strsrch_->collator,
    162                                              reinterpret_cast<UBreakIterator*>(that.m_breakiterator_),
    163                                              &status);
    164        if (U_SUCCESS(status)) {
    165            // m_search_ has been created by the base SearchIterator class
    166            m_search_        = m_strsrch_->search;
    167        }
    168    }
    169 }
    170 
    171 StringSearch::~StringSearch()
    172 {
    173    if (m_strsrch_ != nullptr) {
    174        usearch_close(m_strsrch_);
    175        m_search_ = nullptr;
    176    }
    177 }
    178 
    179 StringSearch *
    180 StringSearch::clone() const {
    181    return new StringSearch(*this);
    182 }
    183 
    184 // operator overloading ---------------------------------------------
    185 StringSearch & StringSearch::operator=(const StringSearch &that)
    186 {
    187    if (this != &that) {
    188        UErrorCode status = U_ZERO_ERROR;
    189        m_text_          = that.m_text_;
    190        m_breakiterator_ = that.m_breakiterator_;
    191        m_pattern_       = that.m_pattern_;
    192        // all m_search_ in the parent class is linked up with m_strsrch_
    193        usearch_close(m_strsrch_);
    194        m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
    195                                              m_pattern_.length(),
    196                                              m_text_.getBuffer(),
    197                                              m_text_.length(),
    198                                              that.m_strsrch_->collator,
    199                                              nullptr, &status);
    200        // Check null pointer
    201        if (m_strsrch_ != nullptr) {
    202            m_search_ = m_strsrch_->search;
    203        }
    204    }
    205    return *this;
    206 }
    207 
    208 bool StringSearch::operator==(const SearchIterator &that) const
    209 {
    210    if (this == &that) {
    211        return true;
    212    }
    213    if (SearchIterator::operator ==(that)) {
    214        const StringSearch *thatsrch = dynamic_cast<const StringSearch *>(&that);
    215        if (thatsrch == nullptr) return false;
    216        return (this->m_pattern_ == thatsrch->m_pattern_ &&
    217                this->m_strsrch_->collator == thatsrch->m_strsrch_->collator);
    218    }
    219    return false;
    220 }
    221 
    222 // public get and set methods ----------------------------------------
    223 
    224 void StringSearch::setOffset(int32_t position, UErrorCode &status)
    225 {
    226    // status checked in usearch_setOffset
    227    usearch_setOffset(m_strsrch_, position, &status);
    228 }
    229 
    230 int32_t StringSearch::getOffset() const
    231 {
    232    return usearch_getOffset(m_strsrch_);
    233 }
    234 
    235 void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
    236 {
    237    if (U_SUCCESS(status)) {
    238        m_text_ = text;
    239        usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
    240    }
    241 }
    242 
    243 void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
    244 {
    245    if (U_SUCCESS(status)) {
    246        text.getText(m_text_);
    247        usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
    248    }
    249 }
    250 
    251 RuleBasedCollator * StringSearch::getCollator() const
    252 {
    253    // Note the const_cast. It would be cleaner if this const method returned a const collator.
    254    return RuleBasedCollator::rbcFromUCollator(const_cast<UCollator *>(m_strsrch_->collator));
    255 }
    256 
    257 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
    258 {
    259    if (U_SUCCESS(status)) {
    260        usearch_setCollator(m_strsrch_, coll->toUCollator(), &status);
    261    }
    262 }
    263 
    264 void StringSearch::setPattern(const UnicodeString &pattern,
    265                                    UErrorCode    &status)
    266 {
    267    if (U_SUCCESS(status)) {
    268        m_pattern_ = pattern;
    269        usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
    270                           &status);
    271    }
    272 }
    273 
    274 const UnicodeString & StringSearch::getPattern() const
    275 {
    276    return m_pattern_;
    277 }
    278 
    279 // public methods ----------------------------------------------------
    280 
    281 void StringSearch::reset()
    282 {
    283    usearch_reset(m_strsrch_);
    284 }
    285 
    286 StringSearch * StringSearch::safeClone() const
    287 {
    288    UErrorCode status = U_ZERO_ERROR;
    289    StringSearch *result = new StringSearch(m_pattern_, m_text_,
    290                                            getCollator(),
    291                                            m_breakiterator_,
    292                                            status);
    293    /* test for nullptr */
    294    if (result == nullptr) {
    295        status = U_MEMORY_ALLOCATION_ERROR;
    296        return nullptr;
    297    }
    298    result->setOffset(getOffset(), status);
    299    result->setMatchStart(m_strsrch_->search->matchedIndex);
    300    result->setMatchLength(m_strsrch_->search->matchedLength);
    301    if (U_FAILURE(status)) {
    302        return nullptr;
    303    }
    304    return result;
    305 }
    306 
    307 // protected method -------------------------------------------------
    308 
    309 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
    310 {
    311    // values passed here are already in the pre-shift position
    312    if (U_SUCCESS(status)) {
    313        if (m_strsrch_->pattern.cesLength == 0) {
    314            m_search_->matchedIndex =
    315                                    m_search_->matchedIndex == USEARCH_DONE ?
    316                                    getOffset() : m_search_->matchedIndex + 1;
    317            m_search_->matchedLength = 0;
    318            ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
    319                           &status);
    320            if (m_search_->matchedIndex == m_search_->textLength) {
    321                m_search_->matchedIndex = USEARCH_DONE;
    322            }
    323        }
    324        else {
    325            // looking at usearch.cpp, this part is shifted out to
    326            // StringSearch instead of SearchIterator because m_strsrch_ is
    327            // not accessible in SearchIterator
    328 #if 0
    329            if (position + m_strsrch_->pattern.defaultShiftSize
    330                > m_search_->textLength) {
    331                setMatchNotFound();
    332                return USEARCH_DONE;
    333            }
    334 #endif
    335            if (m_search_->matchedLength <= 0) {
    336                // the flipping direction issue has already been handled
    337                // in next()
    338                // for boundary check purposes. this will ensure that the
    339                // next match will not precede the current offset
    340                // note search->matchedIndex will always be set to something
    341                // in the code
    342                m_search_->matchedIndex = position - 1;
    343            }
    344 
    345            ucol_setOffset(m_strsrch_->textIter, position, &status);
    346            
    347 #if 0
    348            for (;;) {
    349                if (m_search_->isCanonicalMatch) {
    350                    // can't use exact here since extra accents are allowed.
    351                    usearch_handleNextCanonical(m_strsrch_, &status);
    352                }
    353                else {
    354                    usearch_handleNextExact(m_strsrch_, &status);
    355                }
    356                if (U_FAILURE(status)) {
    357                    return USEARCH_DONE;
    358                }
    359                if (m_breakiterator_ == nullptr
    360 #if !UCONFIG_NO_BREAK_ITERATION
    361                    ||
    362                    m_search_->matchedIndex == USEARCH_DONE ||
    363                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
    364                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
    365                                                  m_search_->matchedLength))
    366 #endif
    367                ) {
    368                    if (m_search_->matchedIndex == USEARCH_DONE) {
    369                        ucol_setOffset(m_strsrch_->textIter,
    370                                       m_search_->textLength, &status);
    371                    }
    372                    else {
    373                        ucol_setOffset(m_strsrch_->textIter,
    374                                       m_search_->matchedIndex, &status);
    375                    }
    376                    return m_search_->matchedIndex;
    377                }
    378            }
    379 #else
    380            // if m_strsrch_->breakIter is always the same as m_breakiterator_
    381            // then we don't need to check the match boundaries here because
    382            // usearch_handleNextXXX will already have done it.
    383            if (m_search_->isCanonicalMatch) {
    384            	// *could* actually use exact here 'cause no extra accents allowed...
    385            	usearch_handleNextCanonical(m_strsrch_, &status);
    386            } else {
    387            	usearch_handleNextExact(m_strsrch_, &status);
    388            }
    389            
    390            if (U_FAILURE(status)) {
    391            	return USEARCH_DONE;
    392            }
    393            
    394            if (m_search_->matchedIndex == USEARCH_DONE) {
    395            	ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
    396            } else {
    397            	ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
    398            }
    399            
    400            return m_search_->matchedIndex;
    401 #endif
    402        }
    403    }
    404    return USEARCH_DONE;
    405 }
    406 
    407 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
    408 {
    409    // values passed here are already in the pre-shift position
    410    if (U_SUCCESS(status)) {
    411        if (m_strsrch_->pattern.cesLength == 0) {
    412            m_search_->matchedIndex =
    413                  (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
    414                   m_search_->matchedIndex);
    415            if (m_search_->matchedIndex == 0) {
    416                setMatchNotFound();
    417            }
    418            else {
    419                m_search_->matchedIndex --;
    420                ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
    421                               &status);
    422                m_search_->matchedLength = 0;
    423            }
    424        }
    425        else {
    426            // looking at usearch.cpp, this part is shifted out to
    427            // StringSearch instead of SearchIterator because m_strsrch_ is
    428            // not accessible in SearchIterator
    429 #if 0
    430            if (!m_search_->isOverlap &&
    431                position - m_strsrch_->pattern.defaultShiftSize < 0) {
    432                setMatchNotFound();
    433                return USEARCH_DONE;
    434            }
    435            
    436            for (;;) {
    437                if (m_search_->isCanonicalMatch) {
    438                    // can't use exact here since extra accents are allowed.
    439                    usearch_handlePreviousCanonical(m_strsrch_, &status);
    440                }
    441                else {
    442                    usearch_handlePreviousExact(m_strsrch_, &status);
    443                }
    444                if (U_FAILURE(status)) {
    445                    return USEARCH_DONE;
    446                }
    447                if (m_breakiterator_ == nullptr
    448 #if !UCONFIG_NO_BREAK_ITERATION
    449                    ||
    450                    m_search_->matchedIndex == USEARCH_DONE ||
    451                    (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
    452                     m_breakiterator_->isBoundary(m_search_->matchedIndex +
    453                                                  m_search_->matchedLength))
    454 #endif
    455                ) {
    456                    return m_search_->matchedIndex;
    457                }
    458            }
    459 #else
    460            ucol_setOffset(m_strsrch_->textIter, position, &status);
    461            
    462            if (m_search_->isCanonicalMatch) {
    463            	// *could* use exact match here since extra accents *not* allowed!
    464            	usearch_handlePreviousCanonical(m_strsrch_, &status);
    465            } else {
    466            	usearch_handlePreviousExact(m_strsrch_, &status);
    467            }
    468            
    469            if (U_FAILURE(status)) {
    470            	return USEARCH_DONE;
    471            }
    472            
    473            return m_search_->matchedIndex;
    474 #endif
    475        }
    476 
    477        return m_search_->matchedIndex;
    478    }
    479    return USEARCH_DONE;
    480 }
    481 
    482 U_NAMESPACE_END
    483 
    484 #endif /* #if !UCONFIG_NO_COLLATION */