tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

string_segment.cpp (3831B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
      9 // Helpful in toString methods and elsewhere.
     10 #define UNISTR_FROM_STRING_EXPLICIT
     11 
     12 #include "numparse_types.h"
     13 #include "string_segment.h"
     14 #include "putilimp.h"
     15 #include "unicode/utf16.h"
     16 #include "unicode/uniset.h"
     17 
     18 U_NAMESPACE_BEGIN
     19 
     20 
     21 StringSegment::StringSegment(const UnicodeString& str, bool ignoreCase)
     22        : fStr(str), fStart(0), fEnd(str.length()),
     23          fFoldCase(ignoreCase) {}
     24 
     25 int32_t StringSegment::getOffset() const {
     26    return fStart;
     27 }
     28 
     29 void StringSegment::setOffset(int32_t start) {
     30    fStart = start;
     31 }
     32 
     33 void StringSegment::adjustOffset(int32_t delta) {
     34    fStart += delta;
     35 }
     36 
     37 void StringSegment::adjustOffsetByCodePoint() {
     38    fStart += U16_LENGTH(getCodePoint());
     39 }
     40 
     41 void StringSegment::setLength(int32_t length) {
     42    fEnd = fStart + length;
     43 }
     44 
     45 void StringSegment::resetLength() {
     46    fEnd = fStr.length();
     47 }
     48 
     49 int32_t StringSegment::length() const {
     50    return fEnd - fStart;
     51 }
     52 
     53 char16_t StringSegment::charAt(int32_t index) const {
     54    return fStr.charAt(index + fStart);
     55 }
     56 
     57 UChar32 StringSegment::codePointAt(int32_t index) const {
     58    return fStr.char32At(index + fStart);
     59 }
     60 
     61 UnicodeString StringSegment::toUnicodeString() const {
     62    return UnicodeString(fStr.getBuffer() + fStart, fEnd - fStart);
     63 }
     64 
     65 UnicodeString StringSegment::toTempUnicodeString() const {
     66    // Use the readonly-aliasing constructor for efficiency.
     67    return UnicodeString(false, fStr.getBuffer() + fStart, fEnd - fStart);
     68 }
     69 
     70 UChar32 StringSegment::getCodePoint() const {
     71    char16_t lead = fStr.charAt(fStart);
     72    if (U16_IS_LEAD(lead) && fStart + 1 < fEnd) {
     73        return fStr.char32At(fStart);
     74    } else if (U16_IS_SURROGATE(lead)) {
     75        return -1;
     76    } else {
     77        return lead;
     78    }
     79 }
     80 
     81 bool StringSegment::startsWith(UChar32 otherCp) const {
     82    return codePointsEqual(getCodePoint(), otherCp, fFoldCase);
     83 }
     84 
     85 bool StringSegment::startsWith(const UnicodeSet& uniset) const {
     86    // TODO: Move UnicodeSet case-folding logic here.
     87    // TODO: Handle string matches here instead of separately.
     88    UChar32 cp = getCodePoint();
     89    if (cp == -1) {
     90        return false;
     91    }
     92    return uniset.contains(cp);
     93 }
     94 
     95 bool StringSegment::startsWith(const UnicodeString& other) const {
     96    if (other.isBogus() || other.length() == 0 || length() == 0) {
     97        return false;
     98    }
     99    int cp1 = getCodePoint();
    100    int cp2 = other.char32At(0);
    101    return codePointsEqual(cp1, cp2, fFoldCase);
    102 }
    103 
    104 int32_t StringSegment::getCommonPrefixLength(const UnicodeString& other) {
    105    return getPrefixLengthInternal(other, fFoldCase);
    106 }
    107 
    108 int32_t StringSegment::getCaseSensitivePrefixLength(const UnicodeString& other) {
    109    return getPrefixLengthInternal(other, false);
    110 }
    111 
    112 int32_t StringSegment::getPrefixLengthInternal(const UnicodeString& other, bool foldCase) {
    113    U_ASSERT(other.length() > 0);
    114    int32_t offset = 0;
    115    for (; offset < uprv_min(length(), other.length());) {
    116        // TODO: case-fold code points, not chars
    117        char16_t c1 = charAt(offset);
    118        char16_t c2 = other.charAt(offset);
    119        if (!codePointsEqual(c1, c2, foldCase)) {
    120            break;
    121        }
    122        offset++;
    123    }
    124    return offset;
    125 }
    126 
    127 bool StringSegment::codePointsEqual(UChar32 cp1, UChar32 cp2, bool foldCase) {
    128    if (cp1 == cp2) {
    129        return true;
    130    }
    131    if (!foldCase) {
    132        return false;
    133    }
    134    cp1 = u_foldCase(cp1, true);
    135    cp2 = u_foldCase(cp2, true);
    136    return cp1 == cp2;
    137 }
    138 
    139 bool StringSegment::operator==(const UnicodeString& other) const {
    140    return toTempUnicodeString() == other;
    141 }
    142 
    143 
    144 U_NAMESPACE_END
    145 #endif /* #if !UCONFIG_NO_FORMATTING */