tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

numparse_scientific.cpp (5637B)


      1 // © 2018 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 
      4 #include "unicode/utypes.h"
      5 
      6 #if !UCONFIG_NO_FORMATTING
      7 
      8 // Allow implicit conversion from char16_t* to UnicodeString for this file:
      9 // Helpful in toString methods and elsewhere.
     10 #define UNISTR_FROM_STRING_EXPLICIT
     11 
     12 #include "numparse_types.h"
     13 #include "numparse_scientific.h"
     14 #include "static_unicode_sets.h"
     15 #include "string_segment.h"
     16 
     17 using namespace icu;
     18 using namespace icu::numparse;
     19 using namespace icu::numparse::impl;
     20 
     21 
     22 namespace {
     23 
     24 inline const UnicodeSet& minusSignSet() {
     25    return *unisets::get(unisets::MINUS_SIGN);
     26 }
     27 
     28 inline const UnicodeSet& plusSignSet() {
     29    return *unisets::get(unisets::PLUS_SIGN);
     30 }
     31 
     32 } // namespace
     33 
     34 
     35 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper)
     36        : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)),
     37          fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED),
     38          fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) {
     39 
     40    const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);
     41    if (minusSignSet().contains(minusSign)) {
     42        fCustomMinusSign.setToBogus();
     43    } else {
     44        fCustomMinusSign = minusSign;
     45    }
     46 
     47    const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);
     48    if (plusSignSet().contains(plusSign)) {
     49        fCustomPlusSign.setToBogus();
     50    } else {
     51        fCustomPlusSign = plusSign;
     52    }
     53 }
     54 
     55 bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
     56    // Only accept scientific notation after the mantissa.
     57    if (!result.seenNumber()) {
     58        return false;
     59    }
     60 
     61    // Only accept one exponent per string.
     62    if (0 != (result.flags & FLAG_HAS_EXPONENT)) {
     63        return false;
     64    }
     65 
     66    // First match the scientific separator, and then match another number after it.
     67    // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again.
     68    int32_t initialOffset = segment.getOffset();
     69    int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString);
     70    if (overlap == fExponentSeparatorString.length()) {
     71        // Full exponent separator match.
     72 
     73        // First attempt to get a code point, returning true if we can't get one.
     74        if (segment.length() == overlap) {
     75            return true;
     76        }
     77        segment.adjustOffset(overlap);
     78 
     79        // Allow ignorables before the sign.
     80        // Note: call site is guarded by the segment.length() check above.
     81        // Note: the ignorables matcher should not touch the result.
     82        fIgnorablesMatcher.match(segment, result, status);
     83        if (segment.length() == 0) {
     84            segment.setOffset(initialOffset);
     85            return true;
     86        }
     87 
     88        // Allow a sign, and then try to match digits.
     89        int8_t exponentSign = 1;
     90        if (segment.startsWith(minusSignSet())) {
     91            exponentSign = -1;
     92            segment.adjustOffsetByCodePoint();
     93        } else if (segment.startsWith(plusSignSet())) {
     94            segment.adjustOffsetByCodePoint();
     95        } else if (segment.startsWith(fCustomMinusSign)) {
     96            overlap = segment.getCommonPrefixLength(fCustomMinusSign);
     97            if (overlap != fCustomMinusSign.length()) {
     98                // Partial custom sign match
     99                segment.setOffset(initialOffset);
    100                return true;
    101            }
    102            exponentSign = -1;
    103            segment.adjustOffset(overlap);
    104        } else if (segment.startsWith(fCustomPlusSign)) {
    105            overlap = segment.getCommonPrefixLength(fCustomPlusSign);
    106            if (overlap != fCustomPlusSign.length()) {
    107                // Partial custom sign match
    108                segment.setOffset(initialOffset);
    109                return true;
    110            }
    111            segment.adjustOffset(overlap);
    112        }
    113 
    114        // Return true if the segment is empty.
    115        if (segment.length() == 0) {
    116            segment.setOffset(initialOffset);
    117            return true;
    118        }
    119 
    120        // Allow ignorables after the sign.
    121        // Note: call site is guarded by the segment.length() check above.
    122        // Note: the ignorables matcher should not touch the result.
    123        fIgnorablesMatcher.match(segment, result, status);
    124        if (segment.length() == 0) {
    125            segment.setOffset(initialOffset);
    126            return true;
    127        }
    128 
    129        // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available.
    130        bool wasBogus = result.quantity.bogus;
    131        result.quantity.bogus = false;
    132        int digitsOffset = segment.getOffset();
    133        bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status);
    134        result.quantity.bogus = wasBogus;
    135 
    136        if (segment.getOffset() != digitsOffset) {
    137            // At least one exponent digit was matched.
    138            result.flags |= FLAG_HAS_EXPONENT;
    139        } else {
    140            // No exponent digits were matched
    141            segment.setOffset(initialOffset);
    142        }
    143        return digitsReturnValue;
    144 
    145    } else if (overlap == segment.length()) {
    146        // Partial exponent separator match
    147        return true;
    148    }
    149 
    150    // No match
    151    return false;
    152 }
    153 
    154 bool ScientificMatcher::smokeTest(const StringSegment& segment) const {
    155    return segment.startsWith(fExponentSeparatorString);
    156 }
    157 
    158 UnicodeString ScientificMatcher::toString() const {
    159    return u"<Scientific>";
    160 }
    161 
    162 
    163 #endif /* #if !UCONFIG_NO_FORMATTING */