numparse_scientific.cpp (5637B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 // Allow implicit conversion from char16_t* to UnicodeString for this file: 9 // Helpful in toString methods and elsewhere. 10 #define UNISTR_FROM_STRING_EXPLICIT 11 12 #include "numparse_types.h" 13 #include "numparse_scientific.h" 14 #include "static_unicode_sets.h" 15 #include "string_segment.h" 16 17 using namespace icu; 18 using namespace icu::numparse; 19 using namespace icu::numparse::impl; 20 21 22 namespace { 23 24 inline const UnicodeSet& minusSignSet() { 25 return *unisets::get(unisets::MINUS_SIGN); 26 } 27 28 inline const UnicodeSet& plusSignSet() { 29 return *unisets::get(unisets::PLUS_SIGN); 30 } 31 32 } // namespace 33 34 35 ScientificMatcher::ScientificMatcher(const DecimalFormatSymbols& dfs, const Grouper& grouper) 36 : fExponentSeparatorString(dfs.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol)), 37 fExponentMatcher(dfs, grouper, PARSE_FLAG_INTEGER_ONLY | PARSE_FLAG_GROUPING_DISABLED), 38 fIgnorablesMatcher(PARSE_FLAG_STRICT_IGNORABLES) { 39 40 const UnicodeString& minusSign = dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); 41 if (minusSignSet().contains(minusSign)) { 42 fCustomMinusSign.setToBogus(); 43 } else { 44 fCustomMinusSign = minusSign; 45 } 46 47 const UnicodeString& plusSign = dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); 48 if (plusSignSet().contains(plusSign)) { 49 fCustomPlusSign.setToBogus(); 50 } else { 51 fCustomPlusSign = plusSign; 52 } 53 } 54 55 bool ScientificMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { 56 // Only accept scientific notation after the mantissa. 57 if (!result.seenNumber()) { 58 return false; 59 } 60 61 // Only accept one exponent per string. 62 if (0 != (result.flags & FLAG_HAS_EXPONENT)) { 63 return false; 64 } 65 66 // First match the scientific separator, and then match another number after it. 67 // NOTE: This is guarded by the smoke test; no need to check fExponentSeparatorString length again. 68 int32_t initialOffset = segment.getOffset(); 69 int32_t overlap = segment.getCommonPrefixLength(fExponentSeparatorString); 70 if (overlap == fExponentSeparatorString.length()) { 71 // Full exponent separator match. 72 73 // First attempt to get a code point, returning true if we can't get one. 74 if (segment.length() == overlap) { 75 return true; 76 } 77 segment.adjustOffset(overlap); 78 79 // Allow ignorables before the sign. 80 // Note: call site is guarded by the segment.length() check above. 81 // Note: the ignorables matcher should not touch the result. 82 fIgnorablesMatcher.match(segment, result, status); 83 if (segment.length() == 0) { 84 segment.setOffset(initialOffset); 85 return true; 86 } 87 88 // Allow a sign, and then try to match digits. 89 int8_t exponentSign = 1; 90 if (segment.startsWith(minusSignSet())) { 91 exponentSign = -1; 92 segment.adjustOffsetByCodePoint(); 93 } else if (segment.startsWith(plusSignSet())) { 94 segment.adjustOffsetByCodePoint(); 95 } else if (segment.startsWith(fCustomMinusSign)) { 96 overlap = segment.getCommonPrefixLength(fCustomMinusSign); 97 if (overlap != fCustomMinusSign.length()) { 98 // Partial custom sign match 99 segment.setOffset(initialOffset); 100 return true; 101 } 102 exponentSign = -1; 103 segment.adjustOffset(overlap); 104 } else if (segment.startsWith(fCustomPlusSign)) { 105 overlap = segment.getCommonPrefixLength(fCustomPlusSign); 106 if (overlap != fCustomPlusSign.length()) { 107 // Partial custom sign match 108 segment.setOffset(initialOffset); 109 return true; 110 } 111 segment.adjustOffset(overlap); 112 } 113 114 // Return true if the segment is empty. 115 if (segment.length() == 0) { 116 segment.setOffset(initialOffset); 117 return true; 118 } 119 120 // Allow ignorables after the sign. 121 // Note: call site is guarded by the segment.length() check above. 122 // Note: the ignorables matcher should not touch the result. 123 fIgnorablesMatcher.match(segment, result, status); 124 if (segment.length() == 0) { 125 segment.setOffset(initialOffset); 126 return true; 127 } 128 129 // We are supposed to accept E0 after NaN, so we need to make sure result.quantity is available. 130 bool wasBogus = result.quantity.bogus; 131 result.quantity.bogus = false; 132 int digitsOffset = segment.getOffset(); 133 bool digitsReturnValue = fExponentMatcher.match(segment, result, exponentSign, status); 134 result.quantity.bogus = wasBogus; 135 136 if (segment.getOffset() != digitsOffset) { 137 // At least one exponent digit was matched. 138 result.flags |= FLAG_HAS_EXPONENT; 139 } else { 140 // No exponent digits were matched 141 segment.setOffset(initialOffset); 142 } 143 return digitsReturnValue; 144 145 } else if (overlap == segment.length()) { 146 // Partial exponent separator match 147 return true; 148 } 149 150 // No match 151 return false; 152 } 153 154 bool ScientificMatcher::smokeTest(const StringSegment& segment) const { 155 return segment.startsWith(fExponentSeparatorString); 156 } 157 158 UnicodeString ScientificMatcher::toString() const { 159 return u"<Scientific>"; 160 } 161 162 163 #endif /* #if !UCONFIG_NO_FORMATTING */