numparse_compositions.cpp (3599B)
1 // © 2018 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #if !UCONFIG_NO_FORMATTING 7 8 // Allow implicit conversion from char16_t* to UnicodeString for this file: 9 // Helpful in toString methods and elsewhere. 10 #define UNISTR_FROM_STRING_EXPLICIT 11 12 #include "numparse_types.h" 13 #include "numparse_compositions.h" 14 #include "string_segment.h" 15 #include "unicode/uniset.h" 16 17 using namespace icu; 18 using namespace icu::numparse; 19 using namespace icu::numparse::impl; 20 21 22 bool SeriesMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { 23 ParsedNumber backup(result); 24 25 int32_t initialOffset = segment.getOffset(); 26 bool maybeMore = true; 27 for (const auto* it = begin(); it < end();) { 28 const NumberParseMatcher* matcher = *it; 29 int matcherOffset = segment.getOffset(); 30 if (segment.length() != 0) { 31 maybeMore = matcher->match(segment, result, status); 32 } else { 33 // Nothing for this matcher to match; ask for more. 34 maybeMore = true; 35 } 36 37 bool success = (segment.getOffset() != matcherOffset); 38 bool isFlexible = matcher->isFlexible(); 39 if (success && isFlexible) { 40 // Match succeeded, and this is a flexible matcher. Re-run it. 41 } else if (success) { 42 // Match succeeded, and this is NOT a flexible matcher. Proceed to the next matcher. 43 it++; 44 // Small hack: if there is another matcher coming, do not accept trailing weak chars. 45 // Needed for proper handling of currency spacing. 46 if (it < end() && segment.getOffset() != result.charEnd && result.charEnd > matcherOffset) { 47 segment.setOffset(result.charEnd); 48 } 49 } else if (isFlexible) { 50 // Match failed, and this is a flexible matcher. Try again with the next matcher. 51 it++; 52 } else { 53 // Match failed, and this is NOT a flexible matcher. Exit. 54 segment.setOffset(initialOffset); 55 result = backup; 56 return maybeMore; 57 } 58 } 59 60 // All matchers in the series succeeded. 61 return maybeMore; 62 } 63 64 bool SeriesMatcher::smokeTest(const StringSegment& segment) const { 65 // NOTE: The range-based for loop calls the virtual begin() and end() methods. 66 // NOTE: We only want the first element. Use the for loop for boundary checking. 67 for (const auto& matcher : *this) { 68 // SeriesMatchers are never allowed to start with a Flexible matcher. 69 U_ASSERT(!matcher->isFlexible()); 70 return matcher->smokeTest(segment); 71 } 72 return false; 73 } 74 75 void SeriesMatcher::postProcess(ParsedNumber& result) const { 76 // NOTE: The range-based for loop calls the virtual begin() and end() methods. 77 for (const auto* matcher : *this) { 78 matcher->postProcess(result); 79 } 80 } 81 82 83 ArraySeriesMatcher::ArraySeriesMatcher() 84 : fMatchersLen(0) { 85 } 86 87 ArraySeriesMatcher::ArraySeriesMatcher(MatcherArray& matchers, int32_t matchersLen) 88 : fMatchers(std::move(matchers)), fMatchersLen(matchersLen) { 89 } 90 91 int32_t ArraySeriesMatcher::length() const { 92 return fMatchersLen; 93 } 94 95 const NumberParseMatcher* const* ArraySeriesMatcher::begin() const { 96 return fMatchers.getAlias(); 97 } 98 99 const NumberParseMatcher* const* ArraySeriesMatcher::end() const { 100 return fMatchers.getAlias() + fMatchersLen; 101 } 102 103 UnicodeString ArraySeriesMatcher::toString() const { 104 return u"<ArraySeries>"; 105 } 106 107 108 #endif /* #if !UCONFIG_NO_FORMATTING */