ustr_titlecase_brkiter.cpp (8395B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2011, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * file name: ustr_titlecase_brkiter.cpp 9 * encoding: UTF-8 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2011may30 14 * created by: Markus W. Scherer 15 * 16 * Titlecasing functions that are based on BreakIterator 17 * were moved here to break dependency cycles among parts of the common library. 18 */ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_BREAK_ITERATION 23 24 #include "unicode/brkiter.h" 25 #include "unicode/casemap.h" 26 #include "unicode/chariter.h" 27 #include "unicode/localpointer.h" 28 #include "unicode/ubrk.h" 29 #include "unicode/ucasemap.h" 30 #include "unicode/utext.h" 31 #include "cmemory.h" 32 #include "uassert.h" 33 #include "ucase.h" 34 #include "ucasemap_imp.h" 35 36 U_NAMESPACE_BEGIN 37 38 /** 39 * Whole-string BreakIterator. 40 * Titlecasing only calls setText(), first(), and next(). 41 * We implement the rest only to satisfy the abstract interface. 42 */ 43 class WholeStringBreakIterator : public BreakIterator { 44 public: 45 WholeStringBreakIterator() : BreakIterator(), length(0) {} 46 ~WholeStringBreakIterator() override; 47 bool operator==(const BreakIterator&) const override; 48 WholeStringBreakIterator *clone() const override; 49 static UClassID U_EXPORT2 getStaticClassID(); 50 UClassID getDynamicClassID() const override; 51 CharacterIterator &getText() const override; 52 UText *getUText(UText *fillIn, UErrorCode &errorCode) const override; 53 void setText(const UnicodeString &text) override; 54 void setText(UText *text, UErrorCode &errorCode) override; 55 void adoptText(CharacterIterator* it) override; 56 int32_t first() override; 57 int32_t last() override; 58 int32_t previous() override; 59 int32_t next() override; 60 int32_t current() const override; 61 int32_t following(int32_t offset) override; 62 int32_t preceding(int32_t offset) override; 63 UBool isBoundary(int32_t offset) override; 64 int32_t next(int32_t n) override; 65 WholeStringBreakIterator *createBufferClone(void *stackBuffer, int32_t &BufferSize, 66 UErrorCode &errorCode) override; 67 WholeStringBreakIterator &refreshInputText(UText *input, UErrorCode &errorCode) override; 68 69 private: 70 int32_t length; 71 }; 72 73 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(WholeStringBreakIterator) 74 75 WholeStringBreakIterator::~WholeStringBreakIterator() {} 76 bool WholeStringBreakIterator::operator==(const BreakIterator&) const { return false; } 77 WholeStringBreakIterator *WholeStringBreakIterator::clone() const { return nullptr; } 78 79 CharacterIterator &WholeStringBreakIterator::getText() const { 80 UPRV_UNREACHABLE_EXIT; // really should not be called 81 } 82 UText *WholeStringBreakIterator::getUText(UText * /*fillIn*/, UErrorCode &errorCode) const { 83 if (U_SUCCESS(errorCode)) { 84 errorCode = U_UNSUPPORTED_ERROR; 85 } 86 return nullptr; 87 } 88 89 void WholeStringBreakIterator::setText(const UnicodeString &text) { 90 length = text.length(); 91 } 92 void WholeStringBreakIterator::setText(UText *text, UErrorCode &errorCode) { 93 if (U_SUCCESS(errorCode)) { 94 int64_t length64 = utext_nativeLength(text); 95 if (length64 <= INT32_MAX) { 96 length = static_cast<int32_t>(length64); 97 } else { 98 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 99 } 100 } 101 } 102 void WholeStringBreakIterator::adoptText(CharacterIterator*) { 103 UPRV_UNREACHABLE_EXIT; // should not be called 104 } 105 106 int32_t WholeStringBreakIterator::first() { return 0; } 107 int32_t WholeStringBreakIterator::last() { return length; } 108 int32_t WholeStringBreakIterator::previous() { return 0; } 109 int32_t WholeStringBreakIterator::next() { return length; } 110 int32_t WholeStringBreakIterator::current() const { return 0; } 111 int32_t WholeStringBreakIterator::following(int32_t /*offset*/) { return length; } 112 int32_t WholeStringBreakIterator::preceding(int32_t /*offset*/) { return 0; } 113 UBool WholeStringBreakIterator::isBoundary(int32_t /*offset*/) { return false; } 114 int32_t WholeStringBreakIterator::next(int32_t /*n*/) { return length; } 115 116 WholeStringBreakIterator *WholeStringBreakIterator::createBufferClone( 117 void * /*stackBuffer*/, int32_t & /*BufferSize*/, UErrorCode &errorCode) { 118 if (U_SUCCESS(errorCode)) { 119 errorCode = U_UNSUPPORTED_ERROR; 120 } 121 return nullptr; 122 } 123 WholeStringBreakIterator &WholeStringBreakIterator::refreshInputText( 124 UText * /*input*/, UErrorCode &errorCode) { 125 if (U_SUCCESS(errorCode)) { 126 errorCode = U_UNSUPPORTED_ERROR; 127 } 128 return *this; 129 } 130 131 U_CFUNC 132 BreakIterator *ustrcase_getTitleBreakIterator( 133 const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter, 134 LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode) { 135 if (U_FAILURE(errorCode)) { return nullptr; } 136 options &= U_TITLECASE_ITERATOR_MASK; 137 if (options != 0 && iter != nullptr) { 138 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 139 return nullptr; 140 } 141 if (iter == nullptr) { 142 switch (options) { 143 case 0: 144 iter = BreakIterator::createWordInstance( 145 locale != nullptr ? *locale : Locale(locID), errorCode); 146 break; 147 case U_TITLECASE_WHOLE_STRING: 148 iter = new WholeStringBreakIterator(); 149 if (iter == nullptr) { 150 errorCode = U_MEMORY_ALLOCATION_ERROR; 151 } 152 break; 153 case U_TITLECASE_SENTENCES: 154 iter = BreakIterator::createSentenceInstance( 155 locale != nullptr ? *locale : Locale(locID), errorCode); 156 break; 157 default: 158 errorCode = U_ILLEGAL_ARGUMENT_ERROR; 159 break; 160 } 161 ownedIter.adoptInstead(iter); 162 } 163 return iter; 164 } 165 166 int32_t CaseMap::toTitle( 167 const char *locale, uint32_t options, BreakIterator *iter, 168 const char16_t *src, int32_t srcLength, 169 char16_t *dest, int32_t destCapacity, Edits *edits, 170 UErrorCode &errorCode) { 171 LocalPointer<BreakIterator> ownedIter; 172 iter = ustrcase_getTitleBreakIterator(nullptr, locale, options, iter, ownedIter, errorCode); 173 if(iter==nullptr) { 174 return 0; 175 } 176 UnicodeString s(srcLength<0, src, srcLength); 177 iter->setText(s); 178 return ustrcase_map( 179 ustrcase_getCaseLocale(locale), options, iter, 180 dest, destCapacity, 181 src, srcLength, 182 ustrcase_internalToTitle, edits, errorCode); 183 } 184 185 U_NAMESPACE_END 186 187 U_NAMESPACE_USE 188 189 U_CAPI int32_t U_EXPORT2 190 u_strToTitle(char16_t *dest, int32_t destCapacity, 191 const char16_t *src, int32_t srcLength, 192 UBreakIterator *titleIter, 193 const char *locale, 194 UErrorCode *pErrorCode) { 195 LocalPointer<BreakIterator> ownedIter; 196 BreakIterator *iter = ustrcase_getTitleBreakIterator( 197 nullptr, locale, 0, reinterpret_cast<BreakIterator *>(titleIter), 198 ownedIter, *pErrorCode); 199 if (iter == nullptr) { 200 return 0; 201 } 202 UnicodeString s(srcLength<0, src, srcLength); 203 iter->setText(s); 204 return ustrcase_mapWithOverlap( 205 ustrcase_getCaseLocale(locale), 0, iter, 206 dest, destCapacity, 207 src, srcLength, 208 ustrcase_internalToTitle, *pErrorCode); 209 } 210 211 U_CAPI int32_t U_EXPORT2 212 ucasemap_toTitle(UCaseMap *csm, 213 char16_t *dest, int32_t destCapacity, 214 const char16_t *src, int32_t srcLength, 215 UErrorCode *pErrorCode) { 216 if (U_FAILURE(*pErrorCode)) { 217 return 0; 218 } 219 if (csm->iter == nullptr) { 220 LocalPointer<BreakIterator> ownedIter; 221 BreakIterator *iter = ustrcase_getTitleBreakIterator( 222 nullptr, csm->locale, csm->options, nullptr, ownedIter, *pErrorCode); 223 if (iter == nullptr) { 224 return 0; 225 } 226 csm->iter = ownedIter.orphan(); 227 } 228 UnicodeString s(srcLength<0, src, srcLength); 229 csm->iter->setText(s); 230 return ustrcase_map( 231 csm->caseLocale, csm->options, csm->iter, 232 dest, destCapacity, 233 src, srcLength, 234 ustrcase_internalToTitle, nullptr, *pErrorCode); 235 } 236 237 #endif // !UCONFIG_NO_BREAK_ITERATION