casetrn.cpp (5083B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2001-2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: casetrn.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2004sep03 16 * created by: Markus W. Scherer 17 * 18 * Implementation class for lower-/upper-/title-casing transliterators. 19 */ 20 21 #include "unicode/utypes.h" 22 23 #if !UCONFIG_NO_TRANSLITERATION 24 25 #include "unicode/uchar.h" 26 #include "unicode/ustring.h" 27 #include "unicode/utf.h" 28 #include "unicode/utf16.h" 29 #include "tolowtrn.h" 30 #include "ucase.h" 31 #include "cpputils.h" 32 33 /* case context iterator using a Replaceable */ 34 U_CFUNC UChar32 U_CALLCONV 35 utrans_rep_caseContextIterator(void *context, int8_t dir) 36 { 37 U_NAMESPACE_USE 38 39 UCaseContext *csc=(UCaseContext *)context; 40 Replaceable *rep=(Replaceable *)csc->p; 41 UChar32 c; 42 43 if(dir<0) { 44 /* reset for backward iteration */ 45 csc->index=csc->cpStart; 46 csc->dir=dir; 47 } else if(dir>0) { 48 /* reset for forward iteration */ 49 csc->index=csc->cpLimit; 50 csc->dir=dir; 51 } else { 52 /* continue current iteration direction */ 53 dir=csc->dir; 54 } 55 56 // automatically adjust start and limit if the Replaceable disagrees 57 // with the original values 58 if(dir<0) { 59 if(csc->start<csc->index) { 60 c=rep->char32At(csc->index-1); 61 if(c<0) { 62 csc->start=csc->index; 63 } else { 64 csc->index-=U16_LENGTH(c); 65 return c; 66 } 67 } 68 } else { 69 // detect, and store in csc->b1, if we hit the limit 70 if(csc->index<csc->limit) { 71 c=rep->char32At(csc->index); 72 if(c<0) { 73 csc->limit=csc->index; 74 csc->b1=true; 75 } else { 76 csc->index+=U16_LENGTH(c); 77 return c; 78 } 79 } else { 80 csc->b1=true; 81 } 82 } 83 return U_SENTINEL; 84 } 85 86 U_NAMESPACE_BEGIN 87 88 UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(CaseMapTransliterator) 89 90 /** 91 * Constructs a transliterator. 92 */ 93 CaseMapTransliterator::CaseMapTransliterator(const UnicodeString &id, UCaseMapFull *map) : 94 Transliterator(id, nullptr), 95 fMap(map) 96 { 97 // TODO test incremental mode with context-sensitive text (e.g. greek sigma) 98 // TODO need to call setMaximumContextLength()?! 99 } 100 101 /** 102 * Destructor. 103 */ 104 CaseMapTransliterator::~CaseMapTransliterator() { 105 } 106 107 /** 108 * Copy constructor. 109 */ 110 CaseMapTransliterator::CaseMapTransliterator(const CaseMapTransliterator& o) : 111 Transliterator(o), 112 fMap(o.fMap) 113 { 114 } 115 116 /** 117 * Assignment operator. 118 */ 119 /*CaseMapTransliterator& CaseMapTransliterator::operator=(const CaseMapTransliterator& o) { 120 Transliterator::operator=(o); 121 fMap = o.fMap; 122 return *this; 123 }*/ 124 125 /** 126 * Transliterator API. 127 */ 128 /*CaseMapTransliterator* CaseMapTransliterator::clone() const { 129 return new CaseMapTransliterator(*this); 130 }*/ 131 132 /** 133 * Implements {@link Transliterator#handleTransliterate}. 134 */ 135 void CaseMapTransliterator::handleTransliterate(Replaceable& text, 136 UTransPosition& offsets, 137 UBool isIncremental) const 138 { 139 if (offsets.start >= offsets.limit) { 140 return; 141 } 142 143 UCaseContext csc; 144 uprv_memset(&csc, 0, sizeof(csc)); 145 csc.p = &text; 146 csc.start = offsets.contextStart; 147 csc.limit = offsets.contextLimit; 148 149 UnicodeString tmp; 150 const char16_t *s; 151 UChar32 c; 152 int32_t textPos, delta, result; 153 154 for(textPos=offsets.start; textPos<offsets.limit;) { 155 csc.cpStart=textPos; 156 c=text.char32At(textPos); 157 csc.cpLimit=textPos+=U16_LENGTH(c); 158 159 result=fMap(c, utrans_rep_caseContextIterator, &csc, &s, UCASE_LOC_ROOT); 160 161 if(csc.b1 && isIncremental) { 162 // fMap() tried to look beyond the context limit 163 // wait for more input 164 offsets.start=csc.cpStart; 165 return; 166 } 167 168 if(result>=0) { 169 // replace the current code point with its full case mapping result 170 // see UCASE_MAX_STRING_LENGTH 171 if(result<=UCASE_MAX_STRING_LENGTH) { 172 // string s[result] 173 tmp.setTo(false, s, result); 174 delta=result-U16_LENGTH(c); 175 } else { 176 // single code point 177 tmp.setTo(result); 178 delta=tmp.length()-U16_LENGTH(c); 179 } 180 text.handleReplaceBetween(csc.cpStart, textPos, tmp); 181 if(delta!=0) { 182 textPos+=delta; 183 csc.limit=offsets.contextLimit+=delta; 184 offsets.limit+=delta; 185 } 186 } 187 } 188 offsets.start=textPos; 189 } 190 191 U_NAMESPACE_END 192 193 #endif /* #if !UCONFIG_NO_TRANSLITERATION */