scriptset.cpp (8672B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2014, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 * 9 * scriptset.cpp 10 * 11 * created on: 2013 Jan 7 12 * created by: Andy Heninger 13 */ 14 15 #include "unicode/utypes.h" 16 17 #include "unicode/uchar.h" 18 #include "unicode/unistr.h" 19 20 #include "scriptset.h" 21 #include "uassert.h" 22 #include "cmemory.h" 23 24 U_NAMESPACE_BEGIN 25 26 //---------------------------------------------------------------------------- 27 // 28 // ScriptSet implementation 29 // 30 //---------------------------------------------------------------------------- 31 ScriptSet::ScriptSet() { 32 uprv_memset(bits, 0, sizeof(bits)); 33 } 34 35 ScriptSet::~ScriptSet() { 36 } 37 38 ScriptSet::ScriptSet(const ScriptSet &other) { 39 *this = other; 40 } 41 42 ScriptSet & ScriptSet::operator =(const ScriptSet &other) { 43 if (this != &other) { 44 uprv_memcpy(bits, other.bits, sizeof(bits)); 45 } 46 return *this; 47 } 48 49 bool ScriptSet::operator == (const ScriptSet &other) const { 50 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 51 if (bits[i] != other.bits[i]) { 52 return false; 53 } 54 } 55 return true; 56 } 57 58 UBool ScriptSet::test(UScriptCode script, UErrorCode &status) const { 59 if (U_FAILURE(status)) { 60 return false; 61 } 62 if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) { 63 status = U_ILLEGAL_ARGUMENT_ERROR; 64 return false; 65 } 66 uint32_t index = script / 32; 67 uint32_t bit = 1 << (script & 31); 68 return ((bits[index] & bit) != 0); 69 } 70 71 72 ScriptSet &ScriptSet::set(UScriptCode script, UErrorCode &status) { 73 if (U_FAILURE(status)) { 74 return *this; 75 } 76 if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) { 77 status = U_ILLEGAL_ARGUMENT_ERROR; 78 return *this; 79 } 80 uint32_t index = script / 32; 81 uint32_t bit = 1 << (script & 31); 82 bits[index] |= bit; 83 return *this; 84 } 85 86 ScriptSet &ScriptSet::reset(UScriptCode script, UErrorCode &status) { 87 if (U_FAILURE(status)) { 88 return *this; 89 } 90 if (script < 0 || static_cast<int32_t>(script) >= SCRIPT_LIMIT) { 91 status = U_ILLEGAL_ARGUMENT_ERROR; 92 return *this; 93 } 94 uint32_t index = script / 32; 95 uint32_t bit = 1 << (script & 31); 96 bits[index] &= ~bit; 97 return *this; 98 } 99 100 101 102 ScriptSet &ScriptSet::Union(const ScriptSet &other) { 103 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 104 bits[i] |= other.bits[i]; 105 } 106 return *this; 107 } 108 109 ScriptSet &ScriptSet::intersect(const ScriptSet &other) { 110 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 111 bits[i] &= other.bits[i]; 112 } 113 return *this; 114 } 115 116 ScriptSet &ScriptSet::intersect(UScriptCode script, UErrorCode &status) { 117 ScriptSet t; 118 t.set(script, status); 119 if (U_SUCCESS(status)) { 120 this->intersect(t); 121 } 122 return *this; 123 } 124 125 UBool ScriptSet::intersects(const ScriptSet &other) const { 126 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 127 if ((bits[i] & other.bits[i]) != 0) { 128 return true; 129 } 130 } 131 return false; 132 } 133 134 UBool ScriptSet::contains(const ScriptSet &other) const { 135 ScriptSet t(*this); 136 t.intersect(other); 137 return (t == other); 138 } 139 140 141 ScriptSet &ScriptSet::setAll() { 142 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 143 bits[i] = 0xffffffffu; 144 } 145 return *this; 146 } 147 148 149 ScriptSet &ScriptSet::resetAll() { 150 uprv_memset(bits, 0, sizeof(bits)); 151 return *this; 152 } 153 154 int32_t ScriptSet::countMembers() const { 155 // This bit counter is good for sparse numbers of '1's, which is 156 // very much the case that we will usually have. 157 int32_t count = 0; 158 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 159 uint32_t x = bits[i]; 160 while (x > 0) { 161 count++; 162 x &= (x - 1); // and off the least significant one bit. 163 } 164 } 165 return count; 166 } 167 168 int32_t ScriptSet::hashCode() const { 169 int32_t hash = 0; 170 for (int32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 171 hash ^= bits[i]; 172 } 173 return hash; 174 } 175 176 int32_t ScriptSet::nextSetBit(int32_t fromIndex) const { 177 // TODO: Wants a better implementation. 178 if (fromIndex < 0) { 179 return -1; 180 } 181 UErrorCode status = U_ZERO_ERROR; 182 for (int32_t scriptIndex = fromIndex; scriptIndex < SCRIPT_LIMIT; scriptIndex++) { 183 if (test(static_cast<UScriptCode>(scriptIndex), status)) { 184 return scriptIndex; 185 } 186 } 187 return -1; 188 } 189 190 UBool ScriptSet::isEmpty() const { 191 for (uint32_t i=0; i<UPRV_LENGTHOF(bits); i++) { 192 if (bits[i] != 0) { 193 return false; 194 } 195 } 196 return true; 197 } 198 199 UnicodeString &ScriptSet::displayScripts(UnicodeString &dest) const { 200 UBool firstTime = true; 201 for (int32_t i = nextSetBit(0); i >= 0; i = nextSetBit(i + 1)) { 202 if (!firstTime) { 203 dest.append(static_cast<char16_t>(0x20)); 204 } 205 firstTime = false; 206 const char* scriptName = uscript_getShortName(static_cast<UScriptCode>(i)); 207 dest.append(UnicodeString(scriptName, -1, US_INV)); 208 } 209 return dest; 210 } 211 212 ScriptSet &ScriptSet::parseScripts(const UnicodeString &scriptString, UErrorCode &status) { 213 resetAll(); 214 if (U_FAILURE(status)) { 215 return *this; 216 } 217 UnicodeString oneScriptName; 218 for (int32_t i=0; i<scriptString.length();) { 219 UChar32 c = scriptString.char32At(i); 220 i = scriptString.moveIndex32(i, 1); 221 if (!u_isUWhiteSpace(c)) { 222 oneScriptName.append(c); 223 if (i < scriptString.length()) { 224 continue; 225 } 226 } 227 if (oneScriptName.length() > 0) { 228 char buf[40]; 229 oneScriptName.extract(0, oneScriptName.length(), buf, sizeof(buf)-1, US_INV); 230 buf[sizeof(buf)-1] = 0; 231 int32_t sc = u_getPropertyValueEnum(UCHAR_SCRIPT, buf); 232 if (sc == UCHAR_INVALID_CODE) { 233 status = U_ILLEGAL_ARGUMENT_ERROR; 234 } else { 235 this->set(static_cast<UScriptCode>(sc), status); 236 } 237 if (U_FAILURE(status)) { 238 return *this; 239 } 240 oneScriptName.remove(); 241 } 242 } 243 return *this; 244 } 245 246 void ScriptSet::setScriptExtensions(UChar32 codePoint, UErrorCode& status) { 247 if (U_FAILURE(status)) { return; } 248 static const int32_t FIRST_GUESS_SCRIPT_CAPACITY = 20; 249 MaybeStackArray<UScriptCode,FIRST_GUESS_SCRIPT_CAPACITY> scripts; 250 UErrorCode internalStatus = U_ZERO_ERROR; 251 int32_t script_count = -1; 252 253 while (true) { 254 script_count = uscript_getScriptExtensions( 255 codePoint, scripts.getAlias(), scripts.getCapacity(), &internalStatus); 256 if (internalStatus == U_BUFFER_OVERFLOW_ERROR) { 257 // Need to allocate more space 258 if (scripts.resize(script_count) == nullptr) { 259 status = U_MEMORY_ALLOCATION_ERROR; 260 return; 261 } 262 internalStatus = U_ZERO_ERROR; 263 } else { 264 break; 265 } 266 } 267 268 // Check if we failed for some reason other than buffer overflow 269 if (U_FAILURE(internalStatus)) { 270 status = internalStatus; 271 return; 272 } 273 274 // Load the scripts into the ScriptSet and return 275 for (int32_t i = 0; i < script_count; i++) { 276 this->set(scripts[i], status); 277 if (U_FAILURE(status)) { return; } 278 } 279 } 280 281 U_NAMESPACE_END 282 283 U_CAPI UBool U_EXPORT2 284 uhash_equalsScriptSet(const UElement key1, const UElement key2) { 285 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 286 icu::ScriptSet *s2 = static_cast<icu::ScriptSet *>(key2.pointer); 287 return (*s1 == *s2); 288 } 289 290 U_CAPI int32_t U_EXPORT2 291 uhash_compareScriptSet(UElement key0, UElement key1) { 292 icu::ScriptSet *s0 = static_cast<icu::ScriptSet *>(key0.pointer); 293 icu::ScriptSet *s1 = static_cast<icu::ScriptSet *>(key1.pointer); 294 int32_t diff = s0->countMembers() - s1->countMembers(); 295 if (diff != 0) return diff; 296 int32_t i0 = s0->nextSetBit(0); 297 int32_t i1 = s1->nextSetBit(0); 298 while ((diff = i0-i1) == 0 && i0 > 0) { 299 i0 = s0->nextSetBit(i0+1); 300 i1 = s1->nextSetBit(i1+1); 301 } 302 return diff; 303 } 304 305 U_CAPI int32_t U_EXPORT2 306 uhash_hashScriptSet(const UElement key) { 307 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(key.pointer); 308 return s->hashCode(); 309 } 310 311 U_CAPI void U_EXPORT2 312 uhash_deleteScriptSet(void *obj) { 313 icu::ScriptSet *s = static_cast<icu::ScriptSet *>(obj); 314 delete s; 315 }