utrans.cpp (15712B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 1997-2009,2014 International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * Date Name Description 9 * 06/21/00 aliu Creation. 10 ******************************************************************************* 11 */ 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_TRANSLITERATION 16 17 #include "unicode/utrans.h" 18 #include "unicode/putil.h" 19 #include "unicode/rep.h" 20 #include "unicode/translit.h" 21 #include "unicode/unifilt.h" 22 #include "unicode/uniset.h" 23 #include "unicode/ustring.h" 24 #include "unicode/uenum.h" 25 #include "unicode/uset.h" 26 #include "uenumimp.h" 27 #include "cpputils.h" 28 #include "rbt.h" 29 30 // Following macro is to be followed by <return value>';' or just ';' 31 #define utrans_ENTRY(s) if ((s)==nullptr || U_FAILURE(*(s))) return 32 33 /******************************************************************** 34 * Replaceable-UReplaceableCallbacks glue 35 ********************************************************************/ 36 37 /** 38 * Make a UReplaceable + UReplaceableCallbacks into a Replaceable object. 39 */ 40 U_NAMESPACE_BEGIN 41 class ReplaceableGlue : public Replaceable { 42 43 UReplaceable *rep; 44 const UReplaceableCallbacks *func; 45 46 public: 47 48 ReplaceableGlue(UReplaceable *replaceable, 49 const UReplaceableCallbacks *funcCallback); 50 51 virtual ~ReplaceableGlue(); 52 53 virtual void handleReplaceBetween(int32_t start, 54 int32_t limit, 55 const UnicodeString& text) override; 56 57 virtual void extractBetween(int32_t start, 58 int32_t limit, 59 UnicodeString& target) const override; 60 61 virtual void copy(int32_t start, int32_t limit, int32_t dest) override; 62 63 // virtual Replaceable *clone() const { return nullptr; } same as default 64 65 /** 66 * ICU "poor man's RTTI", returns a UClassID for the actual class. 67 * 68 * @draft ICU 2.2 69 */ 70 virtual UClassID getDynamicClassID() const override; 71 72 /** 73 * ICU "poor man's RTTI", returns a UClassID for this class. 74 * 75 * @draft ICU 2.2 76 */ 77 static UClassID U_EXPORT2 getStaticClassID(); 78 79 protected: 80 81 virtual int32_t getLength() const override; 82 83 virtual char16_t getCharAt(int32_t offset) const override; 84 85 virtual UChar32 getChar32At(int32_t offset) const override; 86 }; 87 88 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(ReplaceableGlue) 89 90 ReplaceableGlue::ReplaceableGlue(UReplaceable *replaceable, 91 const UReplaceableCallbacks *funcCallback) 92 : Replaceable() 93 { 94 this->rep = replaceable; 95 this->func = funcCallback; 96 } 97 98 ReplaceableGlue::~ReplaceableGlue() {} 99 100 int32_t ReplaceableGlue::getLength() const { 101 return (*func->length)(rep); 102 } 103 104 char16_t ReplaceableGlue::getCharAt(int32_t offset) const { 105 return (*func->charAt)(rep, offset); 106 } 107 108 UChar32 ReplaceableGlue::getChar32At(int32_t offset) const { 109 return (*func->char32At)(rep, offset); 110 } 111 112 void ReplaceableGlue::handleReplaceBetween(int32_t start, 113 int32_t limit, 114 const UnicodeString& text) { 115 (*func->replace)(rep, start, limit, text.getBuffer(), text.length()); 116 } 117 118 void ReplaceableGlue::extractBetween(int32_t start, 119 int32_t limit, 120 UnicodeString& target) const { 121 (*func->extract)(rep, start, limit, target.getBuffer(limit-start)); 122 target.releaseBuffer(limit-start); 123 } 124 125 void ReplaceableGlue::copy(int32_t start, int32_t limit, int32_t dest) { 126 (*func->copy)(rep, start, limit, dest); 127 } 128 U_NAMESPACE_END 129 /******************************************************************** 130 * General API 131 ********************************************************************/ 132 U_NAMESPACE_USE 133 134 U_CAPI UTransliterator* U_EXPORT2 135 utrans_openU(const char16_t *id, 136 int32_t idLength, 137 UTransDirection dir, 138 const char16_t *rules, 139 int32_t rulesLength, 140 UParseError *parseError, 141 UErrorCode *status) { 142 if(status==nullptr || U_FAILURE(*status)) { 143 return nullptr; 144 } 145 if (id == nullptr) { 146 *status = U_ILLEGAL_ARGUMENT_ERROR; 147 return nullptr; 148 } 149 UParseError temp; 150 151 if(parseError == nullptr){ 152 parseError = &temp; 153 } 154 155 UnicodeString ID(idLength<0, id, idLength); // r-o alias 156 157 if(rules==nullptr){ 158 159 Transliterator *trans = nullptr; 160 161 trans = Transliterator::createInstance(ID, dir, *parseError, *status); 162 163 if(U_FAILURE(*status)){ 164 return nullptr; 165 } 166 return (UTransliterator*) trans; 167 }else{ 168 UnicodeString ruleStr(rulesLength < 0, 169 rules, 170 rulesLength); // r-o alias 171 172 Transliterator *trans = nullptr; 173 trans = Transliterator::createFromRules(ID, ruleStr, dir, *parseError, *status); 174 if(U_FAILURE(*status)) { 175 return nullptr; 176 } 177 178 return (UTransliterator*) trans; 179 } 180 } 181 182 U_CAPI UTransliterator* U_EXPORT2 183 utrans_open(const char* id, 184 UTransDirection dir, 185 const char16_t* rules, /* may be Null */ 186 int32_t rulesLength, /* -1 if null-terminated */ 187 UParseError* parseError, /* may be Null */ 188 UErrorCode* status) { 189 UnicodeString ID(id, -1, US_INV); // use invariant converter 190 return utrans_openU(ID.getBuffer(), ID.length(), dir, 191 rules, rulesLength, 192 parseError, status); 193 } 194 195 U_CAPI UTransliterator* U_EXPORT2 196 utrans_openInverse(const UTransliterator* trans, 197 UErrorCode* status) { 198 199 utrans_ENTRY(status) nullptr; 200 201 UTransliterator* result = 202 (UTransliterator*) ((Transliterator*) trans)->createInverse(*status); 203 204 return result; 205 } 206 207 U_CAPI UTransliterator* U_EXPORT2 208 utrans_clone(const UTransliterator* trans, 209 UErrorCode* status) { 210 211 utrans_ENTRY(status) nullptr; 212 213 if (trans == nullptr) { 214 *status = U_ILLEGAL_ARGUMENT_ERROR; 215 return nullptr; 216 } 217 218 Transliterator *t = ((Transliterator*) trans)->clone(); 219 if (t == nullptr) { 220 *status = U_MEMORY_ALLOCATION_ERROR; 221 } 222 return (UTransliterator*) t; 223 } 224 225 U_CAPI void U_EXPORT2 226 utrans_close(UTransliterator* trans) { 227 delete (Transliterator*) trans; 228 } 229 230 U_CAPI const char16_t * U_EXPORT2 231 utrans_getUnicodeID(const UTransliterator *trans, 232 int32_t *resultLength) { 233 // Transliterator keeps its ID NUL-terminated 234 const UnicodeString &ID=((Transliterator*) trans)->getID(); 235 if(resultLength!=nullptr) { 236 *resultLength=ID.length(); 237 } 238 return ID.getBuffer(); 239 } 240 241 U_CAPI int32_t U_EXPORT2 242 utrans_getID(const UTransliterator* trans, 243 char* buf, 244 int32_t bufCapacity) { 245 return ((Transliterator*) trans)->getID().extract(0, 0x7fffffff, buf, bufCapacity, US_INV); 246 } 247 248 U_CAPI void U_EXPORT2 249 utrans_register(UTransliterator* adoptedTrans, 250 UErrorCode* status) { 251 utrans_ENTRY(status); 252 // status currently ignored; may remove later 253 Transliterator::registerInstance((Transliterator*) adoptedTrans); 254 } 255 256 U_CAPI void U_EXPORT2 257 utrans_unregisterID(const char16_t* id, int32_t idLength) { 258 UnicodeString ID(idLength<0, id, idLength); // r-o alias 259 Transliterator::unregister(ID); 260 } 261 262 U_CAPI void U_EXPORT2 263 utrans_unregister(const char* id) { 264 UnicodeString ID(id, -1, US_INV); // use invariant converter 265 Transliterator::unregister(ID); 266 } 267 268 U_CAPI void U_EXPORT2 269 utrans_setFilter(UTransliterator* trans, 270 const char16_t* filterPattern, 271 int32_t filterPatternLen, 272 UErrorCode* status) { 273 274 utrans_ENTRY(status); 275 UnicodeFilter* filter = nullptr; 276 if (filterPattern != nullptr && *filterPattern != 0) { 277 // Create read only alias of filterPattern: 278 UnicodeString pat(filterPatternLen < 0, filterPattern, filterPatternLen); 279 filter = new UnicodeSet(pat, *status); 280 /* test for nullptr */ 281 if (filter == nullptr) { 282 *status = U_MEMORY_ALLOCATION_ERROR; 283 return; 284 } 285 if (U_FAILURE(*status)) { 286 delete filter; 287 filter = nullptr; 288 } 289 } 290 ((Transliterator*) trans)->adoptFilter(filter); 291 } 292 293 U_CAPI int32_t U_EXPORT2 294 utrans_countAvailableIDs() { 295 return Transliterator::countAvailableIDs(); 296 } 297 298 U_CAPI int32_t U_EXPORT2 299 utrans_getAvailableID(int32_t index, 300 char* buf, // may be nullptr 301 int32_t bufCapacity) { 302 return Transliterator::getAvailableID(index).extract(0, 0x7fffffff, buf, bufCapacity, US_INV); 303 } 304 305 /* Transliterator UEnumeration ---------------------------------------------- */ 306 307 typedef struct UTransEnumeration { 308 UEnumeration uenum; 309 int32_t index, count; 310 } UTransEnumeration; 311 312 U_CDECL_BEGIN 313 static int32_t U_CALLCONV 314 utrans_enum_count(UEnumeration *uenum, UErrorCode *pErrorCode) { 315 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 316 return 0; 317 } 318 return ((UTransEnumeration *)uenum)->count; 319 } 320 321 static const char16_t* U_CALLCONV 322 utrans_enum_unext(UEnumeration *uenum, 323 int32_t* resultLength, 324 UErrorCode *pErrorCode) { 325 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 326 return nullptr; 327 } 328 329 UTransEnumeration *ute=(UTransEnumeration *)uenum; 330 int32_t index=ute->index; 331 if(index<ute->count) { 332 const UnicodeString &ID=Transliterator::getAvailableID(index); 333 ute->index=index+1; 334 if(resultLength!=nullptr) { 335 *resultLength=ID.length(); 336 } 337 // Transliterator keeps its ID NUL-terminated 338 return ID.getBuffer(); 339 } 340 341 if(resultLength!=nullptr) { 342 *resultLength=0; 343 } 344 return nullptr; 345 } 346 347 static void U_CALLCONV 348 utrans_enum_reset(UEnumeration *uenum, UErrorCode *pErrorCode) { 349 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 350 return; 351 } 352 353 UTransEnumeration *ute=(UTransEnumeration *)uenum; 354 ute->index=0; 355 ute->count=Transliterator::countAvailableIDs(); 356 } 357 358 static void U_CALLCONV 359 utrans_enum_close(UEnumeration *uenum) { 360 uprv_free(uenum); 361 } 362 U_CDECL_END 363 364 static const UEnumeration utransEnumeration={ 365 nullptr, 366 nullptr, 367 utrans_enum_close, 368 utrans_enum_count, 369 utrans_enum_unext, 370 uenum_nextDefault, 371 utrans_enum_reset 372 }; 373 374 U_CAPI UEnumeration * U_EXPORT2 375 utrans_openIDs(UErrorCode *pErrorCode) { 376 UTransEnumeration *ute; 377 378 if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) { 379 return nullptr; 380 } 381 382 ute=(UTransEnumeration *)uprv_malloc(sizeof(UTransEnumeration)); 383 if(ute==nullptr) { 384 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 385 return nullptr; 386 } 387 388 ute->uenum=utransEnumeration; 389 ute->index=0; 390 ute->count=Transliterator::countAvailableIDs(); 391 return (UEnumeration *)ute; 392 } 393 394 /******************************************************************** 395 * Transliteration API 396 ********************************************************************/ 397 398 U_CAPI void U_EXPORT2 399 utrans_trans(const UTransliterator* trans, 400 UReplaceable* rep, 401 const UReplaceableCallbacks* repFunc, 402 int32_t start, 403 int32_t* limit, 404 UErrorCode* status) { 405 406 utrans_ENTRY(status); 407 408 if (trans == nullptr || rep == nullptr || repFunc == nullptr || limit == nullptr) { 409 *status = U_ILLEGAL_ARGUMENT_ERROR; 410 return; 411 } 412 413 ReplaceableGlue r(rep, repFunc); 414 415 *limit = ((Transliterator*) trans)->transliterate(r, start, *limit); 416 } 417 418 U_CAPI void U_EXPORT2 419 utrans_transIncremental(const UTransliterator* trans, 420 UReplaceable* rep, 421 const UReplaceableCallbacks* repFunc, 422 UTransPosition* pos, 423 UErrorCode* status) { 424 425 utrans_ENTRY(status); 426 427 if (trans == nullptr || rep == nullptr || repFunc == nullptr || pos == nullptr) { 428 *status = U_ILLEGAL_ARGUMENT_ERROR; 429 return; 430 } 431 432 ReplaceableGlue r(rep, repFunc); 433 434 ((Transliterator*) trans)->transliterate(r, *pos, *status); 435 } 436 437 U_CAPI void U_EXPORT2 438 utrans_transUChars(const UTransliterator* trans, 439 char16_t* text, 440 int32_t* textLength, 441 int32_t textCapacity, 442 int32_t start, 443 int32_t* limit, 444 UErrorCode* status) { 445 446 utrans_ENTRY(status); 447 448 if (trans == nullptr || text == nullptr || limit == nullptr) { 449 *status = U_ILLEGAL_ARGUMENT_ERROR; 450 return; 451 } 452 453 int32_t textLen = (textLength == nullptr || *textLength < 0) 454 ? u_strlen(text) : *textLength; 455 // writeable alias: for this ct, len CANNOT be -1 (why?) 456 UnicodeString str(text, textLen, textCapacity); 457 458 *limit = ((Transliterator*) trans)->transliterate(str, start, *limit); 459 460 // Copy the string buffer back to text (only if necessary) 461 // and fill in *neededCapacity (if neededCapacity != nullptr). 462 textLen = str.extract(text, textCapacity, *status); 463 if(textLength != nullptr) { 464 *textLength = textLen; 465 } 466 } 467 468 U_CAPI void U_EXPORT2 469 utrans_transIncrementalUChars(const UTransliterator* trans, 470 char16_t* text, 471 int32_t* textLength, 472 int32_t textCapacity, 473 UTransPosition* pos, 474 UErrorCode* status) { 475 476 utrans_ENTRY(status); 477 478 if (trans == nullptr || text == nullptr || pos == nullptr) { 479 *status = U_ILLEGAL_ARGUMENT_ERROR; 480 return; 481 } 482 483 int32_t textLen = (textLength == nullptr || *textLength < 0) 484 ? u_strlen(text) : *textLength; 485 // writeable alias: for this ct, len CANNOT be -1 (why?) 486 UnicodeString str(text, textLen, textCapacity); 487 488 ((Transliterator*) trans)->transliterate(str, *pos, *status); 489 490 // Copy the string buffer back to text (only if necessary) 491 // and fill in *neededCapacity (if neededCapacity != nullptr). 492 textLen = str.extract(text, textCapacity, *status); 493 if(textLength != nullptr) { 494 *textLength = textLen; 495 } 496 } 497 498 U_CAPI int32_t U_EXPORT2 499 utrans_toRules( const UTransliterator* trans, 500 UBool escapeUnprintable, 501 char16_t* result, int32_t resultLength, 502 UErrorCode* status) { 503 utrans_ENTRY(status) 0; 504 if ( (result==nullptr)? resultLength!=0: resultLength<0 ) { 505 *status = U_ILLEGAL_ARGUMENT_ERROR; 506 return 0; 507 } 508 509 UnicodeString res; 510 res.setTo(result, 0, resultLength); 511 ((Transliterator*) trans)->toRules(res, escapeUnprintable); 512 return res.extract(result, resultLength, *status); 513 } 514 515 U_CAPI USet* U_EXPORT2 516 utrans_getSourceSet(const UTransliterator* trans, 517 UBool ignoreFilter, 518 USet* fillIn, 519 UErrorCode* status) { 520 utrans_ENTRY(status) fillIn; 521 522 if (fillIn == nullptr) { 523 fillIn = uset_openEmpty(); 524 } 525 if (ignoreFilter) { 526 ((Transliterator*) trans)->handleGetSourceSet(*((UnicodeSet*)fillIn)); 527 } else { 528 ((Transliterator*) trans)->getSourceSet(*((UnicodeSet*)fillIn)); 529 } 530 return fillIn; 531 } 532 533 #endif /* #if !UCONFIG_NO_TRANSLITERATION */