uidna.cpp (28986B)
1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2014, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: uidna.cpp 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003feb1 16 * created by: Ram Viswanadha 17 */ 18 19 #include "unicode/utypes.h" 20 21 #if !UCONFIG_NO_IDNA 22 23 #include "unicode/uidna.h" 24 #include "unicode/ustring.h" 25 #include "unicode/usprep.h" 26 #include "punycode.h" 27 #include "ustr_imp.h" 28 #include "cmemory.h" 29 #include "uassert.h" 30 #include "sprpimpl.h" 31 32 /* it is official IDNA ACE Prefix is "xn--" */ 33 static const char16_t ACE_PREFIX[] ={ 0x0078,0x006E,0x002d,0x002d } ; 34 #define ACE_PREFIX_LENGTH 4 35 36 #define MAX_LABEL_LENGTH 63 37 /* The Max length of the labels should not be more than MAX_LABEL_LENGTH */ 38 #define MAX_LABEL_BUFFER_SIZE 100 39 40 #define MAX_DOMAIN_NAME_LENGTH 255 41 /* The Max length of the domain names should not be more than MAX_DOMAIN_NAME_LENGTH */ 42 #define MAX_IDN_BUFFER_SIZE MAX_DOMAIN_NAME_LENGTH+1 43 44 #define LOWER_CASE_DELTA 0x0020 45 #define HYPHEN 0x002D 46 #define FULL_STOP 0x002E 47 #define CAPITAL_A 0x0041 48 #define CAPITAL_Z 0x005A 49 50 inline static char16_t 51 toASCIILower(char16_t ch){ 52 if(CAPITAL_A <= ch && ch <= CAPITAL_Z){ 53 return ch + LOWER_CASE_DELTA; 54 } 55 return ch; 56 } 57 58 inline static UBool 59 startsWithPrefix(const char16_t* src , int32_t srcLength){ 60 if(srcLength < ACE_PREFIX_LENGTH){ 61 return false; 62 } 63 64 for(int8_t i=0; i< ACE_PREFIX_LENGTH; i++){ 65 if(toASCIILower(src[i]) != ACE_PREFIX[i]){ 66 return false; 67 } 68 } 69 return true; 70 } 71 72 73 inline static int32_t 74 compareCaseInsensitiveASCII(const char16_t* s1, int32_t s1Len, 75 const char16_t* s2, int32_t s2Len){ 76 77 int32_t minLength; 78 int32_t lengthResult; 79 80 // are we comparing different lengths? 81 if(s1Len != s2Len) { 82 if(s1Len < s2Len) { 83 minLength = s1Len; 84 lengthResult = -1; 85 } else { 86 minLength = s2Len; 87 lengthResult = 1; 88 } 89 } else { 90 // ok the lengths are equal 91 minLength = s1Len; 92 lengthResult = 0; 93 } 94 95 char16_t c1,c2; 96 int32_t rc; 97 98 for(int32_t i =0;/* no condition */;i++) { 99 100 /* If we reach the ends of both strings then they match */ 101 if(i == minLength) { 102 return lengthResult; 103 } 104 105 c1 = s1[i]; 106 c2 = s2[i]; 107 108 /* Case-insensitive comparison */ 109 if(c1!=c2) { 110 rc = static_cast<int32_t>(toASCIILower(c1)) - static_cast<int32_t>(toASCIILower(c2)); 111 if(rc!=0) { 112 lengthResult=rc; 113 break; 114 } 115 } 116 } 117 return lengthResult; 118 } 119 120 121 /** 122 * Ascertain if the given code point is a label separator as 123 * defined by the IDNA RFC 124 * 125 * @param ch The code point to be ascertained 126 * @return true if the char is a label separator 127 * @stable ICU 2.8 128 */ 129 static inline UBool isLabelSeparator(char16_t ch){ 130 switch(ch){ 131 case 0x002e: 132 case 0x3002: 133 case 0xFF0E: 134 case 0xFF61: 135 return true; 136 default: 137 return false; 138 } 139 } 140 141 // returns the length of the label excluding the separator 142 // if *limit == separator then the length returned does not include 143 // the separtor. 144 static inline int32_t 145 getNextSeparator(char16_t *src, int32_t srcLength, 146 char16_t **limit, UBool *done){ 147 if(srcLength == -1){ 148 int32_t i; 149 for(i=0 ; ;i++){ 150 if(src[i] == 0){ 151 *limit = src + i; // point to null 152 *done = true; 153 return i; 154 } 155 if(isLabelSeparator(src[i])){ 156 *limit = src + (i+1); // go past the delimiter 157 return i; 158 159 } 160 } 161 }else{ 162 int32_t i; 163 for(i=0;i<srcLength;i++){ 164 if(isLabelSeparator(src[i])){ 165 *limit = src + (i+1); // go past the delimiter 166 return i; 167 } 168 } 169 // we have not found the delimiter 170 // if(i==srcLength) 171 *limit = src+srcLength; 172 *done = true; 173 174 return i; 175 } 176 } 177 static inline UBool isLDHChar(char16_t ch){ 178 // high runner case 179 if(ch>0x007A){ 180 return false; 181 } 182 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] 183 if( (ch==0x002D) || 184 (0x0030 <= ch && ch <= 0x0039) || 185 (0x0041 <= ch && ch <= 0x005A) || 186 (0x0061 <= ch && ch <= 0x007A) 187 ){ 188 return true; 189 } 190 return false; 191 } 192 193 static int32_t 194 _internal_toASCII(const char16_t* src, int32_t srcLength, 195 char16_t* dest, int32_t destCapacity, 196 int32_t options, 197 UStringPrepProfile* nameprep, 198 UParseError* parseError, 199 UErrorCode* status) 200 { 201 202 // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. 203 char16_t b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE]; 204 //initialize pointers to stack buffers 205 char16_t *b1 = b1Stack, *b2 = b2Stack; 206 int32_t b1Len=0, b2Len, 207 b1Capacity = MAX_LABEL_BUFFER_SIZE, 208 b2Capacity = MAX_LABEL_BUFFER_SIZE , 209 reqLength=0; 210 211 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; 212 UBool* caseFlags = nullptr; 213 214 // the source contains all ascii codepoints 215 UBool srcIsASCII = true; 216 // assume the source contains all LDH codepoints 217 UBool srcIsLDH = true; 218 219 int32_t j=0; 220 221 //get the options 222 UBool useSTD3ASCIIRules = static_cast<UBool>((options & UIDNA_USE_STD3_RULES) != 0); 223 224 int32_t failPos = -1; 225 226 if(srcLength == -1){ 227 srcLength = u_strlen(src); 228 } 229 230 if(srcLength > b1Capacity){ 231 b1 = static_cast<char16_t*>(uprv_malloc(srcLength * U_SIZEOF_UCHAR)); 232 if(b1==nullptr){ 233 *status = U_MEMORY_ALLOCATION_ERROR; 234 goto CLEANUP; 235 } 236 b1Capacity = srcLength; 237 } 238 239 // step 1 240 for( j=0;j<srcLength;j++){ 241 if(src[j] > 0x7F){ 242 srcIsASCII = false; 243 } 244 b1[b1Len++] = src[j]; 245 } 246 247 // step 2 is performed only if the source contains non ASCII 248 if(srcIsASCII == false){ 249 250 // step 2 251 UErrorCode bufferStatus = U_ZERO_ERROR; 252 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, &bufferStatus); 253 254 if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){ 255 // redo processing of string 256 // we do not have enough room so grow the buffer 257 if(b1 != b1Stack){ 258 uprv_free(b1); 259 } 260 b1 = static_cast<char16_t*>(uprv_malloc(b1Len * U_SIZEOF_UCHAR)); 261 if(b1==nullptr){ 262 *status = U_MEMORY_ALLOCATION_ERROR; 263 goto CLEANUP; 264 } 265 266 bufferStatus = U_ZERO_ERROR; // reset error 267 268 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, &bufferStatus); 269 } 270 if (U_FAILURE(bufferStatus)) { 271 *status = bufferStatus; 272 } 273 } 274 // error bail out 275 if(U_FAILURE(*status)){ 276 goto CLEANUP; 277 } 278 if(b1Len == 0){ 279 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 280 goto CLEANUP; 281 } 282 283 // for step 3 & 4 284 srcIsASCII = true; 285 for( j=0;j<b1Len;j++){ 286 // check if output of usprep_prepare is all ASCII 287 if(b1[j] > 0x7F){ 288 srcIsASCII = false; 289 }else if(isLDHChar(b1[j])==false){ // if the char is in ASCII range verify that it is an LDH character 290 srcIsLDH = false; 291 failPos = j; 292 } 293 } 294 if(useSTD3ASCIIRules){ 295 // verify 3a and 3b 296 // 3(a) Verify the absence of non-LDH ASCII code points; that is, the 297 // absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F. 298 // 3(b) Verify the absence of leading and trailing hyphen-minus; that 299 // is, the absence of U+002D at the beginning and end of the 300 // sequence. 301 if( srcIsLDH == false /* source at this point should not contain anyLDH characters */ 302 || b1[0] == HYPHEN || b1[b1Len-1] == HYPHEN){ 303 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 304 305 /* populate the parseError struct */ 306 if(srcIsLDH==false){ 307 // failPos is always set the index of failure 308 uprv_syntaxError(b1,failPos, b1Len,parseError); 309 }else if(b1[0] == HYPHEN){ 310 // fail position is 0 311 uprv_syntaxError(b1,0,b1Len,parseError); 312 }else{ 313 // the last index in the source is always length-1 314 uprv_syntaxError(b1, (b1Len>0) ? b1Len-1 : b1Len, b1Len,parseError); 315 } 316 317 goto CLEANUP; 318 } 319 } 320 // Step 4: if the source is ASCII then proceed to step 8 321 if(srcIsASCII){ 322 if(b1Len <= destCapacity){ 323 u_memmove(dest, b1, b1Len); 324 reqLength = b1Len; 325 }else{ 326 reqLength = b1Len; 327 goto CLEANUP; 328 } 329 }else{ 330 // step 5 : verify the sequence does not begin with ACE prefix 331 if(!startsWithPrefix(b1,b1Len)){ 332 333 //step 6: encode the sequence with punycode 334 335 // do not preserve the case flags for now! 336 // TODO: Preserve the case while implementing the RFE 337 // caseFlags = (UBool*) uprv_malloc(b1Len * sizeof(UBool)); 338 // uprv_memset(caseFlags,true,b1Len); 339 340 UErrorCode bufferStatus = U_ZERO_ERROR; 341 b2Len = u_strToPunycode(b1,b1Len,b2,b2Capacity,caseFlags,&bufferStatus); 342 343 if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){ 344 // redo processing of string 345 /* we do not have enough room so grow the buffer*/ 346 b2 = static_cast<char16_t*>(uprv_malloc(b2Len * U_SIZEOF_UCHAR)); 347 if(b2 == nullptr){ 348 *status = U_MEMORY_ALLOCATION_ERROR; 349 goto CLEANUP; 350 } 351 352 bufferStatus = U_ZERO_ERROR; // reset error 353 354 b2Len = u_strToPunycode(b1,b1Len,b2,b2Len,caseFlags,&bufferStatus); 355 } 356 //error bail out 357 if(U_FAILURE(bufferStatus)){ 358 *status = bufferStatus; 359 goto CLEANUP; 360 } 361 // TODO : Reconsider while implementing the case preserve RFE 362 // convert all codepoints to lower case ASCII 363 // toASCIILower(b2,b2Len); 364 reqLength = b2Len+ACE_PREFIX_LENGTH; 365 366 if(reqLength > destCapacity){ 367 *status = U_BUFFER_OVERFLOW_ERROR; 368 goto CLEANUP; 369 } 370 //Step 7: prepend the ACE prefix 371 u_memcpy(dest, ACE_PREFIX, ACE_PREFIX_LENGTH); 372 //Step 6: copy the contents in b2 into dest 373 u_memcpy(dest+ACE_PREFIX_LENGTH, b2, b2Len); 374 375 }else{ 376 *status = U_IDNA_ACE_PREFIX_ERROR; 377 //position of failure is 0 378 uprv_syntaxError(b1,0,b1Len,parseError); 379 goto CLEANUP; 380 } 381 } 382 // step 8: verify the length of label 383 if(reqLength > MAX_LABEL_LENGTH){ 384 *status = U_IDNA_LABEL_TOO_LONG_ERROR; 385 } 386 387 CLEANUP: 388 if(b1 != b1Stack){ 389 uprv_free(b1); 390 } 391 if(b2 != b2Stack){ 392 uprv_free(b2); 393 } 394 uprv_free(caseFlags); 395 396 return u_terminateUChars(dest, destCapacity, reqLength, status); 397 } 398 399 static int32_t 400 _internal_toUnicode(const char16_t* src, int32_t srcLength, 401 char16_t* dest, int32_t destCapacity, 402 int32_t options, 403 UStringPrepProfile* nameprep, 404 UParseError* parseError, 405 UErrorCode* status) 406 { 407 408 //get the options 409 //UBool useSTD3ASCIIRules = (UBool)((options & UIDNA_USE_STD3_RULES) != 0); 410 int32_t namePrepOptions = ((options & UIDNA_ALLOW_UNASSIGNED) != 0) ? USPREP_ALLOW_UNASSIGNED: 0; 411 412 // TODO Revisit buffer handling. The label should not be over 63 ASCII characters. ICU4J may need to be updated too. 413 char16_t b1Stack[MAX_LABEL_BUFFER_SIZE], b2Stack[MAX_LABEL_BUFFER_SIZE], b3Stack[MAX_LABEL_BUFFER_SIZE]; 414 415 //initialize pointers to stack buffers 416 char16_t *b1 = b1Stack, *b2 = b2Stack, *b1Prime=nullptr, *b3=b3Stack; 417 int32_t b1Len = 0, b2Len, b1PrimeLen, b3Len, 418 b1Capacity = MAX_LABEL_BUFFER_SIZE, 419 b2Capacity = MAX_LABEL_BUFFER_SIZE, 420 b3Capacity = MAX_LABEL_BUFFER_SIZE, 421 reqLength=0; 422 423 UBool* caseFlags = nullptr; 424 425 UBool srcIsASCII = true; 426 /*UBool srcIsLDH = true; 427 int32_t failPos =0;*/ 428 429 // step 1: find out if all the codepoints in src are ASCII 430 if(srcLength==-1){ 431 srcLength = 0; 432 for(;src[srcLength]!=0;){ 433 if(src[srcLength]> 0x7f){ 434 srcIsASCII = false; 435 }/*else if(isLDHChar(src[srcLength])==false){ 436 // here we do not assemble surrogates 437 // since we know that LDH code points 438 // are in the ASCII range only 439 srcIsLDH = false; 440 failPos = srcLength; 441 }*/ 442 srcLength++; 443 } 444 }else if(srcLength > 0){ 445 for(int32_t j=0; j<srcLength; j++){ 446 if(src[j]> 0x7f){ 447 srcIsASCII = false; 448 break; 449 }/*else if(isLDHChar(src[j])==false){ 450 // here we do not assemble surrogates 451 // since we know that LDH code points 452 // are in the ASCII range only 453 srcIsLDH = false; 454 failPos = j; 455 }*/ 456 } 457 }else{ 458 return 0; 459 } 460 461 if(srcIsASCII == false){ 462 // step 2: process the string 463 UErrorCode bufferStatus = U_ZERO_ERROR; 464 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Capacity, namePrepOptions, parseError, &bufferStatus); 465 if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){ 466 // redo processing of string 467 /* we do not have enough room so grow the buffer*/ 468 b1 = static_cast<char16_t*>(uprv_malloc(b1Len * U_SIZEOF_UCHAR)); 469 if(b1==nullptr){ 470 *status = U_MEMORY_ALLOCATION_ERROR; 471 goto CLEANUP; 472 } 473 474 bufferStatus = U_ZERO_ERROR; // reset error 475 476 b1Len = usprep_prepare(nameprep, src, srcLength, b1, b1Len, namePrepOptions, parseError, &bufferStatus); 477 } 478 //bail out on error 479 if(U_FAILURE(bufferStatus)){ 480 *status = bufferStatus; 481 goto CLEANUP; 482 } 483 }else{ 484 485 //just point src to b1 486 b1 = const_cast<char16_t*>(src); 487 b1Len = srcLength; 488 } 489 490 // The RFC states that 491 // <quote> 492 // ToUnicode never fails. If any step fails, then the original input 493 // is returned immediately in that step. 494 // </quote> 495 496 //step 3: verify ACE Prefix 497 if(startsWithPrefix(b1,b1Len)){ 498 499 //step 4: Remove the ACE Prefix 500 b1Prime = b1 + ACE_PREFIX_LENGTH; 501 b1PrimeLen = b1Len - ACE_PREFIX_LENGTH; 502 503 //step 5: Decode using punycode 504 UErrorCode bufferStatus = U_ZERO_ERROR; 505 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Capacity, caseFlags, &bufferStatus); 506 507 if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){ 508 // redo processing of string 509 /* we do not have enough room so grow the buffer*/ 510 b2 = static_cast<char16_t*>(uprv_malloc(b2Len * U_SIZEOF_UCHAR)); 511 if(b2==nullptr){ 512 *status = U_MEMORY_ALLOCATION_ERROR; 513 goto CLEANUP; 514 } 515 516 bufferStatus = U_ZERO_ERROR; // reset error 517 518 b2Len = u_strFromPunycode(b1Prime, b1PrimeLen, b2, b2Len, caseFlags, &bufferStatus); 519 } 520 521 522 //step 6:Apply toASCII 523 b3Len = uidna_toASCII(b2, b2Len, b3, b3Capacity, options, parseError, &bufferStatus); 524 525 if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){ 526 // redo processing of string 527 /* we do not have enough room so grow the buffer*/ 528 b3 = static_cast<char16_t*>(uprv_malloc(b3Len * U_SIZEOF_UCHAR)); 529 if(b3==nullptr){ 530 *status = U_MEMORY_ALLOCATION_ERROR; 531 goto CLEANUP; 532 } 533 534 bufferStatus = U_ZERO_ERROR; // reset error 535 536 b3Len = uidna_toASCII(b2,b2Len,b3,b3Len,options,parseError,&bufferStatus); 537 538 } 539 //bail out on error 540 if(U_FAILURE(bufferStatus)){ 541 *status = bufferStatus; 542 goto CLEANUP; 543 } 544 545 //step 7: verify 546 if(compareCaseInsensitiveASCII(b1, b1Len, b3, b3Len) !=0){ 547 // Cause the original to be returned. 548 *status = U_IDNA_VERIFICATION_ERROR; 549 goto CLEANUP; 550 } 551 552 //step 8: return output of step 5 553 reqLength = b2Len; 554 if(b2Len <= destCapacity) { 555 u_memmove(dest, b2, b2Len); 556 } 557 } 558 else{ 559 // See the start of this if statement for why this is commented out. 560 // verify that STD3 ASCII rules are satisfied 561 /*if(useSTD3ASCIIRules == true){ 562 if( srcIsLDH == false // source contains some non-LDH characters 563 || src[0] == HYPHEN || src[srcLength-1] == HYPHEN){ 564 *status = U_IDNA_STD3_ASCII_RULES_ERROR; 565 566 // populate the parseError struct 567 if(srcIsLDH==false){ 568 // failPos is always set the index of failure 569 uprv_syntaxError(src,failPos, srcLength,parseError); 570 }else if(src[0] == HYPHEN){ 571 // fail position is 0 572 uprv_syntaxError(src,0,srcLength,parseError); 573 }else{ 574 // the last index in the source is always length-1 575 uprv_syntaxError(src, (srcLength>0) ? srcLength-1 : srcLength, srcLength,parseError); 576 } 577 578 goto CLEANUP; 579 } 580 }*/ 581 // just return the source 582 //copy the source to destination 583 if(srcLength <= destCapacity){ 584 u_memmove(dest, src, srcLength); 585 } 586 reqLength = srcLength; 587 } 588 589 590 CLEANUP: 591 592 if(b1 != b1Stack && b1!=src){ 593 uprv_free(b1); 594 } 595 if(b2 != b2Stack){ 596 uprv_free(b2); 597 } 598 if(b3 != b3Stack){ 599 uprv_free(b3); 600 } 601 uprv_free(caseFlags); 602 603 // The RFC states that 604 // <quote> 605 // ToUnicode never fails. If any step fails, then the original input 606 // is returned immediately in that step. 607 // </quote> 608 // So if any step fails lets copy source to destination 609 if(U_FAILURE(*status)){ 610 //copy the source to destination 611 if(dest && srcLength <= destCapacity){ 612 // srcLength should have already been set earlier. 613 U_ASSERT(srcLength >= 0); 614 u_memmove(dest, src, srcLength); 615 } 616 reqLength = srcLength; 617 *status = U_ZERO_ERROR; 618 } 619 620 return u_terminateUChars(dest, destCapacity, reqLength, status); 621 } 622 623 U_CAPI int32_t U_EXPORT2 624 uidna_toASCII(const char16_t* src, int32_t srcLength, 625 char16_t* dest, int32_t destCapacity, 626 int32_t options, 627 UParseError* parseError, 628 UErrorCode* status){ 629 630 if(status == nullptr || U_FAILURE(*status)){ 631 return 0; 632 } 633 if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 634 *status = U_ILLEGAL_ARGUMENT_ERROR; 635 return 0; 636 } 637 638 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 639 640 if(U_FAILURE(*status)){ 641 return -1; 642 } 643 644 int32_t retLen = _internal_toASCII(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); 645 646 /* close the profile*/ 647 usprep_close(nameprep); 648 649 return retLen; 650 } 651 652 U_CAPI int32_t U_EXPORT2 653 uidna_toUnicode(const char16_t* src, int32_t srcLength, 654 char16_t* dest, int32_t destCapacity, 655 int32_t options, 656 UParseError* parseError, 657 UErrorCode* status){ 658 659 if(status == nullptr || U_FAILURE(*status)){ 660 return 0; 661 } 662 if( (src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 663 *status = U_ILLEGAL_ARGUMENT_ERROR; 664 return 0; 665 } 666 667 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 668 669 if(U_FAILURE(*status)){ 670 return -1; 671 } 672 673 int32_t retLen = _internal_toUnicode(src, srcLength, dest, destCapacity, options, nameprep, parseError, status); 674 675 usprep_close(nameprep); 676 677 return retLen; 678 } 679 680 681 U_CAPI int32_t U_EXPORT2 682 uidna_IDNToASCII( const char16_t *src, int32_t srcLength, 683 char16_t* dest, int32_t destCapacity, 684 int32_t options, 685 UParseError *parseError, 686 UErrorCode *status){ 687 688 if(status == nullptr || U_FAILURE(*status)){ 689 return 0; 690 } 691 if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 692 *status = U_ILLEGAL_ARGUMENT_ERROR; 693 return 0; 694 } 695 696 int32_t reqLength = 0; 697 698 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 699 700 if(U_FAILURE(*status)){ 701 return 0; 702 } 703 704 //initialize pointers 705 char16_t* delimiter = const_cast<char16_t*>(src); 706 char16_t* labelStart = const_cast<char16_t*>(src); 707 char16_t* currentDest = dest; 708 int32_t remainingLen = srcLength; 709 int32_t remainingDestCapacity = destCapacity; 710 int32_t labelLen = 0, labelReqLength = 0; 711 UBool done = false; 712 713 714 for(;;){ 715 716 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); 717 labelReqLength = 0; 718 if(!(labelLen==0 && done)){// make sure this is not a root label separator. 719 720 UErrorCode bufferStatus = U_ZERO_ERROR; 721 labelReqLength = _internal_toASCII( labelStart, labelLen, 722 currentDest, remainingDestCapacity, 723 options, nameprep, 724 parseError, &bufferStatus); 725 726 if (bufferStatus == U_BUFFER_OVERFLOW_ERROR) { 727 remainingDestCapacity = 0; 728 } else if (U_FAILURE(bufferStatus)) { 729 *status = bufferStatus; 730 break; 731 } 732 } 733 734 reqLength +=labelReqLength; 735 // adjust the destination pointer 736 if(labelReqLength < remainingDestCapacity){ 737 currentDest = currentDest + labelReqLength; 738 remainingDestCapacity -= labelReqLength; 739 }else{ 740 // should never occur 741 remainingDestCapacity = 0; 742 } 743 744 if(done){ 745 break; 746 } 747 748 // add the label separator 749 if(remainingDestCapacity > 0){ 750 *currentDest++ = FULL_STOP; 751 remainingDestCapacity--; 752 } 753 reqLength++; 754 755 labelStart = delimiter; 756 if(remainingLen >0 ){ 757 remainingLen = (int32_t)(srcLength - (delimiter - src)); 758 } 759 760 } 761 762 if(reqLength > MAX_DOMAIN_NAME_LENGTH){ 763 *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; 764 } 765 766 usprep_close(nameprep); 767 768 return u_terminateUChars(dest, destCapacity, reqLength, status); 769 } 770 771 U_CAPI int32_t U_EXPORT2 772 uidna_IDNToUnicode( const char16_t* src, int32_t srcLength, 773 char16_t* dest, int32_t destCapacity, 774 int32_t options, 775 UParseError* parseError, 776 UErrorCode* status){ 777 778 if(status == nullptr || U_FAILURE(*status)){ 779 return 0; 780 } 781 if((src==nullptr) || (srcLength < -1) || (destCapacity<0) || (!dest && destCapacity > 0)){ 782 *status = U_ILLEGAL_ARGUMENT_ERROR; 783 return 0; 784 } 785 786 int32_t reqLength = 0; 787 788 UStringPrepProfile* nameprep = usprep_openByType(USPREP_RFC3491_NAMEPREP, status); 789 790 if(U_FAILURE(*status)){ 791 return 0; 792 } 793 794 //initialize pointers 795 char16_t* delimiter = const_cast<char16_t*>(src); 796 char16_t* labelStart = const_cast<char16_t*>(src); 797 char16_t* currentDest = dest; 798 int32_t remainingLen = srcLength; 799 int32_t remainingDestCapacity = destCapacity; 800 int32_t labelLen = 0, labelReqLength = 0; 801 UBool done = false; 802 803 for(;;){ 804 805 labelLen = getNextSeparator(labelStart,remainingLen, &delimiter,&done); 806 807 // The RFC states that 808 // <quote> 809 // ToUnicode never fails. If any step fails, then the original input 810 // is returned immediately in that step. 811 // </quote> 812 // _internal_toUnicode will copy the label. 813 /*if(labelLen==0 && done==false){ 814 *status = U_IDNA_ZERO_LENGTH_LABEL_ERROR; 815 break; 816 }*/ 817 818 labelReqLength = _internal_toUnicode(labelStart, labelLen, 819 currentDest, remainingDestCapacity, 820 options, nameprep, 821 parseError, status); 822 823 if(*status == U_BUFFER_OVERFLOW_ERROR){ 824 *status = U_ZERO_ERROR; // reset error 825 remainingDestCapacity = 0; 826 } 827 828 if(U_FAILURE(*status)){ 829 break; 830 } 831 832 reqLength +=labelReqLength; 833 // adjust the destination pointer 834 if(labelReqLength < remainingDestCapacity){ 835 currentDest = currentDest + labelReqLength; 836 remainingDestCapacity -= labelReqLength; 837 }else{ 838 // should never occur 839 remainingDestCapacity = 0; 840 } 841 842 if(done){ 843 break; 844 } 845 846 // add the label separator 847 // Unlike the ToASCII operation we don't normalize the label separators 848 if(remainingDestCapacity > 0){ 849 *currentDest++ = *(labelStart + labelLen); 850 remainingDestCapacity--; 851 } 852 reqLength++; 853 854 labelStart = delimiter; 855 if(remainingLen >0 ){ 856 remainingLen = (int32_t)(srcLength - (delimiter - src)); 857 } 858 859 } 860 861 if(reqLength > MAX_DOMAIN_NAME_LENGTH){ 862 *status = U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR; 863 } 864 865 usprep_close(nameprep); 866 867 return u_terminateUChars(dest, destCapacity, reqLength, status); 868 } 869 870 U_CAPI int32_t U_EXPORT2 871 uidna_compare( const char16_t *s1, int32_t length1, 872 const char16_t *s2, int32_t length2, 873 int32_t options, 874 UErrorCode* status){ 875 876 if(status == nullptr || U_FAILURE(*status)){ 877 return -1; 878 } 879 880 char16_t b1Stack[MAX_IDN_BUFFER_SIZE], b2Stack[MAX_IDN_BUFFER_SIZE]; 881 char16_t *b1 = b1Stack, *b2 = b2Stack; 882 int32_t b1Len, b2Len, b1Capacity = MAX_IDN_BUFFER_SIZE, b2Capacity = MAX_IDN_BUFFER_SIZE; 883 int32_t result=-1; 884 885 UParseError parseError; 886 887 UErrorCode bufferStatus = U_ZERO_ERROR; 888 b1Len = uidna_IDNToASCII(s1, length1, b1, b1Capacity, options, &parseError, &bufferStatus); 889 if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){ 890 // redo processing of string 891 b1 = (char16_t*) uprv_malloc(b1Len * U_SIZEOF_UCHAR); 892 if(b1==nullptr){ 893 *status = U_MEMORY_ALLOCATION_ERROR; 894 goto CLEANUP; 895 } 896 897 bufferStatus = U_ZERO_ERROR; // reset error 898 899 b1Len = uidna_IDNToASCII(s1,length1,b1,b1Len, options, &parseError, &bufferStatus); 900 } 901 902 b2Len = uidna_IDNToASCII(s2,length2, b2,b2Capacity, options, &parseError, &bufferStatus); 903 if(bufferStatus == U_BUFFER_OVERFLOW_ERROR){ 904 // redo processing of string 905 b2 = (char16_t*) uprv_malloc(b2Len * U_SIZEOF_UCHAR); 906 if(b2==nullptr){ 907 *status = U_MEMORY_ALLOCATION_ERROR; 908 goto CLEANUP; 909 } 910 911 bufferStatus = U_ZERO_ERROR; // reset error 912 913 b2Len = uidna_IDNToASCII(s2, length2, b2, b2Len, options, &parseError, &bufferStatus); 914 } 915 916 if (U_FAILURE(bufferStatus)) { 917 *status = bufferStatus; 918 } 919 920 // when toASCII is applied all label separators are replaced with FULL_STOP 921 result = compareCaseInsensitiveASCII(b1,b1Len,b2,b2Len); 922 923 CLEANUP: 924 if(b1 != b1Stack){ 925 uprv_free(b1); 926 } 927 928 if(b2 != b2Stack){ 929 uprv_free(b2); 930 } 931 932 return result; 933 } 934 935 #endif /* #if !UCONFIG_NO_IDNA */