utf8.c (18892B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 /* 6 * utf8.c 7 * 8 * This file contains some additional utility routines required for 9 * handling UTF8 strings. 10 */ 11 12 #ifndef BASE_H 13 #include "base.h" 14 #endif /* BASE_H */ 15 16 #include "plstr.h" 17 18 /* 19 * NOTES: 20 * 21 * There's an "is hex string" function in pki1/atav.c. If we need 22 * it in more places, pull that one out. 23 */ 24 25 /* 26 * nssUTF8_CaseIgnoreMatch 27 * 28 * Returns true if the two UTF8-encoded strings pointed to by the 29 * two specified NSSUTF8 pointers differ only in typcase. 30 * 31 * The error may be one of the following values: 32 * NSS_ERROR_INVALID_POINTER 33 * 34 * Return value: 35 * PR_TRUE if the strings match, ignoring case 36 * PR_FALSE if they don't 37 * PR_FALSE upon error 38 */ 39 40 NSS_IMPLEMENT PRBool 41 nssUTF8_CaseIgnoreMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) 42 { 43 #ifdef NSSDEBUG 44 if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { 45 nss_SetError(NSS_ERROR_INVALID_POINTER); 46 if ((PRStatus *)NULL != statusOpt) { 47 *statusOpt = PR_FAILURE; 48 } 49 return PR_FALSE; 50 } 51 #endif /* NSSDEBUG */ 52 53 if ((PRStatus *)NULL != statusOpt) { 54 *statusOpt = PR_SUCCESS; 55 } 56 57 /* 58 * XXX fgmr 59 * 60 * This is, like, so wrong! 61 */ 62 if (0 == PL_strcasecmp((const char *)a, (const char *)b)) { 63 return PR_TRUE; 64 } else { 65 return PR_FALSE; 66 } 67 } 68 69 /* 70 * nssUTF8_PrintableMatch 71 * 72 * Returns true if the two Printable strings pointed to by the 73 * two specified NSSUTF8 pointers match when compared with the 74 * rules for Printable String (leading and trailing spaces are 75 * disregarded, extents of whitespace match irregardless of length, 76 * and case is not significant), then PR_TRUE will be returned. 77 * Otherwise, PR_FALSE will be returned. Upon failure, PR_FALSE 78 * will be returned. If the optional statusOpt argument is not 79 * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that 80 * location. 81 * 82 * The error may be one of the following values: 83 * NSS_ERROR_INVALID_POINTER 84 * 85 * Return value: 86 * PR_TRUE if the strings match, ignoring case 87 * PR_FALSE if they don't 88 * PR_FALSE upon error 89 */ 90 91 NSS_IMPLEMENT PRBool 92 nssUTF8_PrintableMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) 93 { 94 PRUint8 *c; 95 PRUint8 *d; 96 97 #ifdef NSSDEBUG 98 if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { 99 nss_SetError(NSS_ERROR_INVALID_POINTER); 100 if ((PRStatus *)NULL != statusOpt) { 101 *statusOpt = PR_FAILURE; 102 } 103 return PR_FALSE; 104 } 105 #endif /* NSSDEBUG */ 106 107 if ((PRStatus *)NULL != statusOpt) { 108 *statusOpt = PR_SUCCESS; 109 } 110 111 c = (PRUint8 *)a; 112 d = (PRUint8 *)b; 113 114 while (' ' == *c) { 115 c++; 116 } 117 118 while (' ' == *d) { 119 d++; 120 } 121 122 while (('\0' != *c) && ('\0' != *d)) { 123 PRUint8 e, f; 124 125 e = *c; 126 f = *d; 127 128 if (('a' <= e) && (e <= 'z')) { 129 e -= ('a' - 'A'); 130 } 131 132 if (('a' <= f) && (f <= 'z')) { 133 f -= ('a' - 'A'); 134 } 135 136 if (e != f) { 137 return PR_FALSE; 138 } 139 140 c++; 141 d++; 142 143 if (' ' == *c) { 144 while (' ' == *c) { 145 c++; 146 } 147 c--; 148 } 149 150 if (' ' == *d) { 151 while (' ' == *d) { 152 d++; 153 } 154 d--; 155 } 156 } 157 158 while (' ' == *c) { 159 c++; 160 } 161 162 while (' ' == *d) { 163 d++; 164 } 165 166 if (*c == *d) { 167 /* And both '\0', btw */ 168 return PR_TRUE; 169 } else { 170 return PR_FALSE; 171 } 172 } 173 174 /* 175 * nssUTF8_Duplicate 176 * 177 * This routine duplicates the UTF8-encoded string pointed to by the 178 * specified NSSUTF8 pointer. If the optional arenaOpt argument is 179 * not null, the memory required will be obtained from that arena; 180 * otherwise, the memory required will be obtained from the heap. 181 * A pointer to the new string will be returned. In case of error, 182 * an error will be placed on the error stack and NULL will be 183 * returned. 184 * 185 * The error may be one of the following values: 186 * NSS_ERROR_INVALID_POINTER 187 * NSS_ERROR_INVALID_ARENA 188 * NSS_ERROR_NO_MEMORY 189 */ 190 191 NSS_IMPLEMENT NSSUTF8 * 192 nssUTF8_Duplicate(const NSSUTF8 *s, NSSArena *arenaOpt) 193 { 194 NSSUTF8 *rv; 195 PRUint32 len; 196 197 #ifdef NSSDEBUG 198 if ((const NSSUTF8 *)NULL == s) { 199 nss_SetError(NSS_ERROR_INVALID_POINTER); 200 return (NSSUTF8 *)NULL; 201 } 202 203 if ((NSSArena *)NULL != arenaOpt) { 204 if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { 205 return (NSSUTF8 *)NULL; 206 } 207 } 208 #endif /* NSSDEBUG */ 209 210 len = PL_strlen((const char *)s); 211 #ifdef PEDANTIC 212 if ('\0' != ((const char *)s)[len]) { 213 /* must have wrapped, e.g., too big for PRUint32 */ 214 nss_SetError(NSS_ERROR_NO_MEMORY); 215 return (NSSUTF8 *)NULL; 216 } 217 #endif /* PEDANTIC */ 218 len++; /* zero termination */ 219 220 rv = nss_ZAlloc(arenaOpt, len); 221 if ((void *)NULL == rv) { 222 return (NSSUTF8 *)NULL; 223 } 224 225 (void)nsslibc_memcpy(rv, s, len); 226 return rv; 227 } 228 229 /* 230 * nssUTF8_Size 231 * 232 * This routine returns the length in bytes (including the terminating 233 * null) of the UTF8-encoded string pointed to by the specified 234 * NSSUTF8 pointer. Zero is returned on error. 235 * 236 * The error may be one of the following values: 237 * NSS_ERROR_INVALID_POINTER 238 * NSS_ERROR_VALUE_TOO_LARGE 239 * 240 * Return value: 241 * 0 on error 242 * nonzero length of the string. 243 */ 244 245 NSS_IMPLEMENT PRUint32 246 nssUTF8_Size(const NSSUTF8 *s, PRStatus *statusOpt) 247 { 248 PRUint32 sv; 249 250 #ifdef NSSDEBUG 251 if ((const NSSUTF8 *)NULL == s) { 252 nss_SetError(NSS_ERROR_INVALID_POINTER); 253 if ((PRStatus *)NULL != statusOpt) { 254 *statusOpt = PR_FAILURE; 255 } 256 return 0; 257 } 258 #endif /* NSSDEBUG */ 259 260 sv = PL_strlen((const char *)s) + 1; 261 #ifdef PEDANTIC 262 if ('\0' != ((const char *)s)[sv - 1]) { 263 /* wrapped */ 264 nss_SetError(NSS_ERROR_VALUE_TOO_LARGE); 265 if ((PRStatus *)NULL != statusOpt) { 266 *statusOpt = PR_FAILURE; 267 } 268 return 0; 269 } 270 #endif /* PEDANTIC */ 271 272 if ((PRStatus *)NULL != statusOpt) { 273 *statusOpt = PR_SUCCESS; 274 } 275 276 return sv; 277 } 278 279 /* 280 * nssUTF8_Length 281 * 282 * This routine returns the length in characters (not including the 283 * terminating null) of the UTF8-encoded string pointed to by the 284 * specified NSSUTF8 pointer. 285 * 286 * The error may be one of the following values: 287 * NSS_ERROR_INVALID_POINTER 288 * NSS_ERROR_VALUE_TOO_LARGE 289 * NSS_ERROR_INVALID_STRING 290 * 291 * Return value: 292 * length of the string (which may be zero) 293 * 0 on error 294 */ 295 296 NSS_IMPLEMENT PRUint32 297 nssUTF8_Length(const NSSUTF8 *s, PRStatus *statusOpt) 298 { 299 PRUint32 l = 0; 300 const PRUint8 *c = (const PRUint8 *)s; 301 302 #ifdef NSSDEBUG 303 if ((const NSSUTF8 *)NULL == s) { 304 nss_SetError(NSS_ERROR_INVALID_POINTER); 305 goto loser; 306 } 307 #endif /* NSSDEBUG */ 308 309 /* 310 * From RFC 3629: 311 * 312 * UTF8-octets = *( UTF8-char ) 313 * UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 314 * UTF8-1 = %x00-7F 315 * UTF8-2 = %xC2-DF UTF8-tail 316 * UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / 317 * %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) 318 * UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / 319 * %xF4 %x80-8F 2( UTF8-tail ) 320 * UTF8-tail = %x80-BF 321 */ 322 323 while (0 != *c) { 324 PRUint32 incr; 325 if (*c < 0x80) { 326 incr = 1; 327 } else if (*c < 0xC2) { 328 nss_SetError(NSS_ERROR_INVALID_STRING); 329 goto loser; 330 } else if (*c < 0xE0) { 331 incr = 2; 332 } else if (*c == 0xE0) { 333 if (c[1] < 0xA0) { 334 nss_SetError(NSS_ERROR_INVALID_STRING); 335 goto loser; 336 } 337 incr = 3; 338 } else if (*c < 0xF0) { 339 if (*c == 0xED && c[1] > 0x9F) { 340 nss_SetError(NSS_ERROR_INVALID_STRING); 341 goto loser; 342 } 343 incr = 3; 344 } else if (*c == 0xF0) { 345 if (c[1] < 0x90) { 346 nss_SetError(NSS_ERROR_INVALID_STRING); 347 goto loser; 348 } 349 incr = 4; 350 } else if (*c < 0xF4) { 351 incr = 4; 352 } else if (*c == 0xF4) { 353 if (c[1] > 0x8F) { 354 nss_SetError(NSS_ERROR_INVALID_STRING); 355 goto loser; 356 } 357 incr = 4; 358 } else { 359 nss_SetError(NSS_ERROR_INVALID_STRING); 360 goto loser; 361 } 362 363 l += incr; 364 365 #ifdef PEDANTIC 366 if (l < incr) { 367 /* Wrapped-- too big */ 368 nss_SetError(NSS_ERROR_VALUE_TOO_LARGE); 369 goto loser; 370 } 371 #endif /* PEDANTIC */ 372 373 { 374 const PRUint8 *d; 375 for (d = &c[1]; d < &c[incr]; d++) { 376 if ((*d & 0xC0) != 0x80) { 377 nss_SetError(NSS_ERROR_INVALID_STRING); 378 goto loser; 379 } 380 } 381 } 382 383 c += incr; 384 } 385 386 if ((PRStatus *)NULL != statusOpt) { 387 *statusOpt = PR_SUCCESS; 388 } 389 390 return l; 391 392 loser: 393 if ((PRStatus *)NULL != statusOpt) { 394 *statusOpt = PR_FAILURE; 395 } 396 397 return 0; 398 } 399 400 /* 401 * nssUTF8_Create 402 * 403 * This routine creates a UTF8 string from a string in some other 404 * format. Some types of string may include embedded null characters, 405 * so for them the length parameter must be used. For string types 406 * that are null-terminated, the length parameter is optional; if it 407 * is zero, it will be ignored. If the optional arena argument is 408 * non-null, the memory used for the new string will be obtained from 409 * that arena, otherwise it will be obtained from the heap. This 410 * routine may return NULL upon error, in which case it will have 411 * placed an error on the error stack. 412 * 413 * The error may be one of the following: 414 * NSS_ERROR_INVALID_POINTER 415 * NSS_ERROR_NO_MEMORY 416 * NSS_ERROR_UNSUPPORTED_TYPE 417 * 418 * Return value: 419 * NULL upon error 420 * A non-null pointer to a new UTF8 string otherwise 421 */ 422 423 extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */ 424 425 NSS_IMPLEMENT NSSUTF8 * 426 nssUTF8_Create(NSSArena *arenaOpt, nssStringType type, const void *inputString, 427 PRUint32 size /* in bytes, not characters */ 428 ) 429 { 430 NSSUTF8 *rv = NULL; 431 432 #ifdef NSSDEBUG 433 if ((NSSArena *)NULL != arenaOpt) { 434 if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { 435 return (NSSUTF8 *)NULL; 436 } 437 } 438 439 if ((const void *)NULL == inputString) { 440 nss_SetError(NSS_ERROR_INVALID_POINTER); 441 return (NSSUTF8 *)NULL; 442 } 443 #endif /* NSSDEBUG */ 444 445 switch (type) { 446 case nssStringType_DirectoryString: 447 /* This is a composite type requiring BER */ 448 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); 449 break; 450 case nssStringType_TeletexString: 451 /* 452 * draft-ietf-pkix-ipki-part1-11 says in part: 453 * 454 * In addition, many legacy implementations support names encoded 455 * in the ISO 8859-1 character set (Latin1String) but tag them as 456 * TeletexString. The Latin1String includes characters used in 457 * Western European countries which are not part of the 458 * TeletexString charcter set. Implementations that process 459 * TeletexString SHOULD be prepared to handle the entire ISO 460 * 8859-1 character set.[ISO 8859-1]. 461 */ 462 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 463 break; 464 case nssStringType_PrintableString: 465 /* 466 * PrintableString consists of A-Za-z0-9 ,()+,-./:=? 467 * This is a subset of ASCII, which is a subset of UTF8. 468 * So we can just duplicate the string over. 469 */ 470 471 if (0 == size) { 472 rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt); 473 } else { 474 rv = nss_ZAlloc(arenaOpt, size + 1); 475 if ((NSSUTF8 *)NULL == rv) { 476 return (NSSUTF8 *)NULL; 477 } 478 479 (void)nsslibc_memcpy(rv, inputString, size); 480 } 481 482 break; 483 case nssStringType_UniversalString: 484 /* 4-byte unicode */ 485 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 486 break; 487 case nssStringType_BMPString: 488 /* Base Multilingual Plane of Unicode */ 489 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 490 break; 491 case nssStringType_UTF8String: 492 if (0 == size) { 493 rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt); 494 } else { 495 rv = nss_ZAlloc(arenaOpt, size + 1); 496 if ((NSSUTF8 *)NULL == rv) { 497 return (NSSUTF8 *)NULL; 498 } 499 500 (void)nsslibc_memcpy(rv, inputString, size); 501 } 502 503 break; 504 case nssStringType_PHGString: 505 /* 506 * PHGString is an IA5String (with case-insensitive comparisons). 507 * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has 508 * currency symbol. 509 */ 510 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 511 break; 512 case nssStringType_GeneralString: 513 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 514 break; 515 default: 516 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); 517 break; 518 } 519 520 return rv; 521 } 522 523 NSS_IMPLEMENT NSSItem * 524 nssUTF8_GetEncoding(NSSArena *arenaOpt, NSSItem *rvOpt, nssStringType type, 525 NSSUTF8 *string) 526 { 527 NSSItem *rv = (NSSItem *)NULL; 528 PRStatus status = PR_SUCCESS; 529 530 #ifdef NSSDEBUG 531 if ((NSSArena *)NULL != arenaOpt) { 532 if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { 533 return (NSSItem *)NULL; 534 } 535 } 536 537 if ((NSSUTF8 *)NULL == string) { 538 nss_SetError(NSS_ERROR_INVALID_POINTER); 539 return (NSSItem *)NULL; 540 } 541 #endif /* NSSDEBUG */ 542 543 switch (type) { 544 case nssStringType_DirectoryString: 545 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 546 break; 547 case nssStringType_TeletexString: 548 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 549 break; 550 case nssStringType_PrintableString: 551 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 552 break; 553 case nssStringType_UniversalString: 554 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 555 break; 556 case nssStringType_BMPString: 557 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 558 break; 559 case nssStringType_UTF8String: { 560 NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt); 561 if ((NSSUTF8 *)NULL == dup) { 562 return (NSSItem *)NULL; 563 } 564 565 if ((NSSItem *)NULL == rvOpt) { 566 rv = nss_ZNEW(arenaOpt, NSSItem); 567 if ((NSSItem *)NULL == rv) { 568 (void)nss_ZFreeIf(dup); 569 return (NSSItem *)NULL; 570 } 571 } else { 572 rv = rvOpt; 573 } 574 575 rv->data = dup; 576 dup = (NSSUTF8 *)NULL; 577 rv->size = nssUTF8_Size(rv->data, &status); 578 if ((0 == rv->size) && (PR_SUCCESS != status)) { 579 if ((NSSItem *)NULL == rvOpt) { 580 (void)nss_ZFreeIf(rv); 581 } 582 return (NSSItem *)NULL; 583 } 584 } break; 585 case nssStringType_PHGString: 586 nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ 587 break; 588 default: 589 nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); 590 break; 591 } 592 593 return rv; 594 } 595 596 /* 597 * nssUTF8_CopyIntoFixedBuffer 598 * 599 * This will copy a UTF8 string into a fixed-length buffer, making 600 * sure that the all characters are valid. Any remaining space will 601 * be padded with the specified ASCII character, typically either 602 * null or space. 603 * 604 * Blah, blah, blah. 605 */ 606 607 NSS_IMPLEMENT PRStatus 608 nssUTF8_CopyIntoFixedBuffer(NSSUTF8 *string, char *buffer, PRUint32 bufferSize, 609 char pad) 610 { 611 PRUint32 stringSize = 0; 612 613 #ifdef NSSDEBUG 614 if ((char *)NULL == buffer) { 615 nss_SetError(NSS_ERROR_INVALID_POINTER); 616 return PR_FALSE; 617 } 618 619 if (0 == bufferSize) { 620 nss_SetError(NSS_ERROR_INVALID_ARGUMENT); 621 return PR_FALSE; 622 } 623 624 if ((pad & 0x80) != 0x00) { 625 nss_SetError(NSS_ERROR_INVALID_ARGUMENT); 626 return PR_FALSE; 627 } 628 #endif /* NSSDEBUG */ 629 630 if ((NSSUTF8 *)NULL == string) { 631 string = (NSSUTF8 *)""; 632 } 633 634 stringSize = nssUTF8_Size(string, (PRStatus *)NULL); 635 stringSize--; /* don't count the trailing null */ 636 if (stringSize > bufferSize) { 637 PRUint32 bs = bufferSize; 638 (void)nsslibc_memcpy(buffer, string, bufferSize); 639 640 if ((((buffer[bs - 1] & 0x80) == 0x00)) || 641 ((bs > 1) && ((buffer[bs - 2] & 0xE0) == 0xC0)) || 642 ((bs > 2) && ((buffer[bs - 3] & 0xF0) == 0xE0)) || 643 ((bs > 3) && ((buffer[bs - 4] & 0xF8) == 0xF0)) || 644 ((bs > 4) && ((buffer[bs - 5] & 0xFC) == 0xF8)) || 645 ((bs > 5) && ((buffer[bs - 6] & 0xFE) == 0xFC))) { 646 /* It fit exactly */ 647 return PR_SUCCESS; 648 } 649 650 /* Too long. We have to trim the last character */ 651 for (/*bs*/; bs != 0; bs--) { 652 if ((buffer[bs - 1] & 0xC0) != 0x80) { 653 buffer[bs - 1] = pad; 654 break; 655 } else { 656 buffer[bs - 1] = pad; 657 } 658 } 659 } else { 660 (void)nsslibc_memset(buffer, pad, bufferSize); 661 (void)nsslibc_memcpy(buffer, string, stringSize); 662 } 663 664 return PR_SUCCESS; 665 } 666 667 /* 668 * nssUTF8_Equal 669 * 670 */ 671 672 NSS_IMPLEMENT PRBool 673 nssUTF8_Equal(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) 674 { 675 PRUint32 la, lb; 676 677 #ifdef NSSDEBUG 678 if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { 679 nss_SetError(NSS_ERROR_INVALID_POINTER); 680 if ((PRStatus *)NULL != statusOpt) { 681 *statusOpt = PR_FAILURE; 682 } 683 return PR_FALSE; 684 } 685 #endif /* NSSDEBUG */ 686 687 la = nssUTF8_Size(a, statusOpt); 688 if (0 == la) { 689 return PR_FALSE; 690 } 691 692 lb = nssUTF8_Size(b, statusOpt); 693 if (0 == lb) { 694 return PR_FALSE; 695 } 696 697 if (la != lb) { 698 return PR_FALSE; 699 } 700 701 return nsslibc_memequal(a, b, la, statusOpt); 702 }