nsURLParsers.cpp (20037B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include <string.h> 7 8 #include "mozilla/RangedPtr.h" 9 #include "mozilla/TextUtils.h" 10 11 #include "nsCRTGlue.h" 12 #include "nsURLParsers.h" 13 #include "nsURLHelper.h" 14 #include "nsString.h" 15 16 using namespace mozilla; 17 18 //---------------------------------------------------------------------------- 19 20 static uint32_t CountConsecutiveSlashes(const char* str, int32_t len) { 21 RangedPtr<const char> p(str, len); 22 uint32_t count = 0; 23 while (len-- && *p++ == '/') ++count; 24 return count; 25 } 26 27 //---------------------------------------------------------------------------- 28 // nsBaseURLParser implementation 29 //---------------------------------------------------------------------------- 30 31 NS_IMPL_ISUPPORTS(nsAuthURLParser, nsIURLParser) 32 NS_IMPL_ISUPPORTS(nsNoAuthURLParser, nsIURLParser) 33 34 #define SET_RESULT(component, pos, len) \ 35 PR_BEGIN_MACRO \ 36 if (component##Pos) *component##Pos = uint32_t(pos); \ 37 if (component##Len) *component##Len = int32_t(len); \ 38 PR_END_MACRO 39 40 #define OFFSET_RESULT(component, offset) \ 41 PR_BEGIN_MACRO \ 42 if (component##Pos) *component##Pos += (offset); \ 43 PR_END_MACRO 44 45 NS_IMETHODIMP 46 nsBaseURLParser::ParseURL(const char* spec, int32_t specLen, 47 uint32_t* schemePos, int32_t* schemeLen, 48 uint32_t* authorityPos, int32_t* authorityLen, 49 uint32_t* pathPos, int32_t* pathLen) { 50 if (NS_WARN_IF(!spec)) { 51 return NS_ERROR_INVALID_POINTER; 52 } 53 54 if (specLen < 0) specLen = strlen(spec); 55 56 const char* stop = nullptr; 57 const char* colon = nullptr; 58 const char* slash = nullptr; 59 const char* p = spec; 60 uint32_t offset = 0; 61 int32_t len = specLen; 62 63 // skip leading whitespace 64 while (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') { 65 spec++; 66 specLen--; 67 offset++; 68 69 p++; 70 len--; 71 } 72 73 for (; len && *p && !colon && !slash; ++p, --len) { 74 switch (*p) { 75 case ':': 76 if (!colon) colon = p; 77 break; 78 case '/': // start of filepath 79 case '?': // start of query 80 case '#': // start of ref 81 if (!slash) slash = p; 82 break; 83 case '@': // username@hostname 84 case '[': // start of IPv6 address literal 85 if (!stop) stop = p; 86 break; 87 } 88 } 89 // disregard the first colon if it follows an '@' or a '[' 90 if (colon && stop && colon > stop) colon = nullptr; 91 92 // if the spec only contained whitespace ... 93 if (specLen == 0) { 94 SET_RESULT(scheme, 0, -1); 95 SET_RESULT(authority, 0, 0); 96 SET_RESULT(path, 0, 0); 97 return NS_OK; 98 } 99 100 // ignore trailing whitespace and control characters 101 for (p = spec + specLen - 1; ((unsigned char)*p <= ' ') && (p != spec); --p) { 102 ; 103 } 104 105 specLen = p - spec + 1; 106 107 if (colon && (colon < slash || !slash)) { 108 // 109 // spec = <scheme>:/<the-rest> 110 // 111 // or 112 // 113 // spec = <scheme>:<authority> 114 // spec = <scheme>:<path-no-slashes> 115 // 116 if (!net_IsValidScheme(nsDependentCSubstring(spec, colon - spec))) { 117 return NS_ERROR_MALFORMED_URI; 118 } 119 SET_RESULT(scheme, offset, colon - spec); 120 if (authorityLen || pathLen) { 121 uint32_t schemeLen = colon + 1 - spec; 122 offset += schemeLen; 123 ParseAfterScheme(colon + 1, specLen - schemeLen, authorityPos, 124 authorityLen, pathPos, pathLen); 125 OFFSET_RESULT(authority, offset); 126 OFFSET_RESULT(path, offset); 127 } 128 } else { 129 // 130 // spec = <authority-no-port-or-password>/<path> 131 // spec = <path> 132 // 133 // or 134 // 135 // spec = <authority-no-port-or-password>/<path-with-colon> 136 // spec = <path-with-colon> 137 // 138 // or 139 // 140 // spec = <authority-no-port-or-password> 141 // spec = <path-no-slashes-or-colon> 142 // 143 SET_RESULT(scheme, 0, -1); 144 if (authorityLen || pathLen) { 145 ParseAfterScheme(spec, specLen, authorityPos, authorityLen, pathPos, 146 pathLen); 147 OFFSET_RESULT(authority, offset); 148 OFFSET_RESULT(path, offset); 149 } 150 } 151 return NS_OK; 152 } 153 154 NS_IMETHODIMP 155 nsBaseURLParser::ParseAuthority(const char* auth, int32_t authLen, 156 uint32_t* usernamePos, int32_t* usernameLen, 157 uint32_t* passwordPos, int32_t* passwordLen, 158 uint32_t* hostnamePos, int32_t* hostnameLen, 159 int32_t* port) { 160 if (NS_WARN_IF(!auth)) { 161 return NS_ERROR_INVALID_POINTER; 162 } 163 164 if (authLen < 0) authLen = strlen(auth); 165 166 SET_RESULT(username, 0, -1); 167 SET_RESULT(password, 0, -1); 168 SET_RESULT(hostname, 0, authLen); 169 if (port) *port = -1; 170 return NS_OK; 171 } 172 173 NS_IMETHODIMP 174 nsBaseURLParser::ParseUserInfo(const char* userinfo, int32_t userinfoLen, 175 uint32_t* usernamePos, int32_t* usernameLen, 176 uint32_t* passwordPos, int32_t* passwordLen) { 177 SET_RESULT(username, 0, -1); 178 SET_RESULT(password, 0, -1); 179 return NS_OK; 180 } 181 182 NS_IMETHODIMP 183 nsBaseURLParser::ParseServerInfo(const char* serverinfo, int32_t serverinfoLen, 184 uint32_t* hostnamePos, int32_t* hostnameLen, 185 int32_t* port) { 186 SET_RESULT(hostname, 0, -1); 187 if (port) *port = -1; 188 return NS_OK; 189 } 190 191 NS_IMETHODIMP 192 nsBaseURLParser::ParsePath(const char* path, int32_t pathLen, 193 uint32_t* filepathPos, int32_t* filepathLen, 194 uint32_t* queryPos, int32_t* queryLen, 195 uint32_t* refPos, int32_t* refLen) { 196 if (NS_WARN_IF(!path)) { 197 return NS_ERROR_INVALID_POINTER; 198 } 199 200 if (pathLen < 0) pathLen = strlen(path); 201 202 // path = [/]<segment1>/<segment2>/<...>/<segmentN>?<query>#<ref> 203 204 // XXX PL_strnpbrk would be nice, but it's buggy 205 206 // search for first occurrence of either ? or # 207 const char *query_beg = nullptr, *query_end = nullptr; 208 const char* ref_beg = nullptr; 209 const char* p = nullptr; 210 for (p = path; p < path + pathLen; ++p) { 211 // only match the query string if it precedes the reference fragment 212 if (!ref_beg && !query_beg && *p == '?') { 213 query_beg = p + 1; 214 } else if (*p == '#') { 215 ref_beg = p + 1; 216 if (query_beg) query_end = p; 217 break; 218 } 219 } 220 221 if (query_beg) { 222 if (query_end) { 223 SET_RESULT(query, query_beg - path, query_end - query_beg); 224 } else { 225 SET_RESULT(query, query_beg - path, pathLen - (query_beg - path)); 226 } 227 } else { 228 SET_RESULT(query, 0, -1); 229 } 230 231 if (ref_beg) { 232 SET_RESULT(ref, ref_beg - path, pathLen - (ref_beg - path)); 233 } else { 234 SET_RESULT(ref, 0, -1); 235 } 236 237 const char* end; 238 if (query_beg) { 239 end = query_beg - 1; 240 } else if (ref_beg) { 241 end = ref_beg - 1; 242 } else { 243 end = path + pathLen; 244 } 245 246 // an empty file path is no file path 247 if (end != path) { 248 SET_RESULT(filepath, 0, end - path); 249 } else { 250 SET_RESULT(filepath, 0, -1); 251 } 252 return NS_OK; 253 } 254 255 NS_IMETHODIMP 256 nsBaseURLParser::ParseFilePath(const char* filepath, int32_t filepathLen, 257 uint32_t* directoryPos, int32_t* directoryLen, 258 uint32_t* basenamePos, int32_t* basenameLen, 259 uint32_t* extensionPos, int32_t* extensionLen) { 260 if (NS_WARN_IF(!filepath)) { 261 return NS_ERROR_INVALID_POINTER; 262 } 263 264 if (filepathLen < 0) filepathLen = strlen(filepath); 265 266 if (filepathLen == 0) { 267 SET_RESULT(directory, 0, -1); 268 SET_RESULT(basename, 0, 0); // assume a zero length file basename 269 SET_RESULT(extension, 0, -1); 270 return NS_OK; 271 } 272 273 const char* p; 274 const char* end = filepath + filepathLen; 275 276 // search backwards for filename 277 for (p = end - 1; *p != '/' && p > filepath; --p) { 278 ; 279 } 280 if (*p == '/') { 281 // catch /.. and /. 282 if ((p + 1 < end && *(p + 1) == '.') && 283 (p + 2 == end || (*(p + 2) == '.' && p + 3 == end))) { 284 p = end - 1; 285 } 286 // filepath = <directory><filename>.<extension> 287 SET_RESULT(directory, 0, p - filepath + 1); 288 ParseFileName(p + 1, end - (p + 1), basenamePos, basenameLen, extensionPos, 289 extensionLen); 290 OFFSET_RESULT(basename, p + 1 - filepath); 291 OFFSET_RESULT(extension, p + 1 - filepath); 292 } else { 293 // filepath = <filename>.<extension> 294 SET_RESULT(directory, 0, -1); 295 ParseFileName(filepath, filepathLen, basenamePos, basenameLen, extensionPos, 296 extensionLen); 297 } 298 return NS_OK; 299 } 300 301 nsresult nsBaseURLParser::ParseFileName( 302 const char* filename, int32_t filenameLen, uint32_t* basenamePos, 303 int32_t* basenameLen, uint32_t* extensionPos, int32_t* extensionLen) { 304 if (NS_WARN_IF(!filename)) { 305 return NS_ERROR_INVALID_POINTER; 306 } 307 308 if (filenameLen < 0) filenameLen = strlen(filename); 309 310 // no extension if filename ends with a '.' 311 if (filename[filenameLen - 1] != '.') { 312 // ignore '.' at the beginning 313 for (const char* p = filename + filenameLen - 1; p > filename; --p) { 314 if (*p == '.') { 315 // filename = <basename.extension> 316 SET_RESULT(basename, 0, p - filename); 317 SET_RESULT(extension, p + 1 - filename, 318 filenameLen - (p - filename + 1)); 319 return NS_OK; 320 } 321 } 322 } 323 // filename = <basename> 324 SET_RESULT(basename, 0, filenameLen); 325 SET_RESULT(extension, 0, -1); 326 return NS_OK; 327 } 328 329 //---------------------------------------------------------------------------- 330 // nsNoAuthURLParser implementation 331 //---------------------------------------------------------------------------- 332 333 NS_IMETHODIMP 334 nsNoAuthURLParser::ParseAuthority(const char* auth, int32_t authLen, 335 uint32_t* usernamePos, int32_t* usernameLen, 336 uint32_t* passwordPos, int32_t* passwordLen, 337 uint32_t* hostnamePos, int32_t* hostnameLen, 338 int32_t* port) { 339 MOZ_ASSERT_UNREACHABLE("Shouldn't parse auth in a NoAuthURL!"); 340 return NS_ERROR_UNEXPECTED; 341 } 342 343 void nsNoAuthURLParser::ParseAfterScheme(const char* spec, int32_t specLen, 344 uint32_t* authPos, int32_t* authLen, 345 uint32_t* pathPos, int32_t* pathLen) { 346 MOZ_ASSERT(specLen >= 0, "unexpected"); 347 348 // everything is the path 349 uint32_t pos = 0; 350 switch (CountConsecutiveSlashes(spec, specLen)) { 351 case 0: 352 case 1: 353 break; 354 case 2: { 355 const char* p = nullptr; 356 if (specLen > 2) { 357 // looks like there is an authority section 358 359 // if the authority looks like a drive number then we 360 // really want to treat it as part of the path 361 // [a-zA-Z][:|]{/\} 362 // i.e one of: c: c:\foo c:/foo c| c|\foo c|/foo 363 if ((specLen > 3) && (spec[3] == ':' || spec[3] == '|') && 364 IsAsciiAlpha(spec[2]) && 365 ((specLen == 4) || (spec[4] == '/') || (spec[4] == '\\'))) { 366 pos = 1; 367 break; 368 } 369 // Ignore apparent authority; path is everything after it 370 for (p = spec + 2; p < spec + specLen; ++p) { 371 if (*p == '/' || *p == '?' || *p == '#') break; 372 } 373 } 374 SET_RESULT(auth, 0, -1); 375 if (p && p != spec + specLen) { 376 SET_RESULT(path, p - spec, specLen - (p - spec)); 377 } else { 378 SET_RESULT(path, 0, -1); 379 } 380 return; 381 } 382 default: 383 pos = 2; 384 break; 385 } 386 SET_RESULT(auth, pos, 0); 387 SET_RESULT(path, pos, specLen - pos); 388 } 389 390 #if defined(XP_WIN) 391 NS_IMETHODIMP 392 nsNoAuthURLParser::ParseFilePath(const char* filepath, int32_t filepathLen, 393 uint32_t* directoryPos, int32_t* directoryLen, 394 uint32_t* basenamePos, int32_t* basenameLen, 395 uint32_t* extensionPos, 396 int32_t* extensionLen) { 397 if (NS_WARN_IF(!filepath)) { 398 return NS_ERROR_INVALID_POINTER; 399 } 400 401 if (filepathLen < 0) filepathLen = strlen(filepath); 402 403 // look for a filepath consisting of only a drive number, which may or 404 // may not have a leading slash. 405 if (filepathLen > 1 && filepathLen < 4) { 406 const char* end = filepath + filepathLen; 407 const char* p = filepath; 408 if (*p == '/') p++; 409 if ((end - p == 2) && (p[1] == ':' || p[1] == '|') && IsAsciiAlpha(*p)) { 410 // filepath = <drive-number>: 411 SET_RESULT(directory, 0, filepathLen); 412 SET_RESULT(basename, 0, -1); 413 SET_RESULT(extension, 0, -1); 414 return NS_OK; 415 } 416 } 417 418 // otherwise fallback on common implementation 419 return nsBaseURLParser::ParseFilePath(filepath, filepathLen, directoryPos, 420 directoryLen, basenamePos, basenameLen, 421 extensionPos, extensionLen); 422 } 423 #endif 424 425 //---------------------------------------------------------------------------- 426 // nsAuthURLParser implementation 427 //---------------------------------------------------------------------------- 428 429 NS_IMETHODIMP 430 nsAuthURLParser::ParseAuthority(const char* auth, int32_t authLen, 431 uint32_t* usernamePos, int32_t* usernameLen, 432 uint32_t* passwordPos, int32_t* passwordLen, 433 uint32_t* hostnamePos, int32_t* hostnameLen, 434 int32_t* port) { 435 nsresult rv; 436 437 if (NS_WARN_IF(!auth)) { 438 return NS_ERROR_INVALID_POINTER; 439 } 440 441 if (authLen < 0) authLen = strlen(auth); 442 443 if (authLen == 0) { 444 SET_RESULT(username, 0, -1); 445 SET_RESULT(password, 0, -1); 446 SET_RESULT(hostname, 0, 0); 447 if (port) *port = -1; 448 return NS_OK; 449 } 450 451 // search backwards for @ 452 const char* p = auth + authLen - 1; 453 for (; (*p != '@') && (p > auth); --p) { 454 } 455 if (*p == '@') { 456 // auth = <user-info@server-info> 457 rv = ParseUserInfo(auth, p - auth, usernamePos, usernameLen, passwordPos, 458 passwordLen); 459 if (NS_FAILED(rv)) return rv; 460 rv = ParseServerInfo(p + 1, authLen - (p - auth + 1), hostnamePos, 461 hostnameLen, port); 462 if (NS_FAILED(rv)) return rv; 463 OFFSET_RESULT(hostname, p + 1 - auth); 464 465 // malformed if has a username or password 466 // but no host info, such as: http://u:p@/ 467 if ((usernamePos || passwordPos) && (!hostnamePos || !*hostnameLen)) { 468 return NS_ERROR_MALFORMED_URI; 469 } 470 } else { 471 // auth = <server-info> 472 SET_RESULT(username, 0, -1); 473 SET_RESULT(password, 0, -1); 474 rv = ParseServerInfo(auth, authLen, hostnamePos, hostnameLen, port); 475 if (NS_FAILED(rv)) return rv; 476 } 477 return NS_OK; 478 } 479 480 NS_IMETHODIMP 481 nsAuthURLParser::ParseUserInfo(const char* userinfo, int32_t userinfoLen, 482 uint32_t* usernamePos, int32_t* usernameLen, 483 uint32_t* passwordPos, int32_t* passwordLen) { 484 if (NS_WARN_IF(!userinfo)) { 485 return NS_ERROR_INVALID_POINTER; 486 } 487 488 if (userinfoLen < 0) userinfoLen = strlen(userinfo); 489 490 if (userinfoLen == 0) { 491 SET_RESULT(username, 0, -1); 492 SET_RESULT(password, 0, -1); 493 return NS_OK; 494 } 495 496 const char* p = (const char*)memchr(userinfo, ':', userinfoLen); 497 if (p) { 498 // userinfo = <username:password> 499 SET_RESULT(username, 0, p - userinfo); 500 SET_RESULT(password, p - userinfo + 1, userinfoLen - (p - userinfo + 1)); 501 } else { 502 // userinfo = <username> 503 SET_RESULT(username, 0, userinfoLen); 504 SET_RESULT(password, 0, -1); 505 } 506 return NS_OK; 507 } 508 509 NS_IMETHODIMP 510 nsAuthURLParser::ParseServerInfo(const char* serverinfo, int32_t serverinfoLen, 511 uint32_t* hostnamePos, int32_t* hostnameLen, 512 int32_t* port) { 513 if (NS_WARN_IF(!serverinfo)) { 514 return NS_ERROR_INVALID_POINTER; 515 } 516 517 if (serverinfoLen < 0) serverinfoLen = strlen(serverinfo); 518 519 if (serverinfoLen == 0) { 520 SET_RESULT(hostname, 0, 0); 521 if (port) *port = -1; 522 return NS_OK; 523 } 524 525 // search backwards for a ':' but stop on ']' (IPv6 address literal 526 // delimiter). check for illegal characters in the hostname. 527 const char* p = serverinfo + serverinfoLen - 1; 528 const char *colon = nullptr, *bracket = nullptr; 529 for (; p > serverinfo; --p) { 530 switch (*p) { 531 case ']': 532 bracket = p; 533 break; 534 case ':': 535 if (bracket == nullptr) colon = p; 536 break; 537 case ' ': 538 // hostname must not contain a space 539 return NS_ERROR_MALFORMED_URI; 540 } 541 } 542 543 if (colon) { 544 // serverinfo = <hostname:port> 545 SET_RESULT(hostname, 0, colon - serverinfo); 546 if (port) { 547 // XXX unfortunately ToInteger is not defined for substrings 548 nsAutoCString buf(colon + 1, serverinfoLen - (colon + 1 - serverinfo)); 549 if (buf.Length() == 0) { 550 *port = -1; 551 } else { 552 const char* nondigit = NS_strspnp("0123456789", buf.get()); 553 if (nondigit && *nondigit) return NS_ERROR_MALFORMED_URI; 554 555 nsresult err; 556 *port = buf.ToInteger(&err); 557 if (NS_FAILED(err) || *port < 0 || 558 *port > std::numeric_limits<uint16_t>::max()) { 559 return NS_ERROR_MALFORMED_URI; 560 } 561 } 562 } 563 } else { 564 // serverinfo = <hostname> 565 SET_RESULT(hostname, 0, serverinfoLen); 566 if (port) *port = -1; 567 } 568 569 // In case of IPv6 address check its validity 570 if (*hostnameLen > 1 && *(serverinfo + *hostnamePos) == '[' && 571 *(serverinfo + *hostnamePos + *hostnameLen - 1) == ']' && 572 !net_IsValidIPv6Addr( 573 Substring(serverinfo + *hostnamePos + 1, *hostnameLen - 2))) { 574 return NS_ERROR_MALFORMED_URI; 575 } 576 577 return NS_OK; 578 } 579 580 void nsAuthURLParser::ParseAfterScheme(const char* spec, int32_t specLen, 581 uint32_t* authPos, int32_t* authLen, 582 uint32_t* pathPos, int32_t* pathLen) { 583 MOZ_ASSERT(specLen >= 0, "unexpected"); 584 585 uint32_t nslash = CountConsecutiveSlashes(spec, specLen); 586 587 // search for the end of the authority section 588 const char* end = spec + specLen; 589 const char* p; 590 for (p = spec + nslash; p < end; ++p) { 591 if (*p == '/' || *p == '?' || *p == '#') break; 592 } 593 if (p < end) { 594 // spec = [/]<auth><path> 595 SET_RESULT(auth, nslash, p - (spec + nslash)); 596 SET_RESULT(path, p - spec, specLen - (p - spec)); 597 } else { 598 // spec = [/]<auth> 599 SET_RESULT(auth, nslash, specLen - nslash); 600 SET_RESULT(path, 0, -1); 601 } 602 } 603 604 //---------------------------------------------------------------------------- 605 // nsStdURLParser implementation 606 //---------------------------------------------------------------------------- 607 608 void nsStdURLParser::ParseAfterScheme(const char* spec, int32_t specLen, 609 uint32_t* authPos, int32_t* authLen, 610 uint32_t* pathPos, int32_t* pathLen) { 611 MOZ_ASSERT(specLen >= 0, "unexpected"); 612 613 uint32_t nslash = CountConsecutiveSlashes(spec, specLen); 614 615 // search for the end of the authority section 616 const char* end = spec + specLen; 617 const char* p; 618 for (p = spec + nslash; p < end; ++p) { 619 if (strchr("/?#;", *p)) break; 620 } 621 switch (nslash) { 622 case 0: 623 case 2: 624 if (p < end) { 625 // spec = (//)<auth><path> 626 SET_RESULT(auth, nslash, p - (spec + nslash)); 627 SET_RESULT(path, p - spec, specLen - (p - spec)); 628 } else { 629 // spec = (//)<auth> 630 SET_RESULT(auth, nslash, specLen - nslash); 631 SET_RESULT(path, 0, -1); 632 } 633 break; 634 case 1: 635 // spec = /<path> 636 SET_RESULT(auth, 0, -1); 637 SET_RESULT(path, 0, specLen); 638 break; 639 default: 640 // spec = ///[/]<path> 641 SET_RESULT(auth, 2, 0); 642 SET_RESULT(path, 2, specLen - 2); 643 } 644 }