tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

IPv4Parser.cpp (9364B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #include "IPv4Parser.h"
      7 #include "mozilla/EndianUtils.h"
      8 #include "nsPrintfCString.h"
      9 #include "nsTArray.h"
     10 
     11 namespace mozilla::net::IPv4Parser {
     12 
     13 // https://url.spec.whatwg.org/#ends-in-a-number-checker
     14 bool EndsInANumber(const nsCString& input) {
     15  // 1. Let parts be the result of strictly splitting input on U+002E (.).
     16  nsTArray<nsDependentCSubstring> parts;
     17  for (const nsDependentCSubstring& part : input.Split('.')) {
     18    parts.AppendElement(part);
     19  }
     20 
     21  if (parts.Length() == 0) {
     22    return false;
     23  }
     24 
     25  // 2.If the last item in parts is the empty string, then:
     26  //    1. If parts’s size is 1, then return false.
     27  //    2. Remove the last item from parts.
     28  if (parts.LastElement().IsEmpty()) {
     29    if (parts.Length() == 1) {
     30      return false;
     31    }
     32    (void)parts.PopLastElement();
     33  }
     34 
     35  // 3. Let last be the last item in parts.
     36  const nsDependentCSubstring& last = parts.LastElement();
     37 
     38  // 4. If last is non-empty and contains only ASCII digits, then return true.
     39  // The erroneous input "09" will be caught by the IPv4 parser at a later
     40  // stage.
     41  if (!last.IsEmpty()) {
     42    if (ContainsOnlyAsciiDigits(last)) {
     43      return true;
     44    }
     45  }
     46 
     47  // 5. If parsing last as an IPv4 number does not return failure, then return
     48  // true. This is equivalent to checking that last is "0X" or "0x", followed by
     49  // zero or more ASCII hex digits.
     50  if (StringBeginsWith(last, "0x"_ns) || StringBeginsWith(last, "0X"_ns)) {
     51    if (ContainsOnlyAsciiHexDigits(Substring(last, 2))) {
     52      return true;
     53    }
     54  }
     55 
     56  return false;
     57 }
     58 
     59 nsresult ParseIPv4Number10(const nsACString& input, uint32_t& number,
     60                           uint32_t maxNumber) {
     61  uint64_t value = 0;
     62  const char* current = input.BeginReading();
     63  const char* end = input.EndReading();
     64  for (; current < end; ++current) {
     65    char c = *current;
     66    MOZ_ASSERT(c >= '0' && c <= '9');
     67    value *= 10;
     68    value += c - '0';
     69  }
     70  if (value <= maxNumber) {
     71    number = value;
     72    return NS_OK;
     73  }
     74 
     75  // The error case
     76  number = 0;
     77  return NS_ERROR_FAILURE;
     78 }
     79 
     80 nsresult ParseIPv4Number(const nsACString& input, int32_t base,
     81                         uint32_t& number, uint32_t maxNumber) {
     82  // Accumulate in the 64-bit value
     83  uint64_t value = 0;
     84  const char* current = input.BeginReading();
     85  const char* end = input.EndReading();
     86  switch (base) {
     87    case 16:
     88      ++current;
     89      [[fallthrough]];
     90    case 8:
     91      ++current;
     92      break;
     93    case 10:
     94    default:
     95      break;
     96  }
     97  for (; current < end; ++current) {
     98    value *= base;
     99    char c = *current;
    100    MOZ_ASSERT((base == 10 && IsAsciiDigit(c)) ||
    101               (base == 8 && c >= '0' && c <= '7') ||
    102               (base == 16 && IsAsciiHexDigit(c)));
    103    if (IsAsciiDigit(c)) {
    104      value += c - '0';
    105    } else if (c >= 'a' && c <= 'f') {
    106      value += c - 'a' + 10;
    107    } else if (c >= 'A' && c <= 'F') {
    108      value += c - 'A' + 10;
    109    }
    110  }
    111 
    112  if (value <= maxNumber) {
    113    number = value;
    114    return NS_OK;
    115  }
    116 
    117  // The error case
    118  number = 0;
    119  return NS_ERROR_FAILURE;
    120 }
    121 
    122 // IPv4 parser spec: https://url.spec.whatwg.org/#concept-ipv4-parser
    123 nsresult NormalizeIPv4(const nsACString& host, nsCString& result) {
    124  int32_t bases[4] = {10, 10, 10, 10};
    125  bool onlyBase10 = true;  // Track this as a special case
    126  int32_t dotIndex[3];     // The positions of the dots in the string
    127 
    128  // Use "length" rather than host.Length() after call to
    129  // ValidateIPv4Number because of potential trailing period.
    130  nsDependentCSubstring filteredHost;
    131  bool trailingDot = false;
    132  if (host.Length() > 0 && host.Last() == '.') {
    133    trailingDot = true;
    134    filteredHost.Rebind(host.BeginReading(), host.Length() - 1);
    135  } else {
    136    filteredHost.Rebind(host.BeginReading(), host.Length());
    137  }
    138 
    139  int32_t length = static_cast<int32_t>(filteredHost.Length());
    140  int32_t dotCount = ValidateIPv4Number(filteredHost, bases, dotIndex,
    141                                        onlyBase10, length, trailingDot);
    142  if (dotCount < 0 || length <= 0) {
    143    return NS_ERROR_FAILURE;
    144  }
    145 
    146  // Max values specified by the spec
    147  static const uint32_t upperBounds[] = {0xffffffffu, 0xffffffu, 0xffffu,
    148                                         0xffu};
    149  uint32_t ipv4;
    150  int32_t start = (dotCount > 0 ? dotIndex[dotCount - 1] + 1 : 0);
    151 
    152  // parse the last part first
    153  nsresult res;
    154  // Doing a special case for all items being base 10 gives ~35% speedup
    155  res = (onlyBase10
    156             ? ParseIPv4Number10(Substring(host, start, length - start), ipv4,
    157                                 upperBounds[dotCount])
    158             : ParseIPv4Number(Substring(host, start, length - start),
    159                               bases[dotCount], ipv4, upperBounds[dotCount]));
    160  if (NS_FAILED(res)) {
    161    return NS_ERROR_FAILURE;
    162  }
    163 
    164  // parse remaining parts starting from first part
    165  int32_t lastUsed = -1;
    166  for (int32_t i = 0; i < dotCount; i++) {
    167    uint32_t number;
    168    start = lastUsed + 1;
    169    lastUsed = dotIndex[i];
    170    res =
    171        (onlyBase10 ? ParseIPv4Number10(
    172                          Substring(host, start, lastUsed - start), number, 255)
    173                    : ParseIPv4Number(Substring(host, start, lastUsed - start),
    174                                      bases[i], number, 255));
    175    if (NS_FAILED(res)) {
    176      return NS_ERROR_FAILURE;
    177    }
    178    ipv4 += number << (8 * (3 - i));
    179  }
    180 
    181  // A special case for ipv4 URL like "127." should have the same result as
    182  // "127".
    183  if (dotCount == 1 && dotIndex[0] == length - 1) {
    184    ipv4 = (ipv4 & 0xff000000) >> 24;
    185  }
    186 
    187  uint8_t ipSegments[4];
    188  NetworkEndian::writeUint32(ipSegments, ipv4);
    189  result = nsPrintfCString("%d.%d.%d.%d", ipSegments[0], ipSegments[1],
    190                           ipSegments[2], ipSegments[3]);
    191  return NS_OK;
    192 }
    193 
    194 // Return the number of "dots" in the string, or -1 if invalid.  Note that the
    195 // number of relevant entries in the bases/starts/ends arrays is number of
    196 // dots + 1.
    197 //
    198 // length is assumed to be <= host.Length(); the caller is responsible for that
    199 //
    200 // Note that the value returned is guaranteed to be in [-1, 3] range.
    201 int32_t ValidateIPv4Number(const nsACString& host, int32_t bases[4],
    202                           int32_t dotIndex[3], bool& onlyBase10,
    203                           int32_t length, bool trailingDot) {
    204  MOZ_ASSERT(length <= (int32_t)host.Length());
    205  if (length <= 0) {
    206    return -1;
    207  }
    208 
    209  bool lastWasNumber = false;  // We count on this being false for i == 0
    210  int32_t dotCount = 0;
    211  onlyBase10 = true;
    212 
    213  for (int32_t i = 0; i < length; i++) {
    214    char current = host[i];
    215    if (current == '.') {
    216      // A dot should not follow a dot, or be first - it can follow an x though.
    217      if (!(lastWasNumber ||
    218            (i >= 2 && (host[i - 1] == 'X' || host[i - 1] == 'x') &&
    219             host[i - 2] == '0')) ||
    220          (i == (length - 1) && trailingDot)) {
    221        return -1;
    222      }
    223 
    224      if (dotCount > 2) {
    225        return -1;
    226      }
    227      lastWasNumber = false;
    228      dotIndex[dotCount] = i;
    229      dotCount++;
    230    } else if (current == 'X' || current == 'x') {
    231      if (!lastWasNumber ||  // An X should not follow an X or a dot or be first
    232          i == (length - 1) ||  // No trailing Xs allowed
    233          (dotCount == 0 &&
    234           i != 1) ||            // If we had no dots, an X should be second
    235          host[i - 1] != '0' ||  // X should always follow a 0.  Guaranteed i >
    236                                 // 0 as lastWasNumber is true
    237          (dotCount > 0 &&
    238           host[i - 2] != '.')) {  // And that zero follows a dot if it exists
    239        return -1;
    240      }
    241      lastWasNumber = false;
    242      bases[dotCount] = 16;
    243      onlyBase10 = false;
    244 
    245    } else if (current == '0') {
    246      if (i < length - 1 &&      // Trailing zero doesn't signal octal
    247          host[i + 1] != '.' &&  // Lone zero is not octal
    248          (i == 0 || host[i - 1] == '.')) {  // Zero at start or following a dot
    249                                             // is a candidate for octal
    250        bases[dotCount] = 8;  // This will turn to 16 above if X shows up
    251        onlyBase10 = false;
    252      }
    253      lastWasNumber = true;
    254 
    255    } else if (current >= '1' && current <= '7') {
    256      lastWasNumber = true;
    257 
    258    } else if (current >= '8' && current <= '9') {
    259      if (bases[dotCount] == 8) {
    260        return -1;
    261      }
    262      lastWasNumber = true;
    263 
    264    } else if ((current >= 'a' && current <= 'f') ||
    265               (current >= 'A' && current <= 'F')) {
    266      if (bases[dotCount] != 16) {
    267        return -1;
    268      }
    269      lastWasNumber = true;
    270 
    271    } else {
    272      return -1;
    273    }
    274  }
    275 
    276  return dotCount;
    277 }
    278 
    279 bool ContainsOnlyAsciiDigits(const nsDependentCSubstring& input) {
    280  for (const auto* c = input.BeginReading(); c < input.EndReading(); c++) {
    281    if (!IsAsciiDigit(*c)) {
    282      return false;
    283    }
    284  }
    285 
    286  return true;
    287 }
    288 
    289 bool ContainsOnlyAsciiHexDigits(const nsDependentCSubstring& input) {
    290  for (const auto* c = input.BeginReading(); c < input.EndReading(); c++) {
    291    if (!IsAsciiHexDigit(*c)) {
    292      return false;
    293    }
    294  }
    295  return true;
    296 }
    297 
    298 }  // namespace mozilla::net::IPv4Parser