IPv4Parser.cpp (9364B)
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* This Source Code Form is subject to the terms of the Mozilla Public 3 * License, v. 2.0. If a copy of the MPL was not distributed with this 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 5 6 #include "IPv4Parser.h" 7 #include "mozilla/EndianUtils.h" 8 #include "nsPrintfCString.h" 9 #include "nsTArray.h" 10 11 namespace mozilla::net::IPv4Parser { 12 13 // https://url.spec.whatwg.org/#ends-in-a-number-checker 14 bool EndsInANumber(const nsCString& input) { 15 // 1. Let parts be the result of strictly splitting input on U+002E (.). 16 nsTArray<nsDependentCSubstring> parts; 17 for (const nsDependentCSubstring& part : input.Split('.')) { 18 parts.AppendElement(part); 19 } 20 21 if (parts.Length() == 0) { 22 return false; 23 } 24 25 // 2.If the last item in parts is the empty string, then: 26 // 1. If parts’s size is 1, then return false. 27 // 2. Remove the last item from parts. 28 if (parts.LastElement().IsEmpty()) { 29 if (parts.Length() == 1) { 30 return false; 31 } 32 (void)parts.PopLastElement(); 33 } 34 35 // 3. Let last be the last item in parts. 36 const nsDependentCSubstring& last = parts.LastElement(); 37 38 // 4. If last is non-empty and contains only ASCII digits, then return true. 39 // The erroneous input "09" will be caught by the IPv4 parser at a later 40 // stage. 41 if (!last.IsEmpty()) { 42 if (ContainsOnlyAsciiDigits(last)) { 43 return true; 44 } 45 } 46 47 // 5. If parsing last as an IPv4 number does not return failure, then return 48 // true. This is equivalent to checking that last is "0X" or "0x", followed by 49 // zero or more ASCII hex digits. 50 if (StringBeginsWith(last, "0x"_ns) || StringBeginsWith(last, "0X"_ns)) { 51 if (ContainsOnlyAsciiHexDigits(Substring(last, 2))) { 52 return true; 53 } 54 } 55 56 return false; 57 } 58 59 nsresult ParseIPv4Number10(const nsACString& input, uint32_t& number, 60 uint32_t maxNumber) { 61 uint64_t value = 0; 62 const char* current = input.BeginReading(); 63 const char* end = input.EndReading(); 64 for (; current < end; ++current) { 65 char c = *current; 66 MOZ_ASSERT(c >= '0' && c <= '9'); 67 value *= 10; 68 value += c - '0'; 69 } 70 if (value <= maxNumber) { 71 number = value; 72 return NS_OK; 73 } 74 75 // The error case 76 number = 0; 77 return NS_ERROR_FAILURE; 78 } 79 80 nsresult ParseIPv4Number(const nsACString& input, int32_t base, 81 uint32_t& number, uint32_t maxNumber) { 82 // Accumulate in the 64-bit value 83 uint64_t value = 0; 84 const char* current = input.BeginReading(); 85 const char* end = input.EndReading(); 86 switch (base) { 87 case 16: 88 ++current; 89 [[fallthrough]]; 90 case 8: 91 ++current; 92 break; 93 case 10: 94 default: 95 break; 96 } 97 for (; current < end; ++current) { 98 value *= base; 99 char c = *current; 100 MOZ_ASSERT((base == 10 && IsAsciiDigit(c)) || 101 (base == 8 && c >= '0' && c <= '7') || 102 (base == 16 && IsAsciiHexDigit(c))); 103 if (IsAsciiDigit(c)) { 104 value += c - '0'; 105 } else if (c >= 'a' && c <= 'f') { 106 value += c - 'a' + 10; 107 } else if (c >= 'A' && c <= 'F') { 108 value += c - 'A' + 10; 109 } 110 } 111 112 if (value <= maxNumber) { 113 number = value; 114 return NS_OK; 115 } 116 117 // The error case 118 number = 0; 119 return NS_ERROR_FAILURE; 120 } 121 122 // IPv4 parser spec: https://url.spec.whatwg.org/#concept-ipv4-parser 123 nsresult NormalizeIPv4(const nsACString& host, nsCString& result) { 124 int32_t bases[4] = {10, 10, 10, 10}; 125 bool onlyBase10 = true; // Track this as a special case 126 int32_t dotIndex[3]; // The positions of the dots in the string 127 128 // Use "length" rather than host.Length() after call to 129 // ValidateIPv4Number because of potential trailing period. 130 nsDependentCSubstring filteredHost; 131 bool trailingDot = false; 132 if (host.Length() > 0 && host.Last() == '.') { 133 trailingDot = true; 134 filteredHost.Rebind(host.BeginReading(), host.Length() - 1); 135 } else { 136 filteredHost.Rebind(host.BeginReading(), host.Length()); 137 } 138 139 int32_t length = static_cast<int32_t>(filteredHost.Length()); 140 int32_t dotCount = ValidateIPv4Number(filteredHost, bases, dotIndex, 141 onlyBase10, length, trailingDot); 142 if (dotCount < 0 || length <= 0) { 143 return NS_ERROR_FAILURE; 144 } 145 146 // Max values specified by the spec 147 static const uint32_t upperBounds[] = {0xffffffffu, 0xffffffu, 0xffffu, 148 0xffu}; 149 uint32_t ipv4; 150 int32_t start = (dotCount > 0 ? dotIndex[dotCount - 1] + 1 : 0); 151 152 // parse the last part first 153 nsresult res; 154 // Doing a special case for all items being base 10 gives ~35% speedup 155 res = (onlyBase10 156 ? ParseIPv4Number10(Substring(host, start, length - start), ipv4, 157 upperBounds[dotCount]) 158 : ParseIPv4Number(Substring(host, start, length - start), 159 bases[dotCount], ipv4, upperBounds[dotCount])); 160 if (NS_FAILED(res)) { 161 return NS_ERROR_FAILURE; 162 } 163 164 // parse remaining parts starting from first part 165 int32_t lastUsed = -1; 166 for (int32_t i = 0; i < dotCount; i++) { 167 uint32_t number; 168 start = lastUsed + 1; 169 lastUsed = dotIndex[i]; 170 res = 171 (onlyBase10 ? ParseIPv4Number10( 172 Substring(host, start, lastUsed - start), number, 255) 173 : ParseIPv4Number(Substring(host, start, lastUsed - start), 174 bases[i], number, 255)); 175 if (NS_FAILED(res)) { 176 return NS_ERROR_FAILURE; 177 } 178 ipv4 += number << (8 * (3 - i)); 179 } 180 181 // A special case for ipv4 URL like "127." should have the same result as 182 // "127". 183 if (dotCount == 1 && dotIndex[0] == length - 1) { 184 ipv4 = (ipv4 & 0xff000000) >> 24; 185 } 186 187 uint8_t ipSegments[4]; 188 NetworkEndian::writeUint32(ipSegments, ipv4); 189 result = nsPrintfCString("%d.%d.%d.%d", ipSegments[0], ipSegments[1], 190 ipSegments[2], ipSegments[3]); 191 return NS_OK; 192 } 193 194 // Return the number of "dots" in the string, or -1 if invalid. Note that the 195 // number of relevant entries in the bases/starts/ends arrays is number of 196 // dots + 1. 197 // 198 // length is assumed to be <= host.Length(); the caller is responsible for that 199 // 200 // Note that the value returned is guaranteed to be in [-1, 3] range. 201 int32_t ValidateIPv4Number(const nsACString& host, int32_t bases[4], 202 int32_t dotIndex[3], bool& onlyBase10, 203 int32_t length, bool trailingDot) { 204 MOZ_ASSERT(length <= (int32_t)host.Length()); 205 if (length <= 0) { 206 return -1; 207 } 208 209 bool lastWasNumber = false; // We count on this being false for i == 0 210 int32_t dotCount = 0; 211 onlyBase10 = true; 212 213 for (int32_t i = 0; i < length; i++) { 214 char current = host[i]; 215 if (current == '.') { 216 // A dot should not follow a dot, or be first - it can follow an x though. 217 if (!(lastWasNumber || 218 (i >= 2 && (host[i - 1] == 'X' || host[i - 1] == 'x') && 219 host[i - 2] == '0')) || 220 (i == (length - 1) && trailingDot)) { 221 return -1; 222 } 223 224 if (dotCount > 2) { 225 return -1; 226 } 227 lastWasNumber = false; 228 dotIndex[dotCount] = i; 229 dotCount++; 230 } else if (current == 'X' || current == 'x') { 231 if (!lastWasNumber || // An X should not follow an X or a dot or be first 232 i == (length - 1) || // No trailing Xs allowed 233 (dotCount == 0 && 234 i != 1) || // If we had no dots, an X should be second 235 host[i - 1] != '0' || // X should always follow a 0. Guaranteed i > 236 // 0 as lastWasNumber is true 237 (dotCount > 0 && 238 host[i - 2] != '.')) { // And that zero follows a dot if it exists 239 return -1; 240 } 241 lastWasNumber = false; 242 bases[dotCount] = 16; 243 onlyBase10 = false; 244 245 } else if (current == '0') { 246 if (i < length - 1 && // Trailing zero doesn't signal octal 247 host[i + 1] != '.' && // Lone zero is not octal 248 (i == 0 || host[i - 1] == '.')) { // Zero at start or following a dot 249 // is a candidate for octal 250 bases[dotCount] = 8; // This will turn to 16 above if X shows up 251 onlyBase10 = false; 252 } 253 lastWasNumber = true; 254 255 } else if (current >= '1' && current <= '7') { 256 lastWasNumber = true; 257 258 } else if (current >= '8' && current <= '9') { 259 if (bases[dotCount] == 8) { 260 return -1; 261 } 262 lastWasNumber = true; 263 264 } else if ((current >= 'a' && current <= 'f') || 265 (current >= 'A' && current <= 'F')) { 266 if (bases[dotCount] != 16) { 267 return -1; 268 } 269 lastWasNumber = true; 270 271 } else { 272 return -1; 273 } 274 } 275 276 return dotCount; 277 } 278 279 bool ContainsOnlyAsciiDigits(const nsDependentCSubstring& input) { 280 for (const auto* c = input.BeginReading(); c < input.EndReading(); c++) { 281 if (!IsAsciiDigit(*c)) { 282 return false; 283 } 284 } 285 286 return true; 287 } 288 289 bool ContainsOnlyAsciiHexDigits(const nsDependentCSubstring& input) { 290 for (const auto* c = input.BeginReading(); c < input.EndReading(); c++) { 291 if (!IsAsciiHexDigit(*c)) { 292 return false; 293 } 294 } 295 return true; 296 } 297 298 } // namespace mozilla::net::IPv4Parser