test_tokenizer.js (15617B)
1 /* Any copyright is dedicated to the Public Domain. 2 * http://creativecommons.org/publicdomain/zero/1.0/ */ 3 4 add_task(async function test_tokenizer() { 5 let testContexts = [ 6 { desc: "Empty string", searchString: "", expectedTokens: [] }, 7 { desc: "Spaces string", searchString: " ", expectedTokens: [] }, 8 { 9 desc: "Single word string", 10 searchString: "test", 11 expectedTokens: [{ value: "test", type: UrlbarTokenizer.TYPE.TEXT }], 12 }, 13 { 14 desc: "Multi word string with mixed whitespace types", 15 searchString: " test1 test2\u1680test3\u2004test4\u1680", 16 expectedTokens: [ 17 { value: "test1", type: UrlbarTokenizer.TYPE.TEXT }, 18 { value: "test2", type: UrlbarTokenizer.TYPE.TEXT }, 19 { value: "test3", type: UrlbarTokenizer.TYPE.TEXT }, 20 { value: "test4", type: UrlbarTokenizer.TYPE.TEXT }, 21 ], 22 }, 23 { 24 desc: "separate restriction char at beginning", 25 searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK} test`, 26 expectedTokens: [ 27 { 28 value: UrlbarTokenizer.RESTRICT.BOOKMARK, 29 type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, 30 }, 31 { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, 32 ], 33 }, 34 { 35 desc: "do not separate restriction char at beginning in search mode", 36 searchMode: { engineName: "testEngine" }, 37 searchString: `${UrlbarTokenizer.RESTRICT.SEARCH}test`, 38 expectedTokens: [{ value: "?test", type: UrlbarTokenizer.TYPE.TEXT }], 39 }, 40 { 41 desc: "separate restriction char at end", 42 searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK}`, 43 expectedTokens: [ 44 { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, 45 { 46 value: UrlbarTokenizer.RESTRICT.BOOKMARK, 47 type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, 48 }, 49 ], 50 }, 51 { 52 desc: "boundary restriction char at end", 53 searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`, 54 expectedTokens: [ 55 { 56 value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`, 57 type: UrlbarTokenizer.TYPE.TEXT, 58 }, 59 ], 60 }, 61 { 62 desc: "do not separate boundary search restriction char at end", 63 searchString: `test${UrlbarTokenizer.RESTRICT.SEARCH}`, 64 expectedTokens: [{ value: "test?", type: UrlbarTokenizer.TYPE.TEXT }], 65 }, 66 { 67 desc: "separate restriction char in the middle", 68 searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK} test`, 69 expectedTokens: [ 70 { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, 71 { 72 value: UrlbarTokenizer.RESTRICT.BOOKMARK, 73 type: UrlbarTokenizer.TYPE.TEXT, 74 }, 75 { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, 76 ], 77 }, 78 { 79 desc: "restriction char in the middle", 80 searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`, 81 expectedTokens: [ 82 { 83 value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`, 84 type: UrlbarTokenizer.TYPE.TEXT, 85 }, 86 ], 87 }, 88 { 89 desc: "restriction char in the middle 2", 90 searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK} test`, 91 expectedTokens: [ 92 { 93 value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`, 94 type: UrlbarTokenizer.TYPE.TEXT, 95 }, 96 { value: `test`, type: UrlbarTokenizer.TYPE.TEXT }, 97 ], 98 }, 99 { 100 desc: "double boundary restriction char", 101 searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}test${UrlbarTokenizer.RESTRICT.TITLE}`, 102 expectedTokens: [ 103 { 104 value: UrlbarTokenizer.RESTRICT.BOOKMARK, 105 type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, 106 }, 107 { 108 value: `test${UrlbarTokenizer.RESTRICT.TITLE}`, 109 type: UrlbarTokenizer.TYPE.TEXT, 110 }, 111 ], 112 }, 113 { 114 desc: "do not separate boundary search restriction char at end when using using a double non-combinable restriction char with a single-character string", 115 searchString: `t${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.SEARCH}`, 116 expectedTokens: [ 117 { 118 value: `t${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.SEARCH}`, 119 type: UrlbarTokenizer.TYPE.TEXT, 120 }, 121 ], 122 }, 123 { 124 desc: "only boundary restriction chars", 125 searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.TITLE}`, 126 expectedTokens: [ 127 { 128 value: UrlbarTokenizer.RESTRICT.BOOKMARK, 129 type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, 130 }, 131 { 132 value: UrlbarTokenizer.RESTRICT.TITLE, 133 type: UrlbarTokenizer.TYPE.RESTRICT_TITLE, 134 }, 135 ], 136 }, 137 { 138 desc: "only the boundary restriction char", 139 searchString: UrlbarTokenizer.RESTRICT.BOOKMARK, 140 expectedTokens: [ 141 { 142 value: UrlbarTokenizer.RESTRICT.BOOKMARK, 143 type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, 144 }, 145 ], 146 }, 147 // Some restriction chars may be # or ?, that are also valid path parts. 148 // The next 2 tests will check we consider those as part of url paths. 149 { 150 desc: "boundary # char on path", 151 searchString: "test/#", 152 expectedTokens: [ 153 { value: "test/#", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 154 ], 155 }, 156 { 157 desc: "boundary ? char on path", 158 searchString: "test/?", 159 expectedTokens: [ 160 { value: "test/?", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 161 ], 162 }, 163 { 164 desc: "multiple boundary restriction chars suffix", 165 searchString: `test ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG}`, 166 expectedTokens: [ 167 { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, 168 { 169 value: UrlbarTokenizer.RESTRICT.HISTORY, 170 type: UrlbarTokenizer.TYPE.TEXT, 171 }, 172 { 173 value: UrlbarTokenizer.RESTRICT.TAG, 174 type: UrlbarTokenizer.TYPE.RESTRICT_TAG, 175 }, 176 ], 177 }, 178 { 179 desc: "multiple boundary restriction chars prefix", 180 searchString: `${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG} test`, 181 expectedTokens: [ 182 { 183 value: UrlbarTokenizer.RESTRICT.HISTORY, 184 type: UrlbarTokenizer.TYPE.RESTRICT_HISTORY, 185 }, 186 { 187 value: UrlbarTokenizer.RESTRICT.TAG, 188 type: UrlbarTokenizer.TYPE.TEXT, 189 }, 190 { value: "test", type: UrlbarTokenizer.TYPE.TEXT }, 191 ], 192 }, 193 { 194 desc: "Math with division", 195 searchString: "3.6/1.2", 196 expectedTokens: [{ value: "3.6/1.2", type: UrlbarTokenizer.TYPE.TEXT }], 197 }, 198 { 199 desc: "ipv4 in bookmarks", 200 searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK} 192.168.1.1:8`, 201 expectedTokens: [ 202 { 203 value: UrlbarTokenizer.RESTRICT.BOOKMARK, 204 type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK, 205 }, 206 { value: "192.168.1.1:8", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, 207 ], 208 }, 209 { 210 desc: "email", 211 searchString: "test@mozilla.com", 212 expectedTokens: [ 213 { value: "test@mozilla.com", type: UrlbarTokenizer.TYPE.TEXT }, 214 ], 215 }, 216 { 217 desc: "email2", 218 searchString: "test.test@mozilla.co.uk", 219 expectedTokens: [ 220 { value: "test.test@mozilla.co.uk", type: UrlbarTokenizer.TYPE.TEXT }, 221 ], 222 }, 223 { 224 desc: "protocol", 225 searchString: "http://test", 226 expectedTokens: [ 227 { value: "http://test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 228 ], 229 }, 230 { 231 desc: "bogus protocol with host (we allow visits to http://///example.com)", 232 searchString: "http:///test", 233 expectedTokens: [ 234 { value: "http:///test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 235 ], 236 }, 237 { 238 desc: "file protocol with path", 239 searchString: "file:///home", 240 expectedTokens: [ 241 { value: "file:///home", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 242 ], 243 }, 244 { 245 desc: "almost a protocol", 246 searchString: "http:", 247 expectedTokens: [ 248 { value: "http:", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 249 ], 250 }, 251 { 252 desc: "almost a protocol 2", 253 searchString: "http:/", 254 expectedTokens: [ 255 { value: "http:/", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 256 ], 257 }, 258 { 259 desc: "bogus protocol (we allow visits to http://///example.com)", 260 searchString: "http:///", 261 expectedTokens: [ 262 { value: "http:///", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 263 ], 264 }, 265 { 266 desc: "file protocol", 267 searchString: "file:///", 268 expectedTokens: [ 269 { value: "file:///", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 270 ], 271 }, 272 { 273 desc: "userinfo", 274 searchString: "user:pass@test", 275 expectedTokens: [ 276 { value: "user:pass@test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, 277 ], 278 }, 279 { 280 desc: "domain with two dots", 281 searchString: "www.mozilla.org", 282 expectedTokens: [ 283 { 284 value: "www.mozilla.org", 285 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED, 286 }, 287 ], 288 }, 289 { 290 desc: "domain with two dots and allowSearchSuggestionsForSimpleOrigins = false", 291 searchString: "www.mozilla.org", 292 allowSearchSuggestionsForSimpleOrigins: false, 293 expectedTokens: [ 294 { 295 value: "www.mozilla.org", 296 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN, 297 }, 298 ], 299 }, 300 { 301 desc: "domain with one dot", 302 searchString: "mozilla.org", 303 expectedTokens: [ 304 { 305 value: "mozilla.org", 306 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED, 307 }, 308 ], 309 }, 310 { 311 desc: "domain with one dot and allowSearchSuggestionsForSimpleOrigins = false", 312 searchString: "mozilla.org", 313 allowSearchSuggestionsForSimpleOrigins: false, 314 expectedTokens: [ 315 { 316 value: "mozilla.org", 317 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN, 318 }, 319 ], 320 }, 321 { 322 desc: "looks like simple origin", 323 searchString: "mozilla.o", 324 expectedTokens: [ 325 { 326 value: "mozilla.o", 327 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED, 328 }, 329 ], 330 }, 331 { 332 desc: "looks like simple origin with allowSearchSuggestionsForSimpleOrigins = false", 333 searchString: "mozilla.o", 334 allowSearchSuggestionsForSimpleOrigins: false, 335 expectedTokens: [ 336 { 337 value: "mozilla.o", 338 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN, 339 }, 340 ], 341 }, 342 { 343 desc: "query ends with dot", 344 searchString: "mozilla.", 345 expectedTokens: [ 346 { 347 value: "mozilla.", 348 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED, 349 }, 350 ], 351 }, 352 { 353 desc: "query ends with dot with allowSearchSuggestionsForSimpleOrigins = false", 354 searchString: "mozilla.", 355 allowSearchSuggestionsForSimpleOrigins: false, 356 expectedTokens: [ 357 { 358 value: "mozilla.", 359 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN, 360 }, 361 ], 362 }, 363 { 364 desc: "data uri", 365 searchString: "data:text/plain,Content", 366 expectedTokens: [ 367 { 368 value: "data:text/plain,Content", 369 type: UrlbarTokenizer.TYPE.POSSIBLE_URL, 370 }, 371 ], 372 }, 373 { 374 desc: "ipv6", 375 searchString: "[2001:db8::1]", 376 expectedTokens: [ 377 { value: "[2001:db8::1]", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, 378 ], 379 }, 380 { 381 desc: "numeric domain", 382 searchString: "test1001.com", 383 expectedTokens: [ 384 { 385 value: "test1001.com", 386 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED, 387 }, 388 ], 389 }, 390 { 391 desc: "invalid ip", 392 searchString: "192.2134.1.2", 393 expectedTokens: [ 394 { value: "192.2134.1.2", type: UrlbarTokenizer.TYPE.TEXT }, 395 ], 396 }, 397 { 398 desc: "ipv4", 399 searchString: "1.2.3.4", 400 expectedTokens: [ 401 { value: "1.2.3.4", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN }, 402 ], 403 }, 404 { 405 desc: "host/path", 406 searchString: "test/test", 407 expectedTokens: [ 408 { value: "test/test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL }, 409 ], 410 }, 411 { 412 desc: "percent encoded string", 413 searchString: "%E6%97%A5%E6%9C%AC", 414 expectedTokens: [ 415 { value: "%E6%97%A5%E6%9C%AC", type: UrlbarTokenizer.TYPE.TEXT }, 416 ], 417 }, 418 { 419 desc: "Uppercase", 420 searchString: "TEST", 421 expectedTokens: [{ value: "TEST", type: UrlbarTokenizer.TYPE.TEXT }], 422 }, 423 { 424 desc: "Mixed case 1", 425 searchString: "TeSt", 426 expectedTokens: [{ value: "TeSt", type: UrlbarTokenizer.TYPE.TEXT }], 427 }, 428 { 429 desc: "Mixed case 2", 430 searchString: "tEsT", 431 expectedTokens: [{ value: "tEsT", type: UrlbarTokenizer.TYPE.TEXT }], 432 }, 433 { 434 desc: "Uppercase with spaces", 435 searchString: "TEST EXAMPLE", 436 expectedTokens: [ 437 { value: "TEST", type: UrlbarTokenizer.TYPE.TEXT }, 438 { value: "EXAMPLE", type: UrlbarTokenizer.TYPE.TEXT }, 439 ], 440 }, 441 { 442 desc: "Mixed case with spaces", 443 searchString: "TeSt eXaMpLe", 444 expectedTokens: [ 445 { value: "TeSt", type: UrlbarTokenizer.TYPE.TEXT }, 446 { value: "eXaMpLe", type: UrlbarTokenizer.TYPE.TEXT }, 447 ], 448 }, 449 { 450 desc: "plain number", 451 searchString: "1001", 452 expectedTokens: [{ value: "1001", type: UrlbarTokenizer.TYPE.TEXT }], 453 }, 454 { 455 desc: "data uri with spaces", 456 searchString: "data:text/html,oh hi?", 457 expectedTokens: [ 458 { 459 value: "data:text/html,oh hi?", 460 type: UrlbarTokenizer.TYPE.POSSIBLE_URL, 461 }, 462 ], 463 }, 464 { 465 desc: "data uri with spaces ignored with other tokens", 466 searchString: "hi data:text/html,oh hi?", 467 expectedTokens: [ 468 { 469 value: "hi", 470 type: UrlbarTokenizer.TYPE.TEXT, 471 }, 472 { 473 value: "data:text/html,oh", 474 type: UrlbarTokenizer.TYPE.POSSIBLE_URL, 475 }, 476 { 477 value: "hi?", 478 type: UrlbarTokenizer.TYPE.TEXT, 479 }, 480 ], 481 }, 482 { 483 desc: "whitelisted host", 484 searchString: "test whitelisted", 485 expectedTokens: [ 486 { 487 value: "test", 488 type: UrlbarTokenizer.TYPE.TEXT, 489 }, 490 { 491 value: "whitelisted", 492 type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN, 493 }, 494 ], 495 }, 496 ]; 497 498 Services.prefs.setBoolPref("browser.fixup.domainwhitelist.whitelisted", true); 499 500 for (let queryContext of testContexts) { 501 info(queryContext.desc); 502 queryContext.trimmedSearchString = queryContext.searchString.trim(); 503 for (let token of queryContext.expectedTokens) { 504 token.lowerCaseValue = token.value.toLocaleLowerCase(); 505 } 506 507 if (queryContext.hasOwnProperty("allowSearchSuggestionsForSimpleOrigins")) { 508 Services.prefs.setBoolPref( 509 "browser.urlbar.allowSearchSuggestionsForSimpleOrigins", 510 queryContext.allowSearchSuggestionsForSimpleOrigins 511 ); 512 } 513 514 let tokens = UrlbarTokenizer.tokenize(queryContext); 515 Assert.deepEqual( 516 tokens, 517 queryContext.expectedTokens, 518 "Check the expected tokens" 519 ); 520 521 Services.prefs.clearUserPref( 522 "browser.urlbar.allowSearchSuggestionsForSimpleOrigins" 523 ); 524 } 525 });