tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

test_tokenizer.js (15617B)


      1 /* Any copyright is dedicated to the Public Domain.
      2 * http://creativecommons.org/publicdomain/zero/1.0/ */
      3 
      4 add_task(async function test_tokenizer() {
      5  let testContexts = [
      6    { desc: "Empty string", searchString: "", expectedTokens: [] },
      7    { desc: "Spaces string", searchString: "      ", expectedTokens: [] },
      8    {
      9      desc: "Single word string",
     10      searchString: "test",
     11      expectedTokens: [{ value: "test", type: UrlbarTokenizer.TYPE.TEXT }],
     12    },
     13    {
     14      desc: "Multi word string with mixed whitespace types",
     15      searchString: " test1 test2\u1680test3\u2004test4\u1680",
     16      expectedTokens: [
     17        { value: "test1", type: UrlbarTokenizer.TYPE.TEXT },
     18        { value: "test2", type: UrlbarTokenizer.TYPE.TEXT },
     19        { value: "test3", type: UrlbarTokenizer.TYPE.TEXT },
     20        { value: "test4", type: UrlbarTokenizer.TYPE.TEXT },
     21      ],
     22    },
     23    {
     24      desc: "separate restriction char at beginning",
     25      searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK} test`,
     26      expectedTokens: [
     27        {
     28          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
     29          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
     30        },
     31        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
     32      ],
     33    },
     34    {
     35      desc: "do not separate restriction char at beginning in search mode",
     36      searchMode: { engineName: "testEngine" },
     37      searchString: `${UrlbarTokenizer.RESTRICT.SEARCH}test`,
     38      expectedTokens: [{ value: "?test", type: UrlbarTokenizer.TYPE.TEXT }],
     39    },
     40    {
     41      desc: "separate restriction char at end",
     42      searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
     43      expectedTokens: [
     44        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
     45        {
     46          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
     47          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
     48        },
     49      ],
     50    },
     51    {
     52      desc: "boundary restriction char at end",
     53      searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
     54      expectedTokens: [
     55        {
     56          value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
     57          type: UrlbarTokenizer.TYPE.TEXT,
     58        },
     59      ],
     60    },
     61    {
     62      desc: "do not separate boundary search restriction char at end",
     63      searchString: `test${UrlbarTokenizer.RESTRICT.SEARCH}`,
     64      expectedTokens: [{ value: "test?", type: UrlbarTokenizer.TYPE.TEXT }],
     65    },
     66    {
     67      desc: "separate restriction char in the middle",
     68      searchString: `test ${UrlbarTokenizer.RESTRICT.BOOKMARK} test`,
     69      expectedTokens: [
     70        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
     71        {
     72          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
     73          type: UrlbarTokenizer.TYPE.TEXT,
     74        },
     75        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
     76      ],
     77    },
     78    {
     79      desc: "restriction char in the middle",
     80      searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`,
     81      expectedTokens: [
     82        {
     83          value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}test`,
     84          type: UrlbarTokenizer.TYPE.TEXT,
     85        },
     86      ],
     87    },
     88    {
     89      desc: "restriction char in the middle 2",
     90      searchString: `test${UrlbarTokenizer.RESTRICT.BOOKMARK} test`,
     91      expectedTokens: [
     92        {
     93          value: `test${UrlbarTokenizer.RESTRICT.BOOKMARK}`,
     94          type: UrlbarTokenizer.TYPE.TEXT,
     95        },
     96        { value: `test`, type: UrlbarTokenizer.TYPE.TEXT },
     97      ],
     98    },
     99    {
    100      desc: "double boundary restriction char",
    101      searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}test${UrlbarTokenizer.RESTRICT.TITLE}`,
    102      expectedTokens: [
    103        {
    104          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
    105          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
    106        },
    107        {
    108          value: `test${UrlbarTokenizer.RESTRICT.TITLE}`,
    109          type: UrlbarTokenizer.TYPE.TEXT,
    110        },
    111      ],
    112    },
    113    {
    114      desc: "do not separate boundary search restriction char at end when using using a double non-combinable restriction char with a single-character string",
    115      searchString: `t${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.SEARCH}`,
    116      expectedTokens: [
    117        {
    118          value: `t${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.SEARCH}`,
    119          type: UrlbarTokenizer.TYPE.TEXT,
    120        },
    121      ],
    122    },
    123    {
    124      desc: "only boundary restriction chars",
    125      searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK}${UrlbarTokenizer.RESTRICT.TITLE}`,
    126      expectedTokens: [
    127        {
    128          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
    129          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
    130        },
    131        {
    132          value: UrlbarTokenizer.RESTRICT.TITLE,
    133          type: UrlbarTokenizer.TYPE.RESTRICT_TITLE,
    134        },
    135      ],
    136    },
    137    {
    138      desc: "only the boundary restriction char",
    139      searchString: UrlbarTokenizer.RESTRICT.BOOKMARK,
    140      expectedTokens: [
    141        {
    142          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
    143          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
    144        },
    145      ],
    146    },
    147    // Some restriction chars may be # or ?, that are also valid path parts.
    148    // The next 2 tests will check we consider those as part of url paths.
    149    {
    150      desc: "boundary # char on path",
    151      searchString: "test/#",
    152      expectedTokens: [
    153        { value: "test/#", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    154      ],
    155    },
    156    {
    157      desc: "boundary ? char on path",
    158      searchString: "test/?",
    159      expectedTokens: [
    160        { value: "test/?", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    161      ],
    162    },
    163    {
    164      desc: "multiple boundary restriction chars suffix",
    165      searchString: `test ${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG}`,
    166      expectedTokens: [
    167        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
    168        {
    169          value: UrlbarTokenizer.RESTRICT.HISTORY,
    170          type: UrlbarTokenizer.TYPE.TEXT,
    171        },
    172        {
    173          value: UrlbarTokenizer.RESTRICT.TAG,
    174          type: UrlbarTokenizer.TYPE.RESTRICT_TAG,
    175        },
    176      ],
    177    },
    178    {
    179      desc: "multiple boundary restriction chars prefix",
    180      searchString: `${UrlbarTokenizer.RESTRICT.HISTORY} ${UrlbarTokenizer.RESTRICT.TAG} test`,
    181      expectedTokens: [
    182        {
    183          value: UrlbarTokenizer.RESTRICT.HISTORY,
    184          type: UrlbarTokenizer.TYPE.RESTRICT_HISTORY,
    185        },
    186        {
    187          value: UrlbarTokenizer.RESTRICT.TAG,
    188          type: UrlbarTokenizer.TYPE.TEXT,
    189        },
    190        { value: "test", type: UrlbarTokenizer.TYPE.TEXT },
    191      ],
    192    },
    193    {
    194      desc: "Math with division",
    195      searchString: "3.6/1.2",
    196      expectedTokens: [{ value: "3.6/1.2", type: UrlbarTokenizer.TYPE.TEXT }],
    197    },
    198    {
    199      desc: "ipv4 in bookmarks",
    200      searchString: `${UrlbarTokenizer.RESTRICT.BOOKMARK} 192.168.1.1:8`,
    201      expectedTokens: [
    202        {
    203          value: UrlbarTokenizer.RESTRICT.BOOKMARK,
    204          type: UrlbarTokenizer.TYPE.RESTRICT_BOOKMARK,
    205        },
    206        { value: "192.168.1.1:8", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
    207      ],
    208    },
    209    {
    210      desc: "email",
    211      searchString: "test@mozilla.com",
    212      expectedTokens: [
    213        { value: "test@mozilla.com", type: UrlbarTokenizer.TYPE.TEXT },
    214      ],
    215    },
    216    {
    217      desc: "email2",
    218      searchString: "test.test@mozilla.co.uk",
    219      expectedTokens: [
    220        { value: "test.test@mozilla.co.uk", type: UrlbarTokenizer.TYPE.TEXT },
    221      ],
    222    },
    223    {
    224      desc: "protocol",
    225      searchString: "http://test",
    226      expectedTokens: [
    227        { value: "http://test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    228      ],
    229    },
    230    {
    231      desc: "bogus protocol with host (we allow visits to http://///example.com)",
    232      searchString: "http:///test",
    233      expectedTokens: [
    234        { value: "http:///test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    235      ],
    236    },
    237    {
    238      desc: "file protocol with path",
    239      searchString: "file:///home",
    240      expectedTokens: [
    241        { value: "file:///home", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    242      ],
    243    },
    244    {
    245      desc: "almost a protocol",
    246      searchString: "http:",
    247      expectedTokens: [
    248        { value: "http:", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    249      ],
    250    },
    251    {
    252      desc: "almost a protocol 2",
    253      searchString: "http:/",
    254      expectedTokens: [
    255        { value: "http:/", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    256      ],
    257    },
    258    {
    259      desc: "bogus protocol (we allow visits to http://///example.com)",
    260      searchString: "http:///",
    261      expectedTokens: [
    262        { value: "http:///", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    263      ],
    264    },
    265    {
    266      desc: "file protocol",
    267      searchString: "file:///",
    268      expectedTokens: [
    269        { value: "file:///", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    270      ],
    271    },
    272    {
    273      desc: "userinfo",
    274      searchString: "user:pass@test",
    275      expectedTokens: [
    276        { value: "user:pass@test", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
    277      ],
    278    },
    279    {
    280      desc: "domain with two dots",
    281      searchString: "www.mozilla.org",
    282      expectedTokens: [
    283        {
    284          value: "www.mozilla.org",
    285          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED,
    286        },
    287      ],
    288    },
    289    {
    290      desc: "domain with two dots and allowSearchSuggestionsForSimpleOrigins = false",
    291      searchString: "www.mozilla.org",
    292      allowSearchSuggestionsForSimpleOrigins: false,
    293      expectedTokens: [
    294        {
    295          value: "www.mozilla.org",
    296          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN,
    297        },
    298      ],
    299    },
    300    {
    301      desc: "domain with one dot",
    302      searchString: "mozilla.org",
    303      expectedTokens: [
    304        {
    305          value: "mozilla.org",
    306          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED,
    307        },
    308      ],
    309    },
    310    {
    311      desc: "domain with one dot and allowSearchSuggestionsForSimpleOrigins = false",
    312      searchString: "mozilla.org",
    313      allowSearchSuggestionsForSimpleOrigins: false,
    314      expectedTokens: [
    315        {
    316          value: "mozilla.org",
    317          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN,
    318        },
    319      ],
    320    },
    321    {
    322      desc: "looks like simple origin",
    323      searchString: "mozilla.o",
    324      expectedTokens: [
    325        {
    326          value: "mozilla.o",
    327          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED,
    328        },
    329      ],
    330    },
    331    {
    332      desc: "looks like simple origin with allowSearchSuggestionsForSimpleOrigins = false",
    333      searchString: "mozilla.o",
    334      allowSearchSuggestionsForSimpleOrigins: false,
    335      expectedTokens: [
    336        {
    337          value: "mozilla.o",
    338          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN,
    339        },
    340      ],
    341    },
    342    {
    343      desc: "query ends with dot",
    344      searchString: "mozilla.",
    345      expectedTokens: [
    346        {
    347          value: "mozilla.",
    348          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED,
    349        },
    350      ],
    351    },
    352    {
    353      desc: "query ends with dot with allowSearchSuggestionsForSimpleOrigins = false",
    354      searchString: "mozilla.",
    355      allowSearchSuggestionsForSimpleOrigins: false,
    356      expectedTokens: [
    357        {
    358          value: "mozilla.",
    359          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN,
    360        },
    361      ],
    362    },
    363    {
    364      desc: "data uri",
    365      searchString: "data:text/plain,Content",
    366      expectedTokens: [
    367        {
    368          value: "data:text/plain,Content",
    369          type: UrlbarTokenizer.TYPE.POSSIBLE_URL,
    370        },
    371      ],
    372    },
    373    {
    374      desc: "ipv6",
    375      searchString: "[2001:db8::1]",
    376      expectedTokens: [
    377        { value: "[2001:db8::1]", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
    378      ],
    379    },
    380    {
    381      desc: "numeric domain",
    382      searchString: "test1001.com",
    383      expectedTokens: [
    384        {
    385          value: "test1001.com",
    386          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN_BUT_SEARCH_ALLOWED,
    387        },
    388      ],
    389    },
    390    {
    391      desc: "invalid ip",
    392      searchString: "192.2134.1.2",
    393      expectedTokens: [
    394        { value: "192.2134.1.2", type: UrlbarTokenizer.TYPE.TEXT },
    395      ],
    396    },
    397    {
    398      desc: "ipv4",
    399      searchString: "1.2.3.4",
    400      expectedTokens: [
    401        { value: "1.2.3.4", type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN },
    402      ],
    403    },
    404    {
    405      desc: "host/path",
    406      searchString: "test/test",
    407      expectedTokens: [
    408        { value: "test/test", type: UrlbarTokenizer.TYPE.POSSIBLE_URL },
    409      ],
    410    },
    411    {
    412      desc: "percent encoded string",
    413      searchString: "%E6%97%A5%E6%9C%AC",
    414      expectedTokens: [
    415        { value: "%E6%97%A5%E6%9C%AC", type: UrlbarTokenizer.TYPE.TEXT },
    416      ],
    417    },
    418    {
    419      desc: "Uppercase",
    420      searchString: "TEST",
    421      expectedTokens: [{ value: "TEST", type: UrlbarTokenizer.TYPE.TEXT }],
    422    },
    423    {
    424      desc: "Mixed case 1",
    425      searchString: "TeSt",
    426      expectedTokens: [{ value: "TeSt", type: UrlbarTokenizer.TYPE.TEXT }],
    427    },
    428    {
    429      desc: "Mixed case 2",
    430      searchString: "tEsT",
    431      expectedTokens: [{ value: "tEsT", type: UrlbarTokenizer.TYPE.TEXT }],
    432    },
    433    {
    434      desc: "Uppercase with spaces",
    435      searchString: "TEST EXAMPLE",
    436      expectedTokens: [
    437        { value: "TEST", type: UrlbarTokenizer.TYPE.TEXT },
    438        { value: "EXAMPLE", type: UrlbarTokenizer.TYPE.TEXT },
    439      ],
    440    },
    441    {
    442      desc: "Mixed case with spaces",
    443      searchString: "TeSt eXaMpLe",
    444      expectedTokens: [
    445        { value: "TeSt", type: UrlbarTokenizer.TYPE.TEXT },
    446        { value: "eXaMpLe", type: UrlbarTokenizer.TYPE.TEXT },
    447      ],
    448    },
    449    {
    450      desc: "plain number",
    451      searchString: "1001",
    452      expectedTokens: [{ value: "1001", type: UrlbarTokenizer.TYPE.TEXT }],
    453    },
    454    {
    455      desc: "data uri with spaces",
    456      searchString: "data:text/html,oh hi?",
    457      expectedTokens: [
    458        {
    459          value: "data:text/html,oh hi?",
    460          type: UrlbarTokenizer.TYPE.POSSIBLE_URL,
    461        },
    462      ],
    463    },
    464    {
    465      desc: "data uri with spaces ignored with other tokens",
    466      searchString: "hi data:text/html,oh hi?",
    467      expectedTokens: [
    468        {
    469          value: "hi",
    470          type: UrlbarTokenizer.TYPE.TEXT,
    471        },
    472        {
    473          value: "data:text/html,oh",
    474          type: UrlbarTokenizer.TYPE.POSSIBLE_URL,
    475        },
    476        {
    477          value: "hi?",
    478          type: UrlbarTokenizer.TYPE.TEXT,
    479        },
    480      ],
    481    },
    482    {
    483      desc: "whitelisted host",
    484      searchString: "test whitelisted",
    485      expectedTokens: [
    486        {
    487          value: "test",
    488          type: UrlbarTokenizer.TYPE.TEXT,
    489        },
    490        {
    491          value: "whitelisted",
    492          type: UrlbarTokenizer.TYPE.POSSIBLE_ORIGIN,
    493        },
    494      ],
    495    },
    496  ];
    497 
    498  Services.prefs.setBoolPref("browser.fixup.domainwhitelist.whitelisted", true);
    499 
    500  for (let queryContext of testContexts) {
    501    info(queryContext.desc);
    502    queryContext.trimmedSearchString = queryContext.searchString.trim();
    503    for (let token of queryContext.expectedTokens) {
    504      token.lowerCaseValue = token.value.toLocaleLowerCase();
    505    }
    506 
    507    if (queryContext.hasOwnProperty("allowSearchSuggestionsForSimpleOrigins")) {
    508      Services.prefs.setBoolPref(
    509        "browser.urlbar.allowSearchSuggestionsForSimpleOrigins",
    510        queryContext.allowSearchSuggestionsForSimpleOrigins
    511      );
    512    }
    513 
    514    let tokens = UrlbarTokenizer.tokenize(queryContext);
    515    Assert.deepEqual(
    516      tokens,
    517      queryContext.expectedTokens,
    518      "Check the expected tokens"
    519    );
    520 
    521    Services.prefs.clearUserPref(
    522      "browser.urlbar.allowSearchSuggestionsForSimpleOrigins"
    523    );
    524  }
    525 });