tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

test_logistic_regression_utils.js (5964B)


      1 /* Any copyright is dedicated to the Public Domain.
      2 * http://creativecommons.org/publicdomain/zero/1.0/ */
      3 
      4 const { SmartTabGroupingManager } = ChromeUtils.importESModule(
      5  "moz-src:///browser/components/tabbrowser/SmartTabGrouping.sys.mjs"
      6 );
      7 
      8 add_task(function test_logistic_regression_get_base_domain() {
      9  // Basic HTTPS URL with www
     10  Assert.equal(
     11    SmartTabGroupingManager.getBaseDomain("https://www.example.com/path"),
     12    "example.com",
     13    "www.example.com should normalize to example.com"
     14  );
     15 
     16  // Multiple subdomains
     17  Assert.equal(
     18    SmartTabGroupingManager.getBaseDomain("https://docs.example.com"),
     19    "docs.example.com",
     20    "Should keep last subdomain + baseDomain"
     21  );
     22 
     23  // Hosted services like blogs
     24  Assert.equal(
     25    SmartTabGroupingManager.getBaseDomain("https://myblog.example.com/"),
     26    "myblog.example.com",
     27    "Should bucket per hosted subdomain (blog, docs, etc.)"
     28  );
     29 
     30  // Host without dots
     31  Assert.equal(
     32    SmartTabGroupingManager.getBaseDomain("http://localhost"),
     33    "localhost",
     34    "Should return hostname as-is when there is no dot"
     35  );
     36 
     37  // Invalid / empty URL should be handled gracefully
     38  Assert.equal(
     39    SmartTabGroupingManager.getBaseDomain(""),
     40    "",
     41    "Invalid URL should return empty string"
     42  );
     43 });
     44 
     45 add_task(function test_logistic_regression_domain_match_fractions() {
     46  const mgr = new SmartTabGroupingManager();
     47 
     48  const anchors = [
     49    { url: "https://a.com/foo" },
     50    { url: "https://www.a.com/bar" },
     51    { url: "https://b.com/baz" },
     52  ];
     53  const candidates = [
     54    { url: "https://a.com/other" }, // matches 2 of 3 anchors
     55    { url: "https://b.com/other" }, // matches 1 of 3 anchors
     56    { url: "https://c.com/other" }, // matches 0 of 3 anchors
     57    { url: "" }, // invalid / empty URL
     58  ];
     59 
     60  const fractions = mgr.getDomainMatchFractions(anchors, candidates);
     61 
     62  Assert.equal(
     63    fractions.length,
     64    candidates.length,
     65    "Should return one value per candidate"
     66  );
     67 
     68  Assert.less(
     69    Math.abs(fractions[0] - 2 / 3),
     70    1e-6,
     71    "Candidate with domain matching two of three anchors should have fraction 2/3"
     72  );
     73 
     74  Assert.less(
     75    Math.abs(fractions[1] - 1 / 3),
     76    1e-6,
     77    "Candidate with domain matching one of three anchors should have fraction 1/3"
     78  );
     79 
     80  Assert.equal(
     81    fractions[2],
     82    0,
     83    "Candidate with domain not matching any anchor should have fraction 0"
     84  );
     85 
     86  Assert.equal(
     87    fractions[3],
     88    0,
     89    "Candidate with invalid URL should have fraction 0"
     90  );
     91 });
     92 
     93 add_task(function test_logistic_regression_get_max_similarity() {
     94  const mgr = new SmartTabGroupingManager();
     95 
     96  const anchors = [
     97    [1, 0],
     98    [0, 1],
     99  ];
    100  const candidates = [
    101    [1, 0], // identical to first anchor -> cos ~ 1
    102    [0.5, 0.5], // at 45 degrees -> cos ~ 0.707 with either anchor
    103  ];
    104 
    105  const maxSims = mgr.getMaxSimilarity(anchors, candidates);
    106 
    107  Assert.equal(
    108    maxSims.length,
    109    candidates.length,
    110    "Should return one max similarity per candidate"
    111  );
    112 
    113  Assert.less(
    114    Math.abs(maxSims[0] - 1),
    115    1e-6,
    116    "First candidate identical to first anchor should have cosine similarity ~1"
    117  );
    118 
    119  Assert.ok(
    120    maxSims[1] > 0.7 && maxSims[1] < 0.8,
    121    "Second candidate should have cosine similarity ~sqrt(1/2) ≈ 0.707 with at least one anchor"
    122  );
    123 });
    124 
    125 add_task(function test_logistic_regression_sigmoid_and_calculate_probability() {
    126  const mgr = new SmartTabGroupingManager();
    127 
    128  // Basic sigmoid sanity checks
    129  Assert.less(Math.abs(mgr.sigmoid(0) - 0.5), 1e-6, "sigmoid(0) should be 0.5");
    130 
    131  Assert.greater(
    132    mgr.sigmoid(10),
    133    0.99,
    134    "sigmoid of large positive number should be close to 1"
    135  );
    136 
    137  Assert.less(
    138    mgr.sigmoid(-10),
    139    0.01,
    140    "sigmoid of large negative number should be close to 0"
    141  );
    142 
    143  // Check that calculateProbability matches explicit linear combination + sigmoid
    144  const params = {
    145    GROUP_SIMILARITY_WEIGHT: 1,
    146    TITLE_SIMILARITY_WEIGHT: 2,
    147    DOMAIN_SIMILARITY_WEIGHT: 3,
    148    INTERCEPT: 0,
    149  };
    150 
    151  const s_gc = 0.5;
    152  const s_tt = 0.5;
    153  const s_dd = 0.5;
    154 
    155  const prob = mgr.calculateProbability(s_gc, s_tt, s_dd, params);
    156  const expectedZ = s_gc * 1 + s_tt * 2 + s_dd * 3; // 3
    157  const expectedProb = mgr.sigmoid(expectedZ);
    158 
    159  Assert.less(
    160    Math.abs(prob - expectedProb),
    161    1e-6,
    162    "calculateProbability should equal sigmoid(linear combination of features and weights)"
    163  );
    164 });
    165 
    166 add_task(
    167  function test_logistic_regression_calculate_all_probabilities_with_group() {
    168    const mgr = new SmartTabGroupingManager();
    169 
    170    // cos = 0 for both candidates -> s_gc = s_tt_max = 0.5 for both
    171    const groupSimilaritiesCos = [0, 0];
    172    const titleSimilaritiesCos = [0, 0];
    173 
    174    // Candidate 0 has full domain match, candidate 1 has none.
    175    const domainSimilarities = [1, 0];
    176 
    177    const probs = mgr.calculateAllProbabilities(
    178      groupSimilaritiesCos,
    179      titleSimilaritiesCos,
    180      domainSimilarities
    181    );
    182 
    183    Assert.equal(
    184      probs.length,
    185      2,
    186      "Should return one probability per candidate"
    187    );
    188 
    189    Assert.greater(
    190      probs[0],
    191      probs[1],
    192      "With group present, candidate with higher domain match fraction should have higher probability"
    193    );
    194  }
    195 );
    196 
    197 add_task(
    198  function test_logistic_regression_calculate_all_probabilities_without_group() {
    199    const mgr = new SmartTabGroupingManager();
    200 
    201    // cos = 0 for both candidates -> s_tt_max = 0.5 for both
    202    const titleSimilaritiesCos = [0, 0];
    203 
    204    // Candidate 0 has full domain match, candidate 1 has none.
    205    const domainSimilarities = [1, 0];
    206 
    207    const probs = mgr.calculateAllProbabilities(
    208      null, // no group similarities -> TITLE_ONLY params
    209      titleSimilaritiesCos,
    210      domainSimilarities
    211    );
    212 
    213    Assert.equal(
    214      probs.length,
    215      2,
    216      "Should return one probability per candidate"
    217    );
    218 
    219    Assert.greater(
    220      probs[0],
    221      probs[1],
    222      "Without group, candidate with higher domain match fraction should have higher probability"
    223    );
    224  }
    225 );